aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c588
1 files changed, 323 insertions, 265 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ce469e5ec80..b1b0c8d4d7df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
147struct list_head ptype_all __read_mostly; /* Taps */ 147struct list_head ptype_all __read_mostly; /* Taps */
148static struct list_head offload_base __read_mostly; 148static struct list_head offload_base __read_mostly;
149 149
150static int netif_rx_internal(struct sk_buff *skb);
151
150/* 152/*
151 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 153 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
152 * semaphore. 154 * semaphore.
@@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload);
480 * and must not be freed until after all the CPU's have gone 482 * and must not be freed until after all the CPU's have gone
481 * through a quiescent state. 483 * through a quiescent state.
482 */ 484 */
483void __dev_remove_offload(struct packet_offload *po) 485static void __dev_remove_offload(struct packet_offload *po)
484{ 486{
485 struct list_head *head = &offload_base; 487 struct list_head *head = &offload_base;
486 struct packet_offload *po1; 488 struct packet_offload *po1;
@@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po)
498out: 500out:
499 spin_unlock(&offload_lock); 501 spin_unlock(&offload_lock);
500} 502}
501EXPORT_SYMBOL(__dev_remove_offload);
502 503
503/** 504/**
504 * dev_remove_offload - remove packet offload handler 505 * dev_remove_offload - remove packet offload handler
@@ -1118,6 +1119,8 @@ rollback:
1118 1119
1119 write_seqcount_end(&devnet_rename_seq); 1120 write_seqcount_end(&devnet_rename_seq);
1120 1121
1122 netdev_adjacent_rename_links(dev, oldname);
1123
1121 write_lock_bh(&dev_base_lock); 1124 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist); 1125 hlist_del_rcu(&dev->name_hlist);
1123 write_unlock_bh(&dev_base_lock); 1126 write_unlock_bh(&dev_base_lock);
@@ -1137,6 +1140,7 @@ rollback:
1137 err = ret; 1140 err = ret;
1138 write_seqcount_begin(&devnet_rename_seq); 1141 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ); 1142 memcpy(dev->name, oldname, IFNAMSIZ);
1143 memcpy(oldname, newname, IFNAMSIZ);
1140 goto rollback; 1144 goto rollback;
1141 } else { 1145 } else {
1142 pr_err("%s: name change rollback failed: %d\n", 1146 pr_err("%s: name change rollback failed: %d\n",
@@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1566 * are as for raw_notifier_call_chain(). 1570 * are as for raw_notifier_call_chain().
1567 */ 1571 */
1568 1572
1569int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, 1573static int call_netdevice_notifiers_info(unsigned long val,
1570 struct netdev_notifier_info *info) 1574 struct net_device *dev,
1575 struct netdev_notifier_info *info)
1571{ 1576{
1572 ASSERT_RTNL(); 1577 ASSERT_RTNL();
1573 netdev_notifier_info_init(info, dev); 1578 netdev_notifier_info_init(info, dev);
1574 return raw_notifier_call_chain(&netdev_chain, val, info); 1579 return raw_notifier_call_chain(&netdev_chain, val, info);
1575} 1580}
1576EXPORT_SYMBOL(call_netdevice_notifiers_info);
1577 1581
1578/** 1582/**
1579 * call_netdevice_notifiers - call all network notifier blocks 1583 * call_netdevice_notifiers - call all network notifier blocks
@@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1699 skb_scrub_packet(skb, true); 1703 skb_scrub_packet(skb, true);
1700 skb->protocol = eth_type_trans(skb, dev); 1704 skb->protocol = eth_type_trans(skb, dev);
1701 1705
1702 return netif_rx(skb); 1706 return netif_rx_internal(skb);
1703} 1707}
1704EXPORT_SYMBOL_GPL(dev_forward_skb); 1708EXPORT_SYMBOL_GPL(dev_forward_skb);
1705 1709
@@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2079} 2083}
2080EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2084EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2081 2085
2082#ifdef CONFIG_RPS 2086#ifdef CONFIG_SYSFS
2083/** 2087/**
2084 * netif_set_real_num_rx_queues - set actual number of RX queues used 2088 * netif_set_real_num_rx_queues - set actual number of RX queues used
2085 * @dev: Network device 2089 * @dev: Network device
@@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q)
2145} 2149}
2146EXPORT_SYMBOL(__netif_schedule); 2150EXPORT_SYMBOL(__netif_schedule);
2147 2151
2148void dev_kfree_skb_irq(struct sk_buff *skb) 2152struct dev_kfree_skb_cb {
2153 enum skb_free_reason reason;
2154};
2155
2156static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2149{ 2157{
2150 if (atomic_dec_and_test(&skb->users)) { 2158 return (struct dev_kfree_skb_cb *)skb->cb;
2151 struct softnet_data *sd; 2159}
2152 unsigned long flags; 2160
2161void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2162{
2163 unsigned long flags;
2153 2164
2154 local_irq_save(flags); 2165 if (likely(atomic_read(&skb->users) == 1)) {
2155 sd = &__get_cpu_var(softnet_data); 2166 smp_rmb();
2156 skb->next = sd->completion_queue; 2167 atomic_set(&skb->users, 0);
2157 sd->completion_queue = skb; 2168 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2158 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2169 return;
2159 local_irq_restore(flags);
2160 } 2170 }
2171 get_kfree_skb_cb(skb)->reason = reason;
2172 local_irq_save(flags);
2173 skb->next = __this_cpu_read(softnet_data.completion_queue);
2174 __this_cpu_write(softnet_data.completion_queue, skb);
2175 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2176 local_irq_restore(flags);
2161} 2177}
2162EXPORT_SYMBOL(dev_kfree_skb_irq); 2178EXPORT_SYMBOL(__dev_kfree_skb_irq);
2163 2179
2164void dev_kfree_skb_any(struct sk_buff *skb) 2180void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2165{ 2181{
2166 if (in_irq() || irqs_disabled()) 2182 if (in_irq() || irqs_disabled())
2167 dev_kfree_skb_irq(skb); 2183 __dev_kfree_skb_irq(skb, reason);
2168 else 2184 else
2169 dev_kfree_skb(skb); 2185 dev_kfree_skb(skb);
2170} 2186}
2171EXPORT_SYMBOL(dev_kfree_skb_any); 2187EXPORT_SYMBOL(__dev_kfree_skb_any);
2172 2188
2173 2189
2174/** 2190/**
@@ -2404,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
2404 * 2. No high memory really exists on this machine. 2420 * 2. No high memory really exists on this machine.
2405 */ 2421 */
2406 2422
2407static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 2423static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
2408{ 2424{
2409#ifdef CONFIG_HIGHMEM 2425#ifdef CONFIG_HIGHMEM
2410 int i; 2426 int i;
@@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
2442{ 2458{
2443 struct dev_gso_cb *cb; 2459 struct dev_gso_cb *cb;
2444 2460
2445 do { 2461 kfree_skb_list(skb->next);
2446 struct sk_buff *nskb = skb->next; 2462 skb->next = NULL;
2447
2448 skb->next = nskb->next;
2449 nskb->next = NULL;
2450 kfree_skb(nskb);
2451 } while (skb->next);
2452 2463
2453 cb = DEV_GSO_CB(skb); 2464 cb = DEV_GSO_CB(skb);
2454 if (cb->destructor) 2465 if (cb->destructor)
@@ -2484,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2484} 2495}
2485 2496
2486static netdev_features_t harmonize_features(struct sk_buff *skb, 2497static netdev_features_t harmonize_features(struct sk_buff *skb,
2487 netdev_features_t features) 2498 const struct net_device *dev,
2499 netdev_features_t features)
2488{ 2500{
2489 if (skb->ip_summed != CHECKSUM_NONE && 2501 if (skb->ip_summed != CHECKSUM_NONE &&
2490 !can_checksum_protocol(features, skb_network_protocol(skb))) { 2502 !can_checksum_protocol(features, skb_network_protocol(skb))) {
2491 features &= ~NETIF_F_ALL_CSUM; 2503 features &= ~NETIF_F_ALL_CSUM;
2492 } else if (illegal_highdma(skb->dev, skb)) { 2504 } else if (illegal_highdma(dev, skb)) {
2493 features &= ~NETIF_F_SG; 2505 features &= ~NETIF_F_SG;
2494 } 2506 }
2495 2507
2496 return features; 2508 return features;
2497} 2509}
2498 2510
2499netdev_features_t netif_skb_features(struct sk_buff *skb) 2511netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
2512 const struct net_device *dev)
2500{ 2513{
2501 __be16 protocol = skb->protocol; 2514 __be16 protocol = skb->protocol;
2502 netdev_features_t features = skb->dev->features; 2515 netdev_features_t features = dev->features;
2503 2516
2504 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) 2517 if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
2505 features &= ~NETIF_F_GSO_MASK; 2518 features &= ~NETIF_F_GSO_MASK;
2506 2519
2507 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { 2520 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2508 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2521 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2509 protocol = veh->h_vlan_encapsulated_proto; 2522 protocol = veh->h_vlan_encapsulated_proto;
2510 } else if (!vlan_tx_tag_present(skb)) { 2523 } else if (!vlan_tx_tag_present(skb)) {
2511 return harmonize_features(skb, features); 2524 return harmonize_features(skb, dev, features);
2512 } 2525 }
2513 2526
2514 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | 2527 features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2515 NETIF_F_HW_VLAN_STAG_TX); 2528 NETIF_F_HW_VLAN_STAG_TX);
2516 2529
2517 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) 2530 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2519,24 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2519 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | 2532 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2520 NETIF_F_HW_VLAN_STAG_TX; 2533 NETIF_F_HW_VLAN_STAG_TX;
2521 2534
2522 return harmonize_features(skb, features); 2535 return harmonize_features(skb, dev, features);
2523}
2524EXPORT_SYMBOL(netif_skb_features);
2525
2526/*
2527 * Returns true if either:
2528 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2529 * 2. skb is fragmented and the device does not support SG.
2530 */
2531static inline int skb_needs_linearize(struct sk_buff *skb,
2532 netdev_features_t features)
2533{
2534 return skb_is_nonlinear(skb) &&
2535 ((skb_has_frag_list(skb) &&
2536 !(features & NETIF_F_FRAGLIST)) ||
2537 (skb_shinfo(skb)->nr_frags &&
2538 !(features & NETIF_F_SG)));
2539} 2536}
2537EXPORT_SYMBOL(netif_skb_dev_features);
2540 2538
2541int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2539int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2542 struct netdev_queue *txq) 2540 struct netdev_queue *txq)
@@ -2605,8 +2603,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2605 dev_queue_xmit_nit(skb, dev); 2603 dev_queue_xmit_nit(skb, dev);
2606 2604
2607 skb_len = skb->len; 2605 skb_len = skb->len;
2608 rc = ops->ndo_start_xmit(skb, dev); 2606 trace_net_dev_start_xmit(skb, dev);
2609 2607 rc = ops->ndo_start_xmit(skb, dev);
2610 trace_net_dev_xmit(skb, rc, dev, skb_len); 2608 trace_net_dev_xmit(skb, rc, dev, skb_len);
2611 if (rc == NETDEV_TX_OK) 2609 if (rc == NETDEV_TX_OK)
2612 txq_trans_update(txq); 2610 txq_trans_update(txq);
@@ -2624,6 +2622,7 @@ gso:
2624 dev_queue_xmit_nit(nskb, dev); 2622 dev_queue_xmit_nit(nskb, dev);
2625 2623
2626 skb_len = nskb->len; 2624 skb_len = nskb->len;
2625 trace_net_dev_start_xmit(nskb, dev);
2627 rc = ops->ndo_start_xmit(nskb, dev); 2626 rc = ops->ndo_start_xmit(nskb, dev);
2628 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2627 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2629 if (unlikely(rc != NETDEV_TX_OK)) { 2628 if (unlikely(rc != NETDEV_TX_OK)) {
@@ -2744,7 +2743,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2744 return rc; 2743 return rc;
2745} 2744}
2746 2745
2747#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 2746#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
2748static void skb_update_prio(struct sk_buff *skb) 2747static void skb_update_prio(struct sk_buff *skb)
2749{ 2748{
2750 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2749 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
@@ -2781,8 +2780,9 @@ int dev_loopback_xmit(struct sk_buff *skb)
2781EXPORT_SYMBOL(dev_loopback_xmit); 2780EXPORT_SYMBOL(dev_loopback_xmit);
2782 2781
2783/** 2782/**
2784 * dev_queue_xmit - transmit a buffer 2783 * __dev_queue_xmit - transmit a buffer
2785 * @skb: buffer to transmit 2784 * @skb: buffer to transmit
2785 * @accel_priv: private data used for L2 forwarding offload
2786 * 2786 *
2787 * Queue a buffer for transmission to a network device. The caller must 2787 * Queue a buffer for transmission to a network device. The caller must
2788 * have set the device and priority and built the buffer before calling 2788 * have set the device and priority and built the buffer before calling
@@ -2805,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
2805 * the BH enable code must have IRQs enabled so that it will not deadlock. 2805 * the BH enable code must have IRQs enabled so that it will not deadlock.
2806 * --BLG 2806 * --BLG
2807 */ 2807 */
2808int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) 2808static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2809{ 2809{
2810 struct net_device *dev = skb->dev; 2810 struct net_device *dev = skb->dev;
2811 struct netdev_queue *txq; 2811 struct netdev_queue *txq;
@@ -3014,7 +3014,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3014 } 3014 }
3015 3015
3016 skb_reset_network_header(skb); 3016 skb_reset_network_header(skb);
3017 if (!skb_get_rxhash(skb)) 3017 if (!skb_get_hash(skb))
3018 goto done; 3018 goto done;
3019 3019
3020 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3020 flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3159,7 +3159,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3159 rcu_read_lock(); 3159 rcu_read_lock();
3160 fl = rcu_dereference(sd->flow_limit); 3160 fl = rcu_dereference(sd->flow_limit);
3161 if (fl) { 3161 if (fl) {
3162 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); 3162 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3163 old_flow = fl->history[fl->history_head]; 3163 old_flow = fl->history[fl->history_head];
3164 fl->history[fl->history_head] = new_flow; 3164 fl->history[fl->history_head] = new_flow;
3165 3165
@@ -3227,22 +3227,7 @@ enqueue:
3227 return NET_RX_DROP; 3227 return NET_RX_DROP;
3228} 3228}
3229 3229
3230/** 3230static int netif_rx_internal(struct sk_buff *skb)
3231 * netif_rx - post buffer to the network code
3232 * @skb: buffer to post
3233 *
3234 * This function receives a packet from a device driver and queues it for
3235 * the upper (protocol) levels to process. It always succeeds. The buffer
3236 * may be dropped during processing for congestion control or by the
3237 * protocol layers.
3238 *
3239 * return values:
3240 * NET_RX_SUCCESS (no congestion)
3241 * NET_RX_DROP (packet was dropped)
3242 *
3243 */
3244
3245int netif_rx(struct sk_buff *skb)
3246{ 3231{
3247 int ret; 3232 int ret;
3248 3233
@@ -3278,14 +3263,38 @@ int netif_rx(struct sk_buff *skb)
3278 } 3263 }
3279 return ret; 3264 return ret;
3280} 3265}
3266
3267/**
3268 * netif_rx - post buffer to the network code
3269 * @skb: buffer to post
3270 *
3271 * This function receives a packet from a device driver and queues it for
3272 * the upper (protocol) levels to process. It always succeeds. The buffer
3273 * may be dropped during processing for congestion control or by the
3274 * protocol layers.
3275 *
3276 * return values:
3277 * NET_RX_SUCCESS (no congestion)
3278 * NET_RX_DROP (packet was dropped)
3279 *
3280 */
3281
3282int netif_rx(struct sk_buff *skb)
3283{
3284 trace_netif_rx_entry(skb);
3285
3286 return netif_rx_internal(skb);
3287}
3281EXPORT_SYMBOL(netif_rx); 3288EXPORT_SYMBOL(netif_rx);
3282 3289
3283int netif_rx_ni(struct sk_buff *skb) 3290int netif_rx_ni(struct sk_buff *skb)
3284{ 3291{
3285 int err; 3292 int err;
3286 3293
3294 trace_netif_rx_ni_entry(skb);
3295
3287 preempt_disable(); 3296 preempt_disable();
3288 err = netif_rx(skb); 3297 err = netif_rx_internal(skb);
3289 if (local_softirq_pending()) 3298 if (local_softirq_pending())
3290 do_softirq(); 3299 do_softirq();
3291 preempt_enable(); 3300 preempt_enable();
@@ -3311,7 +3320,10 @@ static void net_tx_action(struct softirq_action *h)
3311 clist = clist->next; 3320 clist = clist->next;
3312 3321
3313 WARN_ON(atomic_read(&skb->users)); 3322 WARN_ON(atomic_read(&skb->users));
3314 trace_kfree_skb(skb, net_tx_action); 3323 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3324 trace_consume_skb(skb);
3325 else
3326 trace_kfree_skb(skb, net_tx_action);
3315 __kfree_skb(skb); 3327 __kfree_skb(skb);
3316 } 3328 }
3317 } 3329 }
@@ -3667,22 +3679,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3667 return ret; 3679 return ret;
3668} 3680}
3669 3681
3670/** 3682static int netif_receive_skb_internal(struct sk_buff *skb)
3671 * netif_receive_skb - process receive buffer from network
3672 * @skb: buffer to process
3673 *
3674 * netif_receive_skb() is the main receive data processing function.
3675 * It always succeeds. The buffer may be dropped during processing
3676 * for congestion control or by the protocol layers.
3677 *
3678 * This function may only be called from softirq context and interrupts
3679 * should be enabled.
3680 *
3681 * Return values (usually ignored):
3682 * NET_RX_SUCCESS: no congestion
3683 * NET_RX_DROP: packet was dropped
3684 */
3685int netif_receive_skb(struct sk_buff *skb)
3686{ 3683{
3687 net_timestamp_check(netdev_tstamp_prequeue, skb); 3684 net_timestamp_check(netdev_tstamp_prequeue, skb);
3688 3685
@@ -3708,6 +3705,28 @@ int netif_receive_skb(struct sk_buff *skb)
3708#endif 3705#endif
3709 return __netif_receive_skb(skb); 3706 return __netif_receive_skb(skb);
3710} 3707}
3708
3709/**
3710 * netif_receive_skb - process receive buffer from network
3711 * @skb: buffer to process
3712 *
3713 * netif_receive_skb() is the main receive data processing function.
3714 * It always succeeds. The buffer may be dropped during processing
3715 * for congestion control or by the protocol layers.
3716 *
3717 * This function may only be called from softirq context and interrupts
3718 * should be enabled.
3719 *
3720 * Return values (usually ignored):
3721 * NET_RX_SUCCESS: no congestion
3722 * NET_RX_DROP: packet was dropped
3723 */
3724int netif_receive_skb(struct sk_buff *skb)
3725{
3726 trace_netif_receive_skb_entry(skb);
3727
3728 return netif_receive_skb_internal(skb);
3729}
3711EXPORT_SYMBOL(netif_receive_skb); 3730EXPORT_SYMBOL(netif_receive_skb);
3712 3731
3713/* Network device is going away, flush any packets still pending 3732/* Network device is going away, flush any packets still pending
@@ -3757,7 +3776,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3757 if (ptype->type != type || !ptype->callbacks.gro_complete) 3776 if (ptype->type != type || !ptype->callbacks.gro_complete)
3758 continue; 3777 continue;
3759 3778
3760 err = ptype->callbacks.gro_complete(skb); 3779 err = ptype->callbacks.gro_complete(skb, 0);
3761 break; 3780 break;
3762 } 3781 }
3763 rcu_read_unlock(); 3782 rcu_read_unlock();
@@ -3769,7 +3788,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3769 } 3788 }
3770 3789
3771out: 3790out:
3772 return netif_receive_skb(skb); 3791 return netif_receive_skb_internal(skb);
3773} 3792}
3774 3793
3775/* napi->gro_list contains packets ordered by age. 3794/* napi->gro_list contains packets ordered by age.
@@ -3805,10 +3824,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3805{ 3824{
3806 struct sk_buff *p; 3825 struct sk_buff *p;
3807 unsigned int maclen = skb->dev->hard_header_len; 3826 unsigned int maclen = skb->dev->hard_header_len;
3827 u32 hash = skb_get_hash_raw(skb);
3808 3828
3809 for (p = napi->gro_list; p; p = p->next) { 3829 for (p = napi->gro_list; p; p = p->next) {
3810 unsigned long diffs; 3830 unsigned long diffs;
3811 3831
3832 NAPI_GRO_CB(p)->flush = 0;
3833
3834 if (hash != skb_get_hash_raw(p)) {
3835 NAPI_GRO_CB(p)->same_flow = 0;
3836 continue;
3837 }
3838
3812 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 3839 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3813 diffs |= p->vlan_tci ^ skb->vlan_tci; 3840 diffs |= p->vlan_tci ^ skb->vlan_tci;
3814 if (maclen == ETH_HLEN) 3841 if (maclen == ETH_HLEN)
@@ -3819,7 +3846,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3819 skb_gro_mac_header(skb), 3846 skb_gro_mac_header(skb),
3820 maclen); 3847 maclen);
3821 NAPI_GRO_CB(p)->same_flow = !diffs; 3848 NAPI_GRO_CB(p)->same_flow = !diffs;
3822 NAPI_GRO_CB(p)->flush = 0; 3849 }
3850}
3851
3852static void skb_gro_reset_offset(struct sk_buff *skb)
3853{
3854 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3855 const skb_frag_t *frag0 = &pinfo->frags[0];
3856
3857 NAPI_GRO_CB(skb)->data_offset = 0;
3858 NAPI_GRO_CB(skb)->frag0 = NULL;
3859 NAPI_GRO_CB(skb)->frag0_len = 0;
3860
3861 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3862 pinfo->nr_frags &&
3863 !PageHighMem(skb_frag_page(frag0))) {
3864 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3865 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3823 } 3866 }
3824} 3867}
3825 3868
@@ -3838,7 +3881,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3838 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3881 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3839 goto normal; 3882 goto normal;
3840 3883
3884 skb_gro_reset_offset(skb);
3841 gro_list_prepare(napi, skb); 3885 gro_list_prepare(napi, skb);
3886 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3842 3887
3843 rcu_read_lock(); 3888 rcu_read_lock();
3844 list_for_each_entry_rcu(ptype, head, list) { 3889 list_for_each_entry_rcu(ptype, head, list) {
@@ -3850,6 +3895,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3850 NAPI_GRO_CB(skb)->same_flow = 0; 3895 NAPI_GRO_CB(skb)->same_flow = 0;
3851 NAPI_GRO_CB(skb)->flush = 0; 3896 NAPI_GRO_CB(skb)->flush = 0;
3852 NAPI_GRO_CB(skb)->free = 0; 3897 NAPI_GRO_CB(skb)->free = 0;
3898 NAPI_GRO_CB(skb)->udp_mark = 0;
3853 3899
3854 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3900 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3855 break; 3901 break;
@@ -3874,10 +3920,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3874 if (same_flow) 3920 if (same_flow)
3875 goto ok; 3921 goto ok;
3876 3922
3877 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) 3923 if (NAPI_GRO_CB(skb)->flush)
3878 goto normal; 3924 goto normal;
3879 3925
3880 napi->gro_count++; 3926 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
3927 struct sk_buff *nskb = napi->gro_list;
3928
3929 /* locate the end of the list to select the 'oldest' flow */
3930 while (nskb->next) {
3931 pp = &nskb->next;
3932 nskb = *pp;
3933 }
3934 *pp = NULL;
3935 nskb->next = NULL;
3936 napi_gro_complete(nskb);
3937 } else {
3938 napi->gro_count++;
3939 }
3881 NAPI_GRO_CB(skb)->count = 1; 3940 NAPI_GRO_CB(skb)->count = 1;
3882 NAPI_GRO_CB(skb)->age = jiffies; 3941 NAPI_GRO_CB(skb)->age = jiffies;
3883 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3942 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
@@ -3915,12 +3974,39 @@ normal:
3915 goto pull; 3974 goto pull;
3916} 3975}
3917 3976
3977struct packet_offload *gro_find_receive_by_type(__be16 type)
3978{
3979 struct list_head *offload_head = &offload_base;
3980 struct packet_offload *ptype;
3981
3982 list_for_each_entry_rcu(ptype, offload_head, list) {
3983 if (ptype->type != type || !ptype->callbacks.gro_receive)
3984 continue;
3985 return ptype;
3986 }
3987 return NULL;
3988}
3989EXPORT_SYMBOL(gro_find_receive_by_type);
3990
3991struct packet_offload *gro_find_complete_by_type(__be16 type)
3992{
3993 struct list_head *offload_head = &offload_base;
3994 struct packet_offload *ptype;
3995
3996 list_for_each_entry_rcu(ptype, offload_head, list) {
3997 if (ptype->type != type || !ptype->callbacks.gro_complete)
3998 continue;
3999 return ptype;
4000 }
4001 return NULL;
4002}
4003EXPORT_SYMBOL(gro_find_complete_by_type);
3918 4004
3919static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 4005static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3920{ 4006{
3921 switch (ret) { 4007 switch (ret) {
3922 case GRO_NORMAL: 4008 case GRO_NORMAL:
3923 if (netif_receive_skb(skb)) 4009 if (netif_receive_skb_internal(skb))
3924 ret = GRO_DROP; 4010 ret = GRO_DROP;
3925 break; 4011 break;
3926 4012
@@ -3943,26 +4029,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3943 return ret; 4029 return ret;
3944} 4030}
3945 4031
3946static void skb_gro_reset_offset(struct sk_buff *skb)
3947{
3948 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3949 const skb_frag_t *frag0 = &pinfo->frags[0];
3950
3951 NAPI_GRO_CB(skb)->data_offset = 0;
3952 NAPI_GRO_CB(skb)->frag0 = NULL;
3953 NAPI_GRO_CB(skb)->frag0_len = 0;
3954
3955 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3956 pinfo->nr_frags &&
3957 !PageHighMem(skb_frag_page(frag0))) {
3958 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3959 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3960 }
3961}
3962
3963gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 4032gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3964{ 4033{
3965 skb_gro_reset_offset(skb); 4034 trace_napi_gro_receive_entry(skb);
3966 4035
3967 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 4036 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3968} 4037}
@@ -3986,8 +4055,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
3986 4055
3987 if (!skb) { 4056 if (!skb) {
3988 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 4057 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3989 if (skb) 4058 napi->skb = skb;
3990 napi->skb = skb;
3991 } 4059 }
3992 return skb; 4060 return skb;
3993} 4061}
@@ -3998,12 +4066,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
3998{ 4066{
3999 switch (ret) { 4067 switch (ret) {
4000 case GRO_NORMAL: 4068 case GRO_NORMAL:
4001 case GRO_HELD: 4069 if (netif_receive_skb_internal(skb))
4002 skb->protocol = eth_type_trans(skb, skb->dev);
4003
4004 if (ret == GRO_HELD)
4005 skb_gro_pull(skb, -ETH_HLEN);
4006 else if (netif_receive_skb(skb))
4007 ret = GRO_DROP; 4070 ret = GRO_DROP;
4008 break; 4071 break;
4009 4072
@@ -4012,6 +4075,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4012 napi_reuse_skb(napi, skb); 4075 napi_reuse_skb(napi, skb);
4013 break; 4076 break;
4014 4077
4078 case GRO_HELD:
4015 case GRO_MERGED: 4079 case GRO_MERGED:
4016 break; 4080 break;
4017 } 4081 }
@@ -4022,36 +4086,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4022static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4086static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4023{ 4087{
4024 struct sk_buff *skb = napi->skb; 4088 struct sk_buff *skb = napi->skb;
4025 struct ethhdr *eth;
4026 unsigned int hlen;
4027 unsigned int off;
4028 4089
4029 napi->skb = NULL; 4090 napi->skb = NULL;
4030 4091
4031 skb_reset_mac_header(skb); 4092 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
4032 skb_gro_reset_offset(skb); 4093 napi_reuse_skb(napi, skb);
4033 4094 return NULL;
4034 off = skb_gro_offset(skb);
4035 hlen = off + sizeof(*eth);
4036 eth = skb_gro_header_fast(skb, off);
4037 if (skb_gro_header_hard(skb, hlen)) {
4038 eth = skb_gro_header_slow(skb, hlen, off);
4039 if (unlikely(!eth)) {
4040 napi_reuse_skb(napi, skb);
4041 skb = NULL;
4042 goto out;
4043 }
4044 } 4095 }
4096 skb->protocol = eth_type_trans(skb, skb->dev);
4045 4097
4046 skb_gro_pull(skb, sizeof(*eth));
4047
4048 /*
4049 * This works because the only protocols we care about don't require
4050 * special handling. We'll fix it up properly at the end.
4051 */
4052 skb->protocol = eth->h_proto;
4053
4054out:
4055 return skb; 4098 return skb;
4056} 4099}
4057 4100
@@ -4062,12 +4105,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
4062 if (!skb) 4105 if (!skb)
4063 return GRO_DROP; 4106 return GRO_DROP;
4064 4107
4108 trace_napi_gro_frags_entry(skb);
4109
4065 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 4110 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4066} 4111}
4067EXPORT_SYMBOL(napi_gro_frags); 4112EXPORT_SYMBOL(napi_gro_frags);
4068 4113
4069/* 4114/*
4070 * net_rps_action sends any pending IPI's for rps. 4115 * net_rps_action_and_irq_enable sends any pending IPI's for rps.
4071 * Note: called with local irq disabled, but exits with local irq enabled. 4116 * Note: called with local irq disabled, but exits with local irq enabled.
4072 */ 4117 */
4073static void net_rps_action_and_irq_enable(struct softnet_data *sd) 4118static void net_rps_action_and_irq_enable(struct softnet_data *sd)
@@ -4272,17 +4317,10 @@ EXPORT_SYMBOL(netif_napi_add);
4272 4317
4273void netif_napi_del(struct napi_struct *napi) 4318void netif_napi_del(struct napi_struct *napi)
4274{ 4319{
4275 struct sk_buff *skb, *next;
4276
4277 list_del_init(&napi->dev_list); 4320 list_del_init(&napi->dev_list);
4278 napi_free_frags(napi); 4321 napi_free_frags(napi);
4279 4322
4280 for (skb = napi->gro_list; skb; skb = next) { 4323 kfree_skb_list(napi->gro_list);
4281 next = skb->next;
4282 skb->next = NULL;
4283 kfree_skb(skb);
4284 }
4285
4286 napi->gro_list = NULL; 4324 napi->gro_list = NULL;
4287 napi->gro_count = 0; 4325 napi->gro_count = 0;
4288} 4326}
@@ -4399,19 +4437,6 @@ struct netdev_adjacent {
4399 struct rcu_head rcu; 4437 struct rcu_head rcu;
4400}; 4438};
4401 4439
4402static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
4403 struct net_device *adj_dev,
4404 struct list_head *adj_list)
4405{
4406 struct netdev_adjacent *adj;
4407
4408 list_for_each_entry_rcu(adj, adj_list, list) {
4409 if (adj->dev == adj_dev)
4410 return adj;
4411 }
4412 return NULL;
4413}
4414
4415static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4440static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4416 struct net_device *adj_dev, 4441 struct net_device *adj_dev,
4417 struct list_head *adj_list) 4442 struct list_head *adj_list)
@@ -4450,13 +4475,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
4450 * Find out if a device is linked to an upper device and return true in case 4475 * Find out if a device is linked to an upper device and return true in case
4451 * it is. The caller must hold the RTNL lock. 4476 * it is. The caller must hold the RTNL lock.
4452 */ 4477 */
4453bool netdev_has_any_upper_dev(struct net_device *dev) 4478static bool netdev_has_any_upper_dev(struct net_device *dev)
4454{ 4479{
4455 ASSERT_RTNL(); 4480 ASSERT_RTNL();
4456 4481
4457 return !list_empty(&dev->all_adj_list.upper); 4482 return !list_empty(&dev->all_adj_list.upper);
4458} 4483}
4459EXPORT_SYMBOL(netdev_has_any_upper_dev);
4460 4484
4461/** 4485/**
4462 * netdev_master_upper_dev_get - Get master upper device 4486 * netdev_master_upper_dev_get - Get master upper device
@@ -4576,6 +4600,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4576EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); 4600EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4577 4601
4578/** 4602/**
4603 * netdev_lower_get_first_private_rcu - Get the first ->private from the
4604 * lower neighbour list, RCU
4605 * variant
4606 * @dev: device
4607 *
4608 * Gets the first netdev_adjacent->private from the dev's lower neighbour
4609 * list. The caller must hold RCU read lock.
4610 */
4611void *netdev_lower_get_first_private_rcu(struct net_device *dev)
4612{
4613 struct netdev_adjacent *lower;
4614
4615 lower = list_first_or_null_rcu(&dev->adj_list.lower,
4616 struct netdev_adjacent, list);
4617 if (lower)
4618 return lower->private;
4619 return NULL;
4620}
4621EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
4622
4623/**
4579 * netdev_master_upper_dev_get_rcu - Get master upper device 4624 * netdev_master_upper_dev_get_rcu - Get master upper device
4580 * @dev: device 4625 * @dev: device
4581 * 4626 *
@@ -4594,13 +4639,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4594} 4639}
4595EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4640EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4596 4641
4642static int netdev_adjacent_sysfs_add(struct net_device *dev,
4643 struct net_device *adj_dev,
4644 struct list_head *dev_list)
4645{
4646 char linkname[IFNAMSIZ+7];
4647 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4648 "upper_%s" : "lower_%s", adj_dev->name);
4649 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
4650 linkname);
4651}
4652static void netdev_adjacent_sysfs_del(struct net_device *dev,
4653 char *name,
4654 struct list_head *dev_list)
4655{
4656 char linkname[IFNAMSIZ+7];
4657 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4658 "upper_%s" : "lower_%s", name);
4659 sysfs_remove_link(&(dev->dev.kobj), linkname);
4660}
4661
4662#define netdev_adjacent_is_neigh_list(dev, dev_list) \
4663 (dev_list == &dev->adj_list.upper || \
4664 dev_list == &dev->adj_list.lower)
4665
4597static int __netdev_adjacent_dev_insert(struct net_device *dev, 4666static int __netdev_adjacent_dev_insert(struct net_device *dev,
4598 struct net_device *adj_dev, 4667 struct net_device *adj_dev,
4599 struct list_head *dev_list, 4668 struct list_head *dev_list,
4600 void *private, bool master) 4669 void *private, bool master)
4601{ 4670{
4602 struct netdev_adjacent *adj; 4671 struct netdev_adjacent *adj;
4603 char linkname[IFNAMSIZ+7];
4604 int ret; 4672 int ret;
4605 4673
4606 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4674 adj = __netdev_find_adj(dev, adj_dev, dev_list);
@@ -4623,16 +4691,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4623 pr_debug("dev_hold for %s, because of link added from %s to %s\n", 4691 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4624 adj_dev->name, dev->name, adj_dev->name); 4692 adj_dev->name, dev->name, adj_dev->name);
4625 4693
4626 if (dev_list == &dev->adj_list.lower) { 4694 if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
4627 sprintf(linkname, "lower_%s", adj_dev->name); 4695 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
4628 ret = sysfs_create_link(&(dev->dev.kobj),
4629 &(adj_dev->dev.kobj), linkname);
4630 if (ret)
4631 goto free_adj;
4632 } else if (dev_list == &dev->adj_list.upper) {
4633 sprintf(linkname, "upper_%s", adj_dev->name);
4634 ret = sysfs_create_link(&(dev->dev.kobj),
4635 &(adj_dev->dev.kobj), linkname);
4636 if (ret) 4696 if (ret)
4637 goto free_adj; 4697 goto free_adj;
4638 } 4698 }
@@ -4652,14 +4712,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4652 return 0; 4712 return 0;
4653 4713
4654remove_symlinks: 4714remove_symlinks:
4655 if (dev_list == &dev->adj_list.lower) { 4715 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4656 sprintf(linkname, "lower_%s", adj_dev->name); 4716 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4658 } else if (dev_list == &dev->adj_list.upper) {
4659 sprintf(linkname, "upper_%s", adj_dev->name);
4660 sysfs_remove_link(&(dev->dev.kobj), linkname);
4661 }
4662
4663free_adj: 4717free_adj:
4664 kfree(adj); 4718 kfree(adj);
4665 dev_put(adj_dev); 4719 dev_put(adj_dev);
@@ -4667,12 +4721,11 @@ free_adj:
4667 return ret; 4721 return ret;
4668} 4722}
4669 4723
4670void __netdev_adjacent_dev_remove(struct net_device *dev, 4724static void __netdev_adjacent_dev_remove(struct net_device *dev,
4671 struct net_device *adj_dev, 4725 struct net_device *adj_dev,
4672 struct list_head *dev_list) 4726 struct list_head *dev_list)
4673{ 4727{
4674 struct netdev_adjacent *adj; 4728 struct netdev_adjacent *adj;
4675 char linkname[IFNAMSIZ+7];
4676 4729
4677 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4730 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4678 4731
@@ -4692,13 +4745,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4692 if (adj->master) 4745 if (adj->master)
4693 sysfs_remove_link(&(dev->dev.kobj), "master"); 4746 sysfs_remove_link(&(dev->dev.kobj), "master");
4694 4747
4695 if (dev_list == &dev->adj_list.lower) { 4748 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4696 sprintf(linkname, "lower_%s", adj_dev->name); 4749 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4697 sysfs_remove_link(&(dev->dev.kobj), linkname);
4698 } else if (dev_list == &dev->adj_list.upper) {
4699 sprintf(linkname, "upper_%s", adj_dev->name);
4700 sysfs_remove_link(&(dev->dev.kobj), linkname);
4701 }
4702 4750
4703 list_del_rcu(&adj->list); 4751 list_del_rcu(&adj->list);
4704 pr_debug("dev_put for %s, because link removed from %s to %s\n", 4752 pr_debug("dev_put for %s, because link removed from %s to %s\n",
@@ -4707,11 +4755,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4707 kfree_rcu(adj, rcu); 4755 kfree_rcu(adj, rcu);
4708} 4756}
4709 4757
4710int __netdev_adjacent_dev_link_lists(struct net_device *dev, 4758static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4711 struct net_device *upper_dev, 4759 struct net_device *upper_dev,
4712 struct list_head *up_list, 4760 struct list_head *up_list,
4713 struct list_head *down_list, 4761 struct list_head *down_list,
4714 void *private, bool master) 4762 void *private, bool master)
4715{ 4763{
4716 int ret; 4764 int ret;
4717 4765
@@ -4730,8 +4778,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4730 return 0; 4778 return 0;
4731} 4779}
4732 4780
4733int __netdev_adjacent_dev_link(struct net_device *dev, 4781static int __netdev_adjacent_dev_link(struct net_device *dev,
4734 struct net_device *upper_dev) 4782 struct net_device *upper_dev)
4735{ 4783{
4736 return __netdev_adjacent_dev_link_lists(dev, upper_dev, 4784 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
4737 &dev->all_adj_list.upper, 4785 &dev->all_adj_list.upper,
@@ -4739,26 +4787,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev,
4739 NULL, false); 4787 NULL, false);
4740} 4788}
4741 4789
4742void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, 4790static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
4743 struct net_device *upper_dev, 4791 struct net_device *upper_dev,
4744 struct list_head *up_list, 4792 struct list_head *up_list,
4745 struct list_head *down_list) 4793 struct list_head *down_list)
4746{ 4794{
4747 __netdev_adjacent_dev_remove(dev, upper_dev, up_list); 4795 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4748 __netdev_adjacent_dev_remove(upper_dev, dev, down_list); 4796 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
4749} 4797}
4750 4798
4751void __netdev_adjacent_dev_unlink(struct net_device *dev, 4799static void __netdev_adjacent_dev_unlink(struct net_device *dev,
4752 struct net_device *upper_dev) 4800 struct net_device *upper_dev)
4753{ 4801{
4754 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4802 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4755 &dev->all_adj_list.upper, 4803 &dev->all_adj_list.upper,
4756 &upper_dev->all_adj_list.lower); 4804 &upper_dev->all_adj_list.lower);
4757} 4805}
4758 4806
4759int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 4807static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4760 struct net_device *upper_dev, 4808 struct net_device *upper_dev,
4761 void *private, bool master) 4809 void *private, bool master)
4762{ 4810{
4763 int ret = __netdev_adjacent_dev_link(dev, upper_dev); 4811 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
4764 4812
@@ -4777,8 +4825,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4777 return 0; 4825 return 0;
4778} 4826}
4779 4827
4780void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, 4828static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
4781 struct net_device *upper_dev) 4829 struct net_device *upper_dev)
4782{ 4830{
4783 __netdev_adjacent_dev_unlink(dev, upper_dev); 4831 __netdev_adjacent_dev_unlink(dev, upper_dev);
4784 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4832 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
@@ -4967,20 +5015,24 @@ void netdev_upper_dev_unlink(struct net_device *dev,
4967} 5015}
4968EXPORT_SYMBOL(netdev_upper_dev_unlink); 5016EXPORT_SYMBOL(netdev_upper_dev_unlink);
4969 5017
4970void *netdev_lower_dev_get_private_rcu(struct net_device *dev, 5018void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
4971 struct net_device *lower_dev)
4972{ 5019{
4973 struct netdev_adjacent *lower; 5020 struct netdev_adjacent *iter;
4974 5021
4975 if (!lower_dev) 5022 list_for_each_entry(iter, &dev->adj_list.upper, list) {
4976 return NULL; 5023 netdev_adjacent_sysfs_del(iter->dev, oldname,
4977 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); 5024 &iter->dev->adj_list.lower);
4978 if (!lower) 5025 netdev_adjacent_sysfs_add(iter->dev, dev,
4979 return NULL; 5026 &iter->dev->adj_list.lower);
5027 }
4980 5028
4981 return lower->private; 5029 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5030 netdev_adjacent_sysfs_del(iter->dev, oldname,
5031 &iter->dev->adj_list.upper);
5032 netdev_adjacent_sysfs_add(iter->dev, dev,
5033 &iter->dev->adj_list.upper);
5034 }
4982} 5035}
4983EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
4984 5036
4985void *netdev_lower_dev_get_private(struct net_device *dev, 5037void *netdev_lower_dev_get_private(struct net_device *dev,
4986 struct net_device *lower_dev) 5038 struct net_device *lower_dev)
@@ -5314,6 +5366,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
5314} 5366}
5315EXPORT_SYMBOL(dev_change_flags); 5367EXPORT_SYMBOL(dev_change_flags);
5316 5368
5369static int __dev_set_mtu(struct net_device *dev, int new_mtu)
5370{
5371 const struct net_device_ops *ops = dev->netdev_ops;
5372
5373 if (ops->ndo_change_mtu)
5374 return ops->ndo_change_mtu(dev, new_mtu);
5375
5376 dev->mtu = new_mtu;
5377 return 0;
5378}
5379
5317/** 5380/**
5318 * dev_set_mtu - Change maximum transfer unit 5381 * dev_set_mtu - Change maximum transfer unit
5319 * @dev: device 5382 * @dev: device
@@ -5323,8 +5386,7 @@ EXPORT_SYMBOL(dev_change_flags);
5323 */ 5386 */
5324int dev_set_mtu(struct net_device *dev, int new_mtu) 5387int dev_set_mtu(struct net_device *dev, int new_mtu)
5325{ 5388{
5326 const struct net_device_ops *ops = dev->netdev_ops; 5389 int err, orig_mtu;
5327 int err;
5328 5390
5329 if (new_mtu == dev->mtu) 5391 if (new_mtu == dev->mtu)
5330 return 0; 5392 return 0;
@@ -5336,14 +5398,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
5336 if (!netif_device_present(dev)) 5398 if (!netif_device_present(dev))
5337 return -ENODEV; 5399 return -ENODEV;
5338 5400
5339 err = 0; 5401 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
5340 if (ops->ndo_change_mtu) 5402 err = notifier_to_errno(err);
5341 err = ops->ndo_change_mtu(dev, new_mtu); 5403 if (err)
5342 else 5404 return err;
5343 dev->mtu = new_mtu;
5344 5405
5345 if (!err) 5406 orig_mtu = dev->mtu;
5346 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 5407 err = __dev_set_mtu(dev, new_mtu);
5408
5409 if (!err) {
5410 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5411 err = notifier_to_errno(err);
5412 if (err) {
5413 /* setting mtu back and notifying everyone again,
5414 * so that they have a chance to revert changes.
5415 */
5416 __dev_set_mtu(dev, orig_mtu);
5417 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5418 }
5419 }
5347 return err; 5420 return err;
5348} 5421}
5349EXPORT_SYMBOL(dev_set_mtu); 5422EXPORT_SYMBOL(dev_set_mtu);
@@ -5697,7 +5770,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5697} 5770}
5698EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5771EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5699 5772
5700#ifdef CONFIG_RPS 5773#ifdef CONFIG_SYSFS
5701static int netif_alloc_rx_queues(struct net_device *dev) 5774static int netif_alloc_rx_queues(struct net_device *dev)
5702{ 5775{
5703 unsigned int i, count = dev->num_rx_queues; 5776 unsigned int i, count = dev->num_rx_queues;
@@ -5836,13 +5909,8 @@ int register_netdevice(struct net_device *dev)
5836 dev->features |= NETIF_F_SOFT_FEATURES; 5909 dev->features |= NETIF_F_SOFT_FEATURES;
5837 dev->wanted_features = dev->features & dev->hw_features; 5910 dev->wanted_features = dev->features & dev->hw_features;
5838 5911
5839 /* Turn on no cache copy if HW is doing checksum */
5840 if (!(dev->flags & IFF_LOOPBACK)) { 5912 if (!(dev->flags & IFF_LOOPBACK)) {
5841 dev->hw_features |= NETIF_F_NOCACHE_COPY; 5913 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5842 if (dev->features & NETIF_F_ALL_CSUM) {
5843 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5844 dev->features |= NETIF_F_NOCACHE_COPY;
5845 }
5846 } 5914 }
5847 5915
5848 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 5916 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6229,7 +6297,7 @@ void netdev_freemem(struct net_device *dev)
6229 * @rxqs: the number of RX subqueues to allocate 6297 * @rxqs: the number of RX subqueues to allocate
6230 * 6298 *
6231 * Allocates a struct net_device with private data area for driver use 6299 * Allocates a struct net_device with private data area for driver use
6232 * and performs basic initialization. Also allocates subquue structs 6300 * and performs basic initialization. Also allocates subqueue structs
6233 * for each queue on the device. 6301 * for each queue on the device.
6234 */ 6302 */
6235struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, 6303struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
@@ -6247,7 +6315,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6247 return NULL; 6315 return NULL;
6248 } 6316 }
6249 6317
6250#ifdef CONFIG_RPS 6318#ifdef CONFIG_SYSFS
6251 if (rxqs < 1) { 6319 if (rxqs < 1) {
6252 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6320 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6253 return NULL; 6321 return NULL;
@@ -6303,7 +6371,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6303 if (netif_alloc_netdev_queues(dev)) 6371 if (netif_alloc_netdev_queues(dev))
6304 goto free_all; 6372 goto free_all;
6305 6373
6306#ifdef CONFIG_RPS 6374#ifdef CONFIG_SYSFS
6307 dev->num_rx_queues = rxqs; 6375 dev->num_rx_queues = rxqs;
6308 dev->real_num_rx_queues = rxqs; 6376 dev->real_num_rx_queues = rxqs;
6309 if (netif_alloc_rx_queues(dev)) 6377 if (netif_alloc_rx_queues(dev))
@@ -6323,7 +6391,7 @@ free_all:
6323free_pcpu: 6391free_pcpu:
6324 free_percpu(dev->pcpu_refcnt); 6392 free_percpu(dev->pcpu_refcnt);
6325 netif_free_tx_queues(dev); 6393 netif_free_tx_queues(dev);
6326#ifdef CONFIG_RPS 6394#ifdef CONFIG_SYSFS
6327 kfree(dev->_rx); 6395 kfree(dev->_rx);
6328#endif 6396#endif
6329 6397
@@ -6348,7 +6416,7 @@ void free_netdev(struct net_device *dev)
6348 release_net(dev_net(dev)); 6416 release_net(dev_net(dev));
6349 6417
6350 netif_free_tx_queues(dev); 6418 netif_free_tx_queues(dev);
6351#ifdef CONFIG_RPS 6419#ifdef CONFIG_SYSFS
6352 kfree(dev->_rx); 6420 kfree(dev->_rx);
6353#endif 6421#endif
6354 6422
@@ -6618,11 +6686,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6618 6686
6619 /* Process offline CPU's input_pkt_queue */ 6687 /* Process offline CPU's input_pkt_queue */
6620 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 6688 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6621 netif_rx(skb); 6689 netif_rx_internal(skb);
6622 input_queue_head_incr(oldsd); 6690 input_queue_head_incr(oldsd);
6623 } 6691 }
6624 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 6692 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6625 netif_rx(skb); 6693 netif_rx_internal(skb);
6626 input_queue_head_incr(oldsd); 6694 input_queue_head_incr(oldsd);
6627 } 6695 }
6628 6696
@@ -6935,28 +7003,18 @@ static int __init net_dev_init(void)
6935 for_each_possible_cpu(i) { 7003 for_each_possible_cpu(i) {
6936 struct softnet_data *sd = &per_cpu(softnet_data, i); 7004 struct softnet_data *sd = &per_cpu(softnet_data, i);
6937 7005
6938 memset(sd, 0, sizeof(*sd));
6939 skb_queue_head_init(&sd->input_pkt_queue); 7006 skb_queue_head_init(&sd->input_pkt_queue);
6940 skb_queue_head_init(&sd->process_queue); 7007 skb_queue_head_init(&sd->process_queue);
6941 sd->completion_queue = NULL;
6942 INIT_LIST_HEAD(&sd->poll_list); 7008 INIT_LIST_HEAD(&sd->poll_list);
6943 sd->output_queue = NULL;
6944 sd->output_queue_tailp = &sd->output_queue; 7009 sd->output_queue_tailp = &sd->output_queue;
6945#ifdef CONFIG_RPS 7010#ifdef CONFIG_RPS
6946 sd->csd.func = rps_trigger_softirq; 7011 sd->csd.func = rps_trigger_softirq;
6947 sd->csd.info = sd; 7012 sd->csd.info = sd;
6948 sd->csd.flags = 0;
6949 sd->cpu = i; 7013 sd->cpu = i;
6950#endif 7014#endif
6951 7015
6952 sd->backlog.poll = process_backlog; 7016 sd->backlog.poll = process_backlog;
6953 sd->backlog.weight = weight_p; 7017 sd->backlog.weight = weight_p;
6954 sd->backlog.gro_list = NULL;
6955 sd->backlog.gro_count = 0;
6956
6957#ifdef CONFIG_NET_FLOW_LIMIT
6958 sd->flow_limit = NULL;
6959#endif
6960 } 7018 }
6961 7019
6962 dev_boot_phase = 0; 7020 dev_boot_phase = 0;