aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 14:17:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 14:17:34 -0500
commit4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch)
tree7d023baea59ed0886ded1f0b6d1c6385690b88f7 /net/core/dev.c
parent82c477669a4665eb4e52030792051e0559ee2a36 (diff)
parent8b662fe70c68282f78482dc272df0c4f355e49f5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BPF debugger and asm tool by Daniel Borkmann. 2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann. 3) Correct reciprocal_divide and update users, from Hannes Frederic Sowa and Daniel Borkmann. 4) Currently we only have a "set" operation for the hw timestamp socket ioctl, add a "get" operation to match. From Ben Hutchings. 5) Add better trace events for debugging driver datapath problems, also from Ben Hutchings. 6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we have a small send and a previous packet is already in the qdisc or device queue, defer until TX completion or we get more data. 7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko. 8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel Borkmann. 9) Share IP header compression code between Bluetooth and IEEE802154 layers, from Jukka Rissanen. 10) Fix ipv6 router reachability probing, from Jiri Benc. 11) Allow packets to be captured on macvtap devices, from Vlad Yasevich. 12) Support tunneling in GRO layer, from Jerry Chu. 13) Allow bonding to be configured fully using netlink, from Scott Feldman. 14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can already get the TCI. From Atzm Watanabe. 15) New "Heavy Hitter" qdisc, from Terry Lam. 16) Significantly improve the IPSEC support in pktgen, from Fan Du. 17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom Herbert. 18) Add Proportional Integral Enhanced packet scheduler, from Vijay Subramanian. 19) Allow openvswitch to mmap'd netlink, from Thomas Graf. 20) Key TCP metrics blobs also by source address, not just destination address. From Christoph Paasch. 21) Support 10G in generic phylib. From Andy Fleming. 22) Try to short-circuit GRO flow compares using device provided RX hash, if provided. From Tom Herbert. The wireless and netfilter folks have been busy little bees too. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits) net/cxgb4: Fix referencing freed adapter ipv6: reallocate addrconf router for ipv6 address when lo device up fib_frontend: fix possible NULL pointer dereference rtnetlink: remove IFLA_BOND_SLAVE definition rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info qlcnic: update version to 5.3.55 qlcnic: Enhance logic to calculate msix vectors. qlcnic: Refactor interrupt coalescing code for all adapters. qlcnic: Update poll controller code path qlcnic: Interrupt code cleanup qlcnic: Enhance Tx timeout debugging. qlcnic: Use bool for rx_mac_learn. bonding: fix u64 division rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC sfc: Use the correct maximum TX DMA ring size for SFC9100 Add Shradha Shah as the sfc driver maintainer. net/vxlan: Share RX skb de-marking and checksum checks with ovs tulip: cleanup by using ARRAY_SIZE() ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called net/cxgb4: Don't retrieve stats during recovery ...
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c562
1 files changed, 309 insertions, 253 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 2e0c6a90f6f2..3721db716350 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
147struct list_head ptype_all __read_mostly; /* Taps */ 147struct list_head ptype_all __read_mostly; /* Taps */
148static struct list_head offload_base __read_mostly; 148static struct list_head offload_base __read_mostly;
149 149
150static int netif_rx_internal(struct sk_buff *skb);
151
150/* 152/*
151 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 153 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
152 * semaphore. 154 * semaphore.
@@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload);
480 * and must not be freed until after all the CPU's have gone 482 * and must not be freed until after all the CPU's have gone
481 * through a quiescent state. 483 * through a quiescent state.
482 */ 484 */
483void __dev_remove_offload(struct packet_offload *po) 485static void __dev_remove_offload(struct packet_offload *po)
484{ 486{
485 struct list_head *head = &offload_base; 487 struct list_head *head = &offload_base;
486 struct packet_offload *po1; 488 struct packet_offload *po1;
@@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po)
498out: 500out:
499 spin_unlock(&offload_lock); 501 spin_unlock(&offload_lock);
500} 502}
501EXPORT_SYMBOL(__dev_remove_offload);
502 503
503/** 504/**
504 * dev_remove_offload - remove packet offload handler 505 * dev_remove_offload - remove packet offload handler
@@ -1118,6 +1119,8 @@ rollback:
1118 1119
1119 write_seqcount_end(&devnet_rename_seq); 1120 write_seqcount_end(&devnet_rename_seq);
1120 1121
1122 netdev_adjacent_rename_links(dev, oldname);
1123
1121 write_lock_bh(&dev_base_lock); 1124 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist); 1125 hlist_del_rcu(&dev->name_hlist);
1123 write_unlock_bh(&dev_base_lock); 1126 write_unlock_bh(&dev_base_lock);
@@ -1137,6 +1140,7 @@ rollback:
1137 err = ret; 1140 err = ret;
1138 write_seqcount_begin(&devnet_rename_seq); 1141 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ); 1142 memcpy(dev->name, oldname, IFNAMSIZ);
1143 memcpy(oldname, newname, IFNAMSIZ);
1140 goto rollback; 1144 goto rollback;
1141 } else { 1145 } else {
1142 pr_err("%s: name change rollback failed: %d\n", 1146 pr_err("%s: name change rollback failed: %d\n",
@@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1566 * are as for raw_notifier_call_chain(). 1570 * are as for raw_notifier_call_chain().
1567 */ 1571 */
1568 1572
1569int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, 1573static int call_netdevice_notifiers_info(unsigned long val,
1570 struct netdev_notifier_info *info) 1574 struct net_device *dev,
1575 struct netdev_notifier_info *info)
1571{ 1576{
1572 ASSERT_RTNL(); 1577 ASSERT_RTNL();
1573 netdev_notifier_info_init(info, dev); 1578 netdev_notifier_info_init(info, dev);
1574 return raw_notifier_call_chain(&netdev_chain, val, info); 1579 return raw_notifier_call_chain(&netdev_chain, val, info);
1575} 1580}
1576EXPORT_SYMBOL(call_netdevice_notifiers_info);
1577 1581
1578/** 1582/**
1579 * call_netdevice_notifiers - call all network notifier blocks 1583 * call_netdevice_notifiers - call all network notifier blocks
@@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1699 skb_scrub_packet(skb, true); 1703 skb_scrub_packet(skb, true);
1700 skb->protocol = eth_type_trans(skb, dev); 1704 skb->protocol = eth_type_trans(skb, dev);
1701 1705
1702 return netif_rx(skb); 1706 return netif_rx_internal(skb);
1703} 1707}
1704EXPORT_SYMBOL_GPL(dev_forward_skb); 1708EXPORT_SYMBOL_GPL(dev_forward_skb);
1705 1709
@@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2079} 2083}
2080EXPORT_SYMBOL(netif_set_real_num_tx_queues); 2084EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2081 2085
2082#ifdef CONFIG_RPS 2086#ifdef CONFIG_SYSFS
2083/** 2087/**
2084 * netif_set_real_num_rx_queues - set actual number of RX queues used 2088 * netif_set_real_num_rx_queues - set actual number of RX queues used
2085 * @dev: Network device 2089 * @dev: Network device
@@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q)
2145} 2149}
2146EXPORT_SYMBOL(__netif_schedule); 2150EXPORT_SYMBOL(__netif_schedule);
2147 2151
2148void dev_kfree_skb_irq(struct sk_buff *skb) 2152struct dev_kfree_skb_cb {
2153 enum skb_free_reason reason;
2154};
2155
2156static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2149{ 2157{
2150 if (atomic_dec_and_test(&skb->users)) { 2158 return (struct dev_kfree_skb_cb *)skb->cb;
2151 struct softnet_data *sd; 2159}
2152 unsigned long flags; 2160
2161void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2162{
2163 unsigned long flags;
2153 2164
2154 local_irq_save(flags); 2165 if (likely(atomic_read(&skb->users) == 1)) {
2155 sd = &__get_cpu_var(softnet_data); 2166 smp_rmb();
2156 skb->next = sd->completion_queue; 2167 atomic_set(&skb->users, 0);
2157 sd->completion_queue = skb; 2168 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2158 raise_softirq_irqoff(NET_TX_SOFTIRQ); 2169 return;
2159 local_irq_restore(flags);
2160 } 2170 }
2171 get_kfree_skb_cb(skb)->reason = reason;
2172 local_irq_save(flags);
2173 skb->next = __this_cpu_read(softnet_data.completion_queue);
2174 __this_cpu_write(softnet_data.completion_queue, skb);
2175 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2176 local_irq_restore(flags);
2161} 2177}
2162EXPORT_SYMBOL(dev_kfree_skb_irq); 2178EXPORT_SYMBOL(__dev_kfree_skb_irq);
2163 2179
2164void dev_kfree_skb_any(struct sk_buff *skb) 2180void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2165{ 2181{
2166 if (in_irq() || irqs_disabled()) 2182 if (in_irq() || irqs_disabled())
2167 dev_kfree_skb_irq(skb); 2183 __dev_kfree_skb_irq(skb, reason);
2168 else 2184 else
2169 dev_kfree_skb(skb); 2185 dev_kfree_skb(skb);
2170} 2186}
2171EXPORT_SYMBOL(dev_kfree_skb_any); 2187EXPORT_SYMBOL(__dev_kfree_skb_any);
2172 2188
2173 2189
2174/** 2190/**
@@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
2442{ 2458{
2443 struct dev_gso_cb *cb; 2459 struct dev_gso_cb *cb;
2444 2460
2445 do { 2461 kfree_skb_list(skb->next);
2446 struct sk_buff *nskb = skb->next; 2462 skb->next = NULL;
2447
2448 skb->next = nskb->next;
2449 nskb->next = NULL;
2450 kfree_skb(nskb);
2451 } while (skb->next);
2452 2463
2453 cb = DEV_GSO_CB(skb); 2464 cb = DEV_GSO_CB(skb);
2454 if (cb->destructor) 2465 if (cb->destructor)
@@ -2523,21 +2534,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2523} 2534}
2524EXPORT_SYMBOL(netif_skb_features); 2535EXPORT_SYMBOL(netif_skb_features);
2525 2536
2526/*
2527 * Returns true if either:
2528 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2529 * 2. skb is fragmented and the device does not support SG.
2530 */
2531static inline int skb_needs_linearize(struct sk_buff *skb,
2532 netdev_features_t features)
2533{
2534 return skb_is_nonlinear(skb) &&
2535 ((skb_has_frag_list(skb) &&
2536 !(features & NETIF_F_FRAGLIST)) ||
2537 (skb_shinfo(skb)->nr_frags &&
2538 !(features & NETIF_F_SG)));
2539}
2540
2541int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2537int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2542 struct netdev_queue *txq) 2538 struct netdev_queue *txq)
2543{ 2539{
@@ -2605,8 +2601,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2605 dev_queue_xmit_nit(skb, dev); 2601 dev_queue_xmit_nit(skb, dev);
2606 2602
2607 skb_len = skb->len; 2603 skb_len = skb->len;
2608 rc = ops->ndo_start_xmit(skb, dev); 2604 trace_net_dev_start_xmit(skb, dev);
2609 2605 rc = ops->ndo_start_xmit(skb, dev);
2610 trace_net_dev_xmit(skb, rc, dev, skb_len); 2606 trace_net_dev_xmit(skb, rc, dev, skb_len);
2611 if (rc == NETDEV_TX_OK) 2607 if (rc == NETDEV_TX_OK)
2612 txq_trans_update(txq); 2608 txq_trans_update(txq);
@@ -2624,6 +2620,7 @@ gso:
2624 dev_queue_xmit_nit(nskb, dev); 2620 dev_queue_xmit_nit(nskb, dev);
2625 2621
2626 skb_len = nskb->len; 2622 skb_len = nskb->len;
2623 trace_net_dev_start_xmit(nskb, dev);
2627 rc = ops->ndo_start_xmit(nskb, dev); 2624 rc = ops->ndo_start_xmit(nskb, dev);
2628 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2625 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2629 if (unlikely(rc != NETDEV_TX_OK)) { 2626 if (unlikely(rc != NETDEV_TX_OK)) {
@@ -2744,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2744 return rc; 2741 return rc;
2745} 2742}
2746 2743
2747#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 2744#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
2748static void skb_update_prio(struct sk_buff *skb) 2745static void skb_update_prio(struct sk_buff *skb)
2749{ 2746{
2750 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); 2747 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
@@ -2781,8 +2778,9 @@ int dev_loopback_xmit(struct sk_buff *skb)
2781EXPORT_SYMBOL(dev_loopback_xmit); 2778EXPORT_SYMBOL(dev_loopback_xmit);
2782 2779
2783/** 2780/**
2784 * dev_queue_xmit - transmit a buffer 2781 * __dev_queue_xmit - transmit a buffer
2785 * @skb: buffer to transmit 2782 * @skb: buffer to transmit
2783 * @accel_priv: private data used for L2 forwarding offload
2786 * 2784 *
2787 * Queue a buffer for transmission to a network device. The caller must 2785 * Queue a buffer for transmission to a network device. The caller must
2788 * have set the device and priority and built the buffer before calling 2786 * have set the device and priority and built the buffer before calling
@@ -3014,7 +3012,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3014 } 3012 }
3015 3013
3016 skb_reset_network_header(skb); 3014 skb_reset_network_header(skb);
3017 if (!skb_get_rxhash(skb)) 3015 if (!skb_get_hash(skb))
3018 goto done; 3016 goto done;
3019 3017
3020 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3018 flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3159,7 +3157,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3159 rcu_read_lock(); 3157 rcu_read_lock();
3160 fl = rcu_dereference(sd->flow_limit); 3158 fl = rcu_dereference(sd->flow_limit);
3161 if (fl) { 3159 if (fl) {
3162 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); 3160 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3163 old_flow = fl->history[fl->history_head]; 3161 old_flow = fl->history[fl->history_head];
3164 fl->history[fl->history_head] = new_flow; 3162 fl->history[fl->history_head] = new_flow;
3165 3163
@@ -3227,22 +3225,7 @@ enqueue:
3227 return NET_RX_DROP; 3225 return NET_RX_DROP;
3228} 3226}
3229 3227
3230/** 3228static int netif_rx_internal(struct sk_buff *skb)
3231 * netif_rx - post buffer to the network code
3232 * @skb: buffer to post
3233 *
3234 * This function receives a packet from a device driver and queues it for
3235 * the upper (protocol) levels to process. It always succeeds. The buffer
3236 * may be dropped during processing for congestion control or by the
3237 * protocol layers.
3238 *
3239 * return values:
3240 * NET_RX_SUCCESS (no congestion)
3241 * NET_RX_DROP (packet was dropped)
3242 *
3243 */
3244
3245int netif_rx(struct sk_buff *skb)
3246{ 3229{
3247 int ret; 3230 int ret;
3248 3231
@@ -3278,14 +3261,38 @@ int netif_rx(struct sk_buff *skb)
3278 } 3261 }
3279 return ret; 3262 return ret;
3280} 3263}
3264
3265/**
3266 * netif_rx - post buffer to the network code
3267 * @skb: buffer to post
3268 *
3269 * This function receives a packet from a device driver and queues it for
3270 * the upper (protocol) levels to process. It always succeeds. The buffer
3271 * may be dropped during processing for congestion control or by the
3272 * protocol layers.
3273 *
3274 * return values:
3275 * NET_RX_SUCCESS (no congestion)
3276 * NET_RX_DROP (packet was dropped)
3277 *
3278 */
3279
3280int netif_rx(struct sk_buff *skb)
3281{
3282 trace_netif_rx_entry(skb);
3283
3284 return netif_rx_internal(skb);
3285}
3281EXPORT_SYMBOL(netif_rx); 3286EXPORT_SYMBOL(netif_rx);
3282 3287
3283int netif_rx_ni(struct sk_buff *skb) 3288int netif_rx_ni(struct sk_buff *skb)
3284{ 3289{
3285 int err; 3290 int err;
3286 3291
3292 trace_netif_rx_ni_entry(skb);
3293
3287 preempt_disable(); 3294 preempt_disable();
3288 err = netif_rx(skb); 3295 err = netif_rx_internal(skb);
3289 if (local_softirq_pending()) 3296 if (local_softirq_pending())
3290 do_softirq(); 3297 do_softirq();
3291 preempt_enable(); 3298 preempt_enable();
@@ -3311,7 +3318,10 @@ static void net_tx_action(struct softirq_action *h)
3311 clist = clist->next; 3318 clist = clist->next;
3312 3319
3313 WARN_ON(atomic_read(&skb->users)); 3320 WARN_ON(atomic_read(&skb->users));
3314 trace_kfree_skb(skb, net_tx_action); 3321 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3322 trace_consume_skb(skb);
3323 else
3324 trace_kfree_skb(skb, net_tx_action);
3315 __kfree_skb(skb); 3325 __kfree_skb(skb);
3316 } 3326 }
3317 } 3327 }
@@ -3667,22 +3677,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3667 return ret; 3677 return ret;
3668} 3678}
3669 3679
3670/** 3680static int netif_receive_skb_internal(struct sk_buff *skb)
3671 * netif_receive_skb - process receive buffer from network
3672 * @skb: buffer to process
3673 *
3674 * netif_receive_skb() is the main receive data processing function.
3675 * It always succeeds. The buffer may be dropped during processing
3676 * for congestion control or by the protocol layers.
3677 *
3678 * This function may only be called from softirq context and interrupts
3679 * should be enabled.
3680 *
3681 * Return values (usually ignored):
3682 * NET_RX_SUCCESS: no congestion
3683 * NET_RX_DROP: packet was dropped
3684 */
3685int netif_receive_skb(struct sk_buff *skb)
3686{ 3681{
3687 net_timestamp_check(netdev_tstamp_prequeue, skb); 3682 net_timestamp_check(netdev_tstamp_prequeue, skb);
3688 3683
@@ -3708,6 +3703,28 @@ int netif_receive_skb(struct sk_buff *skb)
3708#endif 3703#endif
3709 return __netif_receive_skb(skb); 3704 return __netif_receive_skb(skb);
3710} 3705}
3706
3707/**
3708 * netif_receive_skb - process receive buffer from network
3709 * @skb: buffer to process
3710 *
3711 * netif_receive_skb() is the main receive data processing function.
3712 * It always succeeds. The buffer may be dropped during processing
3713 * for congestion control or by the protocol layers.
3714 *
3715 * This function may only be called from softirq context and interrupts
3716 * should be enabled.
3717 *
3718 * Return values (usually ignored):
3719 * NET_RX_SUCCESS: no congestion
3720 * NET_RX_DROP: packet was dropped
3721 */
3722int netif_receive_skb(struct sk_buff *skb)
3723{
3724 trace_netif_receive_skb_entry(skb);
3725
3726 return netif_receive_skb_internal(skb);
3727}
3711EXPORT_SYMBOL(netif_receive_skb); 3728EXPORT_SYMBOL(netif_receive_skb);
3712 3729
3713/* Network device is going away, flush any packets still pending 3730/* Network device is going away, flush any packets still pending
@@ -3757,7 +3774,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3757 if (ptype->type != type || !ptype->callbacks.gro_complete) 3774 if (ptype->type != type || !ptype->callbacks.gro_complete)
3758 continue; 3775 continue;
3759 3776
3760 err = ptype->callbacks.gro_complete(skb); 3777 err = ptype->callbacks.gro_complete(skb, 0);
3761 break; 3778 break;
3762 } 3779 }
3763 rcu_read_unlock(); 3780 rcu_read_unlock();
@@ -3769,7 +3786,7 @@ static int napi_gro_complete(struct sk_buff *skb)
3769 } 3786 }
3770 3787
3771out: 3788out:
3772 return netif_receive_skb(skb); 3789 return netif_receive_skb_internal(skb);
3773} 3790}
3774 3791
3775/* napi->gro_list contains packets ordered by age. 3792/* napi->gro_list contains packets ordered by age.
@@ -3805,10 +3822,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3805{ 3822{
3806 struct sk_buff *p; 3823 struct sk_buff *p;
3807 unsigned int maclen = skb->dev->hard_header_len; 3824 unsigned int maclen = skb->dev->hard_header_len;
3825 u32 hash = skb_get_hash_raw(skb);
3808 3826
3809 for (p = napi->gro_list; p; p = p->next) { 3827 for (p = napi->gro_list; p; p = p->next) {
3810 unsigned long diffs; 3828 unsigned long diffs;
3811 3829
3830 NAPI_GRO_CB(p)->flush = 0;
3831
3832 if (hash != skb_get_hash_raw(p)) {
3833 NAPI_GRO_CB(p)->same_flow = 0;
3834 continue;
3835 }
3836
3812 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 3837 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3813 diffs |= p->vlan_tci ^ skb->vlan_tci; 3838 diffs |= p->vlan_tci ^ skb->vlan_tci;
3814 if (maclen == ETH_HLEN) 3839 if (maclen == ETH_HLEN)
@@ -3819,7 +3844,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3819 skb_gro_mac_header(skb), 3844 skb_gro_mac_header(skb),
3820 maclen); 3845 maclen);
3821 NAPI_GRO_CB(p)->same_flow = !diffs; 3846 NAPI_GRO_CB(p)->same_flow = !diffs;
3822 NAPI_GRO_CB(p)->flush = 0; 3847 }
3848}
3849
3850static void skb_gro_reset_offset(struct sk_buff *skb)
3851{
3852 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3853 const skb_frag_t *frag0 = &pinfo->frags[0];
3854
3855 NAPI_GRO_CB(skb)->data_offset = 0;
3856 NAPI_GRO_CB(skb)->frag0 = NULL;
3857 NAPI_GRO_CB(skb)->frag0_len = 0;
3858
3859 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3860 pinfo->nr_frags &&
3861 !PageHighMem(skb_frag_page(frag0))) {
3862 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3863 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3823 } 3864 }
3824} 3865}
3825 3866
@@ -3838,7 +3879,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3838 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3879 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3839 goto normal; 3880 goto normal;
3840 3881
3882 skb_gro_reset_offset(skb);
3841 gro_list_prepare(napi, skb); 3883 gro_list_prepare(napi, skb);
3884 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3842 3885
3843 rcu_read_lock(); 3886 rcu_read_lock();
3844 list_for_each_entry_rcu(ptype, head, list) { 3887 list_for_each_entry_rcu(ptype, head, list) {
@@ -3850,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3850 NAPI_GRO_CB(skb)->same_flow = 0; 3893 NAPI_GRO_CB(skb)->same_flow = 0;
3851 NAPI_GRO_CB(skb)->flush = 0; 3894 NAPI_GRO_CB(skb)->flush = 0;
3852 NAPI_GRO_CB(skb)->free = 0; 3895 NAPI_GRO_CB(skb)->free = 0;
3896 NAPI_GRO_CB(skb)->udp_mark = 0;
3853 3897
3854 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3898 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3855 break; 3899 break;
@@ -3874,10 +3918,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3874 if (same_flow) 3918 if (same_flow)
3875 goto ok; 3919 goto ok;
3876 3920
3877 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) 3921 if (NAPI_GRO_CB(skb)->flush)
3878 goto normal; 3922 goto normal;
3879 3923
3880 napi->gro_count++; 3924 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
3925 struct sk_buff *nskb = napi->gro_list;
3926
3927 /* locate the end of the list to select the 'oldest' flow */
3928 while (nskb->next) {
3929 pp = &nskb->next;
3930 nskb = *pp;
3931 }
3932 *pp = NULL;
3933 nskb->next = NULL;
3934 napi_gro_complete(nskb);
3935 } else {
3936 napi->gro_count++;
3937 }
3881 NAPI_GRO_CB(skb)->count = 1; 3938 NAPI_GRO_CB(skb)->count = 1;
3882 NAPI_GRO_CB(skb)->age = jiffies; 3939 NAPI_GRO_CB(skb)->age = jiffies;
3883 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3940 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
@@ -3915,12 +3972,39 @@ normal:
3915 goto pull; 3972 goto pull;
3916} 3973}
3917 3974
3975struct packet_offload *gro_find_receive_by_type(__be16 type)
3976{
3977 struct list_head *offload_head = &offload_base;
3978 struct packet_offload *ptype;
3979
3980 list_for_each_entry_rcu(ptype, offload_head, list) {
3981 if (ptype->type != type || !ptype->callbacks.gro_receive)
3982 continue;
3983 return ptype;
3984 }
3985 return NULL;
3986}
3987EXPORT_SYMBOL(gro_find_receive_by_type);
3988
3989struct packet_offload *gro_find_complete_by_type(__be16 type)
3990{
3991 struct list_head *offload_head = &offload_base;
3992 struct packet_offload *ptype;
3993
3994 list_for_each_entry_rcu(ptype, offload_head, list) {
3995 if (ptype->type != type || !ptype->callbacks.gro_complete)
3996 continue;
3997 return ptype;
3998 }
3999 return NULL;
4000}
4001EXPORT_SYMBOL(gro_find_complete_by_type);
3918 4002
3919static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) 4003static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3920{ 4004{
3921 switch (ret) { 4005 switch (ret) {
3922 case GRO_NORMAL: 4006 case GRO_NORMAL:
3923 if (netif_receive_skb(skb)) 4007 if (netif_receive_skb_internal(skb))
3924 ret = GRO_DROP; 4008 ret = GRO_DROP;
3925 break; 4009 break;
3926 4010
@@ -3943,26 +4027,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3943 return ret; 4027 return ret;
3944} 4028}
3945 4029
3946static void skb_gro_reset_offset(struct sk_buff *skb)
3947{
3948 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3949 const skb_frag_t *frag0 = &pinfo->frags[0];
3950
3951 NAPI_GRO_CB(skb)->data_offset = 0;
3952 NAPI_GRO_CB(skb)->frag0 = NULL;
3953 NAPI_GRO_CB(skb)->frag0_len = 0;
3954
3955 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3956 pinfo->nr_frags &&
3957 !PageHighMem(skb_frag_page(frag0))) {
3958 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3959 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3960 }
3961}
3962
3963gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 4030gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3964{ 4031{
3965 skb_gro_reset_offset(skb); 4032 trace_napi_gro_receive_entry(skb);
3966 4033
3967 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 4034 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3968} 4035}
@@ -3986,8 +4053,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
3986 4053
3987 if (!skb) { 4054 if (!skb) {
3988 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); 4055 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3989 if (skb) 4056 napi->skb = skb;
3990 napi->skb = skb;
3991 } 4057 }
3992 return skb; 4058 return skb;
3993} 4059}
@@ -3998,12 +4064,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
3998{ 4064{
3999 switch (ret) { 4065 switch (ret) {
4000 case GRO_NORMAL: 4066 case GRO_NORMAL:
4001 case GRO_HELD: 4067 if (netif_receive_skb_internal(skb))
4002 skb->protocol = eth_type_trans(skb, skb->dev);
4003
4004 if (ret == GRO_HELD)
4005 skb_gro_pull(skb, -ETH_HLEN);
4006 else if (netif_receive_skb(skb))
4007 ret = GRO_DROP; 4068 ret = GRO_DROP;
4008 break; 4069 break;
4009 4070
@@ -4012,6 +4073,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4012 napi_reuse_skb(napi, skb); 4073 napi_reuse_skb(napi, skb);
4013 break; 4074 break;
4014 4075
4076 case GRO_HELD:
4015 case GRO_MERGED: 4077 case GRO_MERGED:
4016 break; 4078 break;
4017 } 4079 }
@@ -4022,36 +4084,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4022static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4084static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4023{ 4085{
4024 struct sk_buff *skb = napi->skb; 4086 struct sk_buff *skb = napi->skb;
4025 struct ethhdr *eth;
4026 unsigned int hlen;
4027 unsigned int off;
4028 4087
4029 napi->skb = NULL; 4088 napi->skb = NULL;
4030 4089
4031 skb_reset_mac_header(skb); 4090 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
4032 skb_gro_reset_offset(skb); 4091 napi_reuse_skb(napi, skb);
4033 4092 return NULL;
4034 off = skb_gro_offset(skb);
4035 hlen = off + sizeof(*eth);
4036 eth = skb_gro_header_fast(skb, off);
4037 if (skb_gro_header_hard(skb, hlen)) {
4038 eth = skb_gro_header_slow(skb, hlen, off);
4039 if (unlikely(!eth)) {
4040 napi_reuse_skb(napi, skb);
4041 skb = NULL;
4042 goto out;
4043 }
4044 } 4093 }
4094 skb->protocol = eth_type_trans(skb, skb->dev);
4045 4095
4046 skb_gro_pull(skb, sizeof(*eth));
4047
4048 /*
4049 * This works because the only protocols we care about don't require
4050 * special handling. We'll fix it up properly at the end.
4051 */
4052 skb->protocol = eth->h_proto;
4053
4054out:
4055 return skb; 4096 return skb;
4056} 4097}
4057 4098
@@ -4062,12 +4103,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
4062 if (!skb) 4103 if (!skb)
4063 return GRO_DROP; 4104 return GRO_DROP;
4064 4105
4106 trace_napi_gro_frags_entry(skb);
4107
4065 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); 4108 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4066} 4109}
4067EXPORT_SYMBOL(napi_gro_frags); 4110EXPORT_SYMBOL(napi_gro_frags);
4068 4111
4069/* 4112/*
4070 * net_rps_action sends any pending IPI's for rps. 4113 * net_rps_action_and_irq_enable sends any pending IPI's for rps.
4071 * Note: called with local irq disabled, but exits with local irq enabled. 4114 * Note: called with local irq disabled, but exits with local irq enabled.
4072 */ 4115 */
4073static void net_rps_action_and_irq_enable(struct softnet_data *sd) 4116static void net_rps_action_and_irq_enable(struct softnet_data *sd)
@@ -4272,17 +4315,10 @@ EXPORT_SYMBOL(netif_napi_add);
4272 4315
4273void netif_napi_del(struct napi_struct *napi) 4316void netif_napi_del(struct napi_struct *napi)
4274{ 4317{
4275 struct sk_buff *skb, *next;
4276
4277 list_del_init(&napi->dev_list); 4318 list_del_init(&napi->dev_list);
4278 napi_free_frags(napi); 4319 napi_free_frags(napi);
4279 4320
4280 for (skb = napi->gro_list; skb; skb = next) { 4321 kfree_skb_list(napi->gro_list);
4281 next = skb->next;
4282 skb->next = NULL;
4283 kfree_skb(skb);
4284 }
4285
4286 napi->gro_list = NULL; 4322 napi->gro_list = NULL;
4287 napi->gro_count = 0; 4323 napi->gro_count = 0;
4288} 4324}
@@ -4399,19 +4435,6 @@ struct netdev_adjacent {
4399 struct rcu_head rcu; 4435 struct rcu_head rcu;
4400}; 4436};
4401 4437
4402static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
4403 struct net_device *adj_dev,
4404 struct list_head *adj_list)
4405{
4406 struct netdev_adjacent *adj;
4407
4408 list_for_each_entry_rcu(adj, adj_list, list) {
4409 if (adj->dev == adj_dev)
4410 return adj;
4411 }
4412 return NULL;
4413}
4414
4415static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, 4438static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4416 struct net_device *adj_dev, 4439 struct net_device *adj_dev,
4417 struct list_head *adj_list) 4440 struct list_head *adj_list)
@@ -4450,13 +4473,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
4450 * Find out if a device is linked to an upper device and return true in case 4473 * Find out if a device is linked to an upper device and return true in case
4451 * it is. The caller must hold the RTNL lock. 4474 * it is. The caller must hold the RTNL lock.
4452 */ 4475 */
4453bool netdev_has_any_upper_dev(struct net_device *dev) 4476static bool netdev_has_any_upper_dev(struct net_device *dev)
4454{ 4477{
4455 ASSERT_RTNL(); 4478 ASSERT_RTNL();
4456 4479
4457 return !list_empty(&dev->all_adj_list.upper); 4480 return !list_empty(&dev->all_adj_list.upper);
4458} 4481}
4459EXPORT_SYMBOL(netdev_has_any_upper_dev);
4460 4482
4461/** 4483/**
4462 * netdev_master_upper_dev_get - Get master upper device 4484 * netdev_master_upper_dev_get - Get master upper device
@@ -4576,6 +4598,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4576EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); 4598EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4577 4599
4578/** 4600/**
4601 * netdev_lower_get_first_private_rcu - Get the first ->private from the
4602 * lower neighbour list, RCU
4603 * variant
4604 * @dev: device
4605 *
4606 * Gets the first netdev_adjacent->private from the dev's lower neighbour
4607 * list. The caller must hold RCU read lock.
4608 */
4609void *netdev_lower_get_first_private_rcu(struct net_device *dev)
4610{
4611 struct netdev_adjacent *lower;
4612
4613 lower = list_first_or_null_rcu(&dev->adj_list.lower,
4614 struct netdev_adjacent, list);
4615 if (lower)
4616 return lower->private;
4617 return NULL;
4618}
4619EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
4620
4621/**
4579 * netdev_master_upper_dev_get_rcu - Get master upper device 4622 * netdev_master_upper_dev_get_rcu - Get master upper device
4580 * @dev: device 4623 * @dev: device
4581 * 4624 *
@@ -4594,13 +4637,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4594} 4637}
4595EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4638EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4596 4639
4640int netdev_adjacent_sysfs_add(struct net_device *dev,
4641 struct net_device *adj_dev,
4642 struct list_head *dev_list)
4643{
4644 char linkname[IFNAMSIZ+7];
4645 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4646 "upper_%s" : "lower_%s", adj_dev->name);
4647 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
4648 linkname);
4649}
4650void netdev_adjacent_sysfs_del(struct net_device *dev,
4651 char *name,
4652 struct list_head *dev_list)
4653{
4654 char linkname[IFNAMSIZ+7];
4655 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4656 "upper_%s" : "lower_%s", name);
4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4658}
4659
4660#define netdev_adjacent_is_neigh_list(dev, dev_list) \
4661 (dev_list == &dev->adj_list.upper || \
4662 dev_list == &dev->adj_list.lower)
4663
4597static int __netdev_adjacent_dev_insert(struct net_device *dev, 4664static int __netdev_adjacent_dev_insert(struct net_device *dev,
4598 struct net_device *adj_dev, 4665 struct net_device *adj_dev,
4599 struct list_head *dev_list, 4666 struct list_head *dev_list,
4600 void *private, bool master) 4667 void *private, bool master)
4601{ 4668{
4602 struct netdev_adjacent *adj; 4669 struct netdev_adjacent *adj;
4603 char linkname[IFNAMSIZ+7];
4604 int ret; 4670 int ret;
4605 4671
4606 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4672 adj = __netdev_find_adj(dev, adj_dev, dev_list);
@@ -4623,16 +4689,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4623 pr_debug("dev_hold for %s, because of link added from %s to %s\n", 4689 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4624 adj_dev->name, dev->name, adj_dev->name); 4690 adj_dev->name, dev->name, adj_dev->name);
4625 4691
4626 if (dev_list == &dev->adj_list.lower) { 4692 if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
4627 sprintf(linkname, "lower_%s", adj_dev->name); 4693 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
4628 ret = sysfs_create_link(&(dev->dev.kobj),
4629 &(adj_dev->dev.kobj), linkname);
4630 if (ret)
4631 goto free_adj;
4632 } else if (dev_list == &dev->adj_list.upper) {
4633 sprintf(linkname, "upper_%s", adj_dev->name);
4634 ret = sysfs_create_link(&(dev->dev.kobj),
4635 &(adj_dev->dev.kobj), linkname);
4636 if (ret) 4694 if (ret)
4637 goto free_adj; 4695 goto free_adj;
4638 } 4696 }
@@ -4652,14 +4710,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
4652 return 0; 4710 return 0;
4653 4711
4654remove_symlinks: 4712remove_symlinks:
4655 if (dev_list == &dev->adj_list.lower) { 4713 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4656 sprintf(linkname, "lower_%s", adj_dev->name); 4714 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4657 sysfs_remove_link(&(dev->dev.kobj), linkname);
4658 } else if (dev_list == &dev->adj_list.upper) {
4659 sprintf(linkname, "upper_%s", adj_dev->name);
4660 sysfs_remove_link(&(dev->dev.kobj), linkname);
4661 }
4662
4663free_adj: 4715free_adj:
4664 kfree(adj); 4716 kfree(adj);
4665 dev_put(adj_dev); 4717 dev_put(adj_dev);
@@ -4667,12 +4719,11 @@ free_adj:
4667 return ret; 4719 return ret;
4668} 4720}
4669 4721
4670void __netdev_adjacent_dev_remove(struct net_device *dev, 4722static void __netdev_adjacent_dev_remove(struct net_device *dev,
4671 struct net_device *adj_dev, 4723 struct net_device *adj_dev,
4672 struct list_head *dev_list) 4724 struct list_head *dev_list)
4673{ 4725{
4674 struct netdev_adjacent *adj; 4726 struct netdev_adjacent *adj;
4675 char linkname[IFNAMSIZ+7];
4676 4727
4677 adj = __netdev_find_adj(dev, adj_dev, dev_list); 4728 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4678 4729
@@ -4692,13 +4743,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4692 if (adj->master) 4743 if (adj->master)
4693 sysfs_remove_link(&(dev->dev.kobj), "master"); 4744 sysfs_remove_link(&(dev->dev.kobj), "master");
4694 4745
4695 if (dev_list == &dev->adj_list.lower) { 4746 if (netdev_adjacent_is_neigh_list(dev, dev_list))
4696 sprintf(linkname, "lower_%s", adj_dev->name); 4747 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4697 sysfs_remove_link(&(dev->dev.kobj), linkname);
4698 } else if (dev_list == &dev->adj_list.upper) {
4699 sprintf(linkname, "upper_%s", adj_dev->name);
4700 sysfs_remove_link(&(dev->dev.kobj), linkname);
4701 }
4702 4748
4703 list_del_rcu(&adj->list); 4749 list_del_rcu(&adj->list);
4704 pr_debug("dev_put for %s, because link removed from %s to %s\n", 4750 pr_debug("dev_put for %s, because link removed from %s to %s\n",
@@ -4707,11 +4753,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev,
4707 kfree_rcu(adj, rcu); 4753 kfree_rcu(adj, rcu);
4708} 4754}
4709 4755
4710int __netdev_adjacent_dev_link_lists(struct net_device *dev, 4756static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4711 struct net_device *upper_dev, 4757 struct net_device *upper_dev,
4712 struct list_head *up_list, 4758 struct list_head *up_list,
4713 struct list_head *down_list, 4759 struct list_head *down_list,
4714 void *private, bool master) 4760 void *private, bool master)
4715{ 4761{
4716 int ret; 4762 int ret;
4717 4763
@@ -4730,8 +4776,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev,
4730 return 0; 4776 return 0;
4731} 4777}
4732 4778
4733int __netdev_adjacent_dev_link(struct net_device *dev, 4779static int __netdev_adjacent_dev_link(struct net_device *dev,
4734 struct net_device *upper_dev) 4780 struct net_device *upper_dev)
4735{ 4781{
4736 return __netdev_adjacent_dev_link_lists(dev, upper_dev, 4782 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
4737 &dev->all_adj_list.upper, 4783 &dev->all_adj_list.upper,
@@ -4739,26 +4785,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev,
4739 NULL, false); 4785 NULL, false);
4740} 4786}
4741 4787
4742void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, 4788static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
4743 struct net_device *upper_dev, 4789 struct net_device *upper_dev,
4744 struct list_head *up_list, 4790 struct list_head *up_list,
4745 struct list_head *down_list) 4791 struct list_head *down_list)
4746{ 4792{
4747 __netdev_adjacent_dev_remove(dev, upper_dev, up_list); 4793 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
4748 __netdev_adjacent_dev_remove(upper_dev, dev, down_list); 4794 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
4749} 4795}
4750 4796
4751void __netdev_adjacent_dev_unlink(struct net_device *dev, 4797static void __netdev_adjacent_dev_unlink(struct net_device *dev,
4752 struct net_device *upper_dev) 4798 struct net_device *upper_dev)
4753{ 4799{
4754 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4800 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
4755 &dev->all_adj_list.upper, 4801 &dev->all_adj_list.upper,
4756 &upper_dev->all_adj_list.lower); 4802 &upper_dev->all_adj_list.lower);
4757} 4803}
4758 4804
4759int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, 4805static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4760 struct net_device *upper_dev, 4806 struct net_device *upper_dev,
4761 void *private, bool master) 4807 void *private, bool master)
4762{ 4808{
4763 int ret = __netdev_adjacent_dev_link(dev, upper_dev); 4809 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
4764 4810
@@ -4777,8 +4823,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4777 return 0; 4823 return 0;
4778} 4824}
4779 4825
4780void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, 4826static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
4781 struct net_device *upper_dev) 4827 struct net_device *upper_dev)
4782{ 4828{
4783 __netdev_adjacent_dev_unlink(dev, upper_dev); 4829 __netdev_adjacent_dev_unlink(dev, upper_dev);
4784 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 4830 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
@@ -4967,20 +5013,24 @@ void netdev_upper_dev_unlink(struct net_device *dev,
4967} 5013}
4968EXPORT_SYMBOL(netdev_upper_dev_unlink); 5014EXPORT_SYMBOL(netdev_upper_dev_unlink);
4969 5015
4970void *netdev_lower_dev_get_private_rcu(struct net_device *dev, 5016void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
4971 struct net_device *lower_dev)
4972{ 5017{
4973 struct netdev_adjacent *lower; 5018 struct netdev_adjacent *iter;
4974 5019
4975 if (!lower_dev) 5020 list_for_each_entry(iter, &dev->adj_list.upper, list) {
4976 return NULL; 5021 netdev_adjacent_sysfs_del(iter->dev, oldname,
4977 lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); 5022 &iter->dev->adj_list.lower);
4978 if (!lower) 5023 netdev_adjacent_sysfs_add(iter->dev, dev,
4979 return NULL; 5024 &iter->dev->adj_list.lower);
5025 }
4980 5026
4981 return lower->private; 5027 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5028 netdev_adjacent_sysfs_del(iter->dev, oldname,
5029 &iter->dev->adj_list.upper);
5030 netdev_adjacent_sysfs_add(iter->dev, dev,
5031 &iter->dev->adj_list.upper);
5032 }
4982} 5033}
4983EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
4984 5034
4985void *netdev_lower_dev_get_private(struct net_device *dev, 5035void *netdev_lower_dev_get_private(struct net_device *dev,
4986 struct net_device *lower_dev) 5036 struct net_device *lower_dev)
@@ -5314,6 +5364,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
5314} 5364}
5315EXPORT_SYMBOL(dev_change_flags); 5365EXPORT_SYMBOL(dev_change_flags);
5316 5366
5367static int __dev_set_mtu(struct net_device *dev, int new_mtu)
5368{
5369 const struct net_device_ops *ops = dev->netdev_ops;
5370
5371 if (ops->ndo_change_mtu)
5372 return ops->ndo_change_mtu(dev, new_mtu);
5373
5374 dev->mtu = new_mtu;
5375 return 0;
5376}
5377
5317/** 5378/**
5318 * dev_set_mtu - Change maximum transfer unit 5379 * dev_set_mtu - Change maximum transfer unit
5319 * @dev: device 5380 * @dev: device
@@ -5323,8 +5384,7 @@ EXPORT_SYMBOL(dev_change_flags);
5323 */ 5384 */
5324int dev_set_mtu(struct net_device *dev, int new_mtu) 5385int dev_set_mtu(struct net_device *dev, int new_mtu)
5325{ 5386{
5326 const struct net_device_ops *ops = dev->netdev_ops; 5387 int err, orig_mtu;
5327 int err;
5328 5388
5329 if (new_mtu == dev->mtu) 5389 if (new_mtu == dev->mtu)
5330 return 0; 5390 return 0;
@@ -5336,14 +5396,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
5336 if (!netif_device_present(dev)) 5396 if (!netif_device_present(dev))
5337 return -ENODEV; 5397 return -ENODEV;
5338 5398
5339 err = 0; 5399 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
5340 if (ops->ndo_change_mtu) 5400 err = notifier_to_errno(err);
5341 err = ops->ndo_change_mtu(dev, new_mtu); 5401 if (err)
5342 else 5402 return err;
5343 dev->mtu = new_mtu;
5344 5403
5345 if (!err) 5404 orig_mtu = dev->mtu;
5346 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); 5405 err = __dev_set_mtu(dev, new_mtu);
5406
5407 if (!err) {
5408 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5409 err = notifier_to_errno(err);
5410 if (err) {
5411 /* setting mtu back and notifying everyone again,
5412 * so that they have a chance to revert changes.
5413 */
5414 __dev_set_mtu(dev, orig_mtu);
5415 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5416 }
5417 }
5347 return err; 5418 return err;
5348} 5419}
5349EXPORT_SYMBOL(dev_set_mtu); 5420EXPORT_SYMBOL(dev_set_mtu);
@@ -5697,7 +5768,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5697} 5768}
5698EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5769EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5699 5770
5700#ifdef CONFIG_RPS 5771#ifdef CONFIG_SYSFS
5701static int netif_alloc_rx_queues(struct net_device *dev) 5772static int netif_alloc_rx_queues(struct net_device *dev)
5702{ 5773{
5703 unsigned int i, count = dev->num_rx_queues; 5774 unsigned int i, count = dev->num_rx_queues;
@@ -5836,13 +5907,8 @@ int register_netdevice(struct net_device *dev)
5836 dev->features |= NETIF_F_SOFT_FEATURES; 5907 dev->features |= NETIF_F_SOFT_FEATURES;
5837 dev->wanted_features = dev->features & dev->hw_features; 5908 dev->wanted_features = dev->features & dev->hw_features;
5838 5909
5839 /* Turn on no cache copy if HW is doing checksum */
5840 if (!(dev->flags & IFF_LOOPBACK)) { 5910 if (!(dev->flags & IFF_LOOPBACK)) {
5841 dev->hw_features |= NETIF_F_NOCACHE_COPY; 5911 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5842 if (dev->features & NETIF_F_ALL_CSUM) {
5843 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5844 dev->features |= NETIF_F_NOCACHE_COPY;
5845 }
5846 } 5912 }
5847 5913
5848 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 5914 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6247,7 +6313,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6247 return NULL; 6313 return NULL;
6248 } 6314 }
6249 6315
6250#ifdef CONFIG_RPS 6316#ifdef CONFIG_SYSFS
6251 if (rxqs < 1) { 6317 if (rxqs < 1) {
6252 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 6318 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6253 return NULL; 6319 return NULL;
@@ -6303,7 +6369,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6303 if (netif_alloc_netdev_queues(dev)) 6369 if (netif_alloc_netdev_queues(dev))
6304 goto free_all; 6370 goto free_all;
6305 6371
6306#ifdef CONFIG_RPS 6372#ifdef CONFIG_SYSFS
6307 dev->num_rx_queues = rxqs; 6373 dev->num_rx_queues = rxqs;
6308 dev->real_num_rx_queues = rxqs; 6374 dev->real_num_rx_queues = rxqs;
6309 if (netif_alloc_rx_queues(dev)) 6375 if (netif_alloc_rx_queues(dev))
@@ -6323,7 +6389,7 @@ free_all:
6323free_pcpu: 6389free_pcpu:
6324 free_percpu(dev->pcpu_refcnt); 6390 free_percpu(dev->pcpu_refcnt);
6325 netif_free_tx_queues(dev); 6391 netif_free_tx_queues(dev);
6326#ifdef CONFIG_RPS 6392#ifdef CONFIG_SYSFS
6327 kfree(dev->_rx); 6393 kfree(dev->_rx);
6328#endif 6394#endif
6329 6395
@@ -6348,7 +6414,7 @@ void free_netdev(struct net_device *dev)
6348 release_net(dev_net(dev)); 6414 release_net(dev_net(dev));
6349 6415
6350 netif_free_tx_queues(dev); 6416 netif_free_tx_queues(dev);
6351#ifdef CONFIG_RPS 6417#ifdef CONFIG_SYSFS
6352 kfree(dev->_rx); 6418 kfree(dev->_rx);
6353#endif 6419#endif
6354 6420
@@ -6618,11 +6684,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6618 6684
6619 /* Process offline CPU's input_pkt_queue */ 6685 /* Process offline CPU's input_pkt_queue */
6620 while ((skb = __skb_dequeue(&oldsd->process_queue))) { 6686 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6621 netif_rx(skb); 6687 netif_rx_internal(skb);
6622 input_queue_head_incr(oldsd); 6688 input_queue_head_incr(oldsd);
6623 } 6689 }
6624 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { 6690 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6625 netif_rx(skb); 6691 netif_rx_internal(skb);
6626 input_queue_head_incr(oldsd); 6692 input_queue_head_incr(oldsd);
6627 } 6693 }
6628 6694
@@ -6935,28 +7001,18 @@ static int __init net_dev_init(void)
6935 for_each_possible_cpu(i) { 7001 for_each_possible_cpu(i) {
6936 struct softnet_data *sd = &per_cpu(softnet_data, i); 7002 struct softnet_data *sd = &per_cpu(softnet_data, i);
6937 7003
6938 memset(sd, 0, sizeof(*sd));
6939 skb_queue_head_init(&sd->input_pkt_queue); 7004 skb_queue_head_init(&sd->input_pkt_queue);
6940 skb_queue_head_init(&sd->process_queue); 7005 skb_queue_head_init(&sd->process_queue);
6941 sd->completion_queue = NULL;
6942 INIT_LIST_HEAD(&sd->poll_list); 7006 INIT_LIST_HEAD(&sd->poll_list);
6943 sd->output_queue = NULL;
6944 sd->output_queue_tailp = &sd->output_queue; 7007 sd->output_queue_tailp = &sd->output_queue;
6945#ifdef CONFIG_RPS 7008#ifdef CONFIG_RPS
6946 sd->csd.func = rps_trigger_softirq; 7009 sd->csd.func = rps_trigger_softirq;
6947 sd->csd.info = sd; 7010 sd->csd.info = sd;
6948 sd->csd.flags = 0;
6949 sd->cpu = i; 7011 sd->cpu = i;
6950#endif 7012#endif
6951 7013
6952 sd->backlog.poll = process_backlog; 7014 sd->backlog.poll = process_backlog;
6953 sd->backlog.weight = weight_p; 7015 sd->backlog.weight = weight_p;
6954 sd->backlog.gro_list = NULL;
6955 sd->backlog.gro_count = 0;
6956
6957#ifdef CONFIG_NET_FLOW_LIMIT
6958 sd->flow_limit = NULL;
6959#endif
6960 } 7016 }
6961 7017
6962 dev_boot_phase = 0; 7018 dev_boot_phase = 0;