aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c153
1 files changed, 87 insertions, 66 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4a91591b30a6..757063420ce0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev)
1245 * If we don't do this there is a chance ndo_poll_controller 1245 * If we don't do this there is a chance ndo_poll_controller
1246 * or ndo_poll may be running while we open the device 1246 * or ndo_poll may be running while we open the device
1247 */ 1247 */
1248 netpoll_rx_disable(dev); 1248 netpoll_poll_disable(dev);
1249 1249
1250 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1250 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1251 ret = notifier_to_errno(ret); 1251 ret = notifier_to_errno(ret);
@@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev)
1260 if (!ret && ops->ndo_open) 1260 if (!ret && ops->ndo_open)
1261 ret = ops->ndo_open(dev); 1261 ret = ops->ndo_open(dev);
1262 1262
1263 netpoll_rx_enable(dev); 1263 netpoll_poll_enable(dev);
1264 1264
1265 if (ret) 1265 if (ret)
1266 clear_bit(__LINK_STATE_START, &dev->state); 1266 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1313,6 +1313,9 @@ static int __dev_close_many(struct list_head *head)
1313 might_sleep(); 1313 might_sleep();
1314 1314
1315 list_for_each_entry(dev, head, close_list) { 1315 list_for_each_entry(dev, head, close_list) {
1316 /* Temporarily disable netpoll until the interface is down */
1317 netpoll_poll_disable(dev);
1318
1316 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1319 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1317 1320
1318 clear_bit(__LINK_STATE_START, &dev->state); 1321 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1343,6 +1346,7 @@ static int __dev_close_many(struct list_head *head)
1343 1346
1344 dev->flags &= ~IFF_UP; 1347 dev->flags &= ~IFF_UP;
1345 net_dmaengine_put(); 1348 net_dmaengine_put();
1349 netpoll_poll_enable(dev);
1346 } 1350 }
1347 1351
1348 return 0; 1352 return 0;
@@ -1353,14 +1357,10 @@ static int __dev_close(struct net_device *dev)
1353 int retval; 1357 int retval;
1354 LIST_HEAD(single); 1358 LIST_HEAD(single);
1355 1359
1356 /* Temporarily disable netpoll until the interface is down */
1357 netpoll_rx_disable(dev);
1358
1359 list_add(&dev->close_list, &single); 1360 list_add(&dev->close_list, &single);
1360 retval = __dev_close_many(&single); 1361 retval = __dev_close_many(&single);
1361 list_del(&single); 1362 list_del(&single);
1362 1363
1363 netpoll_rx_enable(dev);
1364 return retval; 1364 return retval;
1365} 1365}
1366 1366
@@ -1398,14 +1398,9 @@ int dev_close(struct net_device *dev)
1398 if (dev->flags & IFF_UP) { 1398 if (dev->flags & IFF_UP) {
1399 LIST_HEAD(single); 1399 LIST_HEAD(single);
1400 1400
1401 /* Block netpoll rx while the interface is going down */
1402 netpoll_rx_disable(dev);
1403
1404 list_add(&dev->close_list, &single); 1401 list_add(&dev->close_list, &single);
1405 dev_close_many(&single); 1402 dev_close_many(&single);
1406 list_del(&single); 1403 list_del(&single);
1407
1408 netpoll_rx_enable(dev);
1409 } 1404 }
1410 return 0; 1405 return 0;
1411} 1406}
@@ -1645,8 +1640,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
1645 __net_timestamp(SKB); \ 1640 __net_timestamp(SKB); \
1646 } \ 1641 } \
1647 1642
1648static inline bool is_skb_forwardable(struct net_device *dev, 1643bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
1649 struct sk_buff *skb)
1650{ 1644{
1651 unsigned int len; 1645 unsigned int len;
1652 1646
@@ -1665,6 +1659,7 @@ static inline bool is_skb_forwardable(struct net_device *dev,
1665 1659
1666 return false; 1660 return false;
1667} 1661}
1662EXPORT_SYMBOL_GPL(is_skb_forwardable);
1668 1663
1669/** 1664/**
1670 * dev_forward_skb - loopback an skb to another netif 1665 * dev_forward_skb - loopback an skb to another netif
@@ -2885,6 +2880,7 @@ recursion_alert:
2885 rc = -ENETDOWN; 2880 rc = -ENETDOWN;
2886 rcu_read_unlock_bh(); 2881 rcu_read_unlock_bh();
2887 2882
2883 atomic_long_inc(&dev->tx_dropped);
2888 kfree_skb(skb); 2884 kfree_skb(skb);
2889 return rc; 2885 return rc;
2890out: 2886out:
@@ -2957,7 +2953,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2957 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2953 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2958 if (!flow_table) 2954 if (!flow_table)
2959 goto out; 2955 goto out;
2960 flow_id = skb->rxhash & flow_table->mask; 2956 flow_id = skb_get_hash(skb) & flow_table->mask;
2961 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, 2957 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2962 rxq_index, flow_id); 2958 rxq_index, flow_id);
2963 if (rc < 0) 2959 if (rc < 0)
@@ -2991,6 +2987,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2991 struct rps_sock_flow_table *sock_flow_table; 2987 struct rps_sock_flow_table *sock_flow_table;
2992 int cpu = -1; 2988 int cpu = -1;
2993 u16 tcpu; 2989 u16 tcpu;
2990 u32 hash;
2994 2991
2995 if (skb_rx_queue_recorded(skb)) { 2992 if (skb_rx_queue_recorded(skb)) {
2996 u16 index = skb_get_rx_queue(skb); 2993 u16 index = skb_get_rx_queue(skb);
@@ -3019,7 +3016,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3019 } 3016 }
3020 3017
3021 skb_reset_network_header(skb); 3018 skb_reset_network_header(skb);
3022 if (!skb_get_hash(skb)) 3019 hash = skb_get_hash(skb);
3020 if (!hash)
3023 goto done; 3021 goto done;
3024 3022
3025 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3023 flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3028,11 +3026,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3028 u16 next_cpu; 3026 u16 next_cpu;
3029 struct rps_dev_flow *rflow; 3027 struct rps_dev_flow *rflow;
3030 3028
3031 rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; 3029 rflow = &flow_table->flows[hash & flow_table->mask];
3032 tcpu = rflow->cpu; 3030 tcpu = rflow->cpu;
3033 3031
3034 next_cpu = sock_flow_table->ents[skb->rxhash & 3032 next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
3035 sock_flow_table->mask];
3036 3033
3037 /* 3034 /*
3038 * If the desired CPU (where last recvmsg was done) is 3035 * If the desired CPU (where last recvmsg was done) is
@@ -3061,7 +3058,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3061 } 3058 }
3062 3059
3063 if (map) { 3060 if (map) {
3064 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 3061 tcpu = map->cpus[((u64) hash * map->len) >> 32];
3065 3062
3066 if (cpu_online(tcpu)) { 3063 if (cpu_online(tcpu)) {
3067 cpu = tcpu; 3064 cpu = tcpu;
@@ -3236,10 +3233,6 @@ static int netif_rx_internal(struct sk_buff *skb)
3236{ 3233{
3237 int ret; 3234 int ret;
3238 3235
3239 /* if netpoll wants it, pretend we never saw it */
3240 if (netpoll_rx(skb))
3241 return NET_RX_DROP;
3242
3243 net_timestamp_check(netdev_tstamp_prequeue, skb); 3236 net_timestamp_check(netdev_tstamp_prequeue, skb);
3244 3237
3245 trace_netif_rx(skb); 3238 trace_netif_rx(skb);
@@ -3500,11 +3493,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3500static bool skb_pfmemalloc_protocol(struct sk_buff *skb) 3493static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3501{ 3494{
3502 switch (skb->protocol) { 3495 switch (skb->protocol) {
3503 case __constant_htons(ETH_P_ARP): 3496 case htons(ETH_P_ARP):
3504 case __constant_htons(ETH_P_IP): 3497 case htons(ETH_P_IP):
3505 case __constant_htons(ETH_P_IPV6): 3498 case htons(ETH_P_IPV6):
3506 case __constant_htons(ETH_P_8021Q): 3499 case htons(ETH_P_8021Q):
3507 case __constant_htons(ETH_P_8021AD): 3500 case htons(ETH_P_8021AD):
3508 return true; 3501 return true;
3509 default: 3502 default:
3510 return false; 3503 return false;
@@ -3525,10 +3518,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3525 3518
3526 trace_netif_receive_skb(skb); 3519 trace_netif_receive_skb(skb);
3527 3520
3528 /* if we've gotten here through NAPI, check netpoll */
3529 if (netpoll_receive_skb(skb))
3530 goto out;
3531
3532 orig_dev = skb->dev; 3521 orig_dev = skb->dev;
3533 3522
3534 skb_reset_network_header(skb); 3523 skb_reset_network_header(skb);
@@ -3655,7 +3644,6 @@ drop:
3655 3644
3656unlock: 3645unlock:
3657 rcu_read_unlock(); 3646 rcu_read_unlock();
3658out:
3659 return ret; 3647 return ret;
3660} 3648}
3661 3649
@@ -3845,10 +3833,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3845 diffs |= p->vlan_tci ^ skb->vlan_tci; 3833 diffs |= p->vlan_tci ^ skb->vlan_tci;
3846 if (maclen == ETH_HLEN) 3834 if (maclen == ETH_HLEN)
3847 diffs |= compare_ether_header(skb_mac_header(p), 3835 diffs |= compare_ether_header(skb_mac_header(p),
3848 skb_gro_mac_header(skb)); 3836 skb_mac_header(skb));
3849 else if (!diffs) 3837 else if (!diffs)
3850 diffs = memcmp(skb_mac_header(p), 3838 diffs = memcmp(skb_mac_header(p),
3851 skb_gro_mac_header(skb), 3839 skb_mac_header(skb),
3852 maclen); 3840 maclen);
3853 NAPI_GRO_CB(p)->same_flow = !diffs; 3841 NAPI_GRO_CB(p)->same_flow = !diffs;
3854 } 3842 }
@@ -3871,6 +3859,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
3871 } 3859 }
3872} 3860}
3873 3861
3862static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
3863{
3864 struct skb_shared_info *pinfo = skb_shinfo(skb);
3865
3866 BUG_ON(skb->end - skb->tail < grow);
3867
3868 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3869
3870 skb->data_len -= grow;
3871 skb->tail += grow;
3872
3873 pinfo->frags[0].page_offset += grow;
3874 skb_frag_size_sub(&pinfo->frags[0], grow);
3875
3876 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
3877 skb_frag_unref(skb, 0);
3878 memmove(pinfo->frags, pinfo->frags + 1,
3879 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
3880 }
3881}
3882
3874static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3883static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3875{ 3884{
3876 struct sk_buff **pp = NULL; 3885 struct sk_buff **pp = NULL;
@@ -3879,14 +3888,14 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3879 struct list_head *head = &offload_base; 3888 struct list_head *head = &offload_base;
3880 int same_flow; 3889 int same_flow;
3881 enum gro_result ret; 3890 enum gro_result ret;
3891 int grow;
3882 3892
3883 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3893 if (!(skb->dev->features & NETIF_F_GRO))
3884 goto normal; 3894 goto normal;
3885 3895
3886 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3896 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3887 goto normal; 3897 goto normal;
3888 3898
3889 skb_gro_reset_offset(skb);
3890 gro_list_prepare(napi, skb); 3899 gro_list_prepare(napi, skb);
3891 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ 3900 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3892 3901
@@ -3950,27 +3959,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3950 ret = GRO_HELD; 3959 ret = GRO_HELD;
3951 3960
3952pull: 3961pull:
3953 if (skb_headlen(skb) < skb_gro_offset(skb)) { 3962 grow = skb_gro_offset(skb) - skb_headlen(skb);
3954 int grow = skb_gro_offset(skb) - skb_headlen(skb); 3963 if (grow > 0)
3955 3964 gro_pull_from_frag0(skb, grow);
3956 BUG_ON(skb->end - skb->tail < grow);
3957
3958 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3959
3960 skb->tail += grow;
3961 skb->data_len -= grow;
3962
3963 skb_shinfo(skb)->frags[0].page_offset += grow;
3964 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3965
3966 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3967 skb_frag_unref(skb, 0);
3968 memmove(skb_shinfo(skb)->frags,
3969 skb_shinfo(skb)->frags + 1,
3970 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3971 }
3972 }
3973
3974ok: 3965ok:
3975 return ret; 3966 return ret;
3976 3967
@@ -4038,6 +4029,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4038{ 4029{
4039 trace_napi_gro_receive_entry(skb); 4030 trace_napi_gro_receive_entry(skb);
4040 4031
4032 skb_gro_reset_offset(skb);
4033
4041 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 4034 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
4042} 4035}
4043EXPORT_SYMBOL(napi_gro_receive); 4036EXPORT_SYMBOL(napi_gro_receive);
@@ -4066,12 +4059,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
4066} 4059}
4067EXPORT_SYMBOL(napi_get_frags); 4060EXPORT_SYMBOL(napi_get_frags);
4068 4061
4069static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, 4062static gro_result_t napi_frags_finish(struct napi_struct *napi,
4070 gro_result_t ret) 4063 struct sk_buff *skb,
4064 gro_result_t ret)
4071{ 4065{
4072 switch (ret) { 4066 switch (ret) {
4073 case GRO_NORMAL: 4067 case GRO_NORMAL:
4074 if (netif_receive_skb_internal(skb)) 4068 case GRO_HELD:
4069 __skb_push(skb, ETH_HLEN);
4070 skb->protocol = eth_type_trans(skb, skb->dev);
4071 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
4075 ret = GRO_DROP; 4072 ret = GRO_DROP;
4076 break; 4073 break;
4077 4074
@@ -4080,7 +4077,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4080 napi_reuse_skb(napi, skb); 4077 napi_reuse_skb(napi, skb);
4081 break; 4078 break;
4082 4079
4083 case GRO_HELD:
4084 case GRO_MERGED: 4080 case GRO_MERGED:
4085 break; 4081 break;
4086 } 4082 }
@@ -4088,17 +4084,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4088 return ret; 4084 return ret;
4089} 4085}
4090 4086
4087/* Upper GRO stack assumes network header starts at gro_offset=0
4088 * Drivers could call both napi_gro_frags() and napi_gro_receive()
4089 * We copy ethernet header into skb->data to have a common layout.
4090 */
4091static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4091static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4092{ 4092{
4093 struct sk_buff *skb = napi->skb; 4093 struct sk_buff *skb = napi->skb;
4094 const struct ethhdr *eth;
4095 unsigned int hlen = sizeof(*eth);
4094 4096
4095 napi->skb = NULL; 4097 napi->skb = NULL;
4096 4098
4097 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { 4099 skb_reset_mac_header(skb);
4098 napi_reuse_skb(napi, skb); 4100 skb_gro_reset_offset(skb);
4099 return NULL; 4101
4102 eth = skb_gro_header_fast(skb, 0);
4103 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4104 eth = skb_gro_header_slow(skb, hlen, 0);
4105 if (unlikely(!eth)) {
4106 napi_reuse_skb(napi, skb);
4107 return NULL;
4108 }
4109 } else {
4110 gro_pull_from_frag0(skb, hlen);
4111 NAPI_GRO_CB(skb)->frag0 += hlen;
4112 NAPI_GRO_CB(skb)->frag0_len -= hlen;
4100 } 4113 }
4101 skb->protocol = eth_type_trans(skb, skb->dev); 4114 __skb_pull(skb, hlen);
4115
4116 /*
4117 * This works because the only protocols we care about don't require
4118 * special handling.
4119 * We'll fix it up properly in napi_frags_finish()
4120 */
4121 skb->protocol = eth->h_proto;
4102 4122
4103 return skb; 4123 return skb;
4104} 4124}
@@ -6251,6 +6271,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6251 netdev_stats_to_stats64(storage, &dev->stats); 6271 netdev_stats_to_stats64(storage, &dev->stats);
6252 } 6272 }
6253 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 6273 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
6274 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
6254 return storage; 6275 return storage;
6255} 6276}
6256EXPORT_SYMBOL(dev_get_stats); 6277EXPORT_SYMBOL(dev_get_stats);