aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 23:53:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 23:53:45 -0400
commitcd6362befe4cc7bf589a5236d2a780af2d47bcc9 (patch)
tree3bd4e13ec3f92a00dc4f6c3d65e820b54dbfe46e /net/core/dev.c
parent0f1b1e6d73cb989ce2c071edc57deade3b084dfe (diff)
parentb1586f099ba897542ece36e8a23c1a62907261ef (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Here is my initial pull request for the networking subsystem during this merge window: 1) Support for ESN in AH (RFC 4302) from Fan Du. 2) Add full kernel doc for ethtool command structures, from Ben Hutchings. 3) Add BCM7xxx PHY driver, from Florian Fainelli. 4) Export computed TCP rate information in netlink socket dumps, from Eric Dumazet. 5) Allow IPSEC SA to be dumped partially using a filter, from Nicolas Dichtel. 6) Convert many drivers to pci_enable_msix_range(), from Alexander Gordeev. 7) Record SKB timestamps more efficiently, from Eric Dumazet. 8) Switch to microsecond resolution for TCP round trip times, also from Eric Dumazet. 9) Clean up and fix 6lowpan fragmentation handling by making use of the existing inet_frag api for it's implementation. 10) Add TX grant mapping to xen-netback driver, from Zoltan Kiss. 11) Auto size SKB lengths when composing netlink messages based upon past message sizes used, from Eric Dumazet. 12) qdisc dumps can take a long time, add a cond_resched(), From Eric Dumazet. 13) Sanitize netpoll core and drivers wrt. SKB handling semantics. Get rid of never-used-in-tree netpoll RX handling. From Eric W Biederman. 14) Support inter-address-family and namespace changing in VTI tunnel driver(s). From Steffen Klassert. 15) Add Altera TSE driver, from Vince Bridgers. 16) Optimizing csum_replace2() so that it doesn't adjust the checksum by checksumming the entire header, from Eric Dumazet. 17) Expand BPF internal implementation for faster interpreting, more direct translations into JIT'd code, and much cleaner uses of BPF filtering in non-socket ocntexts. From Daniel Borkmann and Alexei Starovoitov" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1976 commits) netpoll: Use skb_irq_freeable to make zap_completion_queue safe. net: Add a test to see if a skb is freeable in irq context qlcnic: Fix build failure due to undefined reference to `vxlan_get_rx_port' net: ptp: move PTP classifier in its own file net: sxgbe: make "core_ops" static net: sxgbe: fix logical vs bitwise operation net: sxgbe: sxgbe_mdio_register() frees the bus Call efx_set_channels() before efx->type->dimension_resources() xen-netback: disable rogue vif in kthread context net/mlx4: Set proper build dependancy with vxlan be2net: fix build dependency on VxLAN mac802154: make csma/cca parameters per-wpan mac802154: allow only one WPAN to be up at any given time net: filter: minor: fix kdoc in __sk_run_filter netlink: don't compare the nul-termination in nla_strcmp can: c_can: Avoid led toggling for every packet. can: c_can: Simplify TX interrupt cleanup can: c_can: Store dlc private can: c_can: Reduce register access can: c_can: Make the code readable ...
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c153
1 files changed, 87 insertions, 66 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4a91591b30a6..757063420ce0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev)
1245 * If we don't do this there is a chance ndo_poll_controller 1245 * If we don't do this there is a chance ndo_poll_controller
1246 * or ndo_poll may be running while we open the device 1246 * or ndo_poll may be running while we open the device
1247 */ 1247 */
1248 netpoll_rx_disable(dev); 1248 netpoll_poll_disable(dev);
1249 1249
1250 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1250 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1251 ret = notifier_to_errno(ret); 1251 ret = notifier_to_errno(ret);
@@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev)
1260 if (!ret && ops->ndo_open) 1260 if (!ret && ops->ndo_open)
1261 ret = ops->ndo_open(dev); 1261 ret = ops->ndo_open(dev);
1262 1262
1263 netpoll_rx_enable(dev); 1263 netpoll_poll_enable(dev);
1264 1264
1265 if (ret) 1265 if (ret)
1266 clear_bit(__LINK_STATE_START, &dev->state); 1266 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1313,6 +1313,9 @@ static int __dev_close_many(struct list_head *head)
1313 might_sleep(); 1313 might_sleep();
1314 1314
1315 list_for_each_entry(dev, head, close_list) { 1315 list_for_each_entry(dev, head, close_list) {
1316 /* Temporarily disable netpoll until the interface is down */
1317 netpoll_poll_disable(dev);
1318
1316 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); 1319 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1317 1320
1318 clear_bit(__LINK_STATE_START, &dev->state); 1321 clear_bit(__LINK_STATE_START, &dev->state);
@@ -1343,6 +1346,7 @@ static int __dev_close_many(struct list_head *head)
1343 1346
1344 dev->flags &= ~IFF_UP; 1347 dev->flags &= ~IFF_UP;
1345 net_dmaengine_put(); 1348 net_dmaengine_put();
1349 netpoll_poll_enable(dev);
1346 } 1350 }
1347 1351
1348 return 0; 1352 return 0;
@@ -1353,14 +1357,10 @@ static int __dev_close(struct net_device *dev)
1353 int retval; 1357 int retval;
1354 LIST_HEAD(single); 1358 LIST_HEAD(single);
1355 1359
1356 /* Temporarily disable netpoll until the interface is down */
1357 netpoll_rx_disable(dev);
1358
1359 list_add(&dev->close_list, &single); 1360 list_add(&dev->close_list, &single);
1360 retval = __dev_close_many(&single); 1361 retval = __dev_close_many(&single);
1361 list_del(&single); 1362 list_del(&single);
1362 1363
1363 netpoll_rx_enable(dev);
1364 return retval; 1364 return retval;
1365} 1365}
1366 1366
@@ -1398,14 +1398,9 @@ int dev_close(struct net_device *dev)
1398 if (dev->flags & IFF_UP) { 1398 if (dev->flags & IFF_UP) {
1399 LIST_HEAD(single); 1399 LIST_HEAD(single);
1400 1400
1401 /* Block netpoll rx while the interface is going down */
1402 netpoll_rx_disable(dev);
1403
1404 list_add(&dev->close_list, &single); 1401 list_add(&dev->close_list, &single);
1405 dev_close_many(&single); 1402 dev_close_many(&single);
1406 list_del(&single); 1403 list_del(&single);
1407
1408 netpoll_rx_enable(dev);
1409 } 1404 }
1410 return 0; 1405 return 0;
1411} 1406}
@@ -1645,8 +1640,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
1645 __net_timestamp(SKB); \ 1640 __net_timestamp(SKB); \
1646 } \ 1641 } \
1647 1642
1648static inline bool is_skb_forwardable(struct net_device *dev, 1643bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
1649 struct sk_buff *skb)
1650{ 1644{
1651 unsigned int len; 1645 unsigned int len;
1652 1646
@@ -1665,6 +1659,7 @@ static inline bool is_skb_forwardable(struct net_device *dev,
1665 1659
1666 return false; 1660 return false;
1667} 1661}
1662EXPORT_SYMBOL_GPL(is_skb_forwardable);
1668 1663
1669/** 1664/**
1670 * dev_forward_skb - loopback an skb to another netif 1665 * dev_forward_skb - loopback an skb to another netif
@@ -2885,6 +2880,7 @@ recursion_alert:
2885 rc = -ENETDOWN; 2880 rc = -ENETDOWN;
2886 rcu_read_unlock_bh(); 2881 rcu_read_unlock_bh();
2887 2882
2883 atomic_long_inc(&dev->tx_dropped);
2888 kfree_skb(skb); 2884 kfree_skb(skb);
2889 return rc; 2885 return rc;
2890out: 2886out:
@@ -2957,7 +2953,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2957 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2953 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2958 if (!flow_table) 2954 if (!flow_table)
2959 goto out; 2955 goto out;
2960 flow_id = skb->rxhash & flow_table->mask; 2956 flow_id = skb_get_hash(skb) & flow_table->mask;
2961 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, 2957 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2962 rxq_index, flow_id); 2958 rxq_index, flow_id);
2963 if (rc < 0) 2959 if (rc < 0)
@@ -2991,6 +2987,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2991 struct rps_sock_flow_table *sock_flow_table; 2987 struct rps_sock_flow_table *sock_flow_table;
2992 int cpu = -1; 2988 int cpu = -1;
2993 u16 tcpu; 2989 u16 tcpu;
2990 u32 hash;
2994 2991
2995 if (skb_rx_queue_recorded(skb)) { 2992 if (skb_rx_queue_recorded(skb)) {
2996 u16 index = skb_get_rx_queue(skb); 2993 u16 index = skb_get_rx_queue(skb);
@@ -3019,7 +3016,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3019 } 3016 }
3020 3017
3021 skb_reset_network_header(skb); 3018 skb_reset_network_header(skb);
3022 if (!skb_get_hash(skb)) 3019 hash = skb_get_hash(skb);
3020 if (!hash)
3023 goto done; 3021 goto done;
3024 3022
3025 flow_table = rcu_dereference(rxqueue->rps_flow_table); 3023 flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3028,11 +3026,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3028 u16 next_cpu; 3026 u16 next_cpu;
3029 struct rps_dev_flow *rflow; 3027 struct rps_dev_flow *rflow;
3030 3028
3031 rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; 3029 rflow = &flow_table->flows[hash & flow_table->mask];
3032 tcpu = rflow->cpu; 3030 tcpu = rflow->cpu;
3033 3031
3034 next_cpu = sock_flow_table->ents[skb->rxhash & 3032 next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
3035 sock_flow_table->mask];
3036 3033
3037 /* 3034 /*
3038 * If the desired CPU (where last recvmsg was done) is 3035 * If the desired CPU (where last recvmsg was done) is
@@ -3061,7 +3058,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3061 } 3058 }
3062 3059
3063 if (map) { 3060 if (map) {
3064 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 3061 tcpu = map->cpus[((u64) hash * map->len) >> 32];
3065 3062
3066 if (cpu_online(tcpu)) { 3063 if (cpu_online(tcpu)) {
3067 cpu = tcpu; 3064 cpu = tcpu;
@@ -3236,10 +3233,6 @@ static int netif_rx_internal(struct sk_buff *skb)
3236{ 3233{
3237 int ret; 3234 int ret;
3238 3235
3239 /* if netpoll wants it, pretend we never saw it */
3240 if (netpoll_rx(skb))
3241 return NET_RX_DROP;
3242
3243 net_timestamp_check(netdev_tstamp_prequeue, skb); 3236 net_timestamp_check(netdev_tstamp_prequeue, skb);
3244 3237
3245 trace_netif_rx(skb); 3238 trace_netif_rx(skb);
@@ -3500,11 +3493,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3500static bool skb_pfmemalloc_protocol(struct sk_buff *skb) 3493static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3501{ 3494{
3502 switch (skb->protocol) { 3495 switch (skb->protocol) {
3503 case __constant_htons(ETH_P_ARP): 3496 case htons(ETH_P_ARP):
3504 case __constant_htons(ETH_P_IP): 3497 case htons(ETH_P_IP):
3505 case __constant_htons(ETH_P_IPV6): 3498 case htons(ETH_P_IPV6):
3506 case __constant_htons(ETH_P_8021Q): 3499 case htons(ETH_P_8021Q):
3507 case __constant_htons(ETH_P_8021AD): 3500 case htons(ETH_P_8021AD):
3508 return true; 3501 return true;
3509 default: 3502 default:
3510 return false; 3503 return false;
@@ -3525,10 +3518,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3525 3518
3526 trace_netif_receive_skb(skb); 3519 trace_netif_receive_skb(skb);
3527 3520
3528 /* if we've gotten here through NAPI, check netpoll */
3529 if (netpoll_receive_skb(skb))
3530 goto out;
3531
3532 orig_dev = skb->dev; 3521 orig_dev = skb->dev;
3533 3522
3534 skb_reset_network_header(skb); 3523 skb_reset_network_header(skb);
@@ -3655,7 +3644,6 @@ drop:
3655 3644
3656unlock: 3645unlock:
3657 rcu_read_unlock(); 3646 rcu_read_unlock();
3658out:
3659 return ret; 3647 return ret;
3660} 3648}
3661 3649
@@ -3845,10 +3833,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3845 diffs |= p->vlan_tci ^ skb->vlan_tci; 3833 diffs |= p->vlan_tci ^ skb->vlan_tci;
3846 if (maclen == ETH_HLEN) 3834 if (maclen == ETH_HLEN)
3847 diffs |= compare_ether_header(skb_mac_header(p), 3835 diffs |= compare_ether_header(skb_mac_header(p),
3848 skb_gro_mac_header(skb)); 3836 skb_mac_header(skb));
3849 else if (!diffs) 3837 else if (!diffs)
3850 diffs = memcmp(skb_mac_header(p), 3838 diffs = memcmp(skb_mac_header(p),
3851 skb_gro_mac_header(skb), 3839 skb_mac_header(skb),
3852 maclen); 3840 maclen);
3853 NAPI_GRO_CB(p)->same_flow = !diffs; 3841 NAPI_GRO_CB(p)->same_flow = !diffs;
3854 } 3842 }
@@ -3871,6 +3859,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
3871 } 3859 }
3872} 3860}
3873 3861
3862static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
3863{
3864 struct skb_shared_info *pinfo = skb_shinfo(skb);
3865
3866 BUG_ON(skb->end - skb->tail < grow);
3867
3868 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3869
3870 skb->data_len -= grow;
3871 skb->tail += grow;
3872
3873 pinfo->frags[0].page_offset += grow;
3874 skb_frag_size_sub(&pinfo->frags[0], grow);
3875
3876 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
3877 skb_frag_unref(skb, 0);
3878 memmove(pinfo->frags, pinfo->frags + 1,
3879 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
3880 }
3881}
3882
3874static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3883static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3875{ 3884{
3876 struct sk_buff **pp = NULL; 3885 struct sk_buff **pp = NULL;
@@ -3879,14 +3888,14 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3879 struct list_head *head = &offload_base; 3888 struct list_head *head = &offload_base;
3880 int same_flow; 3889 int same_flow;
3881 enum gro_result ret; 3890 enum gro_result ret;
3891 int grow;
3882 3892
3883 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3893 if (!(skb->dev->features & NETIF_F_GRO))
3884 goto normal; 3894 goto normal;
3885 3895
3886 if (skb_is_gso(skb) || skb_has_frag_list(skb)) 3896 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3887 goto normal; 3897 goto normal;
3888 3898
3889 skb_gro_reset_offset(skb);
3890 gro_list_prepare(napi, skb); 3899 gro_list_prepare(napi, skb);
3891 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ 3900 NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
3892 3901
@@ -3950,27 +3959,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3950 ret = GRO_HELD; 3959 ret = GRO_HELD;
3951 3960
3952pull: 3961pull:
3953 if (skb_headlen(skb) < skb_gro_offset(skb)) { 3962 grow = skb_gro_offset(skb) - skb_headlen(skb);
3954 int grow = skb_gro_offset(skb) - skb_headlen(skb); 3963 if (grow > 0)
3955 3964 gro_pull_from_frag0(skb, grow);
3956 BUG_ON(skb->end - skb->tail < grow);
3957
3958 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3959
3960 skb->tail += grow;
3961 skb->data_len -= grow;
3962
3963 skb_shinfo(skb)->frags[0].page_offset += grow;
3964 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3965
3966 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3967 skb_frag_unref(skb, 0);
3968 memmove(skb_shinfo(skb)->frags,
3969 skb_shinfo(skb)->frags + 1,
3970 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3971 }
3972 }
3973
3974ok: 3965ok:
3975 return ret; 3966 return ret;
3976 3967
@@ -4038,6 +4029,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4038{ 4029{
4039 trace_napi_gro_receive_entry(skb); 4030 trace_napi_gro_receive_entry(skb);
4040 4031
4032 skb_gro_reset_offset(skb);
4033
4041 return napi_skb_finish(dev_gro_receive(napi, skb), skb); 4034 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
4042} 4035}
4043EXPORT_SYMBOL(napi_gro_receive); 4036EXPORT_SYMBOL(napi_gro_receive);
@@ -4066,12 +4059,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
4066} 4059}
4067EXPORT_SYMBOL(napi_get_frags); 4060EXPORT_SYMBOL(napi_get_frags);
4068 4061
4069static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, 4062static gro_result_t napi_frags_finish(struct napi_struct *napi,
4070 gro_result_t ret) 4063 struct sk_buff *skb,
4064 gro_result_t ret)
4071{ 4065{
4072 switch (ret) { 4066 switch (ret) {
4073 case GRO_NORMAL: 4067 case GRO_NORMAL:
4074 if (netif_receive_skb_internal(skb)) 4068 case GRO_HELD:
4069 __skb_push(skb, ETH_HLEN);
4070 skb->protocol = eth_type_trans(skb, skb->dev);
4071 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
4075 ret = GRO_DROP; 4072 ret = GRO_DROP;
4076 break; 4073 break;
4077 4074
@@ -4080,7 +4077,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4080 napi_reuse_skb(napi, skb); 4077 napi_reuse_skb(napi, skb);
4081 break; 4078 break;
4082 4079
4083 case GRO_HELD:
4084 case GRO_MERGED: 4080 case GRO_MERGED:
4085 break; 4081 break;
4086 } 4082 }
@@ -4088,17 +4084,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
4088 return ret; 4084 return ret;
4089} 4085}
4090 4086
4087/* Upper GRO stack assumes network header starts at gro_offset=0
4088 * Drivers could call both napi_gro_frags() and napi_gro_receive()
4089 * We copy ethernet header into skb->data to have a common layout.
4090 */
4091static struct sk_buff *napi_frags_skb(struct napi_struct *napi) 4091static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4092{ 4092{
4093 struct sk_buff *skb = napi->skb; 4093 struct sk_buff *skb = napi->skb;
4094 const struct ethhdr *eth;
4095 unsigned int hlen = sizeof(*eth);
4094 4096
4095 napi->skb = NULL; 4097 napi->skb = NULL;
4096 4098
4097 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { 4099 skb_reset_mac_header(skb);
4098 napi_reuse_skb(napi, skb); 4100 skb_gro_reset_offset(skb);
4099 return NULL; 4101
4102 eth = skb_gro_header_fast(skb, 0);
4103 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4104 eth = skb_gro_header_slow(skb, hlen, 0);
4105 if (unlikely(!eth)) {
4106 napi_reuse_skb(napi, skb);
4107 return NULL;
4108 }
4109 } else {
4110 gro_pull_from_frag0(skb, hlen);
4111 NAPI_GRO_CB(skb)->frag0 += hlen;
4112 NAPI_GRO_CB(skb)->frag0_len -= hlen;
4100 } 4113 }
4101 skb->protocol = eth_type_trans(skb, skb->dev); 4114 __skb_pull(skb, hlen);
4115
4116 /*
4117 * This works because the only protocols we care about don't require
4118 * special handling.
4119 * We'll fix it up properly in napi_frags_finish()
4120 */
4121 skb->protocol = eth->h_proto;
4102 4122
4103 return skb; 4123 return skb;
4104} 4124}
@@ -6251,6 +6271,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6251 netdev_stats_to_stats64(storage, &dev->stats); 6271 netdev_stats_to_stats64(storage, &dev->stats);
6252 } 6272 }
6253 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 6273 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
6274 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
6254 return storage; 6275 return storage;
6255} 6276}
6256EXPORT_SYMBOL(dev_get_stats); 6277EXPORT_SYMBOL(dev_get_stats);