aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-09 21:24:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-09 21:24:39 -0400
commit496322bc91e35007ed754184dcd447a02b6dd685 (patch)
treef5298d0a74c0a6e65c0e98050b594b8d020904c1 /net/core
parent2e17c5a97e231f3cb426f4b7895eab5be5c5442e (diff)
parent56e0ef527b184b3de2d7f88c6190812b2b2ac6bf (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "This is a re-do of the net-next pull request for the current merge window. The only difference from the one I made the other day is that this has Eliezer's interface renames and the timeout handling changes made based upon your feedback, as well as a few bug fixes that have trickeled in. Highlights: 1) Low latency device polling, eliminating the cost of interrupt handling and context switches. Allows direct polling of a network device from socket operations, such as recvmsg() and poll(). Currently ixgbe, mlx4, and bnx2x support this feature. Full high level description, performance numbers, and design in commit 0a4db187a999 ("Merge branch 'll_poll'") From Eliezer Tamir. 2) With the routing cache removed, ip_check_mc_rcu() gets exercised more than ever before in the case where we have lots of multicast addresses. Use a hash table instead of a simple linked list, from Eric Dumazet. 3) Add driver for Atheros CQA98xx 802.11ac wireless devices, from Bartosz Markowski, Janusz Dziedzic, Kalle Valo, Marek Kwaczynski, Marek Puzyniak, Michal Kazior, and Sujith Manoharan. 4) Support reporting the TUN device persist flag to userspace, from Pavel Emelyanov. 5) Allow controlling network device VF link state using netlink, from Rony Efraim. 6) Support GRE tunneling in openvswitch, from Pravin B Shelar. 7) Adjust SOCK_MIN_RCVBUF and SOCK_MIN_SNDBUF for modern times, from Daniel Borkmann and Eric Dumazet. 8) Allow controlling of TCP quickack behavior on a per-route basis, from Cong Wang. 9) Several bug fixes and improvements to vxlan from Stephen Hemminger, Pravin B Shelar, and Mike Rapoport. In particular, support receiving on multiple UDP ports. 10) Major cleanups, particular in the area of debugging and cookie lifetime handline, to the SCTP protocol code. From Daniel Borkmann. 11) Allow packets to cross network namespaces when traversing tunnel devices. From Nicolas Dichtel. 12) Allow monitoring netlink traffic via AF_PACKET sockets, in a manner akin to how we monitor real network traffic via ptype_all. From Daniel Borkmann. 13) Several bug fixes and improvements for the new alx device driver, from Johannes Berg. 14) Fix scalability issues in the netem packet scheduler's time queue, by using an rbtree. From Eric Dumazet. 15) Several bug fixes in TCP loss recovery handling, from Yuchung Cheng. 16) Add support for GSO segmentation of MPLS packets, from Simon Horman. 17) Make network notifiers have a real data type for the opaque pointer that's passed into them. Use this to properly handle network device flag changes in arp_netdev_event(). From Jiri Pirko and Timo Teräs. 18) Convert several drivers over to module_pci_driver(), from Peter Huewe. 19) tcp_fixup_rcvbuf() can loop 500 times over loopback, just use a O(1) calculation instead. From Eric Dumazet. 20) Support setting of explicit tunnel peer addresses in ipv6, just like ipv4. From Nicolas Dichtel. 21) Protect x86 BPF JIT against spraying attacks, from Eric Dumazet. 22) Prevent a single high rate flow from overruning an individual cpu during RX packet processing via selective flow shedding. From Willem de Bruijn. 23) Don't use spinlocks in TCP md5 signing fast paths, from Eric Dumazet. 24) Don't just drop GSO packets which are above the TBF scheduler's burst limit, chop them up so they are in-bounds instead. Also from Eric Dumazet. 25) VLAN offloads are missed when configured on top of a bridge, fix from Vlad Yasevich. 26) Support IPV6 in ping sockets. From Lorenzo Colitti. 27) Receive flow steering targets should be updated at poll() time too, from David Majnemer. 28) Fix several corner case regressions in PMTU/redirect handling due to the routing cache removal, from Timo Teräs. 29) We have to be mindful of ipv4 mapped ipv6 sockets in upd_v6_push_pending_frames(). From Hannes Frederic Sowa. 30) Fix L2TP sequence number handling bugs, from James Chapman." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1214 commits) drivers/net: caif: fix wrong rtnl_is_locked() usage drivers/net: enic: release rtnl_lock on error-path vhost-net: fix use-after-free in vhost_net_flush net: mv643xx_eth: do not use port number as platform device id net: sctp: confirm route during forward progress virtio_net: fix race in RX VQ processing virtio: support unlocked queue poll net/cadence/macb: fix bug/typo in extracting gem_irq_read_clear bit Documentation: Fix references to defunct linux-net@vger.kernel.org net/fs: change busy poll time accounting net: rename low latency sockets functions to busy poll bridge: fix some kernel warning in multicast timer sfc: Fix memory leak when discarding scattered packets sit: fix tunnel update via netlink dt:net:stmmac: Add dt specific phy reset callback support. dt:net:stmmac: Add support to dwmac version 3.610 and 3.710 dt:net:stmmac: Allocate platform data only if its NULL. net:stmmac: fix memleak in the open method ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available net: ipv6: fix wrong ping_v6_sendmsg return value ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c238
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c24
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/gen_estimator.c12
-rw-r--r--net/core/gen_stats.c22
-rw-r--r--net/core/link_watch.c3
-rw-r--r--net/core/neighbour.c34
-rw-r--r--net/core/net-procfs.c16
-rw-r--r--net/core/netpoll.c16
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c81
-rw-r--r--net/core/rtnetlink.c32
-rw-r--r--net/core/skbuff.c69
-rw-r--r--net/core/sock.c26
-rw-r--r--net/core/sysctl_net_core.c139
18 files changed, 556 insertions, 173 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b71423db7785..6e9ab31e457e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -56,6 +56,7 @@
56#include <net/sock.h> 56#include <net/sock.h>
57#include <net/tcp_states.h> 57#include <net/tcp_states.h>
58#include <trace/events/skb.h> 58#include <trace/events/skb.h>
59#include <net/ll_poll.h>
59 60
60/* 61/*
61 * Is a socket 'connection oriented' ? 62 * Is a socket 'connection oriented' ?
@@ -207,6 +208,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
207 } 208 }
208 spin_unlock_irqrestore(&queue->lock, cpu_flags); 209 spin_unlock_irqrestore(&queue->lock, cpu_flags);
209 210
211 if (sk_can_busy_loop(sk) &&
212 sk_busy_loop(sk, flags & MSG_DONTWAIT))
213 continue;
214
210 /* User doesn't want to wait */ 215 /* User doesn't want to wait */
211 error = -EAGAIN; 216 error = -EAGAIN;
212 if (!timeo) 217 if (!timeo)
diff --git a/net/core/dev.c b/net/core/dev.c
index faebb398fb46..560dafd83adf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,8 @@
129#include <linux/inetdevice.h> 129#include <linux/inetdevice.h>
130#include <linux/cpu_rmap.h> 130#include <linux/cpu_rmap.h>
131#include <linux/static_key.h> 131#include <linux/static_key.h>
132#include <linux/hashtable.h>
133#include <linux/vmalloc.h>
132 134
133#include "net-sysfs.h" 135#include "net-sysfs.h"
134 136
@@ -166,6 +168,12 @@ static struct list_head offload_base __read_mostly;
166DEFINE_RWLOCK(dev_base_lock); 168DEFINE_RWLOCK(dev_base_lock);
167EXPORT_SYMBOL(dev_base_lock); 169EXPORT_SYMBOL(dev_base_lock);
168 170
171/* protects napi_hash addition/deletion and napi_gen_id */
172static DEFINE_SPINLOCK(napi_hash_lock);
173
174static unsigned int napi_gen_id;
175static DEFINE_HASHTABLE(napi_hash, 8);
176
169seqcount_t devnet_rename_seq; 177seqcount_t devnet_rename_seq;
170 178
171static inline void dev_base_seq_inc(struct net *net) 179static inline void dev_base_seq_inc(struct net *net)
@@ -1232,9 +1240,7 @@ static int __dev_open(struct net_device *dev)
1232 * If we don't do this there is a chance ndo_poll_controller 1240 * If we don't do this there is a chance ndo_poll_controller
1233 * or ndo_poll may be running while we open the device 1241 * or ndo_poll may be running while we open the device
1234 */ 1242 */
1235 ret = netpoll_rx_disable(dev); 1243 netpoll_rx_disable(dev);
1236 if (ret)
1237 return ret;
1238 1244
1239 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1245 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1240 ret = notifier_to_errno(ret); 1246 ret = notifier_to_errno(ret);
@@ -1343,9 +1349,7 @@ static int __dev_close(struct net_device *dev)
1343 LIST_HEAD(single); 1349 LIST_HEAD(single);
1344 1350
1345 /* Temporarily disable netpoll until the interface is down */ 1351 /* Temporarily disable netpoll until the interface is down */
1346 retval = netpoll_rx_disable(dev); 1352 netpoll_rx_disable(dev);
1347 if (retval)
1348 return retval;
1349 1353
1350 list_add(&dev->unreg_list, &single); 1354 list_add(&dev->unreg_list, &single);
1351 retval = __dev_close_many(&single); 1355 retval = __dev_close_many(&single);
@@ -1387,14 +1391,11 @@ static int dev_close_many(struct list_head *head)
1387 */ 1391 */
1388int dev_close(struct net_device *dev) 1392int dev_close(struct net_device *dev)
1389{ 1393{
1390 int ret = 0;
1391 if (dev->flags & IFF_UP) { 1394 if (dev->flags & IFF_UP) {
1392 LIST_HEAD(single); 1395 LIST_HEAD(single);
1393 1396
1394 /* Block netpoll rx while the interface is going down */ 1397 /* Block netpoll rx while the interface is going down */
1395 ret = netpoll_rx_disable(dev); 1398 netpoll_rx_disable(dev);
1396 if (ret)
1397 return ret;
1398 1399
1399 list_add(&dev->unreg_list, &single); 1400 list_add(&dev->unreg_list, &single);
1400 dev_close_many(&single); 1401 dev_close_many(&single);
@@ -1402,7 +1403,7 @@ int dev_close(struct net_device *dev)
1402 1403
1403 netpoll_rx_enable(dev); 1404 netpoll_rx_enable(dev);
1404 } 1405 }
1405 return ret; 1406 return 0;
1406} 1407}
1407EXPORT_SYMBOL(dev_close); 1408EXPORT_SYMBOL(dev_close);
1408 1409
@@ -1432,6 +1433,14 @@ void dev_disable_lro(struct net_device *dev)
1432} 1433}
1433EXPORT_SYMBOL(dev_disable_lro); 1434EXPORT_SYMBOL(dev_disable_lro);
1434 1435
1436static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1437 struct net_device *dev)
1438{
1439 struct netdev_notifier_info info;
1440
1441 netdev_notifier_info_init(&info, dev);
1442 return nb->notifier_call(nb, val, &info);
1443}
1435 1444
1436static int dev_boot_phase = 1; 1445static int dev_boot_phase = 1;
1437 1446
@@ -1464,7 +1473,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
1464 goto unlock; 1473 goto unlock;
1465 for_each_net(net) { 1474 for_each_net(net) {
1466 for_each_netdev(net, dev) { 1475 for_each_netdev(net, dev) {
1467 err = nb->notifier_call(nb, NETDEV_REGISTER, dev); 1476 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1468 err = notifier_to_errno(err); 1477 err = notifier_to_errno(err);
1469 if (err) 1478 if (err)
1470 goto rollback; 1479 goto rollback;
@@ -1472,7 +1481,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
1472 if (!(dev->flags & IFF_UP)) 1481 if (!(dev->flags & IFF_UP))
1473 continue; 1482 continue;
1474 1483
1475 nb->notifier_call(nb, NETDEV_UP, dev); 1484 call_netdevice_notifier(nb, NETDEV_UP, dev);
1476 } 1485 }
1477 } 1486 }
1478 1487
@@ -1488,10 +1497,11 @@ rollback:
1488 goto outroll; 1497 goto outroll;
1489 1498
1490 if (dev->flags & IFF_UP) { 1499 if (dev->flags & IFF_UP) {
1491 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1500 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1492 nb->notifier_call(nb, NETDEV_DOWN, dev); 1501 dev);
1502 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1493 } 1503 }
1494 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1504 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1495 } 1505 }
1496 } 1506 }
1497 1507
@@ -1529,10 +1539,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
1529 for_each_net(net) { 1539 for_each_net(net) {
1530 for_each_netdev(net, dev) { 1540 for_each_netdev(net, dev) {
1531 if (dev->flags & IFF_UP) { 1541 if (dev->flags & IFF_UP) {
1532 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); 1542 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1533 nb->notifier_call(nb, NETDEV_DOWN, dev); 1543 dev);
1544 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1534 } 1545 }
1535 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1546 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1536 } 1547 }
1537 } 1548 }
1538unlock: 1549unlock:
@@ -1542,6 +1553,25 @@ unlock:
1542EXPORT_SYMBOL(unregister_netdevice_notifier); 1553EXPORT_SYMBOL(unregister_netdevice_notifier);
1543 1554
1544/** 1555/**
1556 * call_netdevice_notifiers_info - call all network notifier blocks
1557 * @val: value passed unmodified to notifier function
1558 * @dev: net_device pointer passed unmodified to notifier function
1559 * @info: notifier information data
1560 *
1561 * Call all network notifier blocks. Parameters and return value
1562 * are as for raw_notifier_call_chain().
1563 */
1564
1565int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
1566 struct netdev_notifier_info *info)
1567{
1568 ASSERT_RTNL();
1569 netdev_notifier_info_init(info, dev);
1570 return raw_notifier_call_chain(&netdev_chain, val, info);
1571}
1572EXPORT_SYMBOL(call_netdevice_notifiers_info);
1573
1574/**
1545 * call_netdevice_notifiers - call all network notifier blocks 1575 * call_netdevice_notifiers - call all network notifier blocks
1546 * @val: value passed unmodified to notifier function 1576 * @val: value passed unmodified to notifier function
1547 * @dev: net_device pointer passed unmodified to notifier function 1577 * @dev: net_device pointer passed unmodified to notifier function
@@ -1552,8 +1582,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1552 1582
1553int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1583int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1554{ 1584{
1555 ASSERT_RTNL(); 1585 struct netdev_notifier_info info;
1556 return raw_notifier_call_chain(&netdev_chain, val, dev); 1586
1587 return call_netdevice_notifiers_info(val, dev, &info);
1557} 1588}
1558EXPORT_SYMBOL(call_netdevice_notifiers); 1589EXPORT_SYMBOL(call_netdevice_notifiers);
1559 1590
@@ -1655,23 +1686,19 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1655 } 1686 }
1656 } 1687 }
1657 1688
1658 skb_orphan(skb);
1659
1660 if (unlikely(!is_skb_forwardable(dev, skb))) { 1689 if (unlikely(!is_skb_forwardable(dev, skb))) {
1661 atomic_long_inc(&dev->rx_dropped); 1690 atomic_long_inc(&dev->rx_dropped);
1662 kfree_skb(skb); 1691 kfree_skb(skb);
1663 return NET_RX_DROP; 1692 return NET_RX_DROP;
1664 } 1693 }
1665 skb->skb_iif = 0; 1694 skb_scrub_packet(skb);
1666 skb->dev = dev;
1667 skb_dst_drop(skb);
1668 skb->tstamp.tv64 = 0;
1669 skb->pkt_type = PACKET_HOST;
1670 skb->protocol = eth_type_trans(skb, dev); 1695 skb->protocol = eth_type_trans(skb, dev);
1671 skb->mark = 0; 1696
1672 secpath_reset(skb); 1697 /* eth_type_trans() can set pkt_type.
1673 nf_reset(skb); 1698 * clear pkt_type _after_ calling eth_type_trans()
1674 nf_reset_trace(skb); 1699 */
1700 skb->pkt_type = PACKET_HOST;
1701
1675 return netif_rx(skb); 1702 return netif_rx(skb);
1676} 1703}
1677EXPORT_SYMBOL_GPL(dev_forward_skb); 1704EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1736,7 +1763,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1736 skb_reset_mac_header(skb2); 1763 skb_reset_mac_header(skb2);
1737 1764
1738 if (skb_network_header(skb2) < skb2->data || 1765 if (skb_network_header(skb2) < skb2->data ||
1739 skb2->network_header > skb2->tail) { 1766 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1740 net_crit_ratelimited("protocol %04x is buggy, dev %s\n", 1767 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1741 ntohs(skb2->protocol), 1768 ntohs(skb2->protocol),
1742 dev->name); 1769 dev->name);
@@ -3099,6 +3126,46 @@ static int rps_ipi_queued(struct softnet_data *sd)
3099 return 0; 3126 return 0;
3100} 3127}
3101 3128
3129#ifdef CONFIG_NET_FLOW_LIMIT
3130int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3131#endif
3132
3133static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3134{
3135#ifdef CONFIG_NET_FLOW_LIMIT
3136 struct sd_flow_limit *fl;
3137 struct softnet_data *sd;
3138 unsigned int old_flow, new_flow;
3139
3140 if (qlen < (netdev_max_backlog >> 1))
3141 return false;
3142
3143 sd = &__get_cpu_var(softnet_data);
3144
3145 rcu_read_lock();
3146 fl = rcu_dereference(sd->flow_limit);
3147 if (fl) {
3148 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
3149 old_flow = fl->history[fl->history_head];
3150 fl->history[fl->history_head] = new_flow;
3151
3152 fl->history_head++;
3153 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3154
3155 if (likely(fl->buckets[old_flow]))
3156 fl->buckets[old_flow]--;
3157
3158 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3159 fl->count++;
3160 rcu_read_unlock();
3161 return true;
3162 }
3163 }
3164 rcu_read_unlock();
3165#endif
3166 return false;
3167}
3168
3102/* 3169/*
3103 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 3170 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
3104 * queue (may be a remote CPU queue). 3171 * queue (may be a remote CPU queue).
@@ -3108,13 +3175,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3108{ 3175{
3109 struct softnet_data *sd; 3176 struct softnet_data *sd;
3110 unsigned long flags; 3177 unsigned long flags;
3178 unsigned int qlen;
3111 3179
3112 sd = &per_cpu(softnet_data, cpu); 3180 sd = &per_cpu(softnet_data, cpu);
3113 3181
3114 local_irq_save(flags); 3182 local_irq_save(flags);
3115 3183
3116 rps_lock(sd); 3184 rps_lock(sd);
3117 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { 3185 qlen = skb_queue_len(&sd->input_pkt_queue);
3186 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3118 if (skb_queue_len(&sd->input_pkt_queue)) { 3187 if (skb_queue_len(&sd->input_pkt_queue)) {
3119enqueue: 3188enqueue:
3120 __skb_queue_tail(&sd->input_pkt_queue, skb); 3189 __skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -3862,7 +3931,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
3862 NAPI_GRO_CB(skb)->frag0 = NULL; 3931 NAPI_GRO_CB(skb)->frag0 = NULL;
3863 NAPI_GRO_CB(skb)->frag0_len = 0; 3932 NAPI_GRO_CB(skb)->frag0_len = 0;
3864 3933
3865 if (skb->mac_header == skb->tail && 3934 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3866 pinfo->nr_frags && 3935 pinfo->nr_frags &&
3867 !PageHighMem(skb_frag_page(frag0))) { 3936 !PageHighMem(skb_frag_page(frag0))) {
3868 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); 3937 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
@@ -4106,6 +4175,58 @@ void napi_complete(struct napi_struct *n)
4106} 4175}
4107EXPORT_SYMBOL(napi_complete); 4176EXPORT_SYMBOL(napi_complete);
4108 4177
4178/* must be called under rcu_read_lock(), as we dont take a reference */
4179struct napi_struct *napi_by_id(unsigned int napi_id)
4180{
4181 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4182 struct napi_struct *napi;
4183
4184 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4185 if (napi->napi_id == napi_id)
4186 return napi;
4187
4188 return NULL;
4189}
4190EXPORT_SYMBOL_GPL(napi_by_id);
4191
4192void napi_hash_add(struct napi_struct *napi)
4193{
4194 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4195
4196 spin_lock(&napi_hash_lock);
4197
4198 /* 0 is not a valid id, we also skip an id that is taken
4199 * we expect both events to be extremely rare
4200 */
4201 napi->napi_id = 0;
4202 while (!napi->napi_id) {
4203 napi->napi_id = ++napi_gen_id;
4204 if (napi_by_id(napi->napi_id))
4205 napi->napi_id = 0;
4206 }
4207
4208 hlist_add_head_rcu(&napi->napi_hash_node,
4209 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4210
4211 spin_unlock(&napi_hash_lock);
4212 }
4213}
4214EXPORT_SYMBOL_GPL(napi_hash_add);
4215
4216/* Warning : caller is responsible to make sure rcu grace period
4217 * is respected before freeing memory containing @napi
4218 */
4219void napi_hash_del(struct napi_struct *napi)
4220{
4221 spin_lock(&napi_hash_lock);
4222
4223 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4224 hlist_del_rcu(&napi->napi_hash_node);
4225
4226 spin_unlock(&napi_hash_lock);
4227}
4228EXPORT_SYMBOL_GPL(napi_hash_del);
4229
4109void netif_napi_add(struct net_device *dev, struct napi_struct *napi, 4230void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4110 int (*poll)(struct napi_struct *, int), int weight) 4231 int (*poll)(struct napi_struct *, int), int weight)
4111{ 4232{
@@ -4404,7 +4525,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4404 else 4525 else
4405 list_add_tail_rcu(&upper->list, &dev->upper_dev_list); 4526 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4406 dev_hold(upper_dev); 4527 dev_hold(upper_dev);
4407 4528 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4408 return 0; 4529 return 0;
4409} 4530}
4410 4531
@@ -4464,6 +4585,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
4464 list_del_rcu(&upper->list); 4585 list_del_rcu(&upper->list);
4465 dev_put(upper_dev); 4586 dev_put(upper_dev);
4466 kfree_rcu(upper, rcu); 4587 kfree_rcu(upper, rcu);
4588 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4467} 4589}
4468EXPORT_SYMBOL(netdev_upper_dev_unlink); 4590EXPORT_SYMBOL(netdev_upper_dev_unlink);
4469 4591
@@ -4734,8 +4856,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4734 } 4856 }
4735 4857
4736 if (dev->flags & IFF_UP && 4858 if (dev->flags & IFF_UP &&
4737 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) 4859 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
4738 call_netdevice_notifiers(NETDEV_CHANGE, dev); 4860 struct netdev_notifier_change_info change_info;
4861
4862 change_info.flags_changed = changes;
4863 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
4864 &change_info.info);
4865 }
4739} 4866}
4740 4867
4741/** 4868/**
@@ -5158,17 +5285,28 @@ static void netdev_init_one_queue(struct net_device *dev,
5158#endif 5285#endif
5159} 5286}
5160 5287
5288static void netif_free_tx_queues(struct net_device *dev)
5289{
5290 if (is_vmalloc_addr(dev->_tx))
5291 vfree(dev->_tx);
5292 else
5293 kfree(dev->_tx);
5294}
5295
5161static int netif_alloc_netdev_queues(struct net_device *dev) 5296static int netif_alloc_netdev_queues(struct net_device *dev)
5162{ 5297{
5163 unsigned int count = dev->num_tx_queues; 5298 unsigned int count = dev->num_tx_queues;
5164 struct netdev_queue *tx; 5299 struct netdev_queue *tx;
5300 size_t sz = count * sizeof(*tx);
5165 5301
5166 BUG_ON(count < 1); 5302 BUG_ON(count < 1 || count > 0xffff);
5167
5168 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5169 if (!tx)
5170 return -ENOMEM;
5171 5303
5304 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
5305 if (!tx) {
5306 tx = vzalloc(sz);
5307 if (!tx)
5308 return -ENOMEM;
5309 }
5172 dev->_tx = tx; 5310 dev->_tx = tx;
5173 5311
5174 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5312 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5269,6 +5407,10 @@ int register_netdevice(struct net_device *dev)
5269 */ 5407 */
5270 dev->hw_enc_features |= NETIF_F_SG; 5408 dev->hw_enc_features |= NETIF_F_SG;
5271 5409
5410 /* Make NETIF_F_SG inheritable to MPLS.
5411 */
5412 dev->mpls_features |= NETIF_F_SG;
5413
5272 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5414 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5273 ret = notifier_to_errno(ret); 5415 ret = notifier_to_errno(ret);
5274 if (ret) 5416 if (ret)
@@ -5712,7 +5854,7 @@ free_all:
5712 5854
5713free_pcpu: 5855free_pcpu:
5714 free_percpu(dev->pcpu_refcnt); 5856 free_percpu(dev->pcpu_refcnt);
5715 kfree(dev->_tx); 5857 netif_free_tx_queues(dev);
5716#ifdef CONFIG_RPS 5858#ifdef CONFIG_RPS
5717 kfree(dev->_rx); 5859 kfree(dev->_rx);
5718#endif 5860#endif
@@ -5737,7 +5879,7 @@ void free_netdev(struct net_device *dev)
5737 5879
5738 release_net(dev_net(dev)); 5880 release_net(dev_net(dev));
5739 5881
5740 kfree(dev->_tx); 5882 netif_free_tx_queues(dev);
5741#ifdef CONFIG_RPS 5883#ifdef CONFIG_RPS
5742 kfree(dev->_rx); 5884 kfree(dev->_rx);
5743#endif 5885#endif
@@ -6048,7 +6190,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
6048} 6190}
6049EXPORT_SYMBOL(netdev_increment_features); 6191EXPORT_SYMBOL(netdev_increment_features);
6050 6192
6051static struct hlist_head *netdev_create_hash(void) 6193static struct hlist_head * __net_init netdev_create_hash(void)
6052{ 6194{
6053 int i; 6195 int i;
6054 struct hlist_head *hash; 6196 struct hlist_head *hash;
@@ -6304,6 +6446,10 @@ static int __init net_dev_init(void)
6304 sd->backlog.weight = weight_p; 6446 sd->backlog.weight = weight_p;
6305 sd->backlog.gro_list = NULL; 6447 sd->backlog.gro_list = NULL;
6306 sd->backlog.gro_count = 0; 6448 sd->backlog.gro_count = 0;
6449
6450#ifdef CONFIG_NET_FLOW_LIMIT
6451 sd->flow_limit = NULL;
6452#endif
6307 } 6453 }
6308 6454
6309 dev_boot_phase = 0; 6455 dev_boot_phase = 0;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d23b6682f4e9..5e78d44333b9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
295} 295}
296 296
297static int dropmon_net_event(struct notifier_block *ev_block, 297static int dropmon_net_event(struct notifier_block *ev_block,
298 unsigned long event, void *ptr) 298 unsigned long event, void *ptr)
299{ 299{
300 struct net_device *dev = ptr; 300 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
301 struct dm_hw_stat_delta *new_stat = NULL; 301 struct dm_hw_stat_delta *new_stat = NULL;
302 struct dm_hw_stat_delta *tmp; 302 struct dm_hw_stat_delta *tmp;
303 303
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8e..ca4231ec7347 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
372static int dst_dev_event(struct notifier_block *this, unsigned long event, 372static int dst_dev_event(struct notifier_block *this, unsigned long event,
373 void *ptr) 373 void *ptr)
374{ 374{
375 struct net_device *dev = ptr; 375 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
376 struct dst_entry *dst, *last = NULL; 376 struct dst_entry *dst, *last = NULL;
377 377
378 switch (event) { 378 switch (event) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ce91766eeca9..ab5fa6336c84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", 83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", 84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
85 [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
85 86
86 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 87 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
87 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", 88 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -1319,10 +1320,19 @@ static int ethtool_get_dump_data(struct net_device *dev,
1319 if (ret) 1320 if (ret)
1320 return ret; 1321 return ret;
1321 1322
1322 len = (tmp.len > dump.len) ? dump.len : tmp.len; 1323 len = min(tmp.len, dump.len);
1323 if (!len) 1324 if (!len)
1324 return -EFAULT; 1325 return -EFAULT;
1325 1326
1327 /* Don't ever let the driver think there's more space available
1328 * than it requested with .get_dump_flag().
1329 */
1330 dump.len = len;
1331
1332 /* Always allocate enough space to hold the whole thing so that the
1333 * driver does not need to check the length and bother with partial
1334 * dumping.
1335 */
1326 data = vzalloc(tmp.len); 1336 data = vzalloc(tmp.len);
1327 if (!data) 1337 if (!data)
1328 return -ENOMEM; 1338 return -ENOMEM;
@@ -1330,6 +1340,16 @@ static int ethtool_get_dump_data(struct net_device *dev,
1330 if (ret) 1340 if (ret)
1331 goto out; 1341 goto out;
1332 1342
1343 /* There are two sane possibilities:
1344 * 1. The driver's .get_dump_data() does not touch dump.len.
1345 * 2. Or it may set dump.len to how much it really writes, which
1346 * should be tmp.len (or len if it can do a partial dump).
1347 * In any case respond to userspace with the actual length of data
1348 * it's receiving.
1349 */
1350 WARN_ON(dump.len != len && dump.len != tmp.len);
1351 dump.len = len;
1352
1333 if (copy_to_user(useraddr, &dump, sizeof(dump))) { 1353 if (copy_to_user(useraddr, &dump, sizeof(dump))) {
1334 ret = -EFAULT; 1354 ret = -EFAULT;
1335 goto out; 1355 goto out;
@@ -1413,7 +1433,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
1413 modinfo.eeprom_len); 1433 modinfo.eeprom_len);
1414} 1434}
1415 1435
1416/* The main entry point in this file. Called from net/core/dev.c */ 1436/* The main entry point in this file. Called from net/core/dev_ioctl.c */
1417 1437
1418int dev_ethtool(struct net *net, struct ifreq *ifr) 1438int dev_ethtool(struct net *net, struct ifreq *ifr)
1419{ 1439{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d5a9f8ead0d8..21735440c44a 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
705 705
706 706
707static int fib_rules_event(struct notifier_block *this, unsigned long event, 707static int fib_rules_event(struct notifier_block *this, unsigned long event,
708 void *ptr) 708 void *ptr)
709{ 709{
710 struct net_device *dev = ptr; 710 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
711 struct net *net = dev_net(dev); 711 struct net *net = dev_net(dev);
712 struct fib_rules_ops *ops; 712 struct fib_rules_ops *ops;
713 713
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index d9d198aa9fed..6b5b6e7013ca 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -82,7 +82,7 @@ struct gen_estimator
82{ 82{
83 struct list_head list; 83 struct list_head list;
84 struct gnet_stats_basic_packed *bstats; 84 struct gnet_stats_basic_packed *bstats;
85 struct gnet_stats_rate_est *rate_est; 85 struct gnet_stats_rate_est64 *rate_est;
86 spinlock_t *stats_lock; 86 spinlock_t *stats_lock;
87 int ewma_log; 87 int ewma_log;
88 u64 last_bytes; 88 u64 last_bytes;
@@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
167 167
168static 168static
169struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, 169struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
170 const struct gnet_stats_rate_est *rate_est) 170 const struct gnet_stats_rate_est64 *rate_est)
171{ 171{
172 struct rb_node *p = est_root.rb_node; 172 struct rb_node *p = est_root.rb_node;
173 173
@@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
203 * 203 *
204 */ 204 */
205int gen_new_estimator(struct gnet_stats_basic_packed *bstats, 205int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
206 struct gnet_stats_rate_est *rate_est, 206 struct gnet_stats_rate_est64 *rate_est,
207 spinlock_t *stats_lock, 207 spinlock_t *stats_lock,
208 struct nlattr *opt) 208 struct nlattr *opt)
209{ 209{
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator);
258 * Note : Caller should respect an RCU grace period before freeing stats_lock 258 * Note : Caller should respect an RCU grace period before freeing stats_lock
259 */ 259 */
260void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, 260void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
261 struct gnet_stats_rate_est *rate_est) 261 struct gnet_stats_rate_est64 *rate_est)
262{ 262{
263 struct gen_estimator *e; 263 struct gen_estimator *e;
264 264
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
290 * Returns 0 on success or a negative error code. 290 * Returns 0 on success or a negative error code.
291 */ 291 */
292int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, 292int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
293 struct gnet_stats_rate_est *rate_est, 293 struct gnet_stats_rate_est64 *rate_est,
294 spinlock_t *stats_lock, struct nlattr *opt) 294 spinlock_t *stats_lock, struct nlattr *opt)
295{ 295{
296 gen_kill_estimator(bstats, rate_est); 296 gen_kill_estimator(bstats, rate_est);
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
306 * Returns true if estimator is active, and false if not. 306 * Returns true if estimator is active, and false if not.
307 */ 307 */
308bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, 308bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
309 const struct gnet_stats_rate_est *rate_est) 309 const struct gnet_stats_rate_est64 *rate_est)
310{ 310{
311 bool res; 311 bool res;
312 312
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ddedf211e588..9d3d9e78397b 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
143int 143int
144gnet_stats_copy_rate_est(struct gnet_dump *d, 144gnet_stats_copy_rate_est(struct gnet_dump *d,
145 const struct gnet_stats_basic_packed *b, 145 const struct gnet_stats_basic_packed *b,
146 struct gnet_stats_rate_est *r) 146 struct gnet_stats_rate_est64 *r)
147{ 147{
148 struct gnet_stats_rate_est est;
149 int res;
150
148 if (b && !gen_estimator_active(b, r)) 151 if (b && !gen_estimator_active(b, r))
149 return 0; 152 return 0;
150 153
154 est.bps = min_t(u64, UINT_MAX, r->bps);
155 /* we have some time before reaching 2^32 packets per second */
156 est.pps = r->pps;
157
151 if (d->compat_tc_stats) { 158 if (d->compat_tc_stats) {
152 d->tc_stats.bps = r->bps; 159 d->tc_stats.bps = est.bps;
153 d->tc_stats.pps = r->pps; 160 d->tc_stats.pps = est.pps;
154 } 161 }
155 162
156 if (d->tail) 163 if (d->tail) {
157 return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); 164 res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
165 if (res < 0 || est.bps == r->bps)
166 return res;
167 /* emit 64bit stats only if needed */
168 return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
169 }
158 170
159 return 0; 171 return 0;
160} 172}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8f82a5cc3851..9c3a839322ba 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev)
92 if (dev->ifindex != dev->iflink) 92 if (dev->ifindex != dev->iflink)
93 return true; 93 return true;
94 94
95 if (dev->priv_flags & IFF_TEAM_PORT)
96 return true;
97
95 return netif_carrier_ok(dev) && qdisc_tx_changing(dev); 98 return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
96} 99}
97 100
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5c56b217b999..b7de821f98df 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
231 we must kill timers etc. and move 231 we must kill timers etc. and move
232 it to safe state. 232 it to safe state.
233 */ 233 */
234 skb_queue_purge(&n->arp_queue); 234 __skb_queue_purge(&n->arp_queue);
235 n->arp_queue_len_bytes = 0; 235 n->arp_queue_len_bytes = 0;
236 n->output = neigh_blackhole; 236 n->output = neigh_blackhole;
237 if (n->nud_state & NUD_VALID) 237 if (n->nud_state & NUD_VALID)
@@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
286 if (!n) 286 if (!n)
287 goto out_entries; 287 goto out_entries;
288 288
289 skb_queue_head_init(&n->arp_queue); 289 __skb_queue_head_init(&n->arp_queue);
290 rwlock_init(&n->lock); 290 rwlock_init(&n->lock);
291 seqlock_init(&n->ha_lock); 291 seqlock_init(&n->ha_lock);
292 n->updated = n->used = now; 292 n->updated = n->used = now;
@@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh)
708 if (neigh_del_timer(neigh)) 708 if (neigh_del_timer(neigh))
709 pr_warn("Impossible event\n"); 709 pr_warn("Impossible event\n");
710 710
711 skb_queue_purge(&neigh->arp_queue); 711 write_lock_bh(&neigh->lock);
712 __skb_queue_purge(&neigh->arp_queue);
713 write_unlock_bh(&neigh->lock);
712 neigh->arp_queue_len_bytes = 0; 714 neigh->arp_queue_len_bytes = 0;
713 715
714 if (dev->netdev_ops->ndo_neigh_destroy) 716 if (dev->netdev_ops->ndo_neigh_destroy)
@@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh)
858 neigh->ops->error_report(neigh, skb); 860 neigh->ops->error_report(neigh, skb);
859 write_lock(&neigh->lock); 861 write_lock(&neigh->lock);
860 } 862 }
861 skb_queue_purge(&neigh->arp_queue); 863 __skb_queue_purge(&neigh->arp_queue);
862 neigh->arp_queue_len_bytes = 0; 864 neigh->arp_queue_len_bytes = 0;
863} 865}
864 866
@@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1210 1212
1211 write_lock_bh(&neigh->lock); 1213 write_lock_bh(&neigh->lock);
1212 } 1214 }
1213 skb_queue_purge(&neigh->arp_queue); 1215 __skb_queue_purge(&neigh->arp_queue);
1214 neigh->arp_queue_len_bytes = 0; 1216 neigh->arp_queue_len_bytes = 0;
1215 } 1217 }
1216out: 1218out:
@@ -1419,7 +1421,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1419 1421
1420 for (p = &tbl->parms; p; p = p->next) { 1422 for (p = &tbl->parms; p; p = p->next) {
1421 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1423 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422 (!p->dev && !ifindex)) 1424 (!p->dev && !ifindex && net_eq(net, &init_net)))
1423 return p; 1425 return p;
1424 } 1426 }
1425 1427
@@ -1429,15 +1431,11 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1429struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1431struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430 struct neigh_table *tbl) 1432 struct neigh_table *tbl)
1431{ 1433{
1432 struct neigh_parms *p, *ref; 1434 struct neigh_parms *p;
1433 struct net *net = dev_net(dev); 1435 struct net *net = dev_net(dev);
1434 const struct net_device_ops *ops = dev->netdev_ops; 1436 const struct net_device_ops *ops = dev->netdev_ops;
1435 1437
1436 ref = lookup_neigh_parms(tbl, net, 0); 1438 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1437 if (!ref)
1438 return NULL;
1439
1440 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441 if (p) { 1439 if (p) {
1442 p->tbl = tbl; 1440 p->tbl = tbl;
1443 atomic_set(&p->refcnt, 1); 1441 atomic_set(&p->refcnt, 1);
@@ -2053,6 +2051,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
2053 } 2051 }
2054 } 2052 }
2055 2053
2054 err = -ENOENT;
2055 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2056 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2057 !net_eq(net, &init_net))
2058 goto errout_tbl_lock;
2059
2056 if (tb[NDTA_THRESH1]) 2060 if (tb[NDTA_THRESH1])
2057 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); 2061 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2058 2062
@@ -2765,11 +2769,11 @@ EXPORT_SYMBOL(neigh_app_ns);
2765static int zero; 2769static int zero;
2766static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 2770static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2767 2771
2768static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, 2772static int proc_unres_qlen(struct ctl_table *ctl, int write,
2769 size_t *lenp, loff_t *ppos) 2773 void __user *buffer, size_t *lenp, loff_t *ppos)
2770{ 2774{
2771 int size, ret; 2775 int size, ret;
2772 ctl_table tmp = *ctl; 2776 struct ctl_table tmp = *ctl;
2773 2777
2774 tmp.extra1 = &zero; 2778 tmp.extra1 = &zero;
2775 tmp.extra2 = &unres_qlen_max; 2779 tmp.extra2 = &unres_qlen_max;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 569d355fec3e..2bf83299600a 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
146static int softnet_seq_show(struct seq_file *seq, void *v) 146static int softnet_seq_show(struct seq_file *seq, void *v)
147{ 147{
148 struct softnet_data *sd = v; 148 struct softnet_data *sd = v;
149 unsigned int flow_limit_count = 0;
149 150
150 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 151#ifdef CONFIG_NET_FLOW_LIMIT
152 struct sd_flow_limit *fl;
153
154 rcu_read_lock();
155 fl = rcu_dereference(sd->flow_limit);
156 if (fl)
157 flow_limit_count = fl->count;
158 rcu_read_unlock();
159#endif
160
161 seq_printf(seq,
162 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
151 sd->processed, sd->dropped, sd->time_squeeze, 0, 163 sd->processed, sd->dropped, sd->time_squeeze, 0,
152 0, 0, 0, 0, /* was fastroute */ 164 0, 0, 0, 0, /* was fastroute */
153 sd->cpu_collision, sd->received_rps); 165 sd->cpu_collision, sd->received_rps, flow_limit_count);
154 return 0; 166 return 0;
155} 167}
156 168
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 35a9f0804b6f..2c637e9a0b27 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -248,7 +248,7 @@ static void netpoll_poll_dev(struct net_device *dev)
248 zap_completion_queue(); 248 zap_completion_queue();
249} 249}
250 250
251int netpoll_rx_disable(struct net_device *dev) 251void netpoll_rx_disable(struct net_device *dev)
252{ 252{
253 struct netpoll_info *ni; 253 struct netpoll_info *ni;
254 int idx; 254 int idx;
@@ -258,7 +258,6 @@ int netpoll_rx_disable(struct net_device *dev)
258 if (ni) 258 if (ni)
259 down(&ni->dev_lock); 259 down(&ni->dev_lock);
260 srcu_read_unlock(&netpoll_srcu, idx); 260 srcu_read_unlock(&netpoll_srcu, idx);
261 return 0;
262} 261}
263EXPORT_SYMBOL(netpoll_rx_disable); 262EXPORT_SYMBOL(netpoll_rx_disable);
264 263
@@ -691,25 +690,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
691 send_skb->dev = skb->dev; 690 send_skb->dev = skb->dev;
692 691
693 skb_reset_network_header(send_skb); 692 skb_reset_network_header(send_skb);
694 skb_put(send_skb, sizeof(struct ipv6hdr)); 693 hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
695 hdr = ipv6_hdr(send_skb);
696
697 *(__be32*)hdr = htonl(0x60000000); 694 *(__be32*)hdr = htonl(0x60000000);
698
699 hdr->payload_len = htons(size); 695 hdr->payload_len = htons(size);
700 hdr->nexthdr = IPPROTO_ICMPV6; 696 hdr->nexthdr = IPPROTO_ICMPV6;
701 hdr->hop_limit = 255; 697 hdr->hop_limit = 255;
702 hdr->saddr = *saddr; 698 hdr->saddr = *saddr;
703 hdr->daddr = *daddr; 699 hdr->daddr = *daddr;
704 700
705 send_skb->transport_header = send_skb->tail; 701 icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
706 skb_put(send_skb, size);
707
708 icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
709 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; 702 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
710 icmp6h->icmp6_router = 0; 703 icmp6h->icmp6_router = 0;
711 icmp6h->icmp6_solicited = 1; 704 icmp6h->icmp6_solicited = 1;
712 target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); 705
706 target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
713 *target = msg->target; 707 *target = msg->target;
714 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, 708 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
715 IPPROTO_ICMPV6, 709 IPPROTO_ICMPV6,
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0777d0aa18c3..e533259dce3c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = {
261static int netprio_device_event(struct notifier_block *unused, 261static int netprio_device_event(struct notifier_block *unused,
262 unsigned long event, void *ptr) 262 unsigned long event, void *ptr)
263{ 263{
264 struct net_device *dev = ptr; 264 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
265 struct netprio_map *old; 265 struct netprio_map *old;
266 266
267 /* 267 /*
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 11f2704c3810..9640972ec50e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1921static int pktgen_device_event(struct notifier_block *unused, 1921static int pktgen_device_event(struct notifier_block *unused,
1922 unsigned long event, void *ptr) 1922 unsigned long event, void *ptr)
1923{ 1923{
1924 struct net_device *dev = ptr; 1924 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1925 struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); 1925 struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
1926 1926
1927 if (pn->pktgen_exiting) 1927 if (pn->pktgen_exiting)
@@ -2627,6 +2627,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2627 pgh->tv_usec = htonl(timestamp.tv_usec); 2627 pgh->tv_usec = htonl(timestamp.tv_usec);
2628} 2628}
2629 2629
2630static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
2631 struct pktgen_dev *pkt_dev,
2632 unsigned int extralen)
2633{
2634 struct sk_buff *skb = NULL;
2635 unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen +
2636 pkt_dev->pkt_overhead;
2637
2638 if (pkt_dev->flags & F_NODE) {
2639 int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id();
2640
2641 skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node);
2642 if (likely(skb)) {
2643 skb_reserve(skb, NET_SKB_PAD);
2644 skb->dev = dev;
2645 }
2646 } else {
2647 skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
2648 }
2649
2650 return skb;
2651}
2652
2630static struct sk_buff *fill_packet_ipv4(struct net_device *odev, 2653static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2631 struct pktgen_dev *pkt_dev) 2654 struct pktgen_dev *pkt_dev)
2632{ 2655{
@@ -2657,32 +2680,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2657 2680
2658 datalen = (odev->hard_header_len + 16) & ~0xf; 2681 datalen = (odev->hard_header_len + 16) & ~0xf;
2659 2682
2660 if (pkt_dev->flags & F_NODE) { 2683 skb = pktgen_alloc_skb(odev, pkt_dev, datalen);
2661 int node;
2662
2663 if (pkt_dev->node >= 0)
2664 node = pkt_dev->node;
2665 else
2666 node = numa_node_id();
2667
2668 skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
2669 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
2670 if (likely(skb)) {
2671 skb_reserve(skb, NET_SKB_PAD);
2672 skb->dev = odev;
2673 }
2674 }
2675 else
2676 skb = __netdev_alloc_skb(odev,
2677 pkt_dev->cur_pkt_size + 64
2678 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
2679
2680 if (!skb) { 2684 if (!skb) {
2681 sprintf(pkt_dev->result, "No memory"); 2685 sprintf(pkt_dev->result, "No memory");
2682 return NULL; 2686 return NULL;
2683 } 2687 }
2684 prefetchw(skb->data);
2685 2688
2689 prefetchw(skb->data);
2686 skb_reserve(skb, datalen); 2690 skb_reserve(skb, datalen);
2687 2691
2688 /* Reserve for ethernet and IP header */ 2692 /* Reserve for ethernet and IP header */
@@ -2708,15 +2712,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2708 *vlan_encapsulated_proto = htons(ETH_P_IP); 2712 *vlan_encapsulated_proto = htons(ETH_P_IP);
2709 } 2713 }
2710 2714
2711 skb->network_header = skb->tail; 2715 skb_set_mac_header(skb, 0);
2712 skb->transport_header = skb->network_header + sizeof(struct iphdr); 2716 skb_set_network_header(skb, skb->len);
2713 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); 2717 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
2718
2719 skb_set_transport_header(skb, skb->len);
2720 udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
2714 skb_set_queue_mapping(skb, queue_map); 2721 skb_set_queue_mapping(skb, queue_map);
2715 skb->priority = pkt_dev->skb_priority; 2722 skb->priority = pkt_dev->skb_priority;
2716 2723
2717 iph = ip_hdr(skb);
2718 udph = udp_hdr(skb);
2719
2720 memcpy(eth, pkt_dev->hh, 12); 2724 memcpy(eth, pkt_dev->hh, 12);
2721 *(__be16 *) & eth[12] = protocol; 2725 *(__be16 *) & eth[12] = protocol;
2722 2726
@@ -2746,8 +2750,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2746 iph->check = 0; 2750 iph->check = 0;
2747 iph->check = ip_fast_csum((void *)iph, iph->ihl); 2751 iph->check = ip_fast_csum((void *)iph, iph->ihl);
2748 skb->protocol = protocol; 2752 skb->protocol = protocol;
2749 skb->mac_header = (skb->network_header - ETH_HLEN -
2750 pkt_dev->pkt_overhead);
2751 skb->dev = odev; 2753 skb->dev = odev;
2752 skb->pkt_type = PACKET_HOST; 2754 skb->pkt_type = PACKET_HOST;
2753 pktgen_finalize_skb(pkt_dev, skb, datalen); 2755 pktgen_finalize_skb(pkt_dev, skb, datalen);
@@ -2788,15 +2790,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2788 mod_cur_headers(pkt_dev); 2790 mod_cur_headers(pkt_dev);
2789 queue_map = pkt_dev->cur_queue_map; 2791 queue_map = pkt_dev->cur_queue_map;
2790 2792
2791 skb = __netdev_alloc_skb(odev, 2793 skb = pktgen_alloc_skb(odev, pkt_dev, 16);
2792 pkt_dev->cur_pkt_size + 64
2793 + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT);
2794 if (!skb) { 2794 if (!skb) {
2795 sprintf(pkt_dev->result, "No memory"); 2795 sprintf(pkt_dev->result, "No memory");
2796 return NULL; 2796 return NULL;
2797 } 2797 }
2798 prefetchw(skb->data);
2799 2798
2799 prefetchw(skb->data);
2800 skb_reserve(skb, 16); 2800 skb_reserve(skb, 16);
2801 2801
2802 /* Reserve for ethernet and IP header */ 2802 /* Reserve for ethernet and IP header */
@@ -2822,13 +2822,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2822 *vlan_encapsulated_proto = htons(ETH_P_IPV6); 2822 *vlan_encapsulated_proto = htons(ETH_P_IPV6);
2823 } 2823 }
2824 2824
2825 skb->network_header = skb->tail; 2825 skb_set_mac_header(skb, 0);
2826 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); 2826 skb_set_network_header(skb, skb->len);
2827 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); 2827 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
2828
2829 skb_set_transport_header(skb, skb->len);
2830 udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
2828 skb_set_queue_mapping(skb, queue_map); 2831 skb_set_queue_mapping(skb, queue_map);
2829 skb->priority = pkt_dev->skb_priority; 2832 skb->priority = pkt_dev->skb_priority;
2830 iph = ipv6_hdr(skb);
2831 udph = udp_hdr(skb);
2832 2833
2833 memcpy(eth, pkt_dev->hh, 12); 2834 memcpy(eth, pkt_dev->hh, 12);
2834 *(__be16 *) &eth[12] = protocol; 2835 *(__be16 *) &eth[12] = protocol;
@@ -2863,8 +2864,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2863 iph->daddr = pkt_dev->cur_in6_daddr; 2864 iph->daddr = pkt_dev->cur_in6_daddr;
2864 iph->saddr = pkt_dev->cur_in6_saddr; 2865 iph->saddr = pkt_dev->cur_in6_saddr;
2865 2866
2866 skb->mac_header = (skb->network_header - ETH_HLEN -
2867 pkt_dev->pkt_overhead);
2868 skb->protocol = protocol; 2867 skb->protocol = protocol;
2869 skb->dev = odev; 2868 skb->dev = odev;
2870 skb->pkt_type = PACKET_HOST; 2869 skb->pkt_type = PACKET_HOST;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a08bd2b7fe3f..3de740834d1f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -947,6 +947,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
947 struct ifla_vf_vlan vf_vlan; 947 struct ifla_vf_vlan vf_vlan;
948 struct ifla_vf_tx_rate vf_tx_rate; 948 struct ifla_vf_tx_rate vf_tx_rate;
949 struct ifla_vf_spoofchk vf_spoofchk; 949 struct ifla_vf_spoofchk vf_spoofchk;
950 struct ifla_vf_link_state vf_linkstate;
950 951
951 /* 952 /*
952 * Not all SR-IOV capable drivers support the 953 * Not all SR-IOV capable drivers support the
@@ -956,18 +957,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
956 */ 957 */
957 ivi.spoofchk = -1; 958 ivi.spoofchk = -1;
958 memset(ivi.mac, 0, sizeof(ivi.mac)); 959 memset(ivi.mac, 0, sizeof(ivi.mac));
960 /* The default value for VF link state is "auto"
961 * IFLA_VF_LINK_STATE_AUTO which equals zero
962 */
963 ivi.linkstate = 0;
959 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) 964 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
960 break; 965 break;
961 vf_mac.vf = 966 vf_mac.vf =
962 vf_vlan.vf = 967 vf_vlan.vf =
963 vf_tx_rate.vf = 968 vf_tx_rate.vf =
964 vf_spoofchk.vf = ivi.vf; 969 vf_spoofchk.vf =
970 vf_linkstate.vf = ivi.vf;
965 971
966 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); 972 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
967 vf_vlan.vlan = ivi.vlan; 973 vf_vlan.vlan = ivi.vlan;
968 vf_vlan.qos = ivi.qos; 974 vf_vlan.qos = ivi.qos;
969 vf_tx_rate.rate = ivi.tx_rate; 975 vf_tx_rate.rate = ivi.tx_rate;
970 vf_spoofchk.setting = ivi.spoofchk; 976 vf_spoofchk.setting = ivi.spoofchk;
977 vf_linkstate.link_state = ivi.linkstate;
971 vf = nla_nest_start(skb, IFLA_VF_INFO); 978 vf = nla_nest_start(skb, IFLA_VF_INFO);
972 if (!vf) { 979 if (!vf) {
973 nla_nest_cancel(skb, vfinfo); 980 nla_nest_cancel(skb, vfinfo);
@@ -978,7 +985,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
978 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), 985 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
979 &vf_tx_rate) || 986 &vf_tx_rate) ||
980 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), 987 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
981 &vf_spoofchk)) 988 &vf_spoofchk) ||
989 nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
990 &vf_linkstate))
982 goto nla_put_failure; 991 goto nla_put_failure;
983 nla_nest_end(skb, vf); 992 nla_nest_end(skb, vf);
984 } 993 }
@@ -1238,6 +1247,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1238 ivs->setting); 1247 ivs->setting);
1239 break; 1248 break;
1240 } 1249 }
1250 case IFLA_VF_LINK_STATE: {
1251 struct ifla_vf_link_state *ivl;
1252 ivl = nla_data(vf);
1253 err = -EOPNOTSUPP;
1254 if (ops->ndo_set_vf_link_state)
1255 err = ops->ndo_set_vf_link_state(dev, ivl->vf,
1256 ivl->link_state);
1257 break;
1258 }
1241 default: 1259 default:
1242 err = -EINVAL; 1260 err = -EINVAL;
1243 break; 1261 break;
@@ -2091,10 +2109,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
2091 } 2109 }
2092 2110
2093 addr = nla_data(tb[NDA_LLADDR]); 2111 addr = nla_data(tb[NDA_LLADDR]);
2094 if (is_zero_ether_addr(addr)) {
2095 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2096 return -EINVAL;
2097 }
2098 2112
2099 err = -EOPNOTSUPP; 2113 err = -EOPNOTSUPP;
2100 2114
@@ -2192,10 +2206,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
2192 } 2206 }
2193 2207
2194 addr = nla_data(tb[NDA_LLADDR]); 2208 addr = nla_data(tb[NDA_LLADDR]);
2195 if (is_zero_ether_addr(addr)) {
2196 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
2197 return -EINVAL;
2198 }
2199 2209
2200 err = -EOPNOTSUPP; 2210 err = -EOPNOTSUPP;
2201 2211
@@ -2667,7 +2677,7 @@ static void rtnetlink_rcv(struct sk_buff *skb)
2667 2677
2668static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 2678static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
2669{ 2679{
2670 struct net_device *dev = ptr; 2680 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2671 2681
2672 switch (event) { 2682 switch (event) {
2673 case NETDEV_UP: 2683 case NETDEV_UP:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1c1738cc4538..724bb7cb173f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -199,9 +199,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
199 skb->truesize = sizeof(struct sk_buff); 199 skb->truesize = sizeof(struct sk_buff);
200 atomic_set(&skb->users, 1); 200 atomic_set(&skb->users, 1);
201 201
202#ifdef NET_SKBUFF_DATA_USES_OFFSET 202 skb->mac_header = (typeof(skb->mac_header))~0U;
203 skb->mac_header = ~0U;
204#endif
205out: 203out:
206 return skb; 204 return skb;
207} 205}
@@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
275 skb->data = data; 273 skb->data = data;
276 skb_reset_tail_pointer(skb); 274 skb_reset_tail_pointer(skb);
277 skb->end = skb->tail + size; 275 skb->end = skb->tail + size;
278#ifdef NET_SKBUFF_DATA_USES_OFFSET 276 skb->mac_header = (typeof(skb->mac_header))~0U;
279 skb->mac_header = ~0U; 277 skb->transport_header = (typeof(skb->transport_header))~0U;
280 skb->transport_header = ~0U;
281#endif
282 278
283 /* make sure we initialize shinfo sequentially */ 279 /* make sure we initialize shinfo sequentially */
284 shinfo = skb_shinfo(skb); 280 shinfo = skb_shinfo(skb);
@@ -344,10 +340,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
344 skb->data = data; 340 skb->data = data;
345 skb_reset_tail_pointer(skb); 341 skb_reset_tail_pointer(skb);
346 skb->end = skb->tail + size; 342 skb->end = skb->tail + size;
347#ifdef NET_SKBUFF_DATA_USES_OFFSET 343 skb->mac_header = (typeof(skb->mac_header))~0U;
348 skb->mac_header = ~0U; 344 skb->transport_header = (typeof(skb->transport_header))~0U;
349 skb->transport_header = ~0U;
350#endif
351 345
352 /* make sure we initialize shinfo sequentially */ 346 /* make sure we initialize shinfo sequentially */
353 shinfo = skb_shinfo(skb); 347 shinfo = skb_shinfo(skb);
@@ -703,6 +697,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
703 new->transport_header = old->transport_header; 697 new->transport_header = old->transport_header;
704 new->network_header = old->network_header; 698 new->network_header = old->network_header;
705 new->mac_header = old->mac_header; 699 new->mac_header = old->mac_header;
700 new->inner_protocol = old->inner_protocol;
706 new->inner_transport_header = old->inner_transport_header; 701 new->inner_transport_header = old->inner_transport_header;
707 new->inner_network_header = old->inner_network_header; 702 new->inner_network_header = old->inner_network_header;
708 new->inner_mac_header = old->inner_mac_header; 703 new->inner_mac_header = old->inner_mac_header;
@@ -743,6 +738,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
743 new->vlan_tci = old->vlan_tci; 738 new->vlan_tci = old->vlan_tci;
744 739
745 skb_copy_secmark(new, old); 740 skb_copy_secmark(new, old);
741
742#ifdef CONFIG_NET_LL_RX_POLL
743 new->napi_id = old->napi_id;
744#endif
746} 745}
747 746
748/* 747/*
@@ -915,18 +914,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
915 914
916static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 915static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
917{ 916{
918#ifndef NET_SKBUFF_DATA_USES_OFFSET
919 /*
920 * Shift between the two data areas in bytes
921 */
922 unsigned long offset = new->data - old->data;
923#endif
924
925 __copy_skb_header(new, old); 917 __copy_skb_header(new, old);
926 918
927#ifndef NET_SKBUFF_DATA_USES_OFFSET
928 skb_headers_offset_update(new, offset);
929#endif
930 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 919 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
931 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 920 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
932 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; 921 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -1118,7 +1107,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1118 skb->end = skb->head + size; 1107 skb->end = skb->head + size;
1119#endif 1108#endif
1120 skb->tail += off; 1109 skb->tail += off;
1121 skb_headers_offset_update(skb, off); 1110 skb_headers_offset_update(skb, nhead);
1122 /* Only adjust this if it actually is csum_start rather than csum */ 1111 /* Only adjust this if it actually is csum_start rather than csum */
1123 if (skb->ip_summed == CHECKSUM_PARTIAL) 1112 if (skb->ip_summed == CHECKSUM_PARTIAL)
1124 skb->csum_start += nhead; 1113 skb->csum_start += nhead;
@@ -1213,9 +1202,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1213 off = newheadroom - oldheadroom; 1202 off = newheadroom - oldheadroom;
1214 if (n->ip_summed == CHECKSUM_PARTIAL) 1203 if (n->ip_summed == CHECKSUM_PARTIAL)
1215 n->csum_start += off; 1204 n->csum_start += off;
1216#ifdef NET_SKBUFF_DATA_USES_OFFSET 1205
1217 skb_headers_offset_update(n, off); 1206 skb_headers_offset_update(n, off);
1218#endif
1219 1207
1220 return n; 1208 return n;
1221} 1209}
@@ -2558,8 +2546,13 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
2558 unsigned int block_limit, abs_offset = consumed + st->lower_offset; 2546 unsigned int block_limit, abs_offset = consumed + st->lower_offset;
2559 skb_frag_t *frag; 2547 skb_frag_t *frag;
2560 2548
2561 if (unlikely(abs_offset >= st->upper_offset)) 2549 if (unlikely(abs_offset >= st->upper_offset)) {
2550 if (st->frag_data) {
2551 kunmap_atomic(st->frag_data);
2552 st->frag_data = NULL;
2553 }
2562 return 0; 2554 return 0;
2555 }
2563 2556
2564next_skb: 2557next_skb:
2565 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; 2558 block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
@@ -2857,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2857 doffset + tnl_hlen); 2850 doffset + tnl_hlen);
2858 2851
2859 if (fskb != skb_shinfo(skb)->frag_list) 2852 if (fskb != skb_shinfo(skb)->frag_list)
2860 continue; 2853 goto perform_csum_check;
2861 2854
2862 if (!sg) { 2855 if (!sg) {
2863 nskb->ip_summed = CHECKSUM_NONE; 2856 nskb->ip_summed = CHECKSUM_NONE;
@@ -2921,6 +2914,7 @@ skip_fraglist:
2921 nskb->len += nskb->data_len; 2914 nskb->len += nskb->data_len;
2922 nskb->truesize += nskb->data_len; 2915 nskb->truesize += nskb->data_len;
2923 2916
2917perform_csum_check:
2924 if (!csum) { 2918 if (!csum) {
2925 nskb->csum = skb_checksum(nskb, doffset, 2919 nskb->csum = skb_checksum(nskb, doffset,
2926 nskb->len - doffset, 0); 2920 nskb->len - doffset, 0);
@@ -3503,3 +3497,26 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
3503 return true; 3497 return true;
3504} 3498}
3505EXPORT_SYMBOL(skb_try_coalesce); 3499EXPORT_SYMBOL(skb_try_coalesce);
3500
3501/**
3502 * skb_scrub_packet - scrub an skb before sending it to another netns
3503 *
3504 * @skb: buffer to clean
3505 *
3506 * skb_scrub_packet can be used to clean an skb before injecting it in
3507 * another namespace. We have to clear all information in the skb that
3508 * could impact namespace isolation.
3509 */
3510void skb_scrub_packet(struct sk_buff *skb)
3511{
3512 skb_orphan(skb);
3513 skb->tstamp.tv64 = 0;
3514 skb->pkt_type = PACKET_HOST;
3515 skb->skb_iif = 0;
3516 skb_dst_drop(skb);
3517 skb->mark = 0;
3518 secpath_reset(skb);
3519 nf_reset(skb);
3520 nf_reset_trace(skb);
3521}
3522EXPORT_SYMBOL_GPL(skb_scrub_packet);
diff --git a/net/core/sock.c b/net/core/sock.c
index d6d024cfaaaf..ab06b719f5b1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,6 +139,8 @@
139#include <net/tcp.h> 139#include <net/tcp.h>
140#endif 140#endif
141 141
142#include <net/ll_poll.h>
143
142static DEFINE_MUTEX(proto_list_mutex); 144static DEFINE_MUTEX(proto_list_mutex);
143static LIST_HEAD(proto_list); 145static LIST_HEAD(proto_list);
144 146
@@ -898,6 +900,19 @@ set_rcvbuf:
898 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); 900 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
899 break; 901 break;
900 902
903#ifdef CONFIG_NET_LL_RX_POLL
904 case SO_LL:
905 /* allow unprivileged users to decrease the value */
906 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
907 ret = -EPERM;
908 else {
909 if (val < 0)
910 ret = -EINVAL;
911 else
912 sk->sk_ll_usec = val;
913 }
914 break;
915#endif
901 default: 916 default:
902 ret = -ENOPROTOOPT; 917 ret = -ENOPROTOOPT;
903 break; 918 break;
@@ -1155,6 +1170,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1155 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); 1170 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1156 break; 1171 break;
1157 1172
1173#ifdef CONFIG_NET_LL_RX_POLL
1174 case SO_LL:
1175 v.val = sk->sk_ll_usec;
1176 break;
1177#endif
1178
1158 default: 1179 default:
1159 return -ENOPROTOOPT; 1180 return -ENOPROTOOPT;
1160 } 1181 }
@@ -2271,6 +2292,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2271 2292
2272 sk->sk_stamp = ktime_set(-1L, 0); 2293 sk->sk_stamp = ktime_set(-1L, 0);
2273 2294
2295#ifdef CONFIG_NET_LL_RX_POLL
2296 sk->sk_napi_id = 0;
2297 sk->sk_ll_usec = sysctl_net_ll_read;
2298#endif
2299
2274 /* 2300 /*
2275 * Before updating sk_refcnt, we must commit prior changes to memory 2301 * Before updating sk_refcnt, we must commit prior changes to memory
2276 * (Documentation/RCU/rculist_nulls.txt for details) 2302 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cfdb46ab3a7f..afc677eadd93 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -19,16 +19,17 @@
19#include <net/ip.h> 19#include <net/ip.h>
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/net_ratelimit.h> 21#include <net/net_ratelimit.h>
22#include <net/ll_poll.h>
22 23
23static int one = 1; 24static int one = 1;
24 25
25#ifdef CONFIG_RPS 26#ifdef CONFIG_RPS
26static int rps_sock_flow_sysctl(ctl_table *table, int write, 27static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
27 void __user *buffer, size_t *lenp, loff_t *ppos) 28 void __user *buffer, size_t *lenp, loff_t *ppos)
28{ 29{
29 unsigned int orig_size, size; 30 unsigned int orig_size, size;
30 int ret, i; 31 int ret, i;
31 ctl_table tmp = { 32 struct ctl_table tmp = {
32 .data = &size, 33 .data = &size,
33 .maxlen = sizeof(size), 34 .maxlen = sizeof(size),
34 .mode = table->mode 35 .mode = table->mode
@@ -87,6 +88,109 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
87} 88}
88#endif /* CONFIG_RPS */ 89#endif /* CONFIG_RPS */
89 90
91#ifdef CONFIG_NET_FLOW_LIMIT
92static DEFINE_MUTEX(flow_limit_update_mutex);
93
94static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
95 void __user *buffer, size_t *lenp,
96 loff_t *ppos)
97{
98 struct sd_flow_limit *cur;
99 struct softnet_data *sd;
100 cpumask_var_t mask;
101 int i, len, ret = 0;
102
103 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
104 return -ENOMEM;
105
106 if (write) {
107 ret = cpumask_parse_user(buffer, *lenp, mask);
108 if (ret)
109 goto done;
110
111 mutex_lock(&flow_limit_update_mutex);
112 len = sizeof(*cur) + netdev_flow_limit_table_len;
113 for_each_possible_cpu(i) {
114 sd = &per_cpu(softnet_data, i);
115 cur = rcu_dereference_protected(sd->flow_limit,
116 lockdep_is_held(&flow_limit_update_mutex));
117 if (cur && !cpumask_test_cpu(i, mask)) {
118 RCU_INIT_POINTER(sd->flow_limit, NULL);
119 synchronize_rcu();
120 kfree(cur);
121 } else if (!cur && cpumask_test_cpu(i, mask)) {
122 cur = kzalloc(len, GFP_KERNEL);
123 if (!cur) {
124 /* not unwinding previous changes */
125 ret = -ENOMEM;
126 goto write_unlock;
127 }
128 cur->num_buckets = netdev_flow_limit_table_len;
129 rcu_assign_pointer(sd->flow_limit, cur);
130 }
131 }
132write_unlock:
133 mutex_unlock(&flow_limit_update_mutex);
134 } else {
135 char kbuf[128];
136
137 if (*ppos || !*lenp) {
138 *lenp = 0;
139 goto done;
140 }
141
142 cpumask_clear(mask);
143 rcu_read_lock();
144 for_each_possible_cpu(i) {
145 sd = &per_cpu(softnet_data, i);
146 if (rcu_dereference(sd->flow_limit))
147 cpumask_set_cpu(i, mask);
148 }
149 rcu_read_unlock();
150
151 len = min(sizeof(kbuf) - 1, *lenp);
152 len = cpumask_scnprintf(kbuf, len, mask);
153 if (!len) {
154 *lenp = 0;
155 goto done;
156 }
157 if (len < *lenp)
158 kbuf[len++] = '\n';
159 if (copy_to_user(buffer, kbuf, len)) {
160 ret = -EFAULT;
161 goto done;
162 }
163 *lenp = len;
164 *ppos += len;
165 }
166
167done:
168 free_cpumask_var(mask);
169 return ret;
170}
171
172static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
173 void __user *buffer, size_t *lenp,
174 loff_t *ppos)
175{
176 unsigned int old, *ptr;
177 int ret;
178
179 mutex_lock(&flow_limit_update_mutex);
180
181 ptr = table->data;
182 old = *ptr;
183 ret = proc_dointvec(table, write, buffer, lenp, ppos);
184 if (!ret && write && !is_power_of_2(*ptr)) {
185 *ptr = old;
186 ret = -EINVAL;
187 }
188
189 mutex_unlock(&flow_limit_update_mutex);
190 return ret;
191}
192#endif /* CONFIG_NET_FLOW_LIMIT */
193
90static struct ctl_table net_core_table[] = { 194static struct ctl_table net_core_table[] = {
91#ifdef CONFIG_NET 195#ifdef CONFIG_NET
92 { 196 {
@@ -180,6 +284,37 @@ static struct ctl_table net_core_table[] = {
180 .proc_handler = rps_sock_flow_sysctl 284 .proc_handler = rps_sock_flow_sysctl
181 }, 285 },
182#endif 286#endif
287#ifdef CONFIG_NET_FLOW_LIMIT
288 {
289 .procname = "flow_limit_cpu_bitmap",
290 .mode = 0644,
291 .proc_handler = flow_limit_cpu_sysctl
292 },
293 {
294 .procname = "flow_limit_table_len",
295 .data = &netdev_flow_limit_table_len,
296 .maxlen = sizeof(int),
297 .mode = 0644,
298 .proc_handler = flow_limit_table_len_sysctl
299 },
300#endif /* CONFIG_NET_FLOW_LIMIT */
301#ifdef CONFIG_NET_LL_RX_POLL
302 {
303 .procname = "low_latency_poll",
304 .data = &sysctl_net_ll_poll,
305 .maxlen = sizeof(unsigned int),
306 .mode = 0644,
307 .proc_handler = proc_dointvec
308 },
309 {
310 .procname = "low_latency_read",
311 .data = &sysctl_net_ll_read,
312 .maxlen = sizeof(unsigned int),
313 .mode = 0644,
314 .proc_handler = proc_dointvec
315 },
316#
317#endif
183#endif /* CONFIG_NET */ 318#endif /* CONFIG_NET */
184 { 319 {
185 .procname = "netdev_budget", 320 .procname = "netdev_budget",