aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c1742
-rw-r--r--net/core/dev_addr_lists.c74
-rw-r--r--net/core/dev_ioctl.c576
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/ethtool.c46
-rw-r--r--net/core/filter.c13
-rw-r--r--net/core/flow.c13
-rw-r--r--net/core/flow_dissector.c173
-rw-r--r--net/core/neighbour.c20
-rw-r--r--net/core/net-procfs.c411
-rw-r--r--net/core/net-sysfs.c180
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c721
-rw-r--r--net/core/netprio_cgroup.c4
-rw-r--r--net/core/pktgen.c205
-rw-r--r--net/core/request_sock.c2
-rw-r--r--net/core/rtnetlink.c217
-rw-r--r--net/core/scm.c5
-rw-r--r--net/core/skbuff.c157
-rw-r--r--net/core/sock.c26
-rw-r--r--net/core/sock_diag.c27
-rw-r--r--net/core/sysctl_net_core.c14
24 files changed, 2725 insertions, 1909 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 674641b13aea..b33b996f5dd6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,10 +9,11 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
12 sock_diag.o 12 sock_diag.o dev_ioctl.o
13 13
14obj-$(CONFIG_XFRM) += flow.o 14obj-$(CONFIG_XFRM) += flow.o
15obj-y += net-sysfs.o 15obj-y += net-sysfs.o
16obj-$(CONFIG_PROC_FS) += net-procfs.o
16obj-$(CONFIG_NET_PKTGEN) += pktgen.o 17obj-$(CONFIG_NET_PKTGEN) += pktgen.o
17obj-$(CONFIG_NETPOLL) += netpoll.o 18obj-$(CONFIG_NETPOLL) += netpoll.o
18obj-$(CONFIG_NET_DMA) += user_dma.o 19obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 0337e2b76862..368f9c3f9dc6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
187 skb_queue_walk(queue, skb) { 187 skb_queue_walk(queue, skb) {
188 *peeked = skb->peeked; 188 *peeked = skb->peeked;
189 if (flags & MSG_PEEK) { 189 if (flags & MSG_PEEK) {
190 if (*off >= skb->len) { 190 if (*off >= skb->len && skb->len) {
191 *off -= skb->len; 191 *off -= skb->len;
192 continue; 192 continue;
193 } 193 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 515473ee52cb..a06a7a58dd11 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -97,8 +97,6 @@
97#include <net/net_namespace.h> 97#include <net/net_namespace.h>
98#include <net/sock.h> 98#include <net/sock.h>
99#include <linux/rtnetlink.h> 99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h> 100#include <linux/stat.h>
103#include <net/dst.h> 101#include <net/dst.h>
104#include <net/pkt_sched.h> 102#include <net/pkt_sched.h>
@@ -106,12 +104,10 @@
106#include <net/xfrm.h> 104#include <net/xfrm.h>
107#include <linux/highmem.h> 105#include <linux/highmem.h>
108#include <linux/init.h> 106#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h> 107#include <linux/module.h>
111#include <linux/netpoll.h> 108#include <linux/netpoll.h>
112#include <linux/rcupdate.h> 109#include <linux/rcupdate.h>
113#include <linux/delay.h> 110#include <linux/delay.h>
114#include <net/wext.h>
115#include <net/iw_handler.h> 111#include <net/iw_handler.h>
116#include <asm/current.h> 112#include <asm/current.h>
117#include <linux/audit.h> 113#include <linux/audit.h>
@@ -132,9 +128,7 @@
132#include <linux/pci.h> 128#include <linux/pci.h>
133#include <linux/inetdevice.h> 129#include <linux/inetdevice.h>
134#include <linux/cpu_rmap.h> 130#include <linux/cpu_rmap.h>
135#include <linux/net_tstamp.h>
136#include <linux/static_key.h> 131#include <linux/static_key.h>
137#include <net/flow_keys.h>
138 132
139#include "net-sysfs.h" 133#include "net-sysfs.h"
140 134
@@ -144,41 +138,10 @@
144/* This should be increased if a protocol with a bigger head is added. */ 138/* This should be increased if a protocol with a bigger head is added. */
145#define GRO_MAX_HEAD (MAX_HEADER + 128) 139#define GRO_MAX_HEAD (MAX_HEADER + 128)
146 140
147/*
148 * The list of packet types we will receive (as opposed to discard)
149 * and the routines to invoke.
150 *
151 * Why 16. Because with 16 the only overlap we get on a hash of the
152 * low nibble of the protocol value is RARP/SNAP/X.25.
153 *
154 * NOTE: That is no longer true with the addition of VLAN tags. Not
155 * sure which should go first, but I bet it won't make much
156 * difference if we are running VLANs. The good news is that
157 * this protocol won't be in the list unless compiled in, so
158 * the average user (w/out VLANs) will not be adversely affected.
159 * --BLG
160 *
161 * 0800 IP
162 * 8100 802.1Q VLAN
163 * 0001 802.3
164 * 0002 AX.25
165 * 0004 802.2
166 * 8035 RARP
167 * 0005 SNAP
168 * 0805 X.25
169 * 0806 ARP
170 * 8137 IPX
171 * 0009 Localtalk
172 * 86DD IPv6
173 */
174
175#define PTYPE_HASH_SIZE (16)
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177
178static DEFINE_SPINLOCK(ptype_lock); 141static DEFINE_SPINLOCK(ptype_lock);
179static DEFINE_SPINLOCK(offload_lock); 142static DEFINE_SPINLOCK(offload_lock);
180static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 143struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
181static struct list_head ptype_all __read_mostly; /* Taps */ 144struct list_head ptype_all __read_mostly; /* Taps */
182static struct list_head offload_base __read_mostly; 145static struct list_head offload_base __read_mostly;
183 146
184/* 147/*
@@ -695,11 +658,10 @@ __setup("netdev=", netdev_boot_setup);
695 658
696struct net_device *__dev_get_by_name(struct net *net, const char *name) 659struct net_device *__dev_get_by_name(struct net *net, const char *name)
697{ 660{
698 struct hlist_node *p;
699 struct net_device *dev; 661 struct net_device *dev;
700 struct hlist_head *head = dev_name_hash(net, name); 662 struct hlist_head *head = dev_name_hash(net, name);
701 663
702 hlist_for_each_entry(dev, p, head, name_hlist) 664 hlist_for_each_entry(dev, head, name_hlist)
703 if (!strncmp(dev->name, name, IFNAMSIZ)) 665 if (!strncmp(dev->name, name, IFNAMSIZ))
704 return dev; 666 return dev;
705 667
@@ -721,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
721 683
722struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) 684struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
723{ 685{
724 struct hlist_node *p;
725 struct net_device *dev; 686 struct net_device *dev;
726 struct hlist_head *head = dev_name_hash(net, name); 687 struct hlist_head *head = dev_name_hash(net, name);
727 688
728 hlist_for_each_entry_rcu(dev, p, head, name_hlist) 689 hlist_for_each_entry_rcu(dev, head, name_hlist)
729 if (!strncmp(dev->name, name, IFNAMSIZ)) 690 if (!strncmp(dev->name, name, IFNAMSIZ))
730 return dev; 691 return dev;
731 692
@@ -772,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name);
772 733
773struct net_device *__dev_get_by_index(struct net *net, int ifindex) 734struct net_device *__dev_get_by_index(struct net *net, int ifindex)
774{ 735{
775 struct hlist_node *p;
776 struct net_device *dev; 736 struct net_device *dev;
777 struct hlist_head *head = dev_index_hash(net, ifindex); 737 struct hlist_head *head = dev_index_hash(net, ifindex);
778 738
779 hlist_for_each_entry(dev, p, head, index_hlist) 739 hlist_for_each_entry(dev, head, index_hlist)
780 if (dev->ifindex == ifindex) 740 if (dev->ifindex == ifindex)
781 return dev; 741 return dev;
782 742
@@ -797,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index);
797 757
798struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) 758struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
799{ 759{
800 struct hlist_node *p;
801 struct net_device *dev; 760 struct net_device *dev;
802 struct hlist_head *head = dev_index_hash(net, ifindex); 761 struct hlist_head *head = dev_index_hash(net, ifindex);
803 762
804 hlist_for_each_entry_rcu(dev, p, head, index_hlist) 763 hlist_for_each_entry_rcu(dev, head, index_hlist)
805 if (dev->ifindex == ifindex) 764 if (dev->ifindex == ifindex)
806 return dev; 765 return dev;
807 766
@@ -1227,36 +1186,6 @@ void netdev_notify_peers(struct net_device *dev)
1227} 1186}
1228EXPORT_SYMBOL(netdev_notify_peers); 1187EXPORT_SYMBOL(netdev_notify_peers);
1229 1188
1230/**
1231 * dev_load - load a network module
1232 * @net: the applicable net namespace
1233 * @name: name of interface
1234 *
1235 * If a network interface is not present and the process has suitable
1236 * privileges this function loads the module. If module loading is not
1237 * available in this kernel then it becomes a nop.
1238 */
1239
1240void dev_load(struct net *net, const char *name)
1241{
1242 struct net_device *dev;
1243 int no_module;
1244
1245 rcu_read_lock();
1246 dev = dev_get_by_name_rcu(net, name);
1247 rcu_read_unlock();
1248
1249 no_module = !dev;
1250 if (no_module && capable(CAP_NET_ADMIN))
1251 no_module = request_module("netdev-%s", name);
1252 if (no_module && capable(CAP_SYS_MODULE)) {
1253 if (!request_module("%s", name))
1254 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1255 name);
1256 }
1257}
1258EXPORT_SYMBOL(dev_load);
1259
1260static int __dev_open(struct net_device *dev) 1189static int __dev_open(struct net_device *dev)
1261{ 1190{
1262 const struct net_device_ops *ops = dev->netdev_ops; 1191 const struct net_device_ops *ops = dev->netdev_ops;
@@ -1267,6 +1196,14 @@ static int __dev_open(struct net_device *dev)
1267 if (!netif_device_present(dev)) 1196 if (!netif_device_present(dev))
1268 return -ENODEV; 1197 return -ENODEV;
1269 1198
1199 /* Block netpoll from trying to do any rx path servicing.
1200 * If we don't do this there is a chance ndo_poll_controller
1201 * or ndo_poll may be running while we open the device
1202 */
1203 ret = netpoll_rx_disable(dev);
1204 if (ret)
1205 return ret;
1206
1270 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1207 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1271 ret = notifier_to_errno(ret); 1208 ret = notifier_to_errno(ret);
1272 if (ret) 1209 if (ret)
@@ -1280,6 +1217,8 @@ static int __dev_open(struct net_device *dev)
1280 if (!ret && ops->ndo_open) 1217 if (!ret && ops->ndo_open)
1281 ret = ops->ndo_open(dev); 1218 ret = ops->ndo_open(dev);
1282 1219
1220 netpoll_rx_enable(dev);
1221
1283 if (ret) 1222 if (ret)
1284 clear_bit(__LINK_STATE_START, &dev->state); 1223 clear_bit(__LINK_STATE_START, &dev->state);
1285 else { 1224 else {
@@ -1371,9 +1310,16 @@ static int __dev_close(struct net_device *dev)
1371 int retval; 1310 int retval;
1372 LIST_HEAD(single); 1311 LIST_HEAD(single);
1373 1312
1313 /* Temporarily disable netpoll until the interface is down */
1314 retval = netpoll_rx_disable(dev);
1315 if (retval)
1316 return retval;
1317
1374 list_add(&dev->unreg_list, &single); 1318 list_add(&dev->unreg_list, &single);
1375 retval = __dev_close_many(&single); 1319 retval = __dev_close_many(&single);
1376 list_del(&single); 1320 list_del(&single);
1321
1322 netpoll_rx_enable(dev);
1377 return retval; 1323 return retval;
1378} 1324}
1379 1325
@@ -1409,14 +1355,22 @@ static int dev_close_many(struct list_head *head)
1409 */ 1355 */
1410int dev_close(struct net_device *dev) 1356int dev_close(struct net_device *dev)
1411{ 1357{
1358 int ret = 0;
1412 if (dev->flags & IFF_UP) { 1359 if (dev->flags & IFF_UP) {
1413 LIST_HEAD(single); 1360 LIST_HEAD(single);
1414 1361
1362 /* Block netpoll rx while the interface is going down */
1363 ret = netpoll_rx_disable(dev);
1364 if (ret)
1365 return ret;
1366
1415 list_add(&dev->unreg_list, &single); 1367 list_add(&dev->unreg_list, &single);
1416 dev_close_many(&single); 1368 dev_close_many(&single);
1417 list_del(&single); 1369 list_del(&single);
1370
1371 netpoll_rx_enable(dev);
1418 } 1372 }
1419 return 0; 1373 return ret;
1420} 1374}
1421EXPORT_SYMBOL(dev_close); 1375EXPORT_SYMBOL(dev_close);
1422 1376
@@ -1621,57 +1575,6 @@ static inline void net_timestamp_set(struct sk_buff *skb)
1621 __net_timestamp(SKB); \ 1575 __net_timestamp(SKB); \
1622 } \ 1576 } \
1623 1577
1624static int net_hwtstamp_validate(struct ifreq *ifr)
1625{
1626 struct hwtstamp_config cfg;
1627 enum hwtstamp_tx_types tx_type;
1628 enum hwtstamp_rx_filters rx_filter;
1629 int tx_type_valid = 0;
1630 int rx_filter_valid = 0;
1631
1632 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1633 return -EFAULT;
1634
1635 if (cfg.flags) /* reserved for future extensions */
1636 return -EINVAL;
1637
1638 tx_type = cfg.tx_type;
1639 rx_filter = cfg.rx_filter;
1640
1641 switch (tx_type) {
1642 case HWTSTAMP_TX_OFF:
1643 case HWTSTAMP_TX_ON:
1644 case HWTSTAMP_TX_ONESTEP_SYNC:
1645 tx_type_valid = 1;
1646 break;
1647 }
1648
1649 switch (rx_filter) {
1650 case HWTSTAMP_FILTER_NONE:
1651 case HWTSTAMP_FILTER_ALL:
1652 case HWTSTAMP_FILTER_SOME:
1653 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1654 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1655 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1656 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1657 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1658 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1659 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1660 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1661 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1662 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1663 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1664 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1665 rx_filter_valid = 1;
1666 break;
1667 }
1668
1669 if (!tx_type_valid || !rx_filter_valid)
1670 return -ERANGE;
1671
1672 return 0;
1673}
1674
1675static inline bool is_skb_forwardable(struct net_device *dev, 1578static inline bool is_skb_forwardable(struct net_device *dev,
1676 struct sk_buff *skb) 1579 struct sk_buff *skb)
1677{ 1580{
@@ -1857,6 +1760,230 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1857 } 1760 }
1858} 1761}
1859 1762
1763#ifdef CONFIG_XPS
1764static DEFINE_MUTEX(xps_map_mutex);
1765#define xmap_dereference(P) \
1766 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1767
1768static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1769 int cpu, u16 index)
1770{
1771 struct xps_map *map = NULL;
1772 int pos;
1773
1774 if (dev_maps)
1775 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1776
1777 for (pos = 0; map && pos < map->len; pos++) {
1778 if (map->queues[pos] == index) {
1779 if (map->len > 1) {
1780 map->queues[pos] = map->queues[--map->len];
1781 } else {
1782 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1783 kfree_rcu(map, rcu);
1784 map = NULL;
1785 }
1786 break;
1787 }
1788 }
1789
1790 return map;
1791}
1792
1793static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1794{
1795 struct xps_dev_maps *dev_maps;
1796 int cpu, i;
1797 bool active = false;
1798
1799 mutex_lock(&xps_map_mutex);
1800 dev_maps = xmap_dereference(dev->xps_maps);
1801
1802 if (!dev_maps)
1803 goto out_no_maps;
1804
1805 for_each_possible_cpu(cpu) {
1806 for (i = index; i < dev->num_tx_queues; i++) {
1807 if (!remove_xps_queue(dev_maps, cpu, i))
1808 break;
1809 }
1810 if (i == dev->num_tx_queues)
1811 active = true;
1812 }
1813
1814 if (!active) {
1815 RCU_INIT_POINTER(dev->xps_maps, NULL);
1816 kfree_rcu(dev_maps, rcu);
1817 }
1818
1819 for (i = index; i < dev->num_tx_queues; i++)
1820 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1821 NUMA_NO_NODE);
1822
1823out_no_maps:
1824 mutex_unlock(&xps_map_mutex);
1825}
1826
1827static struct xps_map *expand_xps_map(struct xps_map *map,
1828 int cpu, u16 index)
1829{
1830 struct xps_map *new_map;
1831 int alloc_len = XPS_MIN_MAP_ALLOC;
1832 int i, pos;
1833
1834 for (pos = 0; map && pos < map->len; pos++) {
1835 if (map->queues[pos] != index)
1836 continue;
1837 return map;
1838 }
1839
1840 /* Need to add queue to this CPU's existing map */
1841 if (map) {
1842 if (pos < map->alloc_len)
1843 return map;
1844
1845 alloc_len = map->alloc_len * 2;
1846 }
1847
1848 /* Need to allocate new map to store queue on this CPU's map */
1849 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1850 cpu_to_node(cpu));
1851 if (!new_map)
1852 return NULL;
1853
1854 for (i = 0; i < pos; i++)
1855 new_map->queues[i] = map->queues[i];
1856 new_map->alloc_len = alloc_len;
1857 new_map->len = pos;
1858
1859 return new_map;
1860}
1861
1862int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
1863{
1864 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1865 struct xps_map *map, *new_map;
1866 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1867 int cpu, numa_node_id = -2;
1868 bool active = false;
1869
1870 mutex_lock(&xps_map_mutex);
1871
1872 dev_maps = xmap_dereference(dev->xps_maps);
1873
1874 /* allocate memory for queue storage */
1875 for_each_online_cpu(cpu) {
1876 if (!cpumask_test_cpu(cpu, mask))
1877 continue;
1878
1879 if (!new_dev_maps)
1880 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
1881 if (!new_dev_maps) {
1882 mutex_unlock(&xps_map_mutex);
1883 return -ENOMEM;
1884 }
1885
1886 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1887 NULL;
1888
1889 map = expand_xps_map(map, cpu, index);
1890 if (!map)
1891 goto error;
1892
1893 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1894 }
1895
1896 if (!new_dev_maps)
1897 goto out_no_new_maps;
1898
1899 for_each_possible_cpu(cpu) {
1900 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
1901 /* add queue to CPU maps */
1902 int pos = 0;
1903
1904 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1905 while ((pos < map->len) && (map->queues[pos] != index))
1906 pos++;
1907
1908 if (pos == map->len)
1909 map->queues[map->len++] = index;
1910#ifdef CONFIG_NUMA
1911 if (numa_node_id == -2)
1912 numa_node_id = cpu_to_node(cpu);
1913 else if (numa_node_id != cpu_to_node(cpu))
1914 numa_node_id = -1;
1915#endif
1916 } else if (dev_maps) {
1917 /* fill in the new device map from the old device map */
1918 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1919 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1920 }
1921
1922 }
1923
1924 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1925
1926 /* Cleanup old maps */
1927 if (dev_maps) {
1928 for_each_possible_cpu(cpu) {
1929 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1930 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1931 if (map && map != new_map)
1932 kfree_rcu(map, rcu);
1933 }
1934
1935 kfree_rcu(dev_maps, rcu);
1936 }
1937
1938 dev_maps = new_dev_maps;
1939 active = true;
1940
1941out_no_new_maps:
1942 /* update Tx queue numa node */
1943 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
1944 (numa_node_id >= 0) ? numa_node_id :
1945 NUMA_NO_NODE);
1946
1947 if (!dev_maps)
1948 goto out_no_maps;
1949
1950 /* removes queue from unused CPUs */
1951 for_each_possible_cpu(cpu) {
1952 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
1953 continue;
1954
1955 if (remove_xps_queue(dev_maps, cpu, index))
1956 active = true;
1957 }
1958
1959 /* free map if not active */
1960 if (!active) {
1961 RCU_INIT_POINTER(dev->xps_maps, NULL);
1962 kfree_rcu(dev_maps, rcu);
1963 }
1964
1965out_no_maps:
1966 mutex_unlock(&xps_map_mutex);
1967
1968 return 0;
1969error:
1970 /* remove any maps that we added */
1971 for_each_possible_cpu(cpu) {
1972 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1973 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1974 NULL;
1975 if (new_map && new_map != map)
1976 kfree(new_map);
1977 }
1978
1979 mutex_unlock(&xps_map_mutex);
1980
1981 kfree(new_dev_maps);
1982 return -ENOMEM;
1983}
1984EXPORT_SYMBOL(netif_set_xps_queue);
1985
1986#endif
1860/* 1987/*
1861 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1988 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1862 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1989 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1880,8 +2007,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1880 if (dev->num_tc) 2007 if (dev->num_tc)
1881 netif_setup_tc(dev, txq); 2008 netif_setup_tc(dev, txq);
1882 2009
1883 if (txq < dev->real_num_tx_queues) 2010 if (txq < dev->real_num_tx_queues) {
1884 qdisc_reset_all_tx_gt(dev, txq); 2011 qdisc_reset_all_tx_gt(dev, txq);
2012#ifdef CONFIG_XPS
2013 netif_reset_xps_queues_gt(dev, txq);
2014#endif
2015 }
1885 } 2016 }
1886 2017
1887 dev->real_num_tx_queues = txq; 2018 dev->real_num_tx_queues = txq;
@@ -2046,6 +2177,15 @@ int skb_checksum_help(struct sk_buff *skb)
2046 return -EINVAL; 2177 return -EINVAL;
2047 } 2178 }
2048 2179
2180 /* Before computing a checksum, we should make sure no frag could
2181 * be modified by an external entity : checksum could be wrong.
2182 */
2183 if (skb_has_shared_frag(skb)) {
2184 ret = __skb_linearize(skb);
2185 if (ret)
2186 goto out;
2187 }
2188
2049 offset = skb_checksum_start_offset(skb); 2189 offset = skb_checksum_start_offset(skb);
2050 BUG_ON(offset >= skb_headlen(skb)); 2190 BUG_ON(offset >= skb_headlen(skb));
2051 csum = skb_checksum(skb, offset, skb->len - offset, 0); 2191 csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -2069,25 +2209,19 @@ out:
2069EXPORT_SYMBOL(skb_checksum_help); 2209EXPORT_SYMBOL(skb_checksum_help);
2070 2210
2071/** 2211/**
2072 * skb_gso_segment - Perform segmentation on skb. 2212 * skb_mac_gso_segment - mac layer segmentation handler.
2073 * @skb: buffer to segment 2213 * @skb: buffer to segment
2074 * @features: features for the output path (see dev->features) 2214 * @features: features for the output path (see dev->features)
2075 *
2076 * This function segments the given skb and returns a list of segments.
2077 *
2078 * It may return NULL if the skb requires no segmentation. This is
2079 * only possible when GSO is used for verifying header integrity.
2080 */ 2215 */
2081struct sk_buff *skb_gso_segment(struct sk_buff *skb, 2216struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2082 netdev_features_t features) 2217 netdev_features_t features)
2083{ 2218{
2084 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2219 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2085 struct packet_offload *ptype; 2220 struct packet_offload *ptype;
2086 __be16 type = skb->protocol; 2221 __be16 type = skb->protocol;
2087 int vlan_depth = ETH_HLEN;
2088 int err;
2089 2222
2090 while (type == htons(ETH_P_8021Q)) { 2223 while (type == htons(ETH_P_8021Q)) {
2224 int vlan_depth = ETH_HLEN;
2091 struct vlan_hdr *vh; 2225 struct vlan_hdr *vh;
2092 2226
2093 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2227 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2098,22 +2232,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2098 vlan_depth += VLAN_HLEN; 2232 vlan_depth += VLAN_HLEN;
2099 } 2233 }
2100 2234
2101 skb_reset_mac_header(skb);
2102 skb->mac_len = skb->network_header - skb->mac_header;
2103 __skb_pull(skb, skb->mac_len); 2235 __skb_pull(skb, skb->mac_len);
2104 2236
2105 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2106 skb_warn_bad_offload(skb);
2107
2108 if (skb_header_cloned(skb) &&
2109 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2110 return ERR_PTR(err);
2111 }
2112
2113 rcu_read_lock(); 2237 rcu_read_lock();
2114 list_for_each_entry_rcu(ptype, &offload_base, list) { 2238 list_for_each_entry_rcu(ptype, &offload_base, list) {
2115 if (ptype->type == type && ptype->callbacks.gso_segment) { 2239 if (ptype->type == type && ptype->callbacks.gso_segment) {
2116 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2240 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2241 int err;
2242
2117 err = ptype->callbacks.gso_send_check(skb); 2243 err = ptype->callbacks.gso_send_check(skb);
2118 segs = ERR_PTR(err); 2244 segs = ERR_PTR(err);
2119 if (err || skb_gso_ok(skb, features)) 2245 if (err || skb_gso_ok(skb, features))
@@ -2131,7 +2257,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2131 2257
2132 return segs; 2258 return segs;
2133} 2259}
2134EXPORT_SYMBOL(skb_gso_segment); 2260EXPORT_SYMBOL(skb_mac_gso_segment);
2261
2262
2263/* openvswitch calls this on rx path, so we need a different check.
2264 */
2265static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2266{
2267 if (tx_path)
2268 return skb->ip_summed != CHECKSUM_PARTIAL;
2269 else
2270 return skb->ip_summed == CHECKSUM_NONE;
2271}
2272
2273/**
2274 * __skb_gso_segment - Perform segmentation on skb.
2275 * @skb: buffer to segment
2276 * @features: features for the output path (see dev->features)
2277 * @tx_path: whether it is called in TX path
2278 *
2279 * This function segments the given skb and returns a list of segments.
2280 *
2281 * It may return NULL if the skb requires no segmentation. This is
2282 * only possible when GSO is used for verifying header integrity.
2283 */
2284struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2285 netdev_features_t features, bool tx_path)
2286{
2287 if (unlikely(skb_needs_check(skb, tx_path))) {
2288 int err;
2289
2290 skb_warn_bad_offload(skb);
2291
2292 if (skb_header_cloned(skb) &&
2293 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2294 return ERR_PTR(err);
2295 }
2296
2297 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2298 skb_reset_mac_header(skb);
2299 skb_reset_mac_len(skb);
2300
2301 return skb_mac_gso_segment(skb, features);
2302}
2303EXPORT_SYMBOL(__skb_gso_segment);
2135 2304
2136/* Take action when hardware reception checksum errors are detected. */ 2305/* Take action when hardware reception checksum errors are detected. */
2137#ifdef CONFIG_BUG 2306#ifdef CONFIG_BUG
@@ -2410,126 +2579,28 @@ out:
2410 return rc; 2579 return rc;
2411} 2580}
2412 2581
2413static u32 hashrnd __read_mostly; 2582static void qdisc_pkt_len_init(struct sk_buff *skb)
2414
2415/*
2416 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2417 * to be used as a distribution range.
2418 */
2419u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2420 unsigned int num_tx_queues)
2421{
2422 u32 hash;
2423 u16 qoffset = 0;
2424 u16 qcount = num_tx_queues;
2425
2426 if (skb_rx_queue_recorded(skb)) {
2427 hash = skb_get_rx_queue(skb);
2428 while (unlikely(hash >= num_tx_queues))
2429 hash -= num_tx_queues;
2430 return hash;
2431 }
2432
2433 if (dev->num_tc) {
2434 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2435 qoffset = dev->tc_to_txq[tc].offset;
2436 qcount = dev->tc_to_txq[tc].count;
2437 }
2438
2439 if (skb->sk && skb->sk->sk_hash)
2440 hash = skb->sk->sk_hash;
2441 else
2442 hash = (__force u16) skb->protocol;
2443 hash = jhash_1word(hash, hashrnd);
2444
2445 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2446}
2447EXPORT_SYMBOL(__skb_tx_hash);
2448
2449static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2450{
2451 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2452 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
2453 dev->name, queue_index,
2454 dev->real_num_tx_queues);
2455 return 0;
2456 }
2457 return queue_index;
2458}
2459
2460static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2461{ 2583{
2462#ifdef CONFIG_XPS 2584 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2463 struct xps_dev_maps *dev_maps;
2464 struct xps_map *map;
2465 int queue_index = -1;
2466 2585
2467 rcu_read_lock(); 2586 qdisc_skb_cb(skb)->pkt_len = skb->len;
2468 dev_maps = rcu_dereference(dev->xps_maps);
2469 if (dev_maps) {
2470 map = rcu_dereference(
2471 dev_maps->cpu_map[raw_smp_processor_id()]);
2472 if (map) {
2473 if (map->len == 1)
2474 queue_index = map->queues[0];
2475 else {
2476 u32 hash;
2477 if (skb->sk && skb->sk->sk_hash)
2478 hash = skb->sk->sk_hash;
2479 else
2480 hash = (__force u16) skb->protocol ^
2481 skb->rxhash;
2482 hash = jhash_1word(hash, hashrnd);
2483 queue_index = map->queues[
2484 ((u64)hash * map->len) >> 32];
2485 }
2486 if (unlikely(queue_index >= dev->real_num_tx_queues))
2487 queue_index = -1;
2488 }
2489 }
2490 rcu_read_unlock();
2491
2492 return queue_index;
2493#else
2494 return -1;
2495#endif
2496}
2497
2498struct netdev_queue *netdev_pick_tx(struct net_device *dev,
2499 struct sk_buff *skb)
2500{
2501 int queue_index;
2502 const struct net_device_ops *ops = dev->netdev_ops;
2503
2504 if (dev->real_num_tx_queues == 1)
2505 queue_index = 0;
2506 else if (ops->ndo_select_queue) {
2507 queue_index = ops->ndo_select_queue(dev, skb);
2508 queue_index = dev_cap_txqueue(dev, queue_index);
2509 } else {
2510 struct sock *sk = skb->sk;
2511 queue_index = sk_tx_queue_get(sk);
2512
2513 if (queue_index < 0 || skb->ooo_okay ||
2514 queue_index >= dev->real_num_tx_queues) {
2515 int old_index = queue_index;
2516 2587
2517 queue_index = get_xps_queue(dev, skb); 2588 /* To get more precise estimation of bytes sent on wire,
2518 if (queue_index < 0) 2589 * we add to pkt_len the headers size of all segments
2519 queue_index = skb_tx_hash(dev, skb); 2590 */
2591 if (shinfo->gso_size) {
2592 unsigned int hdr_len;
2520 2593
2521 if (queue_index != old_index && sk) { 2594 /* mac layer + network layer */
2522 struct dst_entry *dst = 2595 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2523 rcu_dereference_check(sk->sk_dst_cache, 1);
2524 2596
2525 if (dst && skb_dst(skb) == dst) 2597 /* + transport layer */
2526 sk_tx_queue_set(sk, queue_index); 2598 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2527 } 2599 hdr_len += tcp_hdrlen(skb);
2528 } 2600 else
2601 hdr_len += sizeof(struct udphdr);
2602 qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
2529 } 2603 }
2530
2531 skb_set_queue_mapping(skb, queue_index);
2532 return netdev_get_tx_queue(dev, queue_index);
2533} 2604}
2534 2605
2535static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 2606static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
@@ -2540,7 +2611,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2540 bool contended; 2611 bool contended;
2541 int rc; 2612 int rc;
2542 2613
2543 qdisc_skb_cb(skb)->pkt_len = skb->len; 2614 qdisc_pkt_len_init(skb);
2544 qdisc_calculate_pkt_len(skb, q); 2615 qdisc_calculate_pkt_len(skb, q);
2545 /* 2616 /*
2546 * Heuristic to force contended enqueues to serialize on a 2617 * Heuristic to force contended enqueues to serialize on a
@@ -2663,6 +2734,8 @@ int dev_queue_xmit(struct sk_buff *skb)
2663 struct Qdisc *q; 2734 struct Qdisc *q;
2664 int rc = -ENOMEM; 2735 int rc = -ENOMEM;
2665 2736
2737 skb_reset_mac_header(skb);
2738
2666 /* Disable soft irqs for various locks below. Also 2739 /* Disable soft irqs for various locks below. Also
2667 * stops preemption for RCU. 2740 * stops preemption for RCU.
2668 */ 2741 */
@@ -2757,41 +2830,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2757 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2830 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2758} 2831}
2759 2832
2760/*
2761 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2762 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
2763 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
2764 * if hash is a canonical 4-tuple hash over transport ports.
2765 */
2766void __skb_get_rxhash(struct sk_buff *skb)
2767{
2768 struct flow_keys keys;
2769 u32 hash;
2770
2771 if (!skb_flow_dissect(skb, &keys))
2772 return;
2773
2774 if (keys.ports)
2775 skb->l4_rxhash = 1;
2776
2777 /* get a consistent hash (same value on both flow directions) */
2778 if (((__force u32)keys.dst < (__force u32)keys.src) ||
2779 (((__force u32)keys.dst == (__force u32)keys.src) &&
2780 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
2781 swap(keys.dst, keys.src);
2782 swap(keys.port16[0], keys.port16[1]);
2783 }
2784
2785 hash = jhash_3words((__force u32)keys.dst,
2786 (__force u32)keys.src,
2787 (__force u32)keys.ports, hashrnd);
2788 if (!hash)
2789 hash = 1;
2790
2791 skb->rxhash = hash;
2792}
2793EXPORT_SYMBOL(__skb_get_rxhash);
2794
2795#ifdef CONFIG_RPS 2833#ifdef CONFIG_RPS
2796 2834
2797/* One global table that all flow-based protocols share. */ 2835/* One global table that all flow-based protocols share. */
@@ -3318,7 +3356,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3318 } 3356 }
3319} 3357}
3320 3358
3321static int __netif_receive_skb(struct sk_buff *skb) 3359static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3322{ 3360{
3323 struct packet_type *ptype, *pt_prev; 3361 struct packet_type *ptype, *pt_prev;
3324 rx_handler_func_t *rx_handler; 3362 rx_handler_func_t *rx_handler;
@@ -3327,24 +3365,11 @@ static int __netif_receive_skb(struct sk_buff *skb)
3327 bool deliver_exact = false; 3365 bool deliver_exact = false;
3328 int ret = NET_RX_DROP; 3366 int ret = NET_RX_DROP;
3329 __be16 type; 3367 __be16 type;
3330 unsigned long pflags = current->flags;
3331 3368
3332 net_timestamp_check(!netdev_tstamp_prequeue, skb); 3369 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3333 3370
3334 trace_netif_receive_skb(skb); 3371 trace_netif_receive_skb(skb);
3335 3372
3336 /*
3337 * PFMEMALLOC skbs are special, they should
3338 * - be delivered to SOCK_MEMALLOC sockets only
3339 * - stay away from userspace
3340 * - have bounded memory usage
3341 *
3342 * Use PF_MEMALLOC as this saves us from propagating the allocation
3343 * context down to all allocation sites.
3344 */
3345 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3346 current->flags |= PF_MEMALLOC;
3347
3348 /* if we've gotten here through NAPI, check netpoll */ 3373 /* if we've gotten here through NAPI, check netpoll */
3349 if (netpoll_receive_skb(skb)) 3374 if (netpoll_receive_skb(skb))
3350 goto out; 3375 goto out;
@@ -3352,7 +3377,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
3352 orig_dev = skb->dev; 3377 orig_dev = skb->dev;
3353 3378
3354 skb_reset_network_header(skb); 3379 skb_reset_network_header(skb);
3355 skb_reset_transport_header(skb); 3380 if (!skb_transport_header_was_set(skb))
3381 skb_reset_transport_header(skb);
3356 skb_reset_mac_len(skb); 3382 skb_reset_mac_len(skb);
3357 3383
3358 pt_prev = NULL; 3384 pt_prev = NULL;
@@ -3377,7 +3403,7 @@ another_round:
3377 } 3403 }
3378#endif 3404#endif
3379 3405
3380 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) 3406 if (pfmemalloc)
3381 goto skip_taps; 3407 goto skip_taps;
3382 3408
3383 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3409 list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -3396,8 +3422,7 @@ skip_taps:
3396ncls: 3422ncls:
3397#endif 3423#endif
3398 3424
3399 if (sk_memalloc_socks() && skb_pfmemalloc(skb) 3425 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3400 && !skb_pfmemalloc_protocol(skb))
3401 goto drop; 3426 goto drop;
3402 3427
3403 if (vlan_tx_tag_present(skb)) { 3428 if (vlan_tx_tag_present(skb)) {
@@ -3467,7 +3492,31 @@ drop:
3467unlock: 3492unlock:
3468 rcu_read_unlock(); 3493 rcu_read_unlock();
3469out: 3494out:
3470 tsk_restore_flags(current, pflags, PF_MEMALLOC); 3495 return ret;
3496}
3497
3498static int __netif_receive_skb(struct sk_buff *skb)
3499{
3500 int ret;
3501
3502 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3503 unsigned long pflags = current->flags;
3504
3505 /*
3506 * PFMEMALLOC skbs are special, they should
3507 * - be delivered to SOCK_MEMALLOC sockets only
3508 * - stay away from userspace
3509 * - have bounded memory usage
3510 *
3511 * Use PF_MEMALLOC as this saves us from propagating the allocation
3512 * context down to all allocation sites.
3513 */
3514 current->flags |= PF_MEMALLOC;
3515 ret = __netif_receive_skb_core(skb, true);
3516 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3517 } else
3518 ret = __netif_receive_skb_core(skb, false);
3519
3471 return ret; 3520 return ret;
3472} 3521}
3473 3522
@@ -3634,7 +3683,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3634 __be16 type = skb->protocol; 3683 __be16 type = skb->protocol;
3635 struct list_head *head = &offload_base; 3684 struct list_head *head = &offload_base;
3636 int same_flow; 3685 int same_flow;
3637 int mac_len;
3638 enum gro_result ret; 3686 enum gro_result ret;
3639 3687
3640 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3688 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
@@ -3651,8 +3699,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3651 continue; 3699 continue;
3652 3700
3653 skb_set_network_header(skb, skb_gro_offset(skb)); 3701 skb_set_network_header(skb, skb_gro_offset(skb));
3654 mac_len = skb->network_header - skb->mac_header; 3702 skb_reset_mac_len(skb);
3655 skb->mac_len = mac_len;
3656 NAPI_GRO_CB(skb)->same_flow = 0; 3703 NAPI_GRO_CB(skb)->same_flow = 0;
3657 NAPI_GRO_CB(skb)->flush = 0; 3704 NAPI_GRO_CB(skb)->flush = 0;
3658 NAPI_GRO_CB(skb)->free = 0; 3705 NAPI_GRO_CB(skb)->free = 0;
@@ -4134,530 +4181,231 @@ softnet_break:
4134 goto out; 4181 goto out;
4135} 4182}
4136 4183
4137static gifconf_func_t *gifconf_list[NPROTO]; 4184struct netdev_upper {
4138
4139/**
4140 * register_gifconf - register a SIOCGIF handler
4141 * @family: Address family
4142 * @gifconf: Function handler
4143 *
4144 * Register protocol dependent address dumping routines. The handler
4145 * that is passed must not be freed or reused until it has been replaced
4146 * by another handler.
4147 */
4148int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
4149{
4150 if (family >= NPROTO)
4151 return -EINVAL;
4152 gifconf_list[family] = gifconf;
4153 return 0;
4154}
4155EXPORT_SYMBOL(register_gifconf);
4156
4157
4158/*
4159 * Map an interface index to its name (SIOCGIFNAME)
4160 */
4161
4162/*
4163 * We need this ioctl for efficient implementation of the
4164 * if_indextoname() function required by the IPv6 API. Without
4165 * it, we would have to search all the interfaces to find a
4166 * match. --pb
4167 */
4168
4169static int dev_ifname(struct net *net, struct ifreq __user *arg)
4170{
4171 struct net_device *dev; 4185 struct net_device *dev;
4172 struct ifreq ifr; 4186 bool master;
4173 unsigned seq; 4187 struct list_head list;
4174 4188 struct rcu_head rcu;
4175 /* 4189 struct list_head search_list;
4176 * Fetch the caller's info block. 4190};
4177 */
4178 4191
4179 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 4192static void __append_search_uppers(struct list_head *search_list,
4180 return -EFAULT; 4193 struct net_device *dev)
4194{
4195 struct netdev_upper *upper;
4181 4196
4182retry: 4197 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4183 seq = read_seqcount_begin(&devnet_rename_seq); 4198 /* check if this upper is not already in search list */
4184 rcu_read_lock(); 4199 if (list_empty(&upper->search_list))
4185 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); 4200 list_add_tail(&upper->search_list, search_list);
4186 if (!dev) {
4187 rcu_read_unlock();
4188 return -ENODEV;
4189 } 4201 }
4190
4191 strcpy(ifr.ifr_name, dev->name);
4192 rcu_read_unlock();
4193 if (read_seqcount_retry(&devnet_rename_seq, seq))
4194 goto retry;
4195
4196 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4197 return -EFAULT;
4198 return 0;
4199} 4202}
4200 4203
4201/* 4204static bool __netdev_search_upper_dev(struct net_device *dev,
4202 * Perform a SIOCGIFCONF call. This structure will change 4205 struct net_device *upper_dev)
4203 * size eventually, and there is nothing I can do about it.
4204 * Thus we will need a 'compatibility mode'.
4205 */
4206
4207static int dev_ifconf(struct net *net, char __user *arg)
4208{ 4206{
4209 struct ifconf ifc; 4207 LIST_HEAD(search_list);
4210 struct net_device *dev; 4208 struct netdev_upper *upper;
4211 char __user *pos; 4209 struct netdev_upper *tmp;
4212 int len; 4210 bool ret = false;
4213 int total;
4214 int i;
4215 4211
4216 /* 4212 __append_search_uppers(&search_list, dev);
4217 * Fetch the caller's info block. 4213 list_for_each_entry(upper, &search_list, search_list) {
4218 */ 4214 if (upper->dev == upper_dev) {
4219 4215 ret = true;
4220 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 4216 break;
4221 return -EFAULT;
4222
4223 pos = ifc.ifc_buf;
4224 len = ifc.ifc_len;
4225
4226 /*
4227 * Loop over the interfaces, and write an info block for each.
4228 */
4229
4230 total = 0;
4231 for_each_netdev(net, dev) {
4232 for (i = 0; i < NPROTO; i++) {
4233 if (gifconf_list[i]) {
4234 int done;
4235 if (!pos)
4236 done = gifconf_list[i](dev, NULL, 0);
4237 else
4238 done = gifconf_list[i](dev, pos + total,
4239 len - total);
4240 if (done < 0)
4241 return -EFAULT;
4242 total += done;
4243 }
4244 } 4217 }
4218 __append_search_uppers(&search_list, upper->dev);
4245 } 4219 }
4246 4220 list_for_each_entry_safe(upper, tmp, &search_list, search_list)
4247 /* 4221 INIT_LIST_HEAD(&upper->search_list);
4248 * All done. Write the updated control block back to the caller. 4222 return ret;
4249 */
4250 ifc.ifc_len = total;
4251
4252 /*
4253 * Both BSD and Solaris return 0 here, so we do too.
4254 */
4255 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4256} 4223}
4257 4224
4258#ifdef CONFIG_PROC_FS 4225static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
4259 4226 struct net_device *upper_dev)
4260#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4261
4262#define get_bucket(x) ((x) >> BUCKET_SPACE)
4263#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4264#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4265
4266static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4267{ 4227{
4268 struct net *net = seq_file_net(seq); 4228 struct netdev_upper *upper;
4269 struct net_device *dev;
4270 struct hlist_node *p;
4271 struct hlist_head *h;
4272 unsigned int count = 0, offset = get_offset(*pos);
4273 4229
4274 h = &net->dev_name_head[get_bucket(*pos)]; 4230 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4275 hlist_for_each_entry_rcu(dev, p, h, name_hlist) { 4231 if (upper->dev == upper_dev)
4276 if (++count == offset) 4232 return upper;
4277 return dev;
4278 } 4233 }
4279
4280 return NULL; 4234 return NULL;
4281} 4235}
4282 4236
4283static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) 4237/**
4284{ 4238 * netdev_has_upper_dev - Check if device is linked to an upper device
4285 struct net_device *dev; 4239 * @dev: device
4286 unsigned int bucket; 4240 * @upper_dev: upper device to check
4287 4241 *
4288 do { 4242 * Find out if a device is linked to specified upper device and return true
4289 dev = dev_from_same_bucket(seq, pos); 4243 * in case it is. Note that this checks only immediate upper device,
4290 if (dev) 4244 * not through a complete stack of devices. The caller must hold the RTNL lock.
4291 return dev;
4292
4293 bucket = get_bucket(*pos) + 1;
4294 *pos = set_bucket_offset(bucket, 1);
4295 } while (bucket < NETDEV_HASHENTRIES);
4296
4297 return NULL;
4298}
4299
4300/*
4301 * This is invoked by the /proc filesystem handler to display a device
4302 * in detail.
4303 */ 4245 */
4304void *dev_seq_start(struct seq_file *seq, loff_t *pos) 4246bool netdev_has_upper_dev(struct net_device *dev,
4305 __acquires(RCU) 4247 struct net_device *upper_dev)
4306{
4307 rcu_read_lock();
4308 if (!*pos)
4309 return SEQ_START_TOKEN;
4310
4311 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4312 return NULL;
4313
4314 return dev_from_bucket(seq, pos);
4315}
4316
4317void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4318{
4319 ++*pos;
4320 return dev_from_bucket(seq, pos);
4321}
4322
4323void dev_seq_stop(struct seq_file *seq, void *v)
4324 __releases(RCU)
4325{
4326 rcu_read_unlock();
4327}
4328
4329static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4330{ 4248{
4331 struct rtnl_link_stats64 temp; 4249 ASSERT_RTNL();
4332 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4333 4250
4334 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " 4251 return __netdev_find_upper(dev, upper_dev);
4335 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4336 dev->name, stats->rx_bytes, stats->rx_packets,
4337 stats->rx_errors,
4338 stats->rx_dropped + stats->rx_missed_errors,
4339 stats->rx_fifo_errors,
4340 stats->rx_length_errors + stats->rx_over_errors +
4341 stats->rx_crc_errors + stats->rx_frame_errors,
4342 stats->rx_compressed, stats->multicast,
4343 stats->tx_bytes, stats->tx_packets,
4344 stats->tx_errors, stats->tx_dropped,
4345 stats->tx_fifo_errors, stats->collisions,
4346 stats->tx_carrier_errors +
4347 stats->tx_aborted_errors +
4348 stats->tx_window_errors +
4349 stats->tx_heartbeat_errors,
4350 stats->tx_compressed);
4351} 4252}
4253EXPORT_SYMBOL(netdev_has_upper_dev);
4352 4254
4353/* 4255/**
4354 * Called from the PROCfs module. This now uses the new arbitrary sized 4256 * netdev_has_any_upper_dev - Check if device is linked to some device
4355 * /proc/net interface to create /proc/net/dev 4257 * @dev: device
4258 *
4259 * Find out if a device is linked to an upper device and return true in case
4260 * it is. The caller must hold the RTNL lock.
4356 */ 4261 */
4357static int dev_seq_show(struct seq_file *seq, void *v) 4262bool netdev_has_any_upper_dev(struct net_device *dev)
4358{
4359 if (v == SEQ_START_TOKEN)
4360 seq_puts(seq, "Inter-| Receive "
4361 " | Transmit\n"
4362 " face |bytes packets errs drop fifo frame "
4363 "compressed multicast|bytes packets errs "
4364 "drop fifo colls carrier compressed\n");
4365 else
4366 dev_seq_printf_stats(seq, v);
4367 return 0;
4368}
4369
4370static struct softnet_data *softnet_get_online(loff_t *pos)
4371{
4372 struct softnet_data *sd = NULL;
4373
4374 while (*pos < nr_cpu_ids)
4375 if (cpu_online(*pos)) {
4376 sd = &per_cpu(softnet_data, *pos);
4377 break;
4378 } else
4379 ++*pos;
4380 return sd;
4381}
4382
4383static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4384{ 4263{
4385 return softnet_get_online(pos); 4264 ASSERT_RTNL();
4386}
4387
4388static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4389{
4390 ++*pos;
4391 return softnet_get_online(pos);
4392}
4393 4265
4394static void softnet_seq_stop(struct seq_file *seq, void *v) 4266 return !list_empty(&dev->upper_dev_list);
4395{
4396} 4267}
4268EXPORT_SYMBOL(netdev_has_any_upper_dev);
4397 4269
4398static int softnet_seq_show(struct seq_file *seq, void *v) 4270/**
4399{ 4271 * netdev_master_upper_dev_get - Get master upper device
4400 struct softnet_data *sd = v; 4272 * @dev: device
4401 4273 *
4402 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 4274 * Find a master upper device and return pointer to it or NULL in case
4403 sd->processed, sd->dropped, sd->time_squeeze, 0, 4275 * it's not there. The caller must hold the RTNL lock.
4404 0, 0, 0, 0, /* was fastroute */ 4276 */
4405 sd->cpu_collision, sd->received_rps); 4277struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4406 return 0;
4407}
4408
4409static const struct seq_operations dev_seq_ops = {
4410 .start = dev_seq_start,
4411 .next = dev_seq_next,
4412 .stop = dev_seq_stop,
4413 .show = dev_seq_show,
4414};
4415
4416static int dev_seq_open(struct inode *inode, struct file *file)
4417{ 4278{
4418 return seq_open_net(inode, file, &dev_seq_ops, 4279 struct netdev_upper *upper;
4419 sizeof(struct seq_net_private));
4420}
4421 4280
4422static const struct file_operations dev_seq_fops = { 4281 ASSERT_RTNL();
4423 .owner = THIS_MODULE,
4424 .open = dev_seq_open,
4425 .read = seq_read,
4426 .llseek = seq_lseek,
4427 .release = seq_release_net,
4428};
4429 4282
4430static const struct seq_operations softnet_seq_ops = { 4283 if (list_empty(&dev->upper_dev_list))
4431 .start = softnet_seq_start, 4284 return NULL;
4432 .next = softnet_seq_next,
4433 .stop = softnet_seq_stop,
4434 .show = softnet_seq_show,
4435};
4436 4285
4437static int softnet_seq_open(struct inode *inode, struct file *file) 4286 upper = list_first_entry(&dev->upper_dev_list,
4438{ 4287 struct netdev_upper, list);
4439 return seq_open(file, &softnet_seq_ops); 4288 if (likely(upper->master))
4289 return upper->dev;
4290 return NULL;
4440} 4291}
4292EXPORT_SYMBOL(netdev_master_upper_dev_get);
4441 4293
4442static const struct file_operations softnet_seq_fops = { 4294/**
4443 .owner = THIS_MODULE, 4295 * netdev_master_upper_dev_get_rcu - Get master upper device
4444 .open = softnet_seq_open, 4296 * @dev: device
4445 .read = seq_read, 4297 *
4446 .llseek = seq_lseek, 4298 * Find a master upper device and return pointer to it or NULL in case
4447 .release = seq_release, 4299 * it's not there. The caller must hold the RCU read lock.
4448}; 4300 */
4449 4301struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4450static void *ptype_get_idx(loff_t pos)
4451{ 4302{
4452 struct packet_type *pt = NULL; 4303 struct netdev_upper *upper;
4453 loff_t i = 0;
4454 int t;
4455
4456 list_for_each_entry_rcu(pt, &ptype_all, list) {
4457 if (i == pos)
4458 return pt;
4459 ++i;
4460 }
4461 4304
4462 for (t = 0; t < PTYPE_HASH_SIZE; t++) { 4305 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4463 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 4306 struct netdev_upper, list);
4464 if (i == pos) 4307 if (upper && likely(upper->master))
4465 return pt; 4308 return upper->dev;
4466 ++i;
4467 }
4468 }
4469 return NULL; 4309 return NULL;
4470} 4310}
4311EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4471 4312
4472static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 4313static int __netdev_upper_dev_link(struct net_device *dev,
4473 __acquires(RCU) 4314 struct net_device *upper_dev, bool master)
4474{ 4315{
4475 rcu_read_lock(); 4316 struct netdev_upper *upper;
4476 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4477}
4478 4317
4479static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4318 ASSERT_RTNL();
4480{
4481 struct packet_type *pt;
4482 struct list_head *nxt;
4483 int hash;
4484 4319
4485 ++*pos; 4320 if (dev == upper_dev)
4486 if (v == SEQ_START_TOKEN) 4321 return -EBUSY;
4487 return ptype_get_idx(0);
4488 4322
4489 pt = v; 4323 /* To prevent loops, check if dev is not upper device to upper_dev. */
4490 nxt = pt->list.next; 4324 if (__netdev_search_upper_dev(upper_dev, dev))
4491 if (pt->type == htons(ETH_P_ALL)) { 4325 return -EBUSY;
4492 if (nxt != &ptype_all)
4493 goto found;
4494 hash = 0;
4495 nxt = ptype_base[0].next;
4496 } else
4497 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4498 4326
4499 while (nxt == &ptype_base[hash]) { 4327 if (__netdev_find_upper(dev, upper_dev))
4500 if (++hash >= PTYPE_HASH_SIZE) 4328 return -EEXIST;
4501 return NULL;
4502 nxt = ptype_base[hash].next;
4503 }
4504found:
4505 return list_entry(nxt, struct packet_type, list);
4506}
4507 4329
4508static void ptype_seq_stop(struct seq_file *seq, void *v) 4330 if (master && netdev_master_upper_dev_get(dev))
4509 __releases(RCU) 4331 return -EBUSY;
4510{
4511 rcu_read_unlock();
4512}
4513 4332
4514static int ptype_seq_show(struct seq_file *seq, void *v) 4333 upper = kmalloc(sizeof(*upper), GFP_KERNEL);
4515{ 4334 if (!upper)
4516 struct packet_type *pt = v; 4335 return -ENOMEM;
4517 4336
4518 if (v == SEQ_START_TOKEN) 4337 upper->dev = upper_dev;
4519 seq_puts(seq, "Type Device Function\n"); 4338 upper->master = master;
4520 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { 4339 INIT_LIST_HEAD(&upper->search_list);
4521 if (pt->type == htons(ETH_P_ALL))
4522 seq_puts(seq, "ALL ");
4523 else
4524 seq_printf(seq, "%04x", ntohs(pt->type));
4525 4340
4526 seq_printf(seq, " %-8s %pF\n", 4341 /* Ensure that master upper link is always the first item in list. */
4527 pt->dev ? pt->dev->name : "", pt->func); 4342 if (master)
4528 } 4343 list_add_rcu(&upper->list, &dev->upper_dev_list);
4344 else
4345 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4346 dev_hold(upper_dev);
4529 4347
4530 return 0; 4348 return 0;
4531} 4349}
4532 4350
4533static const struct seq_operations ptype_seq_ops = { 4351/**
4534 .start = ptype_seq_start, 4352 * netdev_upper_dev_link - Add a link to the upper device
4535 .next = ptype_seq_next, 4353 * @dev: device
4536 .stop = ptype_seq_stop, 4354 * @upper_dev: new upper device
4537 .show = ptype_seq_show, 4355 *
4538}; 4356 * Adds a link to device which is upper to this one. The caller must hold
4539 4357 * the RTNL lock. On a failure a negative errno code is returned.
4540static int ptype_seq_open(struct inode *inode, struct file *file) 4358 * On success the reference counts are adjusted and the function
4541{ 4359 * returns zero.
4542 return seq_open_net(inode, file, &ptype_seq_ops, 4360 */
4543 sizeof(struct seq_net_private)); 4361int netdev_upper_dev_link(struct net_device *dev,
4544} 4362 struct net_device *upper_dev)
4545
4546static const struct file_operations ptype_seq_fops = {
4547 .owner = THIS_MODULE,
4548 .open = ptype_seq_open,
4549 .read = seq_read,
4550 .llseek = seq_lseek,
4551 .release = seq_release_net,
4552};
4553
4554
4555static int __net_init dev_proc_net_init(struct net *net)
4556{
4557 int rc = -ENOMEM;
4558
4559 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4560 goto out;
4561 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4562 goto out_dev;
4563 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4564 goto out_softnet;
4565
4566 if (wext_proc_init(net))
4567 goto out_ptype;
4568 rc = 0;
4569out:
4570 return rc;
4571out_ptype:
4572 proc_net_remove(net, "ptype");
4573out_softnet:
4574 proc_net_remove(net, "softnet_stat");
4575out_dev:
4576 proc_net_remove(net, "dev");
4577 goto out;
4578}
4579
4580static void __net_exit dev_proc_net_exit(struct net *net)
4581{
4582 wext_proc_exit(net);
4583
4584 proc_net_remove(net, "ptype");
4585 proc_net_remove(net, "softnet_stat");
4586 proc_net_remove(net, "dev");
4587}
4588
4589static struct pernet_operations __net_initdata dev_proc_ops = {
4590 .init = dev_proc_net_init,
4591 .exit = dev_proc_net_exit,
4592};
4593
4594static int __init dev_proc_init(void)
4595{ 4363{
4596 return register_pernet_subsys(&dev_proc_ops); 4364 return __netdev_upper_dev_link(dev, upper_dev, false);
4597} 4365}
4598#else 4366EXPORT_SYMBOL(netdev_upper_dev_link);
4599#define dev_proc_init() 0
4600#endif /* CONFIG_PROC_FS */
4601
4602 4367
4603/** 4368/**
4604 * netdev_set_master - set up master pointer 4369 * netdev_master_upper_dev_link - Add a master link to the upper device
4605 * @slave: slave device 4370 * @dev: device
4606 * @master: new master device 4371 * @upper_dev: new upper device
4607 * 4372 *
4608 * Changes the master device of the slave. Pass %NULL to break the 4373 * Adds a link to device which is upper to this one. In this case, only
4609 * bonding. The caller must hold the RTNL semaphore. On a failure 4374 * one master upper device can be linked, although other non-master devices
4610 * a negative errno code is returned. On success the reference counts 4375 * might be linked as well. The caller must hold the RTNL lock.
4611 * are adjusted and the function returns zero. 4376 * On a failure a negative errno code is returned. On success the reference
4377 * counts are adjusted and the function returns zero.
4612 */ 4378 */
4613int netdev_set_master(struct net_device *slave, struct net_device *master) 4379int netdev_master_upper_dev_link(struct net_device *dev,
4380 struct net_device *upper_dev)
4614{ 4381{
4615 struct net_device *old = slave->master; 4382 return __netdev_upper_dev_link(dev, upper_dev, true);
4616
4617 ASSERT_RTNL();
4618
4619 if (master) {
4620 if (old)
4621 return -EBUSY;
4622 dev_hold(master);
4623 }
4624
4625 slave->master = master;
4626
4627 if (old)
4628 dev_put(old);
4629 return 0;
4630} 4383}
4631EXPORT_SYMBOL(netdev_set_master); 4384EXPORT_SYMBOL(netdev_master_upper_dev_link);
4632 4385
4633/** 4386/**
4634 * netdev_set_bond_master - set up bonding master/slave pair 4387 * netdev_upper_dev_unlink - Removes a link to upper device
4635 * @slave: slave device 4388 * @dev: device
4636 * @master: new master device 4389 * @upper_dev: new upper device
4637 * 4390 *
4638 * Changes the master device of the slave. Pass %NULL to break the 4391 * Removes a link to device which is upper to this one. The caller must hold
4639 * bonding. The caller must hold the RTNL semaphore. On a failure 4392 * the RTNL lock.
4640 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4641 * to the routing socket and the function returns zero.
4642 */ 4393 */
4643int netdev_set_bond_master(struct net_device *slave, struct net_device *master) 4394void netdev_upper_dev_unlink(struct net_device *dev,
4395 struct net_device *upper_dev)
4644{ 4396{
4645 int err; 4397 struct netdev_upper *upper;
4646 4398
4647 ASSERT_RTNL(); 4399 ASSERT_RTNL();
4648 4400
4649 err = netdev_set_master(slave, master); 4401 upper = __netdev_find_upper(dev, upper_dev);
4650 if (err) 4402 if (!upper)
4651 return err; 4403 return;
4652 if (master) 4404 list_del_rcu(&upper->list);
4653 slave->flags |= IFF_SLAVE; 4405 dev_put(upper_dev);
4654 else 4406 kfree_rcu(upper, rcu);
4655 slave->flags &= ~IFF_SLAVE;
4656
4657 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4658 return 0;
4659} 4407}
4660EXPORT_SYMBOL(netdev_set_bond_master); 4408EXPORT_SYMBOL(netdev_upper_dev_unlink);
4661 4409
4662static void dev_change_rx_flags(struct net_device *dev, int flags) 4410static void dev_change_rx_flags(struct net_device *dev, int flags)
4663{ 4411{
@@ -5020,381 +4768,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5020 if (!netif_device_present(dev)) 4768 if (!netif_device_present(dev))
5021 return -ENODEV; 4769 return -ENODEV;
5022 err = ops->ndo_set_mac_address(dev, sa); 4770 err = ops->ndo_set_mac_address(dev, sa);
5023 if (!err) 4771 if (err)
5024 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 4772 return err;
4773 dev->addr_assign_type = NET_ADDR_SET;
4774 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5025 add_device_randomness(dev->dev_addr, dev->addr_len); 4775 add_device_randomness(dev->dev_addr, dev->addr_len);
5026 return err; 4776 return 0;
5027} 4777}
5028EXPORT_SYMBOL(dev_set_mac_address); 4778EXPORT_SYMBOL(dev_set_mac_address);
5029 4779
5030/*
5031 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
5032 */
5033static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
5034{
5035 int err;
5036 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
5037
5038 if (!dev)
5039 return -ENODEV;
5040
5041 switch (cmd) {
5042 case SIOCGIFFLAGS: /* Get interface flags */
5043 ifr->ifr_flags = (short) dev_get_flags(dev);
5044 return 0;
5045
5046 case SIOCGIFMETRIC: /* Get the metric on the interface
5047 (currently unused) */
5048 ifr->ifr_metric = 0;
5049 return 0;
5050
5051 case SIOCGIFMTU: /* Get the MTU of a device */
5052 ifr->ifr_mtu = dev->mtu;
5053 return 0;
5054
5055 case SIOCGIFHWADDR:
5056 if (!dev->addr_len)
5057 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
5058 else
5059 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
5060 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5061 ifr->ifr_hwaddr.sa_family = dev->type;
5062 return 0;
5063
5064 case SIOCGIFSLAVE:
5065 err = -EINVAL;
5066 break;
5067
5068 case SIOCGIFMAP:
5069 ifr->ifr_map.mem_start = dev->mem_start;
5070 ifr->ifr_map.mem_end = dev->mem_end;
5071 ifr->ifr_map.base_addr = dev->base_addr;
5072 ifr->ifr_map.irq = dev->irq;
5073 ifr->ifr_map.dma = dev->dma;
5074 ifr->ifr_map.port = dev->if_port;
5075 return 0;
5076
5077 case SIOCGIFINDEX:
5078 ifr->ifr_ifindex = dev->ifindex;
5079 return 0;
5080
5081 case SIOCGIFTXQLEN:
5082 ifr->ifr_qlen = dev->tx_queue_len;
5083 return 0;
5084
5085 default:
5086 /* dev_ioctl() should ensure this case
5087 * is never reached
5088 */
5089 WARN_ON(1);
5090 err = -ENOTTY;
5091 break;
5092
5093 }
5094 return err;
5095}
5096
5097/*
5098 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
5099 */
5100static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
5101{
5102 int err;
5103 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
5104 const struct net_device_ops *ops;
5105
5106 if (!dev)
5107 return -ENODEV;
5108
5109 ops = dev->netdev_ops;
5110
5111 switch (cmd) {
5112 case SIOCSIFFLAGS: /* Set interface flags */
5113 return dev_change_flags(dev, ifr->ifr_flags);
5114
5115 case SIOCSIFMETRIC: /* Set the metric on the interface
5116 (currently unused) */
5117 return -EOPNOTSUPP;
5118
5119 case SIOCSIFMTU: /* Set the MTU of a device */
5120 return dev_set_mtu(dev, ifr->ifr_mtu);
5121
5122 case SIOCSIFHWADDR:
5123 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
5124
5125 case SIOCSIFHWBROADCAST:
5126 if (ifr->ifr_hwaddr.sa_family != dev->type)
5127 return -EINVAL;
5128 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
5129 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5130 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5131 return 0;
5132
5133 case SIOCSIFMAP:
5134 if (ops->ndo_set_config) {
5135 if (!netif_device_present(dev))
5136 return -ENODEV;
5137 return ops->ndo_set_config(dev, &ifr->ifr_map);
5138 }
5139 return -EOPNOTSUPP;
5140
5141 case SIOCADDMULTI:
5142 if (!ops->ndo_set_rx_mode ||
5143 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5144 return -EINVAL;
5145 if (!netif_device_present(dev))
5146 return -ENODEV;
5147 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
5148
5149 case SIOCDELMULTI:
5150 if (!ops->ndo_set_rx_mode ||
5151 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5152 return -EINVAL;
5153 if (!netif_device_present(dev))
5154 return -ENODEV;
5155 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
5156
5157 case SIOCSIFTXQLEN:
5158 if (ifr->ifr_qlen < 0)
5159 return -EINVAL;
5160 dev->tx_queue_len = ifr->ifr_qlen;
5161 return 0;
5162
5163 case SIOCSIFNAME:
5164 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5165 return dev_change_name(dev, ifr->ifr_newname);
5166
5167 case SIOCSHWTSTAMP:
5168 err = net_hwtstamp_validate(ifr);
5169 if (err)
5170 return err;
5171 /* fall through */
5172
5173 /*
5174 * Unknown or private ioctl
5175 */
5176 default:
5177 if ((cmd >= SIOCDEVPRIVATE &&
5178 cmd <= SIOCDEVPRIVATE + 15) ||
5179 cmd == SIOCBONDENSLAVE ||
5180 cmd == SIOCBONDRELEASE ||
5181 cmd == SIOCBONDSETHWADDR ||
5182 cmd == SIOCBONDSLAVEINFOQUERY ||
5183 cmd == SIOCBONDINFOQUERY ||
5184 cmd == SIOCBONDCHANGEACTIVE ||
5185 cmd == SIOCGMIIPHY ||
5186 cmd == SIOCGMIIREG ||
5187 cmd == SIOCSMIIREG ||
5188 cmd == SIOCBRADDIF ||
5189 cmd == SIOCBRDELIF ||
5190 cmd == SIOCSHWTSTAMP ||
5191 cmd == SIOCWANDEV) {
5192 err = -EOPNOTSUPP;
5193 if (ops->ndo_do_ioctl) {
5194 if (netif_device_present(dev))
5195 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5196 else
5197 err = -ENODEV;
5198 }
5199 } else
5200 err = -EINVAL;
5201
5202 }
5203 return err;
5204}
5205
5206/*
5207 * This function handles all "interface"-type I/O control requests. The actual
5208 * 'doing' part of this is dev_ifsioc above.
5209 */
5210
5211/** 4780/**
5212 * dev_ioctl - network device ioctl 4781 * dev_change_carrier - Change device carrier
5213 * @net: the applicable net namespace 4782 * @dev: device
5214 * @cmd: command to issue 4783 * @new_carries: new value
5215 * @arg: pointer to a struct ifreq in user space
5216 * 4784 *
5217 * Issue ioctl functions to devices. This is normally called by the 4785 * Change device carrier
5218 * user space syscall interfaces but can sometimes be useful for
5219 * other purposes. The return value is the return from the syscall if
5220 * positive or a negative errno code on error.
5221 */ 4786 */
5222 4787int dev_change_carrier(struct net_device *dev, bool new_carrier)
5223int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5224{ 4788{
5225 struct ifreq ifr; 4789 const struct net_device_ops *ops = dev->netdev_ops;
5226 int ret;
5227 char *colon;
5228
5229 /* One special case: SIOCGIFCONF takes ifconf argument
5230 and requires shared lock, because it sleeps writing
5231 to user space.
5232 */
5233
5234 if (cmd == SIOCGIFCONF) {
5235 rtnl_lock();
5236 ret = dev_ifconf(net, (char __user *) arg);
5237 rtnl_unlock();
5238 return ret;
5239 }
5240 if (cmd == SIOCGIFNAME)
5241 return dev_ifname(net, (struct ifreq __user *)arg);
5242
5243 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5244 return -EFAULT;
5245
5246 ifr.ifr_name[IFNAMSIZ-1] = 0;
5247
5248 colon = strchr(ifr.ifr_name, ':');
5249 if (colon)
5250 *colon = 0;
5251
5252 /*
5253 * See which interface the caller is talking about.
5254 */
5255
5256 switch (cmd) {
5257 /*
5258 * These ioctl calls:
5259 * - can be done by all.
5260 * - atomic and do not require locking.
5261 * - return a value
5262 */
5263 case SIOCGIFFLAGS:
5264 case SIOCGIFMETRIC:
5265 case SIOCGIFMTU:
5266 case SIOCGIFHWADDR:
5267 case SIOCGIFSLAVE:
5268 case SIOCGIFMAP:
5269 case SIOCGIFINDEX:
5270 case SIOCGIFTXQLEN:
5271 dev_load(net, ifr.ifr_name);
5272 rcu_read_lock();
5273 ret = dev_ifsioc_locked(net, &ifr, cmd);
5274 rcu_read_unlock();
5275 if (!ret) {
5276 if (colon)
5277 *colon = ':';
5278 if (copy_to_user(arg, &ifr,
5279 sizeof(struct ifreq)))
5280 ret = -EFAULT;
5281 }
5282 return ret;
5283
5284 case SIOCETHTOOL:
5285 dev_load(net, ifr.ifr_name);
5286 rtnl_lock();
5287 ret = dev_ethtool(net, &ifr);
5288 rtnl_unlock();
5289 if (!ret) {
5290 if (colon)
5291 *colon = ':';
5292 if (copy_to_user(arg, &ifr,
5293 sizeof(struct ifreq)))
5294 ret = -EFAULT;
5295 }
5296 return ret;
5297
5298 /*
5299 * These ioctl calls:
5300 * - require superuser power.
5301 * - require strict serialization.
5302 * - return a value
5303 */
5304 case SIOCGMIIPHY:
5305 case SIOCGMIIREG:
5306 case SIOCSIFNAME:
5307 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5308 return -EPERM;
5309 dev_load(net, ifr.ifr_name);
5310 rtnl_lock();
5311 ret = dev_ifsioc(net, &ifr, cmd);
5312 rtnl_unlock();
5313 if (!ret) {
5314 if (colon)
5315 *colon = ':';
5316 if (copy_to_user(arg, &ifr,
5317 sizeof(struct ifreq)))
5318 ret = -EFAULT;
5319 }
5320 return ret;
5321
5322 /*
5323 * These ioctl calls:
5324 * - require superuser power.
5325 * - require strict serialization.
5326 * - do not return a value
5327 */
5328 case SIOCSIFMAP:
5329 case SIOCSIFTXQLEN:
5330 if (!capable(CAP_NET_ADMIN))
5331 return -EPERM;
5332 /* fall through */
5333 /*
5334 * These ioctl calls:
5335 * - require local superuser power.
5336 * - require strict serialization.
5337 * - do not return a value
5338 */
5339 case SIOCSIFFLAGS:
5340 case SIOCSIFMETRIC:
5341 case SIOCSIFMTU:
5342 case SIOCSIFHWADDR:
5343 case SIOCSIFSLAVE:
5344 case SIOCADDMULTI:
5345 case SIOCDELMULTI:
5346 case SIOCSIFHWBROADCAST:
5347 case SIOCSMIIREG:
5348 case SIOCBONDENSLAVE:
5349 case SIOCBONDRELEASE:
5350 case SIOCBONDSETHWADDR:
5351 case SIOCBONDCHANGEACTIVE:
5352 case SIOCBRADDIF:
5353 case SIOCBRDELIF:
5354 case SIOCSHWTSTAMP:
5355 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5356 return -EPERM;
5357 /* fall through */
5358 case SIOCBONDSLAVEINFOQUERY:
5359 case SIOCBONDINFOQUERY:
5360 dev_load(net, ifr.ifr_name);
5361 rtnl_lock();
5362 ret = dev_ifsioc(net, &ifr, cmd);
5363 rtnl_unlock();
5364 return ret;
5365
5366 case SIOCGIFMEM:
5367 /* Get the per device memory space. We can add this but
5368 * currently do not support it */
5369 case SIOCSIFMEM:
5370 /* Set the per device memory buffer space.
5371 * Not applicable in our case */
5372 case SIOCSIFLINK:
5373 return -ENOTTY;
5374 4790
5375 /* 4791 if (!ops->ndo_change_carrier)
5376 * Unknown or private ioctl. 4792 return -EOPNOTSUPP;
5377 */ 4793 if (!netif_device_present(dev))
5378 default: 4794 return -ENODEV;
5379 if (cmd == SIOCWANDEV || 4795 return ops->ndo_change_carrier(dev, new_carrier);
5380 (cmd >= SIOCDEVPRIVATE &&
5381 cmd <= SIOCDEVPRIVATE + 15)) {
5382 dev_load(net, ifr.ifr_name);
5383 rtnl_lock();
5384 ret = dev_ifsioc(net, &ifr, cmd);
5385 rtnl_unlock();
5386 if (!ret && copy_to_user(arg, &ifr,
5387 sizeof(struct ifreq)))
5388 ret = -EFAULT;
5389 return ret;
5390 }
5391 /* Take care of Wireless Extensions */
5392 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5393 return wext_handle_ioctl(net, &ifr, cmd, arg);
5394 return -ENOTTY;
5395 }
5396} 4796}
5397 4797EXPORT_SYMBOL(dev_change_carrier);
5398 4798
5399/** 4799/**
5400 * dev_new_index - allocate an ifindex 4800 * dev_new_index - allocate an ifindex
@@ -5482,11 +4882,15 @@ static void rollback_registered_many(struct list_head *head)
5482 if (dev->netdev_ops->ndo_uninit) 4882 if (dev->netdev_ops->ndo_uninit)
5483 dev->netdev_ops->ndo_uninit(dev); 4883 dev->netdev_ops->ndo_uninit(dev);
5484 4884
5485 /* Notifier chain MUST detach us from master device. */ 4885 /* Notifier chain MUST detach us all upper devices. */
5486 WARN_ON(dev->master); 4886 WARN_ON(netdev_has_any_upper_dev(dev));
5487 4887
5488 /* Remove entries from kobject tree */ 4888 /* Remove entries from kobject tree */
5489 netdev_unregister_kobject(dev); 4889 netdev_unregister_kobject(dev);
4890#ifdef CONFIG_XPS
4891 /* Remove XPS queueing entries */
4892 netif_reset_xps_queues_gt(dev, 0);
4893#endif
5490 } 4894 }
5491 4895
5492 synchronize_net(); 4896 synchronize_net();
@@ -5664,10 +5068,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
5664 BUG_ON(count < 1); 5068 BUG_ON(count < 1);
5665 5069
5666 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); 5070 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5667 if (!rx) { 5071 if (!rx)
5668 pr_err("netdev: Unable to allocate %u rx queues\n", count);
5669 return -ENOMEM; 5072 return -ENOMEM;
5670 } 5073
5671 dev->_rx = rx; 5074 dev->_rx = rx;
5672 5075
5673 for (i = 0; i < count; i++) 5076 for (i = 0; i < count; i++)
@@ -5698,10 +5101,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
5698 BUG_ON(count < 1); 5101 BUG_ON(count < 1);
5699 5102
5700 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); 5103 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5701 if (!tx) { 5104 if (!tx)
5702 pr_err("netdev: Unable to allocate %u tx queues\n", count);
5703 return -ENOMEM; 5105 return -ENOMEM;
5704 } 5106
5705 dev->_tx = tx; 5107 dev->_tx = tx;
5706 5108
5707 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5109 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5760,6 +5162,14 @@ int register_netdevice(struct net_device *dev)
5760 } 5162 }
5761 } 5163 }
5762 5164
5165 if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
5166 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5167 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5168 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
5169 ret = -EINVAL;
5170 goto err_uninit;
5171 }
5172
5763 ret = -EBUSY; 5173 ret = -EBUSY;
5764 if (!dev->ifindex) 5174 if (!dev->ifindex)
5765 dev->ifindex = dev_new_index(net); 5175 dev->ifindex = dev_new_index(net);
@@ -5815,6 +5225,13 @@ int register_netdevice(struct net_device *dev)
5815 list_netdevice(dev); 5225 list_netdevice(dev);
5816 add_device_randomness(dev->dev_addr, dev->addr_len); 5226 add_device_randomness(dev->dev_addr, dev->addr_len);
5817 5227
5228 /* If the device has permanent device address, driver should
5229 * set dev_addr and also addr_assign_type should be set to
5230 * NET_ADDR_PERM (default value).
5231 */
5232 if (dev->addr_assign_type == NET_ADDR_PERM)
5233 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
5234
5818 /* Notify protocols, that a new device appeared. */ 5235 /* Notify protocols, that a new device appeared. */
5819 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 5236 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5820 ret = notifier_to_errno(ret); 5237 ret = notifier_to_errno(ret);
@@ -6121,6 +5538,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
6121 5538
6122static const struct ethtool_ops default_ethtool_ops; 5539static const struct ethtool_ops default_ethtool_ops;
6123 5540
5541void netdev_set_default_ethtool_ops(struct net_device *dev,
5542 const struct ethtool_ops *ops)
5543{
5544 if (dev->ethtool_ops == &default_ethtool_ops)
5545 dev->ethtool_ops = ops;
5546}
5547EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
5548
6124/** 5549/**
6125 * alloc_netdev_mqs - allocate network device 5550 * alloc_netdev_mqs - allocate network device
6126 * @sizeof_priv: size of private data to allocate space for 5551 * @sizeof_priv: size of private data to allocate space for
@@ -6165,10 +5590,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6165 alloc_size += NETDEV_ALIGN - 1; 5590 alloc_size += NETDEV_ALIGN - 1;
6166 5591
6167 p = kzalloc(alloc_size, GFP_KERNEL); 5592 p = kzalloc(alloc_size, GFP_KERNEL);
6168 if (!p) { 5593 if (!p)
6169 pr_err("alloc_netdev: Unable to allocate device\n");
6170 return NULL; 5594 return NULL;
6171 }
6172 5595
6173 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5596 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6174 dev->padded = (char *)dev - (char *)p; 5597 dev->padded = (char *)dev - (char *)p;
@@ -6191,6 +5614,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6191 INIT_LIST_HEAD(&dev->napi_list); 5614 INIT_LIST_HEAD(&dev->napi_list);
6192 INIT_LIST_HEAD(&dev->unreg_list); 5615 INIT_LIST_HEAD(&dev->unreg_list);
6193 INIT_LIST_HEAD(&dev->link_watch_list); 5616 INIT_LIST_HEAD(&dev->link_watch_list);
5617 INIT_LIST_HEAD(&dev->upper_dev_list);
6194 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5618 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6195 setup(dev); 5619 setup(dev);
6196 5620
@@ -6834,19 +6258,9 @@ static int __init net_dev_init(void)
6834 6258
6835 hotcpu_notifier(dev_cpu_callback, 0); 6259 hotcpu_notifier(dev_cpu_callback, 0);
6836 dst_init(); 6260 dst_init();
6837 dev_mcast_init();
6838 rc = 0; 6261 rc = 0;
6839out: 6262out:
6840 return rc; 6263 return rc;
6841} 6264}
6842 6265
6843subsys_initcall(net_dev_init); 6266subsys_initcall(net_dev_init);
6844
6845static int __init initialize_hashrnd(void)
6846{
6847 get_random_bytes(&hashrnd, sizeof(hashrnd));
6848 return 0;
6849}
6850
6851late_initcall_sync(initialize_hashrnd);
6852
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index b079c7bbc157..bd2eb9d3e369 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -15,7 +15,6 @@
15#include <linux/rtnetlink.h> 15#include <linux/rtnetlink.h>
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/proc_fs.h>
19 18
20/* 19/*
21 * General list handling functions 20 * General list handling functions
@@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev)
727 __hw_addr_init(&dev->mc); 726 __hw_addr_init(&dev->mc);
728} 727}
729EXPORT_SYMBOL(dev_mc_init); 728EXPORT_SYMBOL(dev_mc_init);
730
731#ifdef CONFIG_PROC_FS
732#include <linux/seq_file.h>
733
734static int dev_mc_seq_show(struct seq_file *seq, void *v)
735{
736 struct netdev_hw_addr *ha;
737 struct net_device *dev = v;
738
739 if (v == SEQ_START_TOKEN)
740 return 0;
741
742 netif_addr_lock_bh(dev);
743 netdev_for_each_mc_addr(ha, dev) {
744 int i;
745
746 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
747 dev->name, ha->refcount, ha->global_use);
748
749 for (i = 0; i < dev->addr_len; i++)
750 seq_printf(seq, "%02x", ha->addr[i]);
751
752 seq_putc(seq, '\n');
753 }
754 netif_addr_unlock_bh(dev);
755 return 0;
756}
757
758static const struct seq_operations dev_mc_seq_ops = {
759 .start = dev_seq_start,
760 .next = dev_seq_next,
761 .stop = dev_seq_stop,
762 .show = dev_mc_seq_show,
763};
764
765static int dev_mc_seq_open(struct inode *inode, struct file *file)
766{
767 return seq_open_net(inode, file, &dev_mc_seq_ops,
768 sizeof(struct seq_net_private));
769}
770
771static const struct file_operations dev_mc_seq_fops = {
772 .owner = THIS_MODULE,
773 .open = dev_mc_seq_open,
774 .read = seq_read,
775 .llseek = seq_lseek,
776 .release = seq_release_net,
777};
778
779#endif
780
781static int __net_init dev_mc_net_init(struct net *net)
782{
783 if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
784 return -ENOMEM;
785 return 0;
786}
787
788static void __net_exit dev_mc_net_exit(struct net *net)
789{
790 proc_net_remove(net, "dev_mcast");
791}
792
793static struct pernet_operations __net_initdata dev_mc_net_ops = {
794 .init = dev_mc_net_init,
795 .exit = dev_mc_net_exit,
796};
797
798void __init dev_mcast_init(void)
799{
800 register_pernet_subsys(&dev_mc_net_ops);
801}
802
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
new file mode 100644
index 000000000000..6cc0481faade
--- /dev/null
+++ b/net/core/dev_ioctl.c
@@ -0,0 +1,576 @@
1#include <linux/kmod.h>
2#include <linux/netdevice.h>
3#include <linux/etherdevice.h>
4#include <linux/rtnetlink.h>
5#include <linux/net_tstamp.h>
6#include <linux/wireless.h>
7#include <net/wext.h>
8
9/*
10 * Map an interface index to its name (SIOCGIFNAME)
11 */
12
13/*
14 * We need this ioctl for efficient implementation of the
15 * if_indextoname() function required by the IPv6 API. Without
16 * it, we would have to search all the interfaces to find a
17 * match. --pb
18 */
19
20static int dev_ifname(struct net *net, struct ifreq __user *arg)
21{
22 struct net_device *dev;
23 struct ifreq ifr;
24 unsigned seq;
25
26 /*
27 * Fetch the caller's info block.
28 */
29
30 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
31 return -EFAULT;
32
33retry:
34 seq = read_seqcount_begin(&devnet_rename_seq);
35 rcu_read_lock();
36 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
37 if (!dev) {
38 rcu_read_unlock();
39 return -ENODEV;
40 }
41
42 strcpy(ifr.ifr_name, dev->name);
43 rcu_read_unlock();
44 if (read_seqcount_retry(&devnet_rename_seq, seq))
45 goto retry;
46
47 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
48 return -EFAULT;
49 return 0;
50}
51
52static gifconf_func_t *gifconf_list[NPROTO];
53
54/**
55 * register_gifconf - register a SIOCGIF handler
56 * @family: Address family
57 * @gifconf: Function handler
58 *
59 * Register protocol dependent address dumping routines. The handler
60 * that is passed must not be freed or reused until it has been replaced
61 * by another handler.
62 */
63int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
64{
65 if (family >= NPROTO)
66 return -EINVAL;
67 gifconf_list[family] = gifconf;
68 return 0;
69}
70EXPORT_SYMBOL(register_gifconf);
71
72/*
73 * Perform a SIOCGIFCONF call. This structure will change
74 * size eventually, and there is nothing I can do about it.
75 * Thus we will need a 'compatibility mode'.
76 */
77
78static int dev_ifconf(struct net *net, char __user *arg)
79{
80 struct ifconf ifc;
81 struct net_device *dev;
82 char __user *pos;
83 int len;
84 int total;
85 int i;
86
87 /*
88 * Fetch the caller's info block.
89 */
90
91 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
92 return -EFAULT;
93
94 pos = ifc.ifc_buf;
95 len = ifc.ifc_len;
96
97 /*
98 * Loop over the interfaces, and write an info block for each.
99 */
100
101 total = 0;
102 for_each_netdev(net, dev) {
103 for (i = 0; i < NPROTO; i++) {
104 if (gifconf_list[i]) {
105 int done;
106 if (!pos)
107 done = gifconf_list[i](dev, NULL, 0);
108 else
109 done = gifconf_list[i](dev, pos + total,
110 len - total);
111 if (done < 0)
112 return -EFAULT;
113 total += done;
114 }
115 }
116 }
117
118 /*
119 * All done. Write the updated control block back to the caller.
120 */
121 ifc.ifc_len = total;
122
123 /*
124 * Both BSD and Solaris return 0 here, so we do too.
125 */
126 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
127}
128
129/*
130 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
131 */
132static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
133{
134 int err;
135 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
136
137 if (!dev)
138 return -ENODEV;
139
140 switch (cmd) {
141 case SIOCGIFFLAGS: /* Get interface flags */
142 ifr->ifr_flags = (short) dev_get_flags(dev);
143 return 0;
144
145 case SIOCGIFMETRIC: /* Get the metric on the interface
146 (currently unused) */
147 ifr->ifr_metric = 0;
148 return 0;
149
150 case SIOCGIFMTU: /* Get the MTU of a device */
151 ifr->ifr_mtu = dev->mtu;
152 return 0;
153
154 case SIOCGIFHWADDR:
155 if (!dev->addr_len)
156 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
157 else
158 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
159 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
160 ifr->ifr_hwaddr.sa_family = dev->type;
161 return 0;
162
163 case SIOCGIFSLAVE:
164 err = -EINVAL;
165 break;
166
167 case SIOCGIFMAP:
168 ifr->ifr_map.mem_start = dev->mem_start;
169 ifr->ifr_map.mem_end = dev->mem_end;
170 ifr->ifr_map.base_addr = dev->base_addr;
171 ifr->ifr_map.irq = dev->irq;
172 ifr->ifr_map.dma = dev->dma;
173 ifr->ifr_map.port = dev->if_port;
174 return 0;
175
176 case SIOCGIFINDEX:
177 ifr->ifr_ifindex = dev->ifindex;
178 return 0;
179
180 case SIOCGIFTXQLEN:
181 ifr->ifr_qlen = dev->tx_queue_len;
182 return 0;
183
184 default:
185 /* dev_ioctl() should ensure this case
186 * is never reached
187 */
188 WARN_ON(1);
189 err = -ENOTTY;
190 break;
191
192 }
193 return err;
194}
195
196static int net_hwtstamp_validate(struct ifreq *ifr)
197{
198 struct hwtstamp_config cfg;
199 enum hwtstamp_tx_types tx_type;
200 enum hwtstamp_rx_filters rx_filter;
201 int tx_type_valid = 0;
202 int rx_filter_valid = 0;
203
204 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
205 return -EFAULT;
206
207 if (cfg.flags) /* reserved for future extensions */
208 return -EINVAL;
209
210 tx_type = cfg.tx_type;
211 rx_filter = cfg.rx_filter;
212
213 switch (tx_type) {
214 case HWTSTAMP_TX_OFF:
215 case HWTSTAMP_TX_ON:
216 case HWTSTAMP_TX_ONESTEP_SYNC:
217 tx_type_valid = 1;
218 break;
219 }
220
221 switch (rx_filter) {
222 case HWTSTAMP_FILTER_NONE:
223 case HWTSTAMP_FILTER_ALL:
224 case HWTSTAMP_FILTER_SOME:
225 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
226 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
227 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
228 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
229 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
230 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
231 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
232 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
233 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
234 case HWTSTAMP_FILTER_PTP_V2_EVENT:
235 case HWTSTAMP_FILTER_PTP_V2_SYNC:
236 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
237 rx_filter_valid = 1;
238 break;
239 }
240
241 if (!tx_type_valid || !rx_filter_valid)
242 return -ERANGE;
243
244 return 0;
245}
246
247/*
248 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
249 */
250static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
251{
252 int err;
253 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
254 const struct net_device_ops *ops;
255
256 if (!dev)
257 return -ENODEV;
258
259 ops = dev->netdev_ops;
260
261 switch (cmd) {
262 case SIOCSIFFLAGS: /* Set interface flags */
263 return dev_change_flags(dev, ifr->ifr_flags);
264
265 case SIOCSIFMETRIC: /* Set the metric on the interface
266 (currently unused) */
267 return -EOPNOTSUPP;
268
269 case SIOCSIFMTU: /* Set the MTU of a device */
270 return dev_set_mtu(dev, ifr->ifr_mtu);
271
272 case SIOCSIFHWADDR:
273 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
274
275 case SIOCSIFHWBROADCAST:
276 if (ifr->ifr_hwaddr.sa_family != dev->type)
277 return -EINVAL;
278 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
279 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
280 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
281 return 0;
282
283 case SIOCSIFMAP:
284 if (ops->ndo_set_config) {
285 if (!netif_device_present(dev))
286 return -ENODEV;
287 return ops->ndo_set_config(dev, &ifr->ifr_map);
288 }
289 return -EOPNOTSUPP;
290
291 case SIOCADDMULTI:
292 if (!ops->ndo_set_rx_mode ||
293 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
294 return -EINVAL;
295 if (!netif_device_present(dev))
296 return -ENODEV;
297 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
298
299 case SIOCDELMULTI:
300 if (!ops->ndo_set_rx_mode ||
301 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
302 return -EINVAL;
303 if (!netif_device_present(dev))
304 return -ENODEV;
305 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
306
307 case SIOCSIFTXQLEN:
308 if (ifr->ifr_qlen < 0)
309 return -EINVAL;
310 dev->tx_queue_len = ifr->ifr_qlen;
311 return 0;
312
313 case SIOCSIFNAME:
314 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
315 return dev_change_name(dev, ifr->ifr_newname);
316
317 case SIOCSHWTSTAMP:
318 err = net_hwtstamp_validate(ifr);
319 if (err)
320 return err;
321 /* fall through */
322
323 /*
324 * Unknown or private ioctl
325 */
326 default:
327 if ((cmd >= SIOCDEVPRIVATE &&
328 cmd <= SIOCDEVPRIVATE + 15) ||
329 cmd == SIOCBONDENSLAVE ||
330 cmd == SIOCBONDRELEASE ||
331 cmd == SIOCBONDSETHWADDR ||
332 cmd == SIOCBONDSLAVEINFOQUERY ||
333 cmd == SIOCBONDINFOQUERY ||
334 cmd == SIOCBONDCHANGEACTIVE ||
335 cmd == SIOCGMIIPHY ||
336 cmd == SIOCGMIIREG ||
337 cmd == SIOCSMIIREG ||
338 cmd == SIOCBRADDIF ||
339 cmd == SIOCBRDELIF ||
340 cmd == SIOCSHWTSTAMP ||
341 cmd == SIOCWANDEV) {
342 err = -EOPNOTSUPP;
343 if (ops->ndo_do_ioctl) {
344 if (netif_device_present(dev))
345 err = ops->ndo_do_ioctl(dev, ifr, cmd);
346 else
347 err = -ENODEV;
348 }
349 } else
350 err = -EINVAL;
351
352 }
353 return err;
354}
355
356/**
357 * dev_load - load a network module
358 * @net: the applicable net namespace
359 * @name: name of interface
360 *
361 * If a network interface is not present and the process has suitable
362 * privileges this function loads the module. If module loading is not
363 * available in this kernel then it becomes a nop.
364 */
365
366void dev_load(struct net *net, const char *name)
367{
368 struct net_device *dev;
369 int no_module;
370
371 rcu_read_lock();
372 dev = dev_get_by_name_rcu(net, name);
373 rcu_read_unlock();
374
375 no_module = !dev;
376 if (no_module && capable(CAP_NET_ADMIN))
377 no_module = request_module("netdev-%s", name);
378 if (no_module && capable(CAP_SYS_MODULE)) {
379 if (!request_module("%s", name))
380 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
381 name);
382 }
383}
384EXPORT_SYMBOL(dev_load);
385
386/*
387 * This function handles all "interface"-type I/O control requests. The actual
388 * 'doing' part of this is dev_ifsioc above.
389 */
390
391/**
392 * dev_ioctl - network device ioctl
393 * @net: the applicable net namespace
394 * @cmd: command to issue
395 * @arg: pointer to a struct ifreq in user space
396 *
397 * Issue ioctl functions to devices. This is normally called by the
398 * user space syscall interfaces but can sometimes be useful for
399 * other purposes. The return value is the return from the syscall if
400 * positive or a negative errno code on error.
401 */
402
403int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
404{
405 struct ifreq ifr;
406 int ret;
407 char *colon;
408
409 /* One special case: SIOCGIFCONF takes ifconf argument
410 and requires shared lock, because it sleeps writing
411 to user space.
412 */
413
414 if (cmd == SIOCGIFCONF) {
415 rtnl_lock();
416 ret = dev_ifconf(net, (char __user *) arg);
417 rtnl_unlock();
418 return ret;
419 }
420 if (cmd == SIOCGIFNAME)
421 return dev_ifname(net, (struct ifreq __user *)arg);
422
423 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
424 return -EFAULT;
425
426 ifr.ifr_name[IFNAMSIZ-1] = 0;
427
428 colon = strchr(ifr.ifr_name, ':');
429 if (colon)
430 *colon = 0;
431
432 /*
433 * See which interface the caller is talking about.
434 */
435
436 switch (cmd) {
437 /*
438 * These ioctl calls:
439 * - can be done by all.
440 * - atomic and do not require locking.
441 * - return a value
442 */
443 case SIOCGIFFLAGS:
444 case SIOCGIFMETRIC:
445 case SIOCGIFMTU:
446 case SIOCGIFHWADDR:
447 case SIOCGIFSLAVE:
448 case SIOCGIFMAP:
449 case SIOCGIFINDEX:
450 case SIOCGIFTXQLEN:
451 dev_load(net, ifr.ifr_name);
452 rcu_read_lock();
453 ret = dev_ifsioc_locked(net, &ifr, cmd);
454 rcu_read_unlock();
455 if (!ret) {
456 if (colon)
457 *colon = ':';
458 if (copy_to_user(arg, &ifr,
459 sizeof(struct ifreq)))
460 ret = -EFAULT;
461 }
462 return ret;
463
464 case SIOCETHTOOL:
465 dev_load(net, ifr.ifr_name);
466 rtnl_lock();
467 ret = dev_ethtool(net, &ifr);
468 rtnl_unlock();
469 if (!ret) {
470 if (colon)
471 *colon = ':';
472 if (copy_to_user(arg, &ifr,
473 sizeof(struct ifreq)))
474 ret = -EFAULT;
475 }
476 return ret;
477
478 /*
479 * These ioctl calls:
480 * - require superuser power.
481 * - require strict serialization.
482 * - return a value
483 */
484 case SIOCGMIIPHY:
485 case SIOCGMIIREG:
486 case SIOCSIFNAME:
487 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
488 return -EPERM;
489 dev_load(net, ifr.ifr_name);
490 rtnl_lock();
491 ret = dev_ifsioc(net, &ifr, cmd);
492 rtnl_unlock();
493 if (!ret) {
494 if (colon)
495 *colon = ':';
496 if (copy_to_user(arg, &ifr,
497 sizeof(struct ifreq)))
498 ret = -EFAULT;
499 }
500 return ret;
501
502 /*
503 * These ioctl calls:
504 * - require superuser power.
505 * - require strict serialization.
506 * - do not return a value
507 */
508 case SIOCSIFMAP:
509 case SIOCSIFTXQLEN:
510 if (!capable(CAP_NET_ADMIN))
511 return -EPERM;
512 /* fall through */
513 /*
514 * These ioctl calls:
515 * - require local superuser power.
516 * - require strict serialization.
517 * - do not return a value
518 */
519 case SIOCSIFFLAGS:
520 case SIOCSIFMETRIC:
521 case SIOCSIFMTU:
522 case SIOCSIFHWADDR:
523 case SIOCSIFSLAVE:
524 case SIOCADDMULTI:
525 case SIOCDELMULTI:
526 case SIOCSIFHWBROADCAST:
527 case SIOCSMIIREG:
528 case SIOCBONDENSLAVE:
529 case SIOCBONDRELEASE:
530 case SIOCBONDSETHWADDR:
531 case SIOCBONDCHANGEACTIVE:
532 case SIOCBRADDIF:
533 case SIOCBRDELIF:
534 case SIOCSHWTSTAMP:
535 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
536 return -EPERM;
537 /* fall through */
538 case SIOCBONDSLAVEINFOQUERY:
539 case SIOCBONDINFOQUERY:
540 dev_load(net, ifr.ifr_name);
541 rtnl_lock();
542 ret = dev_ifsioc(net, &ifr, cmd);
543 rtnl_unlock();
544 return ret;
545
546 case SIOCGIFMEM:
547 /* Get the per device memory space. We can add this but
548 * currently do not support it */
549 case SIOCSIFMEM:
550 /* Set the per device memory buffer space.
551 * Not applicable in our case */
552 case SIOCSIFLINK:
553 return -ENOTTY;
554
555 /*
556 * Unknown or private ioctl.
557 */
558 default:
559 if (cmd == SIOCWANDEV ||
560 (cmd >= SIOCDEVPRIVATE &&
561 cmd <= SIOCDEVPRIVATE + 15)) {
562 dev_load(net, ifr.ifr_name);
563 rtnl_lock();
564 ret = dev_ifsioc(net, &ifr, cmd);
565 rtnl_unlock();
566 if (!ret && copy_to_user(arg, &ifr,
567 sizeof(struct ifreq)))
568 ret = -EFAULT;
569 return ret;
570 }
571 /* Take care of Wireless Extensions */
572 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
573 return wext_handle_ioctl(net, &ifr, cmd, arg);
574 return -ENOTTY;
575 }
576}
diff --git a/net/core/dst.c b/net/core/dst.c
index ee6153e2cf43..35fd12f1a69c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -179,6 +179,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
179 dst_init_metrics(dst, dst_default_metrics, true); 179 dst_init_metrics(dst, dst_default_metrics, true);
180 dst->expires = 0UL; 180 dst->expires = 0UL;
181 dst->path = dst; 181 dst->path = dst;
182 dst->from = NULL;
182#ifdef CONFIG_XFRM 183#ifdef CONFIG_XFRM
183 dst->xfrm = NULL; 184 dst->xfrm = NULL;
184#endif 185#endif
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a8705432e4b1..3e9b2c3e30f0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
77 [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", 77 [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
80 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
80 81
81 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 82 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
82 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", 83 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -175,7 +176,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
175 if (sset == ETH_SS_FEATURES) 176 if (sset == ETH_SS_FEATURES)
176 return ARRAY_SIZE(netdev_features_strings); 177 return ARRAY_SIZE(netdev_features_strings);
177 178
178 if (ops && ops->get_sset_count && ops->get_strings) 179 if (ops->get_sset_count && ops->get_strings)
179 return ops->get_sset_count(dev, sset); 180 return ops->get_sset_count(dev, sset);
180 else 181 else
181 return -EOPNOTSUPP; 182 return -EOPNOTSUPP;
@@ -311,7 +312,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
311{ 312{
312 ASSERT_RTNL(); 313 ASSERT_RTNL();
313 314
314 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) 315 if (!dev->ethtool_ops->get_settings)
315 return -EOPNOTSUPP; 316 return -EOPNOTSUPP;
316 317
317 memset(cmd, 0, sizeof(struct ethtool_cmd)); 318 memset(cmd, 0, sizeof(struct ethtool_cmd));
@@ -355,7 +356,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
355 356
356 memset(&info, 0, sizeof(info)); 357 memset(&info, 0, sizeof(info));
357 info.cmd = ETHTOOL_GDRVINFO; 358 info.cmd = ETHTOOL_GDRVINFO;
358 if (ops && ops->get_drvinfo) { 359 if (ops->get_drvinfo) {
359 ops->get_drvinfo(dev, &info); 360 ops->get_drvinfo(dev, &info);
360 } else if (dev->dev.parent && dev->dev.parent->driver) { 361 } else if (dev->dev.parent && dev->dev.parent->driver) {
361 strlcpy(info.bus_info, dev_name(dev->dev.parent), 362 strlcpy(info.bus_info, dev_name(dev->dev.parent),
@@ -370,7 +371,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
370 * this method of obtaining string set info is deprecated; 371 * this method of obtaining string set info is deprecated;
371 * Use ETHTOOL_GSSET_INFO instead. 372 * Use ETHTOOL_GSSET_INFO instead.
372 */ 373 */
373 if (ops && ops->get_sset_count) { 374 if (ops->get_sset_count) {
374 int rc; 375 int rc;
375 376
376 rc = ops->get_sset_count(dev, ETH_SS_TEST); 377 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -383,9 +384,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
383 if (rc >= 0) 384 if (rc >= 0)
384 info.n_priv_flags = rc; 385 info.n_priv_flags = rc;
385 } 386 }
386 if (ops && ops->get_regs_len) 387 if (ops->get_regs_len)
387 info.regdump_len = ops->get_regs_len(dev); 388 info.regdump_len = ops->get_regs_len(dev);
388 if (ops && ops->get_eeprom_len) 389 if (ops->get_eeprom_len)
389 info.eedump_len = ops->get_eeprom_len(dev); 390 info.eedump_len = ops->get_eeprom_len(dev);
390 391
391 if (copy_to_user(useraddr, &info, sizeof(info))) 392 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -590,13 +591,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
590 struct ethtool_rxnfc rx_rings; 591 struct ethtool_rxnfc rx_rings;
591 u32 user_size, dev_size, i; 592 u32 user_size, dev_size, i;
592 u32 *indir; 593 u32 *indir;
594 const struct ethtool_ops *ops = dev->ethtool_ops;
593 int ret; 595 int ret;
594 596
595 if (!dev->ethtool_ops->get_rxfh_indir_size || 597 if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
596 !dev->ethtool_ops->set_rxfh_indir || 598 !ops->get_rxnfc)
597 !dev->ethtool_ops->get_rxnfc)
598 return -EOPNOTSUPP; 599 return -EOPNOTSUPP;
599 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); 600
601 dev_size = ops->get_rxfh_indir_size(dev);
600 if (dev_size == 0) 602 if (dev_size == 0)
601 return -EOPNOTSUPP; 603 return -EOPNOTSUPP;
602 604
@@ -613,7 +615,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
613 return -ENOMEM; 615 return -ENOMEM;
614 616
615 rx_rings.cmd = ETHTOOL_GRXRINGS; 617 rx_rings.cmd = ETHTOOL_GRXRINGS;
616 ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); 618 ret = ops->get_rxnfc(dev, &rx_rings, NULL);
617 if (ret) 619 if (ret)
618 goto out; 620 goto out;
619 621
@@ -639,7 +641,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
639 } 641 }
640 } 642 }
641 643
642 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); 644 ret = ops->set_rxfh_indir(dev, indir);
643 645
644out: 646out:
645 kfree(indir); 647 kfree(indir);
@@ -1082,9 +1084,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1082{ 1084{
1083 struct ethtool_value id; 1085 struct ethtool_value id;
1084 static bool busy; 1086 static bool busy;
1087 const struct ethtool_ops *ops = dev->ethtool_ops;
1085 int rc; 1088 int rc;
1086 1089
1087 if (!dev->ethtool_ops->set_phys_id) 1090 if (!ops->set_phys_id)
1088 return -EOPNOTSUPP; 1091 return -EOPNOTSUPP;
1089 1092
1090 if (busy) 1093 if (busy)
@@ -1093,7 +1096,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1093 if (copy_from_user(&id, useraddr, sizeof(id))) 1096 if (copy_from_user(&id, useraddr, sizeof(id)))
1094 return -EFAULT; 1097 return -EFAULT;
1095 1098
1096 rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); 1099 rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
1097 if (rc < 0) 1100 if (rc < 0)
1098 return rc; 1101 return rc;
1099 1102
@@ -1118,7 +1121,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1118 i = n; 1121 i = n;
1119 do { 1122 do {
1120 rtnl_lock(); 1123 rtnl_lock();
1121 rc = dev->ethtool_ops->set_phys_id(dev, 1124 rc = ops->set_phys_id(dev,
1122 (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); 1125 (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
1123 rtnl_unlock(); 1126 rtnl_unlock();
1124 if (rc) 1127 if (rc)
@@ -1133,7 +1136,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1133 dev_put(dev); 1136 dev_put(dev);
1134 busy = false; 1137 busy = false;
1135 1138
1136 (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); 1139 (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
1137 return rc; 1140 return rc;
1138} 1141}
1139 1142
@@ -1275,7 +1278,7 @@ static int ethtool_get_dump_flag(struct net_device *dev,
1275 struct ethtool_dump dump; 1278 struct ethtool_dump dump;
1276 const struct ethtool_ops *ops = dev->ethtool_ops; 1279 const struct ethtool_ops *ops = dev->ethtool_ops;
1277 1280
1278 if (!dev->ethtool_ops->get_dump_flag) 1281 if (!ops->get_dump_flag)
1279 return -EOPNOTSUPP; 1282 return -EOPNOTSUPP;
1280 1283
1281 if (copy_from_user(&dump, useraddr, sizeof(dump))) 1284 if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1299,8 +1302,7 @@ static int ethtool_get_dump_data(struct net_device *dev,
1299 const struct ethtool_ops *ops = dev->ethtool_ops; 1302 const struct ethtool_ops *ops = dev->ethtool_ops;
1300 void *data = NULL; 1303 void *data = NULL;
1301 1304
1302 if (!dev->ethtool_ops->get_dump_data || 1305 if (!ops->get_dump_data || !ops->get_dump_flag)
1303 !dev->ethtool_ops->get_dump_flag)
1304 return -EOPNOTSUPP; 1306 return -EOPNOTSUPP;
1305 1307
1306 if (copy_from_user(&dump, useraddr, sizeof(dump))) 1308 if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1346,13 +1348,9 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
1346 info.cmd = ETHTOOL_GET_TS_INFO; 1348 info.cmd = ETHTOOL_GET_TS_INFO;
1347 1349
1348 if (phydev && phydev->drv && phydev->drv->ts_info) { 1350 if (phydev && phydev->drv && phydev->drv->ts_info) {
1349
1350 err = phydev->drv->ts_info(phydev, &info); 1351 err = phydev->drv->ts_info(phydev, &info);
1351 1352 } else if (ops->get_ts_info) {
1352 } else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) {
1353
1354 err = ops->get_ts_info(dev, &info); 1353 err = ops->get_ts_info(dev, &info);
1355
1356 } else { 1354 } else {
1357 info.so_timestamping = 1355 info.so_timestamping =
1358 SOF_TIMESTAMPING_RX_SOFTWARE | 1356 SOF_TIMESTAMPING_RX_SOFTWARE |
diff --git a/net/core/filter.c b/net/core/filter.c
index c23543cba132..2e20b55a7830 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -532,6 +532,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
532 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, 532 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
533 }; 533 };
534 int pc; 534 int pc;
535 bool anc_found;
535 536
536 if (flen == 0 || flen > BPF_MAXINSNS) 537 if (flen == 0 || flen > BPF_MAXINSNS)
537 return -EINVAL; 538 return -EINVAL;
@@ -592,8 +593,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
592 case BPF_S_LD_W_ABS: 593 case BPF_S_LD_W_ABS:
593 case BPF_S_LD_H_ABS: 594 case BPF_S_LD_H_ABS:
594 case BPF_S_LD_B_ABS: 595 case BPF_S_LD_B_ABS:
596 anc_found = false;
595#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ 597#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
596 code = BPF_S_ANC_##CODE; \ 598 code = BPF_S_ANC_##CODE; \
599 anc_found = true; \
597 break 600 break
598 switch (ftest->k) { 601 switch (ftest->k) {
599 ANCILLARY(PROTOCOL); 602 ANCILLARY(PROTOCOL);
@@ -610,6 +613,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
610 ANCILLARY(VLAN_TAG); 613 ANCILLARY(VLAN_TAG);
611 ANCILLARY(VLAN_TAG_PRESENT); 614 ANCILLARY(VLAN_TAG_PRESENT);
612 } 615 }
616
617 /* ancillary operation unknown or unsupported */
618 if (anc_found == false && ftest->k >= SKF_AD_OFF)
619 return -EINVAL;
613 } 620 }
614 ftest->code = code; 621 ftest->code = code;
615 } 622 }
@@ -714,6 +721,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
714 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 721 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
715 int err; 722 int err;
716 723
724 if (sock_flag(sk, SOCK_FILTER_LOCKED))
725 return -EPERM;
726
717 /* Make sure new filter is there and in the right amounts. */ 727 /* Make sure new filter is there and in the right amounts. */
718 if (fprog->filter == NULL) 728 if (fprog->filter == NULL)
719 return -EINVAL; 729 return -EINVAL;
@@ -750,6 +760,9 @@ int sk_detach_filter(struct sock *sk)
750 int ret = -ENOENT; 760 int ret = -ENOENT;
751 struct sk_filter *filter; 761 struct sk_filter *filter;
752 762
763 if (sock_flag(sk, SOCK_FILTER_LOCKED))
764 return -EPERM;
765
753 filter = rcu_dereference_protected(sk->sk_filter, 766 filter = rcu_dereference_protected(sk->sk_filter,
754 sock_owned_by_user(sk)); 767 sock_owned_by_user(sk));
755 if (filter) { 768 if (filter) {
diff --git a/net/core/flow.c b/net/core/flow.c
index b0901ee5a002..c56ea6f7f6c7 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -132,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc,
132 int shrink_to) 132 int shrink_to)
133{ 133{
134 struct flow_cache_entry *fle; 134 struct flow_cache_entry *fle;
135 struct hlist_node *entry, *tmp; 135 struct hlist_node *tmp;
136 LIST_HEAD(gc_list); 136 LIST_HEAD(gc_list);
137 int i, deleted = 0; 137 int i, deleted = 0;
138 138
139 for (i = 0; i < flow_cache_hash_size(fc); i++) { 139 for (i = 0; i < flow_cache_hash_size(fc); i++) {
140 int saved = 0; 140 int saved = 0;
141 141
142 hlist_for_each_entry_safe(fle, entry, tmp, 142 hlist_for_each_entry_safe(fle, tmp,
143 &fcp->hash_table[i], u.hlist) { 143 &fcp->hash_table[i], u.hlist) {
144 if (saved < shrink_to && 144 if (saved < shrink_to &&
145 flow_entry_valid(fle)) { 145 flow_entry_valid(fle)) {
@@ -211,7 +211,6 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
211 struct flow_cache *fc = &flow_cache_global; 211 struct flow_cache *fc = &flow_cache_global;
212 struct flow_cache_percpu *fcp; 212 struct flow_cache_percpu *fcp;
213 struct flow_cache_entry *fle, *tfle; 213 struct flow_cache_entry *fle, *tfle;
214 struct hlist_node *entry;
215 struct flow_cache_object *flo; 214 struct flow_cache_object *flo;
216 size_t keysize; 215 size_t keysize;
217 unsigned int hash; 216 unsigned int hash;
@@ -235,7 +234,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
235 flow_new_hash_rnd(fc, fcp); 234 flow_new_hash_rnd(fc, fcp);
236 235
237 hash = flow_hash_code(fc, fcp, key, keysize); 236 hash = flow_hash_code(fc, fcp, key, keysize);
238 hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { 237 hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
239 if (tfle->net == net && 238 if (tfle->net == net &&
240 tfle->family == family && 239 tfle->family == family &&
241 tfle->dir == dir && 240 tfle->dir == dir &&
@@ -286,7 +285,7 @@ nocache:
286 else 285 else
287 fle->genid--; 286 fle->genid--;
288 } else { 287 } else {
289 if (flo && !IS_ERR(flo)) 288 if (!IS_ERR_OR_NULL(flo))
290 flo->ops->delete(flo); 289 flo->ops->delete(flo);
291 } 290 }
292ret_object: 291ret_object:
@@ -301,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data)
301 struct flow_cache *fc = info->cache; 300 struct flow_cache *fc = info->cache;
302 struct flow_cache_percpu *fcp; 301 struct flow_cache_percpu *fcp;
303 struct flow_cache_entry *fle; 302 struct flow_cache_entry *fle;
304 struct hlist_node *entry, *tmp; 303 struct hlist_node *tmp;
305 LIST_HEAD(gc_list); 304 LIST_HEAD(gc_list);
306 int i, deleted = 0; 305 int i, deleted = 0;
307 306
308 fcp = this_cpu_ptr(fc->percpu); 307 fcp = this_cpu_ptr(fc->percpu);
309 for (i = 0; i < flow_cache_hash_size(fc); i++) { 308 for (i = 0; i < flow_cache_hash_size(fc); i++) {
310 hlist_for_each_entry_safe(fle, entry, tmp, 309 hlist_for_each_entry_safe(fle, tmp,
311 &fcp->hash_table[i], u.hlist) { 310 &fcp->hash_table[i], u.hlist) {
312 if (flow_entry_valid(fle)) 311 if (flow_entry_valid(fle))
313 continue; 312 continue;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 466820b6e344..9d4c7201400d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -143,3 +143,176 @@ ipv6:
143 return true; 143 return true;
144} 144}
145EXPORT_SYMBOL(skb_flow_dissect); 145EXPORT_SYMBOL(skb_flow_dissect);
146
147static u32 hashrnd __read_mostly;
148
149/*
150 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
151 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
152 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
153 * if hash is a canonical 4-tuple hash over transport ports.
154 */
155void __skb_get_rxhash(struct sk_buff *skb)
156{
157 struct flow_keys keys;
158 u32 hash;
159
160 if (!skb_flow_dissect(skb, &keys))
161 return;
162
163 if (keys.ports)
164 skb->l4_rxhash = 1;
165
166 /* get a consistent hash (same value on both flow directions) */
167 if (((__force u32)keys.dst < (__force u32)keys.src) ||
168 (((__force u32)keys.dst == (__force u32)keys.src) &&
169 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
170 swap(keys.dst, keys.src);
171 swap(keys.port16[0], keys.port16[1]);
172 }
173
174 hash = jhash_3words((__force u32)keys.dst,
175 (__force u32)keys.src,
176 (__force u32)keys.ports, hashrnd);
177 if (!hash)
178 hash = 1;
179
180 skb->rxhash = hash;
181}
182EXPORT_SYMBOL(__skb_get_rxhash);
183
184/*
185 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
186 * to be used as a distribution range.
187 */
188u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
189 unsigned int num_tx_queues)
190{
191 u32 hash;
192 u16 qoffset = 0;
193 u16 qcount = num_tx_queues;
194
195 if (skb_rx_queue_recorded(skb)) {
196 hash = skb_get_rx_queue(skb);
197 while (unlikely(hash >= num_tx_queues))
198 hash -= num_tx_queues;
199 return hash;
200 }
201
202 if (dev->num_tc) {
203 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
204 qoffset = dev->tc_to_txq[tc].offset;
205 qcount = dev->tc_to_txq[tc].count;
206 }
207
208 if (skb->sk && skb->sk->sk_hash)
209 hash = skb->sk->sk_hash;
210 else
211 hash = (__force u16) skb->protocol;
212 hash = jhash_1word(hash, hashrnd);
213
214 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
215}
216EXPORT_SYMBOL(__skb_tx_hash);
217
218static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
219{
220 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
221 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
222 dev->name, queue_index,
223 dev->real_num_tx_queues);
224 return 0;
225 }
226 return queue_index;
227}
228
229static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
230{
231#ifdef CONFIG_XPS
232 struct xps_dev_maps *dev_maps;
233 struct xps_map *map;
234 int queue_index = -1;
235
236 rcu_read_lock();
237 dev_maps = rcu_dereference(dev->xps_maps);
238 if (dev_maps) {
239 map = rcu_dereference(
240 dev_maps->cpu_map[raw_smp_processor_id()]);
241 if (map) {
242 if (map->len == 1)
243 queue_index = map->queues[0];
244 else {
245 u32 hash;
246 if (skb->sk && skb->sk->sk_hash)
247 hash = skb->sk->sk_hash;
248 else
249 hash = (__force u16) skb->protocol ^
250 skb->rxhash;
251 hash = jhash_1word(hash, hashrnd);
252 queue_index = map->queues[
253 ((u64)hash * map->len) >> 32];
254 }
255 if (unlikely(queue_index >= dev->real_num_tx_queues))
256 queue_index = -1;
257 }
258 }
259 rcu_read_unlock();
260
261 return queue_index;
262#else
263 return -1;
264#endif
265}
266
267u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
268{
269 struct sock *sk = skb->sk;
270 int queue_index = sk_tx_queue_get(sk);
271
272 if (queue_index < 0 || skb->ooo_okay ||
273 queue_index >= dev->real_num_tx_queues) {
274 int new_index = get_xps_queue(dev, skb);
275 if (new_index < 0)
276 new_index = skb_tx_hash(dev, skb);
277
278 if (queue_index != new_index && sk) {
279 struct dst_entry *dst =
280 rcu_dereference_check(sk->sk_dst_cache, 1);
281
282 if (dst && skb_dst(skb) == dst)
283 sk_tx_queue_set(sk, queue_index);
284
285 }
286
287 queue_index = new_index;
288 }
289
290 return queue_index;
291}
292EXPORT_SYMBOL(__netdev_pick_tx);
293
294struct netdev_queue *netdev_pick_tx(struct net_device *dev,
295 struct sk_buff *skb)
296{
297 int queue_index = 0;
298
299 if (dev->real_num_tx_queues != 1) {
300 const struct net_device_ops *ops = dev->netdev_ops;
301 if (ops->ndo_select_queue)
302 queue_index = ops->ndo_select_queue(dev, skb);
303 else
304 queue_index = __netdev_pick_tx(dev, skb);
305 queue_index = dev_cap_txqueue(dev, queue_index);
306 }
307
308 skb_set_queue_mapping(skb, queue_index);
309 return netdev_get_tx_queue(dev, queue_index);
310}
311
312static int __init initialize_hashrnd(void)
313{
314 get_random_bytes(&hashrnd, sizeof(hashrnd));
315 return 0;
316}
317
318late_initcall_sync(initialize_hashrnd);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c815f285e5ab..3863b8f639c5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -290,15 +290,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
290 goto out_entries; 290 goto out_entries;
291 } 291 }
292 292
293 if (tbl->entry_size) 293 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
294 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295 else {
296 int sz = sizeof(*n) + tbl->key_len;
297
298 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299 sz += dev->neigh_priv_len;
300 n = kzalloc(sz, GFP_ATOMIC);
301 }
302 if (!n) 294 if (!n)
303 goto out_entries; 295 goto out_entries;
304 296
@@ -778,6 +770,9 @@ static void neigh_periodic_work(struct work_struct *work)
778 nht = rcu_dereference_protected(tbl->nht, 770 nht = rcu_dereference_protected(tbl->nht,
779 lockdep_is_held(&tbl->lock)); 771 lockdep_is_held(&tbl->lock));
780 772
773 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
774 goto out;
775
781 /* 776 /*
782 * periodically recompute ReachableTime from random function 777 * periodically recompute ReachableTime from random function
783 */ 778 */
@@ -832,6 +827,7 @@ next_elt:
832 nht = rcu_dereference_protected(tbl->nht, 827 nht = rcu_dereference_protected(tbl->nht,
833 lockdep_is_held(&tbl->lock)); 828 lockdep_is_held(&tbl->lock));
834 } 829 }
830out:
835 /* Cycle through all hash buckets every base_reachable_time/2 ticks. 831 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
836 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 832 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
837 * base_reachable_time. 833 * base_reachable_time.
@@ -1542,6 +1538,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1542 if (!tbl->nht || !tbl->phash_buckets) 1538 if (!tbl->nht || !tbl->phash_buckets)
1543 panic("cannot allocate neighbour cache hashes"); 1539 panic("cannot allocate neighbour cache hashes");
1544 1540
1541 if (!tbl->entry_size)
1542 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1543 tbl->key_len, NEIGH_PRIV_ALIGN);
1544 else
1545 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1546
1545 rwlock_init(&tbl->lock); 1547 rwlock_init(&tbl->lock);
1546 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1548 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1547 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); 1549 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
new file mode 100644
index 000000000000..3174f1998ee6
--- /dev/null
+++ b/net/core/net-procfs.c
@@ -0,0 +1,411 @@
1#include <linux/netdevice.h>
2#include <linux/proc_fs.h>
3#include <linux/seq_file.h>
4#include <net/wext.h>
5
6#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
7
8#define get_bucket(x) ((x) >> BUCKET_SPACE)
9#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
10#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
11
12extern struct list_head ptype_all __read_mostly;
13extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
14
15static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
16{
17 struct net *net = seq_file_net(seq);
18 struct net_device *dev;
19 struct hlist_head *h;
20 unsigned int count = 0, offset = get_offset(*pos);
21
22 h = &net->dev_name_head[get_bucket(*pos)];
23 hlist_for_each_entry_rcu(dev, h, name_hlist) {
24 if (++count == offset)
25 return dev;
26 }
27
28 return NULL;
29}
30
31static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
32{
33 struct net_device *dev;
34 unsigned int bucket;
35
36 do {
37 dev = dev_from_same_bucket(seq, pos);
38 if (dev)
39 return dev;
40
41 bucket = get_bucket(*pos) + 1;
42 *pos = set_bucket_offset(bucket, 1);
43 } while (bucket < NETDEV_HASHENTRIES);
44
45 return NULL;
46}
47
48/*
49 * This is invoked by the /proc filesystem handler to display a device
50 * in detail.
51 */
52static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
53 __acquires(RCU)
54{
55 rcu_read_lock();
56 if (!*pos)
57 return SEQ_START_TOKEN;
58
59 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
60 return NULL;
61
62 return dev_from_bucket(seq, pos);
63}
64
65static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
66{
67 ++*pos;
68 return dev_from_bucket(seq, pos);
69}
70
71static void dev_seq_stop(struct seq_file *seq, void *v)
72 __releases(RCU)
73{
74 rcu_read_unlock();
75}
76
77static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
78{
79 struct rtnl_link_stats64 temp;
80 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
81
82 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
83 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
84 dev->name, stats->rx_bytes, stats->rx_packets,
85 stats->rx_errors,
86 stats->rx_dropped + stats->rx_missed_errors,
87 stats->rx_fifo_errors,
88 stats->rx_length_errors + stats->rx_over_errors +
89 stats->rx_crc_errors + stats->rx_frame_errors,
90 stats->rx_compressed, stats->multicast,
91 stats->tx_bytes, stats->tx_packets,
92 stats->tx_errors, stats->tx_dropped,
93 stats->tx_fifo_errors, stats->collisions,
94 stats->tx_carrier_errors +
95 stats->tx_aborted_errors +
96 stats->tx_window_errors +
97 stats->tx_heartbeat_errors,
98 stats->tx_compressed);
99}
100
101/*
102 * Called from the PROCfs module. This now uses the new arbitrary sized
103 * /proc/net interface to create /proc/net/dev
104 */
105static int dev_seq_show(struct seq_file *seq, void *v)
106{
107 if (v == SEQ_START_TOKEN)
108 seq_puts(seq, "Inter-| Receive "
109 " | Transmit\n"
110 " face |bytes packets errs drop fifo frame "
111 "compressed multicast|bytes packets errs "
112 "drop fifo colls carrier compressed\n");
113 else
114 dev_seq_printf_stats(seq, v);
115 return 0;
116}
117
118static struct softnet_data *softnet_get_online(loff_t *pos)
119{
120 struct softnet_data *sd = NULL;
121
122 while (*pos < nr_cpu_ids)
123 if (cpu_online(*pos)) {
124 sd = &per_cpu(softnet_data, *pos);
125 break;
126 } else
127 ++*pos;
128 return sd;
129}
130
131static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
132{
133 return softnet_get_online(pos);
134}
135
136static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
137{
138 ++*pos;
139 return softnet_get_online(pos);
140}
141
142static void softnet_seq_stop(struct seq_file *seq, void *v)
143{
144}
145
146static int softnet_seq_show(struct seq_file *seq, void *v)
147{
148 struct softnet_data *sd = v;
149
150 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
151 sd->processed, sd->dropped, sd->time_squeeze, 0,
152 0, 0, 0, 0, /* was fastroute */
153 sd->cpu_collision, sd->received_rps);
154 return 0;
155}
156
157static const struct seq_operations dev_seq_ops = {
158 .start = dev_seq_start,
159 .next = dev_seq_next,
160 .stop = dev_seq_stop,
161 .show = dev_seq_show,
162};
163
164static int dev_seq_open(struct inode *inode, struct file *file)
165{
166 return seq_open_net(inode, file, &dev_seq_ops,
167 sizeof(struct seq_net_private));
168}
169
170static const struct file_operations dev_seq_fops = {
171 .owner = THIS_MODULE,
172 .open = dev_seq_open,
173 .read = seq_read,
174 .llseek = seq_lseek,
175 .release = seq_release_net,
176};
177
178static const struct seq_operations softnet_seq_ops = {
179 .start = softnet_seq_start,
180 .next = softnet_seq_next,
181 .stop = softnet_seq_stop,
182 .show = softnet_seq_show,
183};
184
185static int softnet_seq_open(struct inode *inode, struct file *file)
186{
187 return seq_open(file, &softnet_seq_ops);
188}
189
190static const struct file_operations softnet_seq_fops = {
191 .owner = THIS_MODULE,
192 .open = softnet_seq_open,
193 .read = seq_read,
194 .llseek = seq_lseek,
195 .release = seq_release,
196};
197
198static void *ptype_get_idx(loff_t pos)
199{
200 struct packet_type *pt = NULL;
201 loff_t i = 0;
202 int t;
203
204 list_for_each_entry_rcu(pt, &ptype_all, list) {
205 if (i == pos)
206 return pt;
207 ++i;
208 }
209
210 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
211 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
212 if (i == pos)
213 return pt;
214 ++i;
215 }
216 }
217 return NULL;
218}
219
220static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
221 __acquires(RCU)
222{
223 rcu_read_lock();
224 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
225}
226
227static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
228{
229 struct packet_type *pt;
230 struct list_head *nxt;
231 int hash;
232
233 ++*pos;
234 if (v == SEQ_START_TOKEN)
235 return ptype_get_idx(0);
236
237 pt = v;
238 nxt = pt->list.next;
239 if (pt->type == htons(ETH_P_ALL)) {
240 if (nxt != &ptype_all)
241 goto found;
242 hash = 0;
243 nxt = ptype_base[0].next;
244 } else
245 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
246
247 while (nxt == &ptype_base[hash]) {
248 if (++hash >= PTYPE_HASH_SIZE)
249 return NULL;
250 nxt = ptype_base[hash].next;
251 }
252found:
253 return list_entry(nxt, struct packet_type, list);
254}
255
256static void ptype_seq_stop(struct seq_file *seq, void *v)
257 __releases(RCU)
258{
259 rcu_read_unlock();
260}
261
262static int ptype_seq_show(struct seq_file *seq, void *v)
263{
264 struct packet_type *pt = v;
265
266 if (v == SEQ_START_TOKEN)
267 seq_puts(seq, "Type Device Function\n");
268 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
269 if (pt->type == htons(ETH_P_ALL))
270 seq_puts(seq, "ALL ");
271 else
272 seq_printf(seq, "%04x", ntohs(pt->type));
273
274 seq_printf(seq, " %-8s %pF\n",
275 pt->dev ? pt->dev->name : "", pt->func);
276 }
277
278 return 0;
279}
280
281static const struct seq_operations ptype_seq_ops = {
282 .start = ptype_seq_start,
283 .next = ptype_seq_next,
284 .stop = ptype_seq_stop,
285 .show = ptype_seq_show,
286};
287
288static int ptype_seq_open(struct inode *inode, struct file *file)
289{
290 return seq_open_net(inode, file, &ptype_seq_ops,
291 sizeof(struct seq_net_private));
292}
293
294static const struct file_operations ptype_seq_fops = {
295 .owner = THIS_MODULE,
296 .open = ptype_seq_open,
297 .read = seq_read,
298 .llseek = seq_lseek,
299 .release = seq_release_net,
300};
301
302
303static int __net_init dev_proc_net_init(struct net *net)
304{
305 int rc = -ENOMEM;
306
307 if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
308 goto out;
309 if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
310 &softnet_seq_fops))
311 goto out_dev;
312 if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
313 goto out_softnet;
314
315 if (wext_proc_init(net))
316 goto out_ptype;
317 rc = 0;
318out:
319 return rc;
320out_ptype:
321 remove_proc_entry("ptype", net->proc_net);
322out_softnet:
323 remove_proc_entry("softnet_stat", net->proc_net);
324out_dev:
325 remove_proc_entry("dev", net->proc_net);
326 goto out;
327}
328
329static void __net_exit dev_proc_net_exit(struct net *net)
330{
331 wext_proc_exit(net);
332
333 remove_proc_entry("ptype", net->proc_net);
334 remove_proc_entry("softnet_stat", net->proc_net);
335 remove_proc_entry("dev", net->proc_net);
336}
337
338static struct pernet_operations __net_initdata dev_proc_ops = {
339 .init = dev_proc_net_init,
340 .exit = dev_proc_net_exit,
341};
342
343static int dev_mc_seq_show(struct seq_file *seq, void *v)
344{
345 struct netdev_hw_addr *ha;
346 struct net_device *dev = v;
347
348 if (v == SEQ_START_TOKEN)
349 return 0;
350
351 netif_addr_lock_bh(dev);
352 netdev_for_each_mc_addr(ha, dev) {
353 int i;
354
355 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
356 dev->name, ha->refcount, ha->global_use);
357
358 for (i = 0; i < dev->addr_len; i++)
359 seq_printf(seq, "%02x", ha->addr[i]);
360
361 seq_putc(seq, '\n');
362 }
363 netif_addr_unlock_bh(dev);
364 return 0;
365}
366
367static const struct seq_operations dev_mc_seq_ops = {
368 .start = dev_seq_start,
369 .next = dev_seq_next,
370 .stop = dev_seq_stop,
371 .show = dev_mc_seq_show,
372};
373
374static int dev_mc_seq_open(struct inode *inode, struct file *file)
375{
376 return seq_open_net(inode, file, &dev_mc_seq_ops,
377 sizeof(struct seq_net_private));
378}
379
380static const struct file_operations dev_mc_seq_fops = {
381 .owner = THIS_MODULE,
382 .open = dev_mc_seq_open,
383 .read = seq_read,
384 .llseek = seq_lseek,
385 .release = seq_release_net,
386};
387
388static int __net_init dev_mc_net_init(struct net *net)
389{
390 if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
391 return -ENOMEM;
392 return 0;
393}
394
395static void __net_exit dev_mc_net_exit(struct net *net)
396{
397 remove_proc_entry("dev_mcast", net->proc_net);
398}
399
400static struct pernet_operations __net_initdata dev_mc_net_ops = {
401 .init = dev_mc_net_init,
402 .exit = dev_mc_net_exit,
403};
404
405int __init dev_proc_init(void)
406{
407 int ret = register_pernet_subsys(&dev_proc_ops);
408 if (!ret)
409 return register_pernet_subsys(&dev_mc_net_ops);
410 return ret;
411}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 28c5f5aa7ca7..7427ab5e27d8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/export.h> 22#include <linux/export.h>
23#include <linux/jiffies.h> 23#include <linux/jiffies.h>
24#include <linux/pm_runtime.h>
24 25
25#include "net-sysfs.h" 26#include "net-sysfs.h"
26 27
@@ -126,6 +127,19 @@ static ssize_t show_broadcast(struct device *dev,
126 return -EINVAL; 127 return -EINVAL;
127} 128}
128 129
130static int change_carrier(struct net_device *net, unsigned long new_carrier)
131{
132 if (!netif_running(net))
133 return -EINVAL;
134 return dev_change_carrier(net, (bool) new_carrier);
135}
136
137static ssize_t store_carrier(struct device *dev, struct device_attribute *attr,
138 const char *buf, size_t len)
139{
140 return netdev_store(dev, attr, buf, len, change_carrier);
141}
142
129static ssize_t show_carrier(struct device *dev, 143static ssize_t show_carrier(struct device *dev,
130 struct device_attribute *attr, char *buf) 144 struct device_attribute *attr, char *buf)
131{ 145{
@@ -331,7 +345,7 @@ static struct device_attribute net_class_attributes[] = {
331 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), 345 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
332 __ATTR(address, S_IRUGO, show_address, NULL), 346 __ATTR(address, S_IRUGO, show_address, NULL),
333 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 347 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
334 __ATTR(carrier, S_IRUGO, show_carrier, NULL), 348 __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier),
335 __ATTR(speed, S_IRUGO, show_speed, NULL), 349 __ATTR(speed, S_IRUGO, show_speed, NULL),
336 __ATTR(duplex, S_IRUGO, show_duplex, NULL), 350 __ATTR(duplex, S_IRUGO, show_duplex, NULL),
337 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 351 __ATTR(dormant, S_IRUGO, show_dormant, NULL),
@@ -989,68 +1003,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
989 return len; 1003 return len;
990} 1004}
991 1005
992static DEFINE_MUTEX(xps_map_mutex);
993#define xmap_dereference(P) \
994 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
995
996static void xps_queue_release(struct netdev_queue *queue)
997{
998 struct net_device *dev = queue->dev;
999 struct xps_dev_maps *dev_maps;
1000 struct xps_map *map;
1001 unsigned long index;
1002 int i, pos, nonempty = 0;
1003
1004 index = get_netdev_queue_index(queue);
1005
1006 mutex_lock(&xps_map_mutex);
1007 dev_maps = xmap_dereference(dev->xps_maps);
1008
1009 if (dev_maps) {
1010 for_each_possible_cpu(i) {
1011 map = xmap_dereference(dev_maps->cpu_map[i]);
1012 if (!map)
1013 continue;
1014
1015 for (pos = 0; pos < map->len; pos++)
1016 if (map->queues[pos] == index)
1017 break;
1018
1019 if (pos < map->len) {
1020 if (map->len > 1)
1021 map->queues[pos] =
1022 map->queues[--map->len];
1023 else {
1024 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1025 NULL);
1026 kfree_rcu(map, rcu);
1027 map = NULL;
1028 }
1029 }
1030 if (map)
1031 nonempty = 1;
1032 }
1033
1034 if (!nonempty) {
1035 RCU_INIT_POINTER(dev->xps_maps, NULL);
1036 kfree_rcu(dev_maps, rcu);
1037 }
1038 }
1039 mutex_unlock(&xps_map_mutex);
1040}
1041
1042static ssize_t store_xps_map(struct netdev_queue *queue, 1006static ssize_t store_xps_map(struct netdev_queue *queue,
1043 struct netdev_queue_attribute *attribute, 1007 struct netdev_queue_attribute *attribute,
1044 const char *buf, size_t len) 1008 const char *buf, size_t len)
1045{ 1009{
1046 struct net_device *dev = queue->dev; 1010 struct net_device *dev = queue->dev;
1047 cpumask_var_t mask;
1048 int err, i, cpu, pos, map_len, alloc_len, need_set;
1049 unsigned long index; 1011 unsigned long index;
1050 struct xps_map *map, *new_map; 1012 cpumask_var_t mask;
1051 struct xps_dev_maps *dev_maps, *new_dev_maps; 1013 int err;
1052 int nonempty = 0;
1053 int numa_node_id = -2;
1054 1014
1055 if (!capable(CAP_NET_ADMIN)) 1015 if (!capable(CAP_NET_ADMIN))
1056 return -EPERM; 1016 return -EPERM;
@@ -1066,105 +1026,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
1066 return err; 1026 return err;
1067 } 1027 }
1068 1028
1069 new_dev_maps = kzalloc(max_t(unsigned int, 1029 err = netif_set_xps_queue(dev, mask, index);
1070 XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
1071 if (!new_dev_maps) {
1072 free_cpumask_var(mask);
1073 return -ENOMEM;
1074 }
1075
1076 mutex_lock(&xps_map_mutex);
1077
1078 dev_maps = xmap_dereference(dev->xps_maps);
1079
1080 for_each_possible_cpu(cpu) {
1081 map = dev_maps ?
1082 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
1083 new_map = map;
1084 if (map) {
1085 for (pos = 0; pos < map->len; pos++)
1086 if (map->queues[pos] == index)
1087 break;
1088 map_len = map->len;
1089 alloc_len = map->alloc_len;
1090 } else
1091 pos = map_len = alloc_len = 0;
1092
1093 need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
1094#ifdef CONFIG_NUMA
1095 if (need_set) {
1096 if (numa_node_id == -2)
1097 numa_node_id = cpu_to_node(cpu);
1098 else if (numa_node_id != cpu_to_node(cpu))
1099 numa_node_id = -1;
1100 }
1101#endif
1102 if (need_set && pos >= map_len) {
1103 /* Need to add queue to this CPU's map */
1104 if (map_len >= alloc_len) {
1105 alloc_len = alloc_len ?
1106 2 * alloc_len : XPS_MIN_MAP_ALLOC;
1107 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
1108 GFP_KERNEL,
1109 cpu_to_node(cpu));
1110 if (!new_map)
1111 goto error;
1112 new_map->alloc_len = alloc_len;
1113 for (i = 0; i < map_len; i++)
1114 new_map->queues[i] = map->queues[i];
1115 new_map->len = map_len;
1116 }
1117 new_map->queues[new_map->len++] = index;
1118 } else if (!need_set && pos < map_len) {
1119 /* Need to remove queue from this CPU's map */
1120 if (map_len > 1)
1121 new_map->queues[pos] =
1122 new_map->queues[--new_map->len];
1123 else
1124 new_map = NULL;
1125 }
1126 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
1127 }
1128
1129 /* Cleanup old maps */
1130 for_each_possible_cpu(cpu) {
1131 map = dev_maps ?
1132 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
1133 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
1134 kfree_rcu(map, rcu);
1135 if (new_dev_maps->cpu_map[cpu])
1136 nonempty = 1;
1137 }
1138
1139 if (nonempty) {
1140 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1141 } else {
1142 kfree(new_dev_maps);
1143 RCU_INIT_POINTER(dev->xps_maps, NULL);
1144 }
1145
1146 if (dev_maps)
1147 kfree_rcu(dev_maps, rcu);
1148
1149 netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
1150 NUMA_NO_NODE);
1151
1152 mutex_unlock(&xps_map_mutex);
1153 1030
1154 free_cpumask_var(mask); 1031 free_cpumask_var(mask);
1155 return len;
1156 1032
1157error: 1033 return err ? : len;
1158 mutex_unlock(&xps_map_mutex);
1159
1160 if (new_dev_maps)
1161 for_each_possible_cpu(i)
1162 kfree(rcu_dereference_protected(
1163 new_dev_maps->cpu_map[i],
1164 1));
1165 kfree(new_dev_maps);
1166 free_cpumask_var(mask);
1167 return -ENOMEM;
1168} 1034}
1169 1035
1170static struct netdev_queue_attribute xps_cpus_attribute = 1036static struct netdev_queue_attribute xps_cpus_attribute =
@@ -1183,10 +1049,6 @@ static void netdev_queue_release(struct kobject *kobj)
1183{ 1049{
1184 struct netdev_queue *queue = to_netdev_queue(kobj); 1050 struct netdev_queue *queue = to_netdev_queue(kobj);
1185 1051
1186#ifdef CONFIG_XPS
1187 xps_queue_release(queue);
1188#endif
1189
1190 memset(kobj, 0, sizeof(*kobj)); 1052 memset(kobj, 0, sizeof(*kobj));
1191 dev_put(queue->dev); 1053 dev_put(queue->dev);
1192} 1054}
@@ -1396,6 +1258,8 @@ void netdev_unregister_kobject(struct net_device * net)
1396 1258
1397 remove_queue_kobjects(net); 1259 remove_queue_kobjects(net);
1398 1260
1261 pm_runtime_set_memalloc_noio(dev, false);
1262
1399 device_del(dev); 1263 device_del(dev);
1400} 1264}
1401 1265
@@ -1440,6 +1304,8 @@ int netdev_register_kobject(struct net_device *net)
1440 return error; 1304 return error;
1441 } 1305 }
1442 1306
1307 pm_runtime_set_memalloc_noio(dev, true);
1308
1443 return error; 1309 return error;
1444} 1310}
1445 1311
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8acce01b6dab..80e271d9e64b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -344,7 +344,7 @@ struct net *get_net_ns_by_fd(int fd)
344 if (IS_ERR(file)) 344 if (IS_ERR(file))
345 return ERR_CAST(file); 345 return ERR_CAST(file);
346 346
347 ei = PROC_I(file->f_dentry->d_inode); 347 ei = PROC_I(file_inode(file));
348 if (ei->ns_ops == &netns_operations) 348 if (ei->ns_ops == &netns_operations)
349 net = get_net(ei->ns); 349 net = get_net(ei->ns);
350 else 350 else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3151acf5ec13..fa32899006a2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,6 +29,9 @@
29#include <linux/if_vlan.h> 29#include <linux/if_vlan.h>
30#include <net/tcp.h> 30#include <net/tcp.h>
31#include <net/udp.h> 31#include <net/udp.h>
32#include <net/addrconf.h>
33#include <net/ndisc.h>
34#include <net/ip6_checksum.h>
32#include <asm/unaligned.h> 35#include <asm/unaligned.h>
33#include <trace/events/napi.h> 36#include <trace/events/napi.h>
34 37
@@ -44,6 +47,8 @@ static struct sk_buff_head skb_pool;
44 47
45static atomic_t trapped; 48static atomic_t trapped;
46 49
50static struct srcu_struct netpoll_srcu;
51
47#define USEC_PER_POLL 50 52#define USEC_PER_POLL 50
48#define NETPOLL_RX_ENABLED 1 53#define NETPOLL_RX_ENABLED 1
49#define NETPOLL_RX_DROP 2 54#define NETPOLL_RX_DROP 2
@@ -55,7 +60,8 @@ static atomic_t trapped;
55 MAX_UDP_CHUNK) 60 MAX_UDP_CHUNK)
56 61
57static void zap_completion_queue(void); 62static void zap_completion_queue(void);
58static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); 63static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
64static void netpoll_async_cleanup(struct work_struct *work);
59 65
60static unsigned int carrier_timeout = 4; 66static unsigned int carrier_timeout = 4;
61module_param(carrier_timeout, uint, 0644); 67module_param(carrier_timeout, uint, 0644);
@@ -181,13 +187,13 @@ static void poll_napi(struct net_device *dev)
181 } 187 }
182} 188}
183 189
184static void service_arp_queue(struct netpoll_info *npi) 190static void service_neigh_queue(struct netpoll_info *npi)
185{ 191{
186 if (npi) { 192 if (npi) {
187 struct sk_buff *skb; 193 struct sk_buff *skb;
188 194
189 while ((skb = skb_dequeue(&npi->arp_tx))) 195 while ((skb = skb_dequeue(&npi->neigh_tx)))
190 netpoll_arp_reply(skb, npi); 196 netpoll_neigh_reply(skb, npi);
191 } 197 }
192} 198}
193 199
@@ -196,35 +202,76 @@ static void netpoll_poll_dev(struct net_device *dev)
196 const struct net_device_ops *ops; 202 const struct net_device_ops *ops;
197 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); 203 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
198 204
199 if (!dev || !netif_running(dev)) 205 /* Don't do any rx activity if the dev_lock mutex is held
206 * the dev_open/close paths use this to block netpoll activity
207 * while changing device state
208 */
209 if (!mutex_trylock(&ni->dev_lock))
200 return; 210 return;
201 211
212 if (!netif_running(dev)) {
213 mutex_unlock(&ni->dev_lock);
214 return;
215 }
216
202 ops = dev->netdev_ops; 217 ops = dev->netdev_ops;
203 if (!ops->ndo_poll_controller) 218 if (!ops->ndo_poll_controller) {
219 mutex_unlock(&ni->dev_lock);
204 return; 220 return;
221 }
205 222
206 /* Process pending work on NIC */ 223 /* Process pending work on NIC */
207 ops->ndo_poll_controller(dev); 224 ops->ndo_poll_controller(dev);
208 225
209 poll_napi(dev); 226 poll_napi(dev);
210 227
228 mutex_unlock(&ni->dev_lock);
229
211 if (dev->flags & IFF_SLAVE) { 230 if (dev->flags & IFF_SLAVE) {
212 if (ni) { 231 if (ni) {
213 struct net_device *bond_dev = dev->master; 232 struct net_device *bond_dev;
214 struct sk_buff *skb; 233 struct sk_buff *skb;
215 struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); 234 struct netpoll_info *bond_ni;
216 while ((skb = skb_dequeue(&ni->arp_tx))) { 235
236 bond_dev = netdev_master_upper_dev_get_rcu(dev);
237 bond_ni = rcu_dereference_bh(bond_dev->npinfo);
238 while ((skb = skb_dequeue(&ni->neigh_tx))) {
217 skb->dev = bond_dev; 239 skb->dev = bond_dev;
218 skb_queue_tail(&bond_ni->arp_tx, skb); 240 skb_queue_tail(&bond_ni->neigh_tx, skb);
219 } 241 }
220 } 242 }
221 } 243 }
222 244
223 service_arp_queue(ni); 245 service_neigh_queue(ni);
224 246
225 zap_completion_queue(); 247 zap_completion_queue();
226} 248}
227 249
250int netpoll_rx_disable(struct net_device *dev)
251{
252 struct netpoll_info *ni;
253 int idx;
254 might_sleep();
255 idx = srcu_read_lock(&netpoll_srcu);
256 ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
257 if (ni)
258 mutex_lock(&ni->dev_lock);
259 srcu_read_unlock(&netpoll_srcu, idx);
260 return 0;
261}
262EXPORT_SYMBOL(netpoll_rx_disable);
263
264void netpoll_rx_enable(struct net_device *dev)
265{
266 struct netpoll_info *ni;
267 rcu_read_lock();
268 ni = rcu_dereference(dev->npinfo);
269 if (ni)
270 mutex_unlock(&ni->dev_lock);
271 rcu_read_unlock();
272}
273EXPORT_SYMBOL(netpoll_rx_enable);
274
228static void refill_skbs(void) 275static void refill_skbs(void)
229{ 276{
230 struct sk_buff *skb; 277 struct sk_buff *skb;
@@ -381,9 +428,14 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
381 struct iphdr *iph; 428 struct iphdr *iph;
382 struct ethhdr *eth; 429 struct ethhdr *eth;
383 static atomic_t ip_ident; 430 static atomic_t ip_ident;
431 struct ipv6hdr *ip6h;
384 432
385 udp_len = len + sizeof(*udph); 433 udp_len = len + sizeof(*udph);
386 ip_len = udp_len + sizeof(*iph); 434 if (np->ipv6)
435 ip_len = udp_len + sizeof(*ip6h);
436 else
437 ip_len = udp_len + sizeof(*iph);
438
387 total_len = ip_len + LL_RESERVED_SPACE(np->dev); 439 total_len = ip_len + LL_RESERVED_SPACE(np->dev);
388 440
389 skb = find_skb(np, total_len + np->dev->needed_tailroom, 441 skb = find_skb(np, total_len + np->dev->needed_tailroom,
@@ -400,34 +452,66 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
400 udph->source = htons(np->local_port); 452 udph->source = htons(np->local_port);
401 udph->dest = htons(np->remote_port); 453 udph->dest = htons(np->remote_port);
402 udph->len = htons(udp_len); 454 udph->len = htons(udp_len);
403 udph->check = 0; 455
404 udph->check = csum_tcpudp_magic(np->local_ip, 456 if (np->ipv6) {
405 np->remote_ip, 457 udph->check = 0;
406 udp_len, IPPROTO_UDP, 458 udph->check = csum_ipv6_magic(&np->local_ip.in6,
407 csum_partial(udph, udp_len, 0)); 459 &np->remote_ip.in6,
408 if (udph->check == 0) 460 udp_len, IPPROTO_UDP,
409 udph->check = CSUM_MANGLED_0; 461 csum_partial(udph, udp_len, 0));
410 462 if (udph->check == 0)
411 skb_push(skb, sizeof(*iph)); 463 udph->check = CSUM_MANGLED_0;
412 skb_reset_network_header(skb); 464
413 iph = ip_hdr(skb); 465 skb_push(skb, sizeof(*ip6h));
414 466 skb_reset_network_header(skb);
415 /* iph->version = 4; iph->ihl = 5; */ 467 ip6h = ipv6_hdr(skb);
416 put_unaligned(0x45, (unsigned char *)iph); 468
417 iph->tos = 0; 469 /* ip6h->version = 6; ip6h->priority = 0; */
418 put_unaligned(htons(ip_len), &(iph->tot_len)); 470 put_unaligned(0x60, (unsigned char *)ip6h);
419 iph->id = htons(atomic_inc_return(&ip_ident)); 471 ip6h->flow_lbl[0] = 0;
420 iph->frag_off = 0; 472 ip6h->flow_lbl[1] = 0;
421 iph->ttl = 64; 473 ip6h->flow_lbl[2] = 0;
422 iph->protocol = IPPROTO_UDP; 474
423 iph->check = 0; 475 ip6h->payload_len = htons(sizeof(struct udphdr) + len);
424 put_unaligned(np->local_ip, &(iph->saddr)); 476 ip6h->nexthdr = IPPROTO_UDP;
425 put_unaligned(np->remote_ip, &(iph->daddr)); 477 ip6h->hop_limit = 32;
426 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 478 ip6h->saddr = np->local_ip.in6;
427 479 ip6h->daddr = np->remote_ip.in6;
428 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 480
429 skb_reset_mac_header(skb); 481 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
430 skb->protocol = eth->h_proto = htons(ETH_P_IP); 482 skb_reset_mac_header(skb);
483 skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
484 } else {
485 udph->check = 0;
486 udph->check = csum_tcpudp_magic(np->local_ip.ip,
487 np->remote_ip.ip,
488 udp_len, IPPROTO_UDP,
489 csum_partial(udph, udp_len, 0));
490 if (udph->check == 0)
491 udph->check = CSUM_MANGLED_0;
492
493 skb_push(skb, sizeof(*iph));
494 skb_reset_network_header(skb);
495 iph = ip_hdr(skb);
496
497 /* iph->version = 4; iph->ihl = 5; */
498 put_unaligned(0x45, (unsigned char *)iph);
499 iph->tos = 0;
500 put_unaligned(htons(ip_len), &(iph->tot_len));
501 iph->id = htons(atomic_inc_return(&ip_ident));
502 iph->frag_off = 0;
503 iph->ttl = 64;
504 iph->protocol = IPPROTO_UDP;
505 iph->check = 0;
506 put_unaligned(np->local_ip.ip, &(iph->saddr));
507 put_unaligned(np->remote_ip.ip, &(iph->daddr));
508 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
509
510 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
511 skb_reset_mac_header(skb);
512 skb->protocol = eth->h_proto = htons(ETH_P_IP);
513 }
514
431 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); 515 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
432 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); 516 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
433 517
@@ -437,18 +521,16 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
437} 521}
438EXPORT_SYMBOL(netpoll_send_udp); 522EXPORT_SYMBOL(netpoll_send_udp);
439 523
440static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) 524static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
441{ 525{
442 struct arphdr *arp; 526 int size, type = ARPOP_REPLY;
443 unsigned char *arp_ptr;
444 int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
445 __be32 sip, tip; 527 __be32 sip, tip;
446 unsigned char *sha; 528 unsigned char *sha;
447 struct sk_buff *send_skb; 529 struct sk_buff *send_skb;
448 struct netpoll *np, *tmp; 530 struct netpoll *np, *tmp;
449 unsigned long flags; 531 unsigned long flags;
450 int hlen, tlen; 532 int hlen, tlen;
451 int hits = 0; 533 int hits = 0, proto;
452 534
453 if (list_empty(&npinfo->rx_np)) 535 if (list_empty(&npinfo->rx_np))
454 return; 536 return;
@@ -466,94 +548,214 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
466 if (!hits) 548 if (!hits)
467 return; 549 return;
468 550
469 /* No arp on this interface */ 551 proto = ntohs(eth_hdr(skb)->h_proto);
470 if (skb->dev->flags & IFF_NOARP) 552 if (proto == ETH_P_IP) {
471 return; 553 struct arphdr *arp;
472 554 unsigned char *arp_ptr;
473 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 555 /* No arp on this interface */
474 return; 556 if (skb->dev->flags & IFF_NOARP)
557 return;
475 558
476 skb_reset_network_header(skb); 559 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
477 skb_reset_transport_header(skb); 560 return;
478 arp = arp_hdr(skb);
479 561
480 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 562 skb_reset_network_header(skb);
481 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 563 skb_reset_transport_header(skb);
482 arp->ar_pro != htons(ETH_P_IP) || 564 arp = arp_hdr(skb);
483 arp->ar_op != htons(ARPOP_REQUEST))
484 return;
485 565
486 arp_ptr = (unsigned char *)(arp+1); 566 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
487 /* save the location of the src hw addr */ 567 arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
488 sha = arp_ptr; 568 arp->ar_pro != htons(ETH_P_IP) ||
489 arp_ptr += skb->dev->addr_len; 569 arp->ar_op != htons(ARPOP_REQUEST))
490 memcpy(&sip, arp_ptr, 4); 570 return;
491 arp_ptr += 4;
492 /* If we actually cared about dst hw addr,
493 it would get copied here */
494 arp_ptr += skb->dev->addr_len;
495 memcpy(&tip, arp_ptr, 4);
496
497 /* Should we ignore arp? */
498 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
499 return;
500 571
501 size = arp_hdr_len(skb->dev); 572 arp_ptr = (unsigned char *)(arp+1);
573 /* save the location of the src hw addr */
574 sha = arp_ptr;
575 arp_ptr += skb->dev->addr_len;
576 memcpy(&sip, arp_ptr, 4);
577 arp_ptr += 4;
578 /* If we actually cared about dst hw addr,
579 it would get copied here */
580 arp_ptr += skb->dev->addr_len;
581 memcpy(&tip, arp_ptr, 4);
502 582
503 spin_lock_irqsave(&npinfo->rx_lock, flags); 583 /* Should we ignore arp? */
504 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 584 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
505 if (tip != np->local_ip) 585 return;
506 continue;
507 586
508 hlen = LL_RESERVED_SPACE(np->dev); 587 size = arp_hdr_len(skb->dev);
509 tlen = np->dev->needed_tailroom;
510 send_skb = find_skb(np, size + hlen + tlen, hlen);
511 if (!send_skb)
512 continue;
513 588
514 skb_reset_network_header(send_skb); 589 spin_lock_irqsave(&npinfo->rx_lock, flags);
515 arp = (struct arphdr *) skb_put(send_skb, size); 590 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
516 send_skb->dev = skb->dev; 591 if (tip != np->local_ip.ip)
517 send_skb->protocol = htons(ETH_P_ARP); 592 continue;
593
594 hlen = LL_RESERVED_SPACE(np->dev);
595 tlen = np->dev->needed_tailroom;
596 send_skb = find_skb(np, size + hlen + tlen, hlen);
597 if (!send_skb)
598 continue;
599
600 skb_reset_network_header(send_skb);
601 arp = (struct arphdr *) skb_put(send_skb, size);
602 send_skb->dev = skb->dev;
603 send_skb->protocol = htons(ETH_P_ARP);
604
605 /* Fill the device header for the ARP frame */
606 if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
607 sha, np->dev->dev_addr,
608 send_skb->len) < 0) {
609 kfree_skb(send_skb);
610 continue;
611 }
518 612
519 /* Fill the device header for the ARP frame */ 613 /*
520 if (dev_hard_header(send_skb, skb->dev, ptype, 614 * Fill out the arp protocol part.
521 sha, np->dev->dev_addr, 615 *
522 send_skb->len) < 0) { 616 * we only support ethernet device type,
523 kfree_skb(send_skb); 617 * which (according to RFC 1390) should
524 continue; 618 * always equal 1 (Ethernet).
619 */
620
621 arp->ar_hrd = htons(np->dev->type);
622 arp->ar_pro = htons(ETH_P_IP);
623 arp->ar_hln = np->dev->addr_len;
624 arp->ar_pln = 4;
625 arp->ar_op = htons(type);
626
627 arp_ptr = (unsigned char *)(arp + 1);
628 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
629 arp_ptr += np->dev->addr_len;
630 memcpy(arp_ptr, &tip, 4);
631 arp_ptr += 4;
632 memcpy(arp_ptr, sha, np->dev->addr_len);
633 arp_ptr += np->dev->addr_len;
634 memcpy(arp_ptr, &sip, 4);
635
636 netpoll_send_skb(np, send_skb);
637
638 /* If there are several rx_hooks for the same address,
639 we're fine by sending a single reply */
640 break;
525 } 641 }
642 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
643 } else if( proto == ETH_P_IPV6) {
644#if IS_ENABLED(CONFIG_IPV6)
645 struct nd_msg *msg;
646 u8 *lladdr = NULL;
647 struct ipv6hdr *hdr;
648 struct icmp6hdr *icmp6h;
649 const struct in6_addr *saddr;
650 const struct in6_addr *daddr;
651 struct inet6_dev *in6_dev = NULL;
652 struct in6_addr *target;
653
654 in6_dev = in6_dev_get(skb->dev);
655 if (!in6_dev || !in6_dev->cnf.accept_ra)
656 return;
526 657
527 /* 658 if (!pskb_may_pull(skb, skb->len))
528 * Fill out the arp protocol part. 659 return;
529 *
530 * we only support ethernet device type,
531 * which (according to RFC 1390) should
532 * always equal 1 (Ethernet).
533 */
534 660
535 arp->ar_hrd = htons(np->dev->type); 661 msg = (struct nd_msg *)skb_transport_header(skb);
536 arp->ar_pro = htons(ETH_P_IP);
537 arp->ar_hln = np->dev->addr_len;
538 arp->ar_pln = 4;
539 arp->ar_op = htons(type);
540 662
541 arp_ptr = (unsigned char *)(arp + 1); 663 __skb_push(skb, skb->data - skb_transport_header(skb));
542 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
543 arp_ptr += np->dev->addr_len;
544 memcpy(arp_ptr, &tip, 4);
545 arp_ptr += 4;
546 memcpy(arp_ptr, sha, np->dev->addr_len);
547 arp_ptr += np->dev->addr_len;
548 memcpy(arp_ptr, &sip, 4);
549 664
550 netpoll_send_skb(np, send_skb); 665 if (ipv6_hdr(skb)->hop_limit != 255)
666 return;
667 if (msg->icmph.icmp6_code != 0)
668 return;
669 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
670 return;
671
672 saddr = &ipv6_hdr(skb)->saddr;
673 daddr = &ipv6_hdr(skb)->daddr;
551 674
552 /* If there are several rx_hooks for the same address, 675 size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
553 we're fine by sending a single reply */ 676
554 break; 677 spin_lock_irqsave(&npinfo->rx_lock, flags);
678 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
679 if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
680 continue;
681
682 hlen = LL_RESERVED_SPACE(np->dev);
683 tlen = np->dev->needed_tailroom;
684 send_skb = find_skb(np, size + hlen + tlen, hlen);
685 if (!send_skb)
686 continue;
687
688 send_skb->protocol = htons(ETH_P_IPV6);
689 send_skb->dev = skb->dev;
690
691 skb_reset_network_header(send_skb);
692 skb_put(send_skb, sizeof(struct ipv6hdr));
693 hdr = ipv6_hdr(send_skb);
694
695 *(__be32*)hdr = htonl(0x60000000);
696
697 hdr->payload_len = htons(size);
698 hdr->nexthdr = IPPROTO_ICMPV6;
699 hdr->hop_limit = 255;
700 hdr->saddr = *saddr;
701 hdr->daddr = *daddr;
702
703 send_skb->transport_header = send_skb->tail;
704 skb_put(send_skb, size);
705
706 icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
707 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
708 icmp6h->icmp6_router = 0;
709 icmp6h->icmp6_solicited = 1;
710 target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
711 *target = msg->target;
712 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
713 IPPROTO_ICMPV6,
714 csum_partial(icmp6h,
715 size, 0));
716
717 if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
718 lladdr, np->dev->dev_addr,
719 send_skb->len) < 0) {
720 kfree_skb(send_skb);
721 continue;
722 }
723
724 netpoll_send_skb(np, send_skb);
725
726 /* If there are several rx_hooks for the same address,
727 we're fine by sending a single reply */
728 break;
729 }
730 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
731#endif
555 } 732 }
556 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 733}
734
735static bool pkt_is_ns(struct sk_buff *skb)
736{
737 struct nd_msg *msg;
738 struct ipv6hdr *hdr;
739
740 if (skb->protocol != htons(ETH_P_ARP))
741 return false;
742 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
743 return false;
744
745 msg = (struct nd_msg *)skb_transport_header(skb);
746 __skb_push(skb, skb->data - skb_transport_header(skb));
747 hdr = ipv6_hdr(skb);
748
749 if (hdr->nexthdr != IPPROTO_ICMPV6)
750 return false;
751 if (hdr->hop_limit != 255)
752 return false;
753 if (msg->icmph.icmp6_code != 0)
754 return false;
755 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
756 return false;
757
758 return true;
557} 759}
558 760
559int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) 761int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
@@ -571,9 +773,11 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
571 goto out; 773 goto out;
572 774
573 /* check if netpoll clients need ARP */ 775 /* check if netpoll clients need ARP */
574 if (skb->protocol == htons(ETH_P_ARP) && 776 if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
575 atomic_read(&trapped)) { 777 skb_queue_tail(&npinfo->neigh_tx, skb);
576 skb_queue_tail(&npinfo->arp_tx, skb); 778 return 1;
779 } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
780 skb_queue_tail(&npinfo->neigh_tx, skb);
577 return 1; 781 return 1;
578 } 782 }
579 783
@@ -584,60 +788,100 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
584 } 788 }
585 789
586 proto = ntohs(eth_hdr(skb)->h_proto); 790 proto = ntohs(eth_hdr(skb)->h_proto);
587 if (proto != ETH_P_IP) 791 if (proto != ETH_P_IP && proto != ETH_P_IPV6)
588 goto out; 792 goto out;
589 if (skb->pkt_type == PACKET_OTHERHOST) 793 if (skb->pkt_type == PACKET_OTHERHOST)
590 goto out; 794 goto out;
591 if (skb_shared(skb)) 795 if (skb_shared(skb))
592 goto out; 796 goto out;
593 797
594 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 798 if (proto == ETH_P_IP) {
595 goto out; 799 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
596 iph = (struct iphdr *)skb->data; 800 goto out;
597 if (iph->ihl < 5 || iph->version != 4) 801 iph = (struct iphdr *)skb->data;
598 goto out; 802 if (iph->ihl < 5 || iph->version != 4)
599 if (!pskb_may_pull(skb, iph->ihl*4)) 803 goto out;
600 goto out; 804 if (!pskb_may_pull(skb, iph->ihl*4))
601 iph = (struct iphdr *)skb->data; 805 goto out;
602 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) 806 iph = (struct iphdr *)skb->data;
603 goto out; 807 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
604 808 goto out;
605 len = ntohs(iph->tot_len);
606 if (skb->len < len || len < iph->ihl*4)
607 goto out;
608 809
609 /* 810 len = ntohs(iph->tot_len);
610 * Our transport medium may have padded the buffer out. 811 if (skb->len < len || len < iph->ihl*4)
611 * Now We trim to the true length of the frame. 812 goto out;
612 */
613 if (pskb_trim_rcsum(skb, len))
614 goto out;
615 813
616 iph = (struct iphdr *)skb->data; 814 /*
617 if (iph->protocol != IPPROTO_UDP) 815 * Our transport medium may have padded the buffer out.
618 goto out; 816 * Now We trim to the true length of the frame.
817 */
818 if (pskb_trim_rcsum(skb, len))
819 goto out;
619 820
620 len -= iph->ihl*4; 821 iph = (struct iphdr *)skb->data;
621 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); 822 if (iph->protocol != IPPROTO_UDP)
622 ulen = ntohs(uh->len); 823 goto out;
623 824
624 if (ulen != len) 825 len -= iph->ihl*4;
625 goto out; 826 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
626 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) 827 ulen = ntohs(uh->len);
627 goto out;
628 828
629 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 829 if (ulen != len)
630 if (np->local_ip && np->local_ip != iph->daddr) 830 goto out;
631 continue; 831 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
632 if (np->remote_ip && np->remote_ip != iph->saddr) 832 goto out;
633 continue; 833 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
634 if (np->local_port && np->local_port != ntohs(uh->dest)) 834 if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
635 continue; 835 continue;
836 if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
837 continue;
838 if (np->local_port && np->local_port != ntohs(uh->dest))
839 continue;
840
841 np->rx_hook(np, ntohs(uh->source),
842 (char *)(uh+1),
843 ulen - sizeof(struct udphdr));
844 hits++;
845 }
846 } else {
847#if IS_ENABLED(CONFIG_IPV6)
848 const struct ipv6hdr *ip6h;
636 849
637 np->rx_hook(np, ntohs(uh->source), 850 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
638 (char *)(uh+1), 851 goto out;
639 ulen - sizeof(struct udphdr)); 852 ip6h = (struct ipv6hdr *)skb->data;
640 hits++; 853 if (ip6h->version != 6)
854 goto out;
855 len = ntohs(ip6h->payload_len);
856 if (!len)
857 goto out;
858 if (len + sizeof(struct ipv6hdr) > skb->len)
859 goto out;
860 if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
861 goto out;
862 ip6h = ipv6_hdr(skb);
863 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
864 goto out;
865 uh = udp_hdr(skb);
866 ulen = ntohs(uh->len);
867 if (ulen != skb->len)
868 goto out;
869 if (udp6_csum_init(skb, uh, IPPROTO_UDP))
870 goto out;
871 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
872 if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
873 continue;
874 if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
875 continue;
876 if (np->local_port && np->local_port != ntohs(uh->dest))
877 continue;
878
879 np->rx_hook(np, ntohs(uh->source),
880 (char *)(uh+1),
881 ulen - sizeof(struct udphdr));
882 hits++;
883 }
884#endif
641 } 885 }
642 886
643 if (!hits) 887 if (!hits)
@@ -658,17 +902,44 @@ out:
658void netpoll_print_options(struct netpoll *np) 902void netpoll_print_options(struct netpoll *np)
659{ 903{
660 np_info(np, "local port %d\n", np->local_port); 904 np_info(np, "local port %d\n", np->local_port);
661 np_info(np, "local IP %pI4\n", &np->local_ip); 905 if (np->ipv6)
906 np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
907 else
908 np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
662 np_info(np, "interface '%s'\n", np->dev_name); 909 np_info(np, "interface '%s'\n", np->dev_name);
663 np_info(np, "remote port %d\n", np->remote_port); 910 np_info(np, "remote port %d\n", np->remote_port);
664 np_info(np, "remote IP %pI4\n", &np->remote_ip); 911 if (np->ipv6)
912 np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
913 else
914 np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
665 np_info(np, "remote ethernet address %pM\n", np->remote_mac); 915 np_info(np, "remote ethernet address %pM\n", np->remote_mac);
666} 916}
667EXPORT_SYMBOL(netpoll_print_options); 917EXPORT_SYMBOL(netpoll_print_options);
668 918
919static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
920{
921 const char *end;
922
923 if (!strchr(str, ':') &&
924 in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
925 if (!*end)
926 return 0;
927 }
928 if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
929#if IS_ENABLED(CONFIG_IPV6)
930 if (!*end)
931 return 1;
932#else
933 return -1;
934#endif
935 }
936 return -1;
937}
938
669int netpoll_parse_options(struct netpoll *np, char *opt) 939int netpoll_parse_options(struct netpoll *np, char *opt)
670{ 940{
671 char *cur=opt, *delim; 941 char *cur=opt, *delim;
942 int ipv6;
672 943
673 if (*cur != '@') { 944 if (*cur != '@') {
674 if ((delim = strchr(cur, '@')) == NULL) 945 if ((delim = strchr(cur, '@')) == NULL)
@@ -684,7 +955,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
684 if ((delim = strchr(cur, '/')) == NULL) 955 if ((delim = strchr(cur, '/')) == NULL)
685 goto parse_failed; 956 goto parse_failed;
686 *delim = 0; 957 *delim = 0;
687 np->local_ip = in_aton(cur); 958 ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
959 if (ipv6 < 0)
960 goto parse_failed;
961 else
962 np->ipv6 = (bool)ipv6;
688 cur = delim; 963 cur = delim;
689 } 964 }
690 cur++; 965 cur++;
@@ -716,7 +991,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
716 if ((delim = strchr(cur, '/')) == NULL) 991 if ((delim = strchr(cur, '/')) == NULL)
717 goto parse_failed; 992 goto parse_failed;
718 *delim = 0; 993 *delim = 0;
719 np->remote_ip = in_aton(cur); 994 ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
995 if (ipv6 < 0)
996 goto parse_failed;
997 else if (np->ipv6 != (bool)ipv6)
998 goto parse_failed;
999 else
1000 np->ipv6 = (bool)ipv6;
720 cur = delim + 1; 1001 cur = delim + 1;
721 1002
722 if (*cur != 0) { 1003 if (*cur != 0) {
@@ -744,6 +1025,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
744 1025
745 np->dev = ndev; 1026 np->dev = ndev;
746 strlcpy(np->dev_name, ndev->name, IFNAMSIZ); 1027 strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1028 INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
747 1029
748 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || 1030 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
749 !ndev->netdev_ops->ndo_poll_controller) { 1031 !ndev->netdev_ops->ndo_poll_controller) {
@@ -764,7 +1046,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
764 INIT_LIST_HEAD(&npinfo->rx_np); 1046 INIT_LIST_HEAD(&npinfo->rx_np);
765 1047
766 spin_lock_init(&npinfo->rx_lock); 1048 spin_lock_init(&npinfo->rx_lock);
767 skb_queue_head_init(&npinfo->arp_tx); 1049 mutex_init(&npinfo->dev_lock);
1050 skb_queue_head_init(&npinfo->neigh_tx);
768 skb_queue_head_init(&npinfo->txq); 1051 skb_queue_head_init(&npinfo->txq);
769 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 1052 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
770 1053
@@ -777,7 +1060,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
777 goto free_npinfo; 1060 goto free_npinfo;
778 } 1061 }
779 } else { 1062 } else {
780 npinfo = ndev->npinfo; 1063 npinfo = rtnl_dereference(ndev->npinfo);
781 atomic_inc(&npinfo->refcnt); 1064 atomic_inc(&npinfo->refcnt);
782 } 1065 }
783 1066
@@ -808,14 +1091,19 @@ int netpoll_setup(struct netpoll *np)
808 struct in_device *in_dev; 1091 struct in_device *in_dev;
809 int err; 1092 int err;
810 1093
811 if (np->dev_name) 1094 rtnl_lock();
812 ndev = dev_get_by_name(&init_net, np->dev_name); 1095 if (np->dev_name) {
1096 struct net *net = current->nsproxy->net_ns;
1097 ndev = __dev_get_by_name(net, np->dev_name);
1098 }
813 if (!ndev) { 1099 if (!ndev) {
814 np_err(np, "%s doesn't exist, aborting\n", np->dev_name); 1100 np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
815 return -ENODEV; 1101 err = -ENODEV;
1102 goto unlock;
816 } 1103 }
1104 dev_hold(ndev);
817 1105
818 if (ndev->master) { 1106 if (netdev_master_upper_dev_get(ndev)) {
819 np_err(np, "%s is a slave device, aborting\n", np->dev_name); 1107 np_err(np, "%s is a slave device, aborting\n", np->dev_name);
820 err = -EBUSY; 1108 err = -EBUSY;
821 goto put; 1109 goto put;
@@ -826,15 +1114,14 @@ int netpoll_setup(struct netpoll *np)
826 1114
827 np_info(np, "device %s not up yet, forcing it\n", np->dev_name); 1115 np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
828 1116
829 rtnl_lock();
830 err = dev_open(ndev); 1117 err = dev_open(ndev);
831 rtnl_unlock();
832 1118
833 if (err) { 1119 if (err) {
834 np_err(np, "failed to open %s\n", ndev->name); 1120 np_err(np, "failed to open %s\n", ndev->name);
835 goto put; 1121 goto put;
836 } 1122 }
837 1123
1124 rtnl_unlock();
838 atleast = jiffies + HZ/10; 1125 atleast = jiffies + HZ/10;
839 atmost = jiffies + carrier_timeout * HZ; 1126 atmost = jiffies + carrier_timeout * HZ;
840 while (!netif_carrier_ok(ndev)) { 1127 while (!netif_carrier_ok(ndev)) {
@@ -854,39 +1141,70 @@ int netpoll_setup(struct netpoll *np)
854 np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); 1141 np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
855 msleep(4000); 1142 msleep(4000);
856 } 1143 }
1144 rtnl_lock();
857 } 1145 }
858 1146
859 if (!np->local_ip) { 1147 if (!np->local_ip.ip) {
860 rcu_read_lock(); 1148 if (!np->ipv6) {
861 in_dev = __in_dev_get_rcu(ndev); 1149 in_dev = __in_dev_get_rtnl(ndev);
1150
1151 if (!in_dev || !in_dev->ifa_list) {
1152 np_err(np, "no IP address for %s, aborting\n",
1153 np->dev_name);
1154 err = -EDESTADDRREQ;
1155 goto put;
1156 }
1157
1158 np->local_ip.ip = in_dev->ifa_list->ifa_local;
1159 np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1160 } else {
1161#if IS_ENABLED(CONFIG_IPV6)
1162 struct inet6_dev *idev;
862 1163
863 if (!in_dev || !in_dev->ifa_list) {
864 rcu_read_unlock();
865 np_err(np, "no IP address for %s, aborting\n",
866 np->dev_name);
867 err = -EDESTADDRREQ; 1164 err = -EDESTADDRREQ;
1165 idev = __in6_dev_get(ndev);
1166 if (idev) {
1167 struct inet6_ifaddr *ifp;
1168
1169 read_lock_bh(&idev->lock);
1170 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1171 if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1172 continue;
1173 np->local_ip.in6 = ifp->addr;
1174 err = 0;
1175 break;
1176 }
1177 read_unlock_bh(&idev->lock);
1178 }
1179 if (err) {
1180 np_err(np, "no IPv6 address for %s, aborting\n",
1181 np->dev_name);
1182 goto put;
1183 } else
1184 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1185#else
1186 np_err(np, "IPv6 is not supported %s, aborting\n",
1187 np->dev_name);
1188 err = -EINVAL;
868 goto put; 1189 goto put;
1190#endif
869 } 1191 }
870
871 np->local_ip = in_dev->ifa_list->ifa_local;
872 rcu_read_unlock();
873 np_info(np, "local IP %pI4\n", &np->local_ip);
874 } 1192 }
875 1193
876 /* fill up the skb queue */ 1194 /* fill up the skb queue */
877 refill_skbs(); 1195 refill_skbs();
878 1196
879 rtnl_lock();
880 err = __netpoll_setup(np, ndev, GFP_KERNEL); 1197 err = __netpoll_setup(np, ndev, GFP_KERNEL);
881 rtnl_unlock();
882
883 if (err) 1198 if (err)
884 goto put; 1199 goto put;
885 1200
1201 rtnl_unlock();
886 return 0; 1202 return 0;
887 1203
888put: 1204put:
889 dev_put(ndev); 1205 dev_put(ndev);
1206unlock:
1207 rtnl_unlock();
890 return err; 1208 return err;
891} 1209}
892EXPORT_SYMBOL(netpoll_setup); 1210EXPORT_SYMBOL(netpoll_setup);
@@ -894,6 +1212,7 @@ EXPORT_SYMBOL(netpoll_setup);
894static int __init netpoll_init(void) 1212static int __init netpoll_init(void)
895{ 1213{
896 skb_queue_head_init(&skb_pool); 1214 skb_queue_head_init(&skb_pool);
1215 init_srcu_struct(&netpoll_srcu);
897 return 0; 1216 return 0;
898} 1217}
899core_initcall(netpoll_init); 1218core_initcall(netpoll_init);
@@ -903,7 +1222,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
903 struct netpoll_info *npinfo = 1222 struct netpoll_info *npinfo =
904 container_of(rcu_head, struct netpoll_info, rcu); 1223 container_of(rcu_head, struct netpoll_info, rcu);
905 1224
906 skb_queue_purge(&npinfo->arp_tx); 1225 skb_queue_purge(&npinfo->neigh_tx);
907 skb_queue_purge(&npinfo->txq); 1226 skb_queue_purge(&npinfo->txq);
908 1227
909 /* we can't call cancel_delayed_work_sync here, as we are in softirq */ 1228 /* we can't call cancel_delayed_work_sync here, as we are in softirq */
@@ -921,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np)
921 struct netpoll_info *npinfo; 1240 struct netpoll_info *npinfo;
922 unsigned long flags; 1241 unsigned long flags;
923 1242
924 npinfo = np->dev->npinfo; 1243 /* rtnl_dereference would be preferable here but
1244 * rcu_cleanup_netpoll path can put us in here safely without
1245 * holding the rtnl, so plain rcu_dereference it is
1246 */
1247 npinfo = rtnl_dereference(np->dev->npinfo);
925 if (!npinfo) 1248 if (!npinfo)
926 return; 1249 return;
927 1250
@@ -933,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np)
933 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 1256 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
934 } 1257 }
935 1258
1259 synchronize_srcu(&netpoll_srcu);
1260
936 if (atomic_dec_and_test(&npinfo->refcnt)) { 1261 if (atomic_dec_and_test(&npinfo->refcnt)) {
937 const struct net_device_ops *ops; 1262 const struct net_device_ops *ops;
938 1263
@@ -940,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np)
940 if (ops->ndo_netpoll_cleanup) 1265 if (ops->ndo_netpoll_cleanup)
941 ops->ndo_netpoll_cleanup(np->dev); 1266 ops->ndo_netpoll_cleanup(np->dev);
942 1267
943 RCU_INIT_POINTER(np->dev->npinfo, NULL); 1268 rcu_assign_pointer(np->dev->npinfo, NULL);
944 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); 1269 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
945 } 1270 }
946} 1271}
947EXPORT_SYMBOL_GPL(__netpoll_cleanup); 1272EXPORT_SYMBOL_GPL(__netpoll_cleanup);
948 1273
949static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) 1274static void netpoll_async_cleanup(struct work_struct *work)
950{ 1275{
951 struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); 1276 struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
952 1277
1278 rtnl_lock();
953 __netpoll_cleanup(np); 1279 __netpoll_cleanup(np);
1280 rtnl_unlock();
954 kfree(np); 1281 kfree(np);
955} 1282}
956 1283
957void __netpoll_free_rcu(struct netpoll *np) 1284void __netpoll_free_async(struct netpoll *np)
958{ 1285{
959 call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); 1286 schedule_work(&np->cleanup_work);
960} 1287}
961EXPORT_SYMBOL_GPL(__netpoll_free_rcu); 1288EXPORT_SYMBOL_GPL(__netpoll_free_async);
962 1289
963void netpoll_cleanup(struct netpoll *np) 1290void netpoll_cleanup(struct netpoll *np)
964{ 1291{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5e67defe2cb0..0777d0aa18c3 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
69 69
70 /* allocate & copy */ 70 /* allocate & copy */
71 new = kzalloc(new_sz, GFP_KERNEL); 71 new = kzalloc(new_sz, GFP_KERNEL);
72 if (!new) { 72 if (!new)
73 pr_warn("Unable to alloc new priomap!\n");
74 return -ENOMEM; 73 return -ENOMEM;
75 }
76 74
77 if (old) 75 if (old)
78 memcpy(new->priomap, old->priomap, 76 memcpy(new->priomap, old->priomap,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b29dacf900f9..6048fc1da1c2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,6 +164,7 @@
164#ifdef CONFIG_XFRM 164#ifdef CONFIG_XFRM
165#include <net/xfrm.h> 165#include <net/xfrm.h>
166#endif 166#endif
167#include <net/netns/generic.h>
167#include <asm/byteorder.h> 168#include <asm/byteorder.h>
168#include <linux/rcupdate.h> 169#include <linux/rcupdate.h>
169#include <linux/bitops.h> 170#include <linux/bitops.h>
@@ -212,7 +213,6 @@
212#define PKTGEN_MAGIC 0xbe9be955 213#define PKTGEN_MAGIC 0xbe9be955
213#define PG_PROC_DIR "pktgen" 214#define PG_PROC_DIR "pktgen"
214#define PGCTRL "pgctrl" 215#define PGCTRL "pgctrl"
215static struct proc_dir_entry *pg_proc_dir;
216 216
217#define MAX_CFLOWS 65536 217#define MAX_CFLOWS 65536
218 218
@@ -397,7 +397,15 @@ struct pktgen_hdr {
397 __be32 tv_usec; 397 __be32 tv_usec;
398}; 398};
399 399
400static bool pktgen_exiting __read_mostly; 400
401static int pg_net_id __read_mostly;
402
403struct pktgen_net {
404 struct net *net;
405 struct proc_dir_entry *proc_dir;
406 struct list_head pktgen_threads;
407 bool pktgen_exiting;
408};
401 409
402struct pktgen_thread { 410struct pktgen_thread {
403 spinlock_t if_lock; /* for list of devices */ 411 spinlock_t if_lock; /* for list of devices */
@@ -414,6 +422,7 @@ struct pktgen_thread {
414 422
415 wait_queue_head_t queue; 423 wait_queue_head_t queue;
416 struct completion start_done; 424 struct completion start_done;
425 struct pktgen_net *net;
417}; 426};
418 427
419#define REMOVE 1 428#define REMOVE 1
@@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
428static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, 437static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
429 const char *ifname, bool exact); 438 const char *ifname, bool exact);
430static int pktgen_device_event(struct notifier_block *, unsigned long, void *); 439static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
431static void pktgen_run_all_threads(void); 440static void pktgen_run_all_threads(struct pktgen_net *pn);
432static void pktgen_reset_all_threads(void); 441static void pktgen_reset_all_threads(struct pktgen_net *pn);
433static void pktgen_stop_all_threads_ifs(void); 442static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
434 443
435static void pktgen_stop(struct pktgen_thread *t); 444static void pktgen_stop(struct pktgen_thread *t);
436static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); 445static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
@@ -442,7 +451,6 @@ static int pg_clone_skb_d __read_mostly;
442static int debug __read_mostly; 451static int debug __read_mostly;
443 452
444static DEFINE_MUTEX(pktgen_thread_lock); 453static DEFINE_MUTEX(pktgen_thread_lock);
445static LIST_HEAD(pktgen_threads);
446 454
447static struct notifier_block pktgen_notifier_block = { 455static struct notifier_block pktgen_notifier_block = {
448 .notifier_call = pktgen_device_event, 456 .notifier_call = pktgen_device_event,
@@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
464{ 472{
465 int err = 0; 473 int err = 0;
466 char data[128]; 474 char data[128];
475 struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
467 476
468 if (!capable(CAP_NET_ADMIN)) { 477 if (!capable(CAP_NET_ADMIN)) {
469 err = -EPERM; 478 err = -EPERM;
@@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
480 data[count - 1] = 0; /* Make string */ 489 data[count - 1] = 0; /* Make string */
481 490
482 if (!strcmp(data, "stop")) 491 if (!strcmp(data, "stop"))
483 pktgen_stop_all_threads_ifs(); 492 pktgen_stop_all_threads_ifs(pn);
484 493
485 else if (!strcmp(data, "start")) 494 else if (!strcmp(data, "start"))
486 pktgen_run_all_threads(); 495 pktgen_run_all_threads(pn);
487 496
488 else if (!strcmp(data, "reset")) 497 else if (!strcmp(data, "reset"))
489 pktgen_reset_all_threads(); 498 pktgen_reset_all_threads(pn);
490 499
491 else 500 else
492 pr_warning("Unknown command: %s\n", data); 501 pr_warning("Unknown command: %s\n", data);
@@ -1781,10 +1790,13 @@ static ssize_t pktgen_thread_write(struct file *file,
1781 return -EFAULT; 1790 return -EFAULT;
1782 i += len; 1791 i += len;
1783 mutex_lock(&pktgen_thread_lock); 1792 mutex_lock(&pktgen_thread_lock);
1784 pktgen_add_device(t, f); 1793 ret = pktgen_add_device(t, f);
1785 mutex_unlock(&pktgen_thread_lock); 1794 mutex_unlock(&pktgen_thread_lock);
1786 ret = count; 1795 if (!ret) {
1787 sprintf(pg_result, "OK: add_device=%s", f); 1796 ret = count;
1797 sprintf(pg_result, "OK: add_device=%s", f);
1798 } else
1799 sprintf(pg_result, "ERROR: can not add device %s", f);
1788 goto out; 1800 goto out;
1789 } 1801 }
1790 1802
@@ -1824,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = {
1824}; 1836};
1825 1837
1826/* Think find or remove for NN */ 1838/* Think find or remove for NN */
1827static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) 1839static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
1840 const char *ifname, int remove)
1828{ 1841{
1829 struct pktgen_thread *t; 1842 struct pktgen_thread *t;
1830 struct pktgen_dev *pkt_dev = NULL; 1843 struct pktgen_dev *pkt_dev = NULL;
1831 bool exact = (remove == FIND); 1844 bool exact = (remove == FIND);
1832 1845
1833 list_for_each_entry(t, &pktgen_threads, th_list) { 1846 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
1834 pkt_dev = pktgen_find_dev(t, ifname, exact); 1847 pkt_dev = pktgen_find_dev(t, ifname, exact);
1835 if (pkt_dev) { 1848 if (pkt_dev) {
1836 if (remove) { 1849 if (remove) {
@@ -1848,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
1848/* 1861/*
1849 * mark a device for removal 1862 * mark a device for removal
1850 */ 1863 */
1851static void pktgen_mark_device(const char *ifname) 1864static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
1852{ 1865{
1853 struct pktgen_dev *pkt_dev = NULL; 1866 struct pktgen_dev *pkt_dev = NULL;
1854 const int max_tries = 10, msec_per_try = 125; 1867 const int max_tries = 10, msec_per_try = 125;
@@ -1859,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname)
1859 1872
1860 while (1) { 1873 while (1) {
1861 1874
1862 pkt_dev = __pktgen_NN_threads(ifname, REMOVE); 1875 pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE);
1863 if (pkt_dev == NULL) 1876 if (pkt_dev == NULL)
1864 break; /* success */ 1877 break; /* success */
1865 1878
@@ -1880,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname)
1880 mutex_unlock(&pktgen_thread_lock); 1893 mutex_unlock(&pktgen_thread_lock);
1881} 1894}
1882 1895
1883static void pktgen_change_name(struct net_device *dev) 1896static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev)
1884{ 1897{
1885 struct pktgen_thread *t; 1898 struct pktgen_thread *t;
1886 1899
1887 list_for_each_entry(t, &pktgen_threads, th_list) { 1900 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
1888 struct pktgen_dev *pkt_dev; 1901 struct pktgen_dev *pkt_dev;
1889 1902
1890 list_for_each_entry(pkt_dev, &t->if_list, list) { 1903 list_for_each_entry(pkt_dev, &t->if_list, list) {
1891 if (pkt_dev->odev != dev) 1904 if (pkt_dev->odev != dev)
1892 continue; 1905 continue;
1893 1906
1894 remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); 1907 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
1895 1908
1896 pkt_dev->entry = proc_create_data(dev->name, 0600, 1909 pkt_dev->entry = proc_create_data(dev->name, 0600,
1897 pg_proc_dir, 1910 pn->proc_dir,
1898 &pktgen_if_fops, 1911 &pktgen_if_fops,
1899 pkt_dev); 1912 pkt_dev);
1900 if (!pkt_dev->entry) 1913 if (!pkt_dev->entry)
@@ -1909,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused,
1909 unsigned long event, void *ptr) 1922 unsigned long event, void *ptr)
1910{ 1923{
1911 struct net_device *dev = ptr; 1924 struct net_device *dev = ptr;
1925 struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
1912 1926
1913 if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) 1927 if (pn->pktgen_exiting)
1914 return NOTIFY_DONE; 1928 return NOTIFY_DONE;
1915 1929
1916 /* It is OK that we do not hold the group lock right now, 1930 /* It is OK that we do not hold the group lock right now,
@@ -1919,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused,
1919 1933
1920 switch (event) { 1934 switch (event) {
1921 case NETDEV_CHANGENAME: 1935 case NETDEV_CHANGENAME:
1922 pktgen_change_name(dev); 1936 pktgen_change_name(pn, dev);
1923 break; 1937 break;
1924 1938
1925 case NETDEV_UNREGISTER: 1939 case NETDEV_UNREGISTER:
1926 pktgen_mark_device(dev->name); 1940 pktgen_mark_device(pn, dev->name);
1927 break; 1941 break;
1928 } 1942 }
1929 1943
1930 return NOTIFY_DONE; 1944 return NOTIFY_DONE;
1931} 1945}
1932 1946
1933static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, 1947static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn,
1948 struct pktgen_dev *pkt_dev,
1934 const char *ifname) 1949 const char *ifname)
1935{ 1950{
1936 char b[IFNAMSIZ+5]; 1951 char b[IFNAMSIZ+5];
@@ -1944,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
1944 } 1959 }
1945 b[i] = 0; 1960 b[i] = 0;
1946 1961
1947 return dev_get_by_name(&init_net, b); 1962 return dev_get_by_name(pn->net, b);
1948} 1963}
1949 1964
1950 1965
1951/* Associate pktgen_dev with a device. */ 1966/* Associate pktgen_dev with a device. */
1952 1967
1953static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) 1968static int pktgen_setup_dev(const struct pktgen_net *pn,
1969 struct pktgen_dev *pkt_dev, const char *ifname)
1954{ 1970{
1955 struct net_device *odev; 1971 struct net_device *odev;
1956 int err; 1972 int err;
@@ -1961,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
1961 pkt_dev->odev = NULL; 1977 pkt_dev->odev = NULL;
1962 } 1978 }
1963 1979
1964 odev = pktgen_dev_get_by_name(pkt_dev, ifname); 1980 odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname);
1965 if (!odev) { 1981 if (!odev) {
1966 pr_err("no such netdevice: \"%s\"\n", ifname); 1982 pr_err("no such netdevice: \"%s\"\n", ifname);
1967 return -ENODEV; 1983 return -ENODEV;
@@ -2203,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
2203static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) 2219static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2204{ 2220{
2205 struct xfrm_state *x = pkt_dev->flows[flow].x; 2221 struct xfrm_state *x = pkt_dev->flows[flow].x;
2222 struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
2206 if (!x) { 2223 if (!x) {
2207 /*slow path: we dont already have xfrm_state*/ 2224 /*slow path: we dont already have xfrm_state*/
2208 x = xfrm_stateonly_find(&init_net, DUMMY_MARK, 2225 x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
2209 (xfrm_address_t *)&pkt_dev->cur_daddr, 2226 (xfrm_address_t *)&pkt_dev->cur_daddr,
2210 (xfrm_address_t *)&pkt_dev->cur_saddr, 2227 (xfrm_address_t *)&pkt_dev->cur_saddr,
2211 AF_INET, 2228 AF_INET,
@@ -2912,7 +2929,7 @@ static void pktgen_run(struct pktgen_thread *t)
2912 t->control &= ~(T_STOP); 2929 t->control &= ~(T_STOP);
2913} 2930}
2914 2931
2915static void pktgen_stop_all_threads_ifs(void) 2932static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
2916{ 2933{
2917 struct pktgen_thread *t; 2934 struct pktgen_thread *t;
2918 2935
@@ -2920,7 +2937,7 @@ static void pktgen_stop_all_threads_ifs(void)
2920 2937
2921 mutex_lock(&pktgen_thread_lock); 2938 mutex_lock(&pktgen_thread_lock);
2922 2939
2923 list_for_each_entry(t, &pktgen_threads, th_list) 2940 list_for_each_entry(t, &pn->pktgen_threads, th_list)
2924 t->control |= T_STOP; 2941 t->control |= T_STOP;
2925 2942
2926 mutex_unlock(&pktgen_thread_lock); 2943 mutex_unlock(&pktgen_thread_lock);
@@ -2956,28 +2973,28 @@ signal:
2956 return 0; 2973 return 0;
2957} 2974}
2958 2975
2959static int pktgen_wait_all_threads_run(void) 2976static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
2960{ 2977{
2961 struct pktgen_thread *t; 2978 struct pktgen_thread *t;
2962 int sig = 1; 2979 int sig = 1;
2963 2980
2964 mutex_lock(&pktgen_thread_lock); 2981 mutex_lock(&pktgen_thread_lock);
2965 2982
2966 list_for_each_entry(t, &pktgen_threads, th_list) { 2983 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
2967 sig = pktgen_wait_thread_run(t); 2984 sig = pktgen_wait_thread_run(t);
2968 if (sig == 0) 2985 if (sig == 0)
2969 break; 2986 break;
2970 } 2987 }
2971 2988
2972 if (sig == 0) 2989 if (sig == 0)
2973 list_for_each_entry(t, &pktgen_threads, th_list) 2990 list_for_each_entry(t, &pn->pktgen_threads, th_list)
2974 t->control |= (T_STOP); 2991 t->control |= (T_STOP);
2975 2992
2976 mutex_unlock(&pktgen_thread_lock); 2993 mutex_unlock(&pktgen_thread_lock);
2977 return sig; 2994 return sig;
2978} 2995}
2979 2996
2980static void pktgen_run_all_threads(void) 2997static void pktgen_run_all_threads(struct pktgen_net *pn)
2981{ 2998{
2982 struct pktgen_thread *t; 2999 struct pktgen_thread *t;
2983 3000
@@ -2985,7 +3002,7 @@ static void pktgen_run_all_threads(void)
2985 3002
2986 mutex_lock(&pktgen_thread_lock); 3003 mutex_lock(&pktgen_thread_lock);
2987 3004
2988 list_for_each_entry(t, &pktgen_threads, th_list) 3005 list_for_each_entry(t, &pn->pktgen_threads, th_list)
2989 t->control |= (T_RUN); 3006 t->control |= (T_RUN);
2990 3007
2991 mutex_unlock(&pktgen_thread_lock); 3008 mutex_unlock(&pktgen_thread_lock);
@@ -2993,10 +3010,10 @@ static void pktgen_run_all_threads(void)
2993 /* Propagate thread->control */ 3010 /* Propagate thread->control */
2994 schedule_timeout_interruptible(msecs_to_jiffies(125)); 3011 schedule_timeout_interruptible(msecs_to_jiffies(125));
2995 3012
2996 pktgen_wait_all_threads_run(); 3013 pktgen_wait_all_threads_run(pn);
2997} 3014}
2998 3015
2999static void pktgen_reset_all_threads(void) 3016static void pktgen_reset_all_threads(struct pktgen_net *pn)
3000{ 3017{
3001 struct pktgen_thread *t; 3018 struct pktgen_thread *t;
3002 3019
@@ -3004,7 +3021,7 @@ static void pktgen_reset_all_threads(void)
3004 3021
3005 mutex_lock(&pktgen_thread_lock); 3022 mutex_lock(&pktgen_thread_lock);
3006 3023
3007 list_for_each_entry(t, &pktgen_threads, th_list) 3024 list_for_each_entry(t, &pn->pktgen_threads, th_list)
3008 t->control |= (T_REMDEVALL); 3025 t->control |= (T_REMDEVALL);
3009 3026
3010 mutex_unlock(&pktgen_thread_lock); 3027 mutex_unlock(&pktgen_thread_lock);
@@ -3012,7 +3029,7 @@ static void pktgen_reset_all_threads(void)
3012 /* Propagate thread->control */ 3029 /* Propagate thread->control */
3013 schedule_timeout_interruptible(msecs_to_jiffies(125)); 3030 schedule_timeout_interruptible(msecs_to_jiffies(125));
3014 3031
3015 pktgen_wait_all_threads_run(); 3032 pktgen_wait_all_threads_run(pn);
3016} 3033}
3017 3034
3018static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) 3035static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
@@ -3154,9 +3171,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3154static void pktgen_rem_thread(struct pktgen_thread *t) 3171static void pktgen_rem_thread(struct pktgen_thread *t)
3155{ 3172{
3156 /* Remove from the thread list */ 3173 /* Remove from the thread list */
3157 3174 remove_proc_entry(t->tsk->comm, t->net->proc_dir);
3158 remove_proc_entry(t->tsk->comm, pg_proc_dir);
3159
3160} 3175}
3161 3176
3162static void pktgen_resched(struct pktgen_dev *pkt_dev) 3177static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3302,7 +3317,7 @@ static int pktgen_thread_worker(void *arg)
3302 pkt_dev = next_to_run(t); 3317 pkt_dev = next_to_run(t);
3303 3318
3304 if (unlikely(!pkt_dev && t->control == 0)) { 3319 if (unlikely(!pkt_dev && t->control == 0)) {
3305 if (pktgen_exiting) 3320 if (t->net->pktgen_exiting)
3306 break; 3321 break;
3307 wait_event_interruptible_timeout(t->queue, 3322 wait_event_interruptible_timeout(t->queue,
3308 t->control != 0, 3323 t->control != 0,
@@ -3424,7 +3439,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3424 3439
3425 /* We don't allow a device to be on several threads */ 3440 /* We don't allow a device to be on several threads */
3426 3441
3427 pkt_dev = __pktgen_NN_threads(ifname, FIND); 3442 pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND);
3428 if (pkt_dev) { 3443 if (pkt_dev) {
3429 pr_err("ERROR: interface already used\n"); 3444 pr_err("ERROR: interface already used\n");
3430 return -EBUSY; 3445 return -EBUSY;
@@ -3459,13 +3474,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3459 pkt_dev->svlan_id = 0xffff; 3474 pkt_dev->svlan_id = 0xffff;
3460 pkt_dev->node = -1; 3475 pkt_dev->node = -1;
3461 3476
3462 err = pktgen_setup_dev(pkt_dev, ifname); 3477 err = pktgen_setup_dev(t->net, pkt_dev, ifname);
3463 if (err) 3478 if (err)
3464 goto out1; 3479 goto out1;
3465 if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) 3480 if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
3466 pkt_dev->clone_skb = pg_clone_skb_d; 3481 pkt_dev->clone_skb = pg_clone_skb_d;
3467 3482
3468 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, 3483 pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
3469 &pktgen_if_fops, pkt_dev); 3484 &pktgen_if_fops, pkt_dev);
3470 if (!pkt_dev->entry) { 3485 if (!pkt_dev->entry) {
3471 pr_err("cannot create %s/%s procfs entry\n", 3486 pr_err("cannot create %s/%s procfs entry\n",
@@ -3490,7 +3505,7 @@ out1:
3490 return err; 3505 return err;
3491} 3506}
3492 3507
3493static int __init pktgen_create_thread(int cpu) 3508static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
3494{ 3509{
3495 struct pktgen_thread *t; 3510 struct pktgen_thread *t;
3496 struct proc_dir_entry *pe; 3511 struct proc_dir_entry *pe;
@@ -3508,7 +3523,7 @@ static int __init pktgen_create_thread(int cpu)
3508 3523
3509 INIT_LIST_HEAD(&t->if_list); 3524 INIT_LIST_HEAD(&t->if_list);
3510 3525
3511 list_add_tail(&t->th_list, &pktgen_threads); 3526 list_add_tail(&t->th_list, &pn->pktgen_threads);
3512 init_completion(&t->start_done); 3527 init_completion(&t->start_done);
3513 3528
3514 p = kthread_create_on_node(pktgen_thread_worker, 3529 p = kthread_create_on_node(pktgen_thread_worker,
@@ -3524,7 +3539,7 @@ static int __init pktgen_create_thread(int cpu)
3524 kthread_bind(p, cpu); 3539 kthread_bind(p, cpu);
3525 t->tsk = p; 3540 t->tsk = p;
3526 3541
3527 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, 3542 pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
3528 &pktgen_thread_fops, t); 3543 &pktgen_thread_fops, t);
3529 if (!pe) { 3544 if (!pe) {
3530 pr_err("cannot create %s/%s procfs entry\n", 3545 pr_err("cannot create %s/%s procfs entry\n",
@@ -3535,6 +3550,7 @@ static int __init pktgen_create_thread(int cpu)
3535 return -EINVAL; 3550 return -EINVAL;
3536 } 3551 }
3537 3552
3553 t->net = pn;
3538 wake_up_process(p); 3554 wake_up_process(p);
3539 wait_for_completion(&t->start_done); 3555 wait_for_completion(&t->start_done);
3540 3556
@@ -3560,6 +3576,7 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
3560static int pktgen_remove_device(struct pktgen_thread *t, 3576static int pktgen_remove_device(struct pktgen_thread *t,
3561 struct pktgen_dev *pkt_dev) 3577 struct pktgen_dev *pkt_dev)
3562{ 3578{
3579 struct pktgen_net *pn = t->net;
3563 3580
3564 pr_debug("remove_device pkt_dev=%p\n", pkt_dev); 3581 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3565 3582
@@ -3580,7 +3597,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3580 _rem_dev_from_if_list(t, pkt_dev); 3597 _rem_dev_from_if_list(t, pkt_dev);
3581 3598
3582 if (pkt_dev->entry) 3599 if (pkt_dev->entry)
3583 remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); 3600 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
3584 3601
3585#ifdef CONFIG_XFRM 3602#ifdef CONFIG_XFRM
3586 free_SAs(pkt_dev); 3603 free_SAs(pkt_dev);
@@ -3592,63 +3609,63 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3592 return 0; 3609 return 0;
3593} 3610}
3594 3611
3595static int __init pg_init(void) 3612static int __net_init pg_net_init(struct net *net)
3596{ 3613{
3597 int cpu; 3614 struct pktgen_net *pn = net_generic(net, pg_net_id);
3598 struct proc_dir_entry *pe; 3615 struct proc_dir_entry *pe;
3599 int ret = 0; 3616 int cpu, ret = 0;
3600 3617
3601 pr_info("%s", version); 3618 pn->net = net;
3602 3619 INIT_LIST_HEAD(&pn->pktgen_threads);
3603 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); 3620 pn->pktgen_exiting = false;
3604 if (!pg_proc_dir) 3621 pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net);
3622 if (!pn->proc_dir) {
3623 pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
3605 return -ENODEV; 3624 return -ENODEV;
3606 3625 }
3607 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); 3626 pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
3608 if (pe == NULL) { 3627 if (pe == NULL) {
3609 pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); 3628 pr_err("cannot create %s procfs entry\n", PGCTRL);
3610 ret = -EINVAL; 3629 ret = -EINVAL;
3611 goto remove_dir; 3630 goto remove;
3612 } 3631 }
3613 3632
3614 register_netdevice_notifier(&pktgen_notifier_block);
3615
3616 for_each_online_cpu(cpu) { 3633 for_each_online_cpu(cpu) {
3617 int err; 3634 int err;
3618 3635
3619 err = pktgen_create_thread(cpu); 3636 err = pktgen_create_thread(cpu, pn);
3620 if (err) 3637 if (err)
3621 pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", 3638 pr_warn("Cannot create thread for cpu %d (%d)\n",
3622 cpu, err); 3639 cpu, err);
3623 } 3640 }
3624 3641
3625 if (list_empty(&pktgen_threads)) { 3642 if (list_empty(&pn->pktgen_threads)) {
3626 pr_err("ERROR: Initialization failed for all threads\n"); 3643 pr_err("Initialization failed for all threads\n");
3627 ret = -ENODEV; 3644 ret = -ENODEV;
3628 goto unregister; 3645 goto remove_entry;
3629 } 3646 }
3630 3647
3631 return 0; 3648 return 0;
3632 3649
3633 unregister: 3650remove_entry:
3634 unregister_netdevice_notifier(&pktgen_notifier_block); 3651 remove_proc_entry(PGCTRL, pn->proc_dir);
3635 remove_proc_entry(PGCTRL, pg_proc_dir); 3652remove:
3636 remove_dir: 3653 remove_proc_entry(PG_PROC_DIR, pn->net->proc_net);
3637 proc_net_remove(&init_net, PG_PROC_DIR);
3638 return ret; 3654 return ret;
3639} 3655}
3640 3656
3641static void __exit pg_cleanup(void) 3657static void __net_exit pg_net_exit(struct net *net)
3642{ 3658{
3659 struct pktgen_net *pn = net_generic(net, pg_net_id);
3643 struct pktgen_thread *t; 3660 struct pktgen_thread *t;
3644 struct list_head *q, *n; 3661 struct list_head *q, *n;
3645 LIST_HEAD(list); 3662 LIST_HEAD(list);
3646 3663
3647 /* Stop all interfaces & threads */ 3664 /* Stop all interfaces & threads */
3648 pktgen_exiting = true; 3665 pn->pktgen_exiting = true;
3649 3666
3650 mutex_lock(&pktgen_thread_lock); 3667 mutex_lock(&pktgen_thread_lock);
3651 list_splice_init(&pktgen_threads, &list); 3668 list_splice_init(&pn->pktgen_threads, &list);
3652 mutex_unlock(&pktgen_thread_lock); 3669 mutex_unlock(&pktgen_thread_lock);
3653 3670
3654 list_for_each_safe(q, n, &list) { 3671 list_for_each_safe(q, n, &list) {
@@ -3658,12 +3675,36 @@ static void __exit pg_cleanup(void)
3658 kfree(t); 3675 kfree(t);
3659 } 3676 }
3660 3677
3661 /* Un-register us from receiving netdevice events */ 3678 remove_proc_entry(PGCTRL, pn->proc_dir);
3662 unregister_netdevice_notifier(&pktgen_notifier_block); 3679 remove_proc_entry(PG_PROC_DIR, pn->net->proc_net);
3680}
3681
3682static struct pernet_operations pg_net_ops = {
3683 .init = pg_net_init,
3684 .exit = pg_net_exit,
3685 .id = &pg_net_id,
3686 .size = sizeof(struct pktgen_net),
3687};
3688
3689static int __init pg_init(void)
3690{
3691 int ret = 0;
3663 3692
3664 /* Clean up proc file system */ 3693 pr_info("%s", version);
3665 remove_proc_entry(PGCTRL, pg_proc_dir); 3694 ret = register_pernet_subsys(&pg_net_ops);
3666 proc_net_remove(&init_net, PG_PROC_DIR); 3695 if (ret)
3696 return ret;
3697 ret = register_netdevice_notifier(&pktgen_notifier_block);
3698 if (ret)
3699 unregister_pernet_subsys(&pg_net_ops);
3700
3701 return ret;
3702}
3703
3704static void __exit pg_cleanup(void)
3705{
3706 unregister_netdevice_notifier(&pktgen_notifier_block);
3707 unregister_pernet_subsys(&pg_net_ops);
3667} 3708}
3668 3709
3669module_init(pg_init); 3710module_init(pg_init);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index c31d9e8668c3..4425148d2b51 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -186,8 +186,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
186 struct fastopen_queue *fastopenq = 186 struct fastopen_queue *fastopenq =
187 inet_csk(lsk)->icsk_accept_queue.fastopenq; 187 inet_csk(lsk)->icsk_accept_queue.fastopenq;
188 188
189 BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
190
191 tcp_sk(sk)->fastopen_rsk = NULL; 189 tcp_sk(sk)->fastopen_rsk = NULL;
192 spin_lock_bh(&fastopenq->lock); 190 spin_lock_bh(&fastopenq->lock);
193 fastopenq->qlen--; 191 fastopenq->qlen--;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1868625af25e..b376410ff259 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -780,6 +780,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
780 + nla_total_size(4) /* IFLA_MTU */ 780 + nla_total_size(4) /* IFLA_MTU */
781 + nla_total_size(4) /* IFLA_LINK */ 781 + nla_total_size(4) /* IFLA_LINK */
782 + nla_total_size(4) /* IFLA_MASTER */ 782 + nla_total_size(4) /* IFLA_MASTER */
783 + nla_total_size(1) /* IFLA_CARRIER */
783 + nla_total_size(4) /* IFLA_PROMISCUITY */ 784 + nla_total_size(4) /* IFLA_PROMISCUITY */
784 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ 785 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
785 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ 786 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
@@ -879,6 +880,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
879 const struct rtnl_link_stats64 *stats; 880 const struct rtnl_link_stats64 *stats;
880 struct nlattr *attr, *af_spec; 881 struct nlattr *attr, *af_spec;
881 struct rtnl_af_ops *af_ops; 882 struct rtnl_af_ops *af_ops;
883 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
882 884
883 ASSERT_RTNL(); 885 ASSERT_RTNL();
884 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 886 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -907,8 +909,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
907#endif 909#endif
908 (dev->ifindex != dev->iflink && 910 (dev->ifindex != dev->iflink &&
909 nla_put_u32(skb, IFLA_LINK, dev->iflink)) || 911 nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
910 (dev->master && 912 (upper_dev &&
911 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || 913 nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
914 nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
912 (dev->qdisc && 915 (dev->qdisc &&
913 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || 916 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
914 (dev->ifalias && 917 (dev->ifalias &&
@@ -1057,7 +1060,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1057 int idx = 0, s_idx; 1060 int idx = 0, s_idx;
1058 struct net_device *dev; 1061 struct net_device *dev;
1059 struct hlist_head *head; 1062 struct hlist_head *head;
1060 struct hlist_node *node;
1061 struct nlattr *tb[IFLA_MAX+1]; 1063 struct nlattr *tb[IFLA_MAX+1];
1062 u32 ext_filter_mask = 0; 1064 u32 ext_filter_mask = 0;
1063 1065
@@ -1077,7 +1079,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1077 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1079 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1078 idx = 0; 1080 idx = 0;
1079 head = &net->dev_index_head[h]; 1081 head = &net->dev_index_head[h];
1080 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1082 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1081 if (idx < s_idx) 1083 if (idx < s_idx)
1082 goto cont; 1084 goto cont;
1083 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1085 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1108,6 +1110,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1108 [IFLA_MTU] = { .type = NLA_U32 }, 1110 [IFLA_MTU] = { .type = NLA_U32 },
1109 [IFLA_LINK] = { .type = NLA_U32 }, 1111 [IFLA_LINK] = { .type = NLA_U32 },
1110 [IFLA_MASTER] = { .type = NLA_U32 }, 1112 [IFLA_MASTER] = { .type = NLA_U32 },
1113 [IFLA_CARRIER] = { .type = NLA_U8 },
1111 [IFLA_TXQLEN] = { .type = NLA_U32 }, 1114 [IFLA_TXQLEN] = { .type = NLA_U32 },
1112 [IFLA_WEIGHT] = { .type = NLA_U32 }, 1115 [IFLA_WEIGHT] = { .type = NLA_U32 },
1113 [IFLA_OPERSTATE] = { .type = NLA_U8 }, 1116 [IFLA_OPERSTATE] = { .type = NLA_U8 },
@@ -1270,16 +1273,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1270 1273
1271static int do_set_master(struct net_device *dev, int ifindex) 1274static int do_set_master(struct net_device *dev, int ifindex)
1272{ 1275{
1273 struct net_device *master_dev; 1276 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1274 const struct net_device_ops *ops; 1277 const struct net_device_ops *ops;
1275 int err; 1278 int err;
1276 1279
1277 if (dev->master) { 1280 if (upper_dev) {
1278 if (dev->master->ifindex == ifindex) 1281 if (upper_dev->ifindex == ifindex)
1279 return 0; 1282 return 0;
1280 ops = dev->master->netdev_ops; 1283 ops = upper_dev->netdev_ops;
1281 if (ops->ndo_del_slave) { 1284 if (ops->ndo_del_slave) {
1282 err = ops->ndo_del_slave(dev->master, dev); 1285 err = ops->ndo_del_slave(upper_dev, dev);
1283 if (err) 1286 if (err)
1284 return err; 1287 return err;
1285 } else { 1288 } else {
@@ -1288,12 +1291,12 @@ static int do_set_master(struct net_device *dev, int ifindex)
1288 } 1291 }
1289 1292
1290 if (ifindex) { 1293 if (ifindex) {
1291 master_dev = __dev_get_by_index(dev_net(dev), ifindex); 1294 upper_dev = __dev_get_by_index(dev_net(dev), ifindex);
1292 if (!master_dev) 1295 if (!upper_dev)
1293 return -EINVAL; 1296 return -EINVAL;
1294 ops = master_dev->netdev_ops; 1297 ops = upper_dev->netdev_ops;
1295 if (ops->ndo_add_slave) { 1298 if (ops->ndo_add_slave) {
1296 err = ops->ndo_add_slave(master_dev, dev); 1299 err = ops->ndo_add_slave(upper_dev, dev);
1297 if (err) 1300 if (err)
1298 return err; 1301 return err;
1299 } else { 1302 } else {
@@ -1307,7 +1310,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1307 struct nlattr **tb, char *ifname, int modified) 1310 struct nlattr **tb, char *ifname, int modified)
1308{ 1311{
1309 const struct net_device_ops *ops = dev->netdev_ops; 1312 const struct net_device_ops *ops = dev->netdev_ops;
1310 int send_addr_notify = 0;
1311 int err; 1313 int err;
1312 1314
1313 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { 1315 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
@@ -1360,16 +1362,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1360 struct sockaddr *sa; 1362 struct sockaddr *sa;
1361 int len; 1363 int len;
1362 1364
1363 if (!ops->ndo_set_mac_address) {
1364 err = -EOPNOTSUPP;
1365 goto errout;
1366 }
1367
1368 if (!netif_device_present(dev)) {
1369 err = -ENODEV;
1370 goto errout;
1371 }
1372
1373 len = sizeof(sa_family_t) + dev->addr_len; 1365 len = sizeof(sa_family_t) + dev->addr_len;
1374 sa = kmalloc(len, GFP_KERNEL); 1366 sa = kmalloc(len, GFP_KERNEL);
1375 if (!sa) { 1367 if (!sa) {
@@ -1379,13 +1371,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1379 sa->sa_family = dev->type; 1371 sa->sa_family = dev->type;
1380 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), 1372 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
1381 dev->addr_len); 1373 dev->addr_len);
1382 err = ops->ndo_set_mac_address(dev, sa); 1374 err = dev_set_mac_address(dev, sa);
1383 kfree(sa); 1375 kfree(sa);
1384 if (err) 1376 if (err)
1385 goto errout; 1377 goto errout;
1386 send_addr_notify = 1;
1387 modified = 1; 1378 modified = 1;
1388 add_device_randomness(dev->dev_addr, dev->addr_len);
1389 } 1379 }
1390 1380
1391 if (tb[IFLA_MTU]) { 1381 if (tb[IFLA_MTU]) {
@@ -1422,7 +1412,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1422 1412
1423 if (tb[IFLA_BROADCAST]) { 1413 if (tb[IFLA_BROADCAST]) {
1424 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); 1414 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
1425 send_addr_notify = 1; 1415 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1426 } 1416 }
1427 1417
1428 if (ifm->ifi_flags || ifm->ifi_change) { 1418 if (ifm->ifi_flags || ifm->ifi_change) {
@@ -1438,6 +1428,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1438 modified = 1; 1428 modified = 1;
1439 } 1429 }
1440 1430
1431 if (tb[IFLA_CARRIER]) {
1432 err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
1433 if (err)
1434 goto errout;
1435 modified = 1;
1436 }
1437
1441 if (tb[IFLA_TXQLEN]) 1438 if (tb[IFLA_TXQLEN])
1442 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 1439 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
1443 1440
@@ -1536,9 +1533,6 @@ errout:
1536 net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", 1533 net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
1537 dev->name); 1534 dev->name);
1538 1535
1539 if (send_addr_notify)
1540 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1541
1542 return err; 1536 return err;
1543} 1537}
1544 1538
@@ -1672,9 +1666,11 @@ struct net_device *rtnl_create_link(struct net *net,
1672 1666
1673 if (tb[IFLA_MTU]) 1667 if (tb[IFLA_MTU])
1674 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1668 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
1675 if (tb[IFLA_ADDRESS]) 1669 if (tb[IFLA_ADDRESS]) {
1676 memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), 1670 memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
1677 nla_len(tb[IFLA_ADDRESS])); 1671 nla_len(tb[IFLA_ADDRESS]));
1672 dev->addr_assign_type = NET_ADDR_SET;
1673 }
1678 if (tb[IFLA_BROADCAST]) 1674 if (tb[IFLA_BROADCAST])
1679 memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), 1675 memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
1680 nla_len(tb[IFLA_BROADCAST])); 1676 nla_len(tb[IFLA_BROADCAST]));
@@ -1992,6 +1988,7 @@ errout:
1992 if (err < 0) 1988 if (err < 0)
1993 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 1989 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
1994} 1990}
1991EXPORT_SYMBOL(rtmsg_ifinfo);
1995 1992
1996static int nlmsg_populate_fdb_fill(struct sk_buff *skb, 1993static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
1997 struct net_device *dev, 1994 struct net_device *dev,
@@ -2054,16 +2051,12 @@ errout:
2054static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2051static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2055{ 2052{
2056 struct net *net = sock_net(skb->sk); 2053 struct net *net = sock_net(skb->sk);
2057 struct net_device *master = NULL;
2058 struct ndmsg *ndm; 2054 struct ndmsg *ndm;
2059 struct nlattr *tb[NDA_MAX+1]; 2055 struct nlattr *tb[NDA_MAX+1];
2060 struct net_device *dev; 2056 struct net_device *dev;
2061 u8 *addr; 2057 u8 *addr;
2062 int err; 2058 int err;
2063 2059
2064 if (!capable(CAP_NET_ADMIN))
2065 return -EPERM;
2066
2067 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 2060 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
2068 if (err < 0) 2061 if (err < 0)
2069 return err; 2062 return err;
@@ -2096,10 +2089,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2096 /* Support fdb on master device the net/bridge default case */ 2089 /* Support fdb on master device the net/bridge default case */
2097 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && 2090 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2098 (dev->priv_flags & IFF_BRIDGE_PORT)) { 2091 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2099 master = dev->master; 2092 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2100 err = master->netdev_ops->ndo_fdb_add(ndm, tb, 2093 const struct net_device_ops *ops = br_dev->netdev_ops;
2101 dev, addr, 2094
2102 nlh->nlmsg_flags); 2095 err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags);
2103 if (err) 2096 if (err)
2104 goto out; 2097 goto out;
2105 else 2098 else
@@ -2125,7 +2118,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2125{ 2118{
2126 struct net *net = sock_net(skb->sk); 2119 struct net *net = sock_net(skb->sk);
2127 struct ndmsg *ndm; 2120 struct ndmsg *ndm;
2128 struct nlattr *llattr; 2121 struct nlattr *tb[NDA_MAX+1];
2129 struct net_device *dev; 2122 struct net_device *dev;
2130 int err = -EINVAL; 2123 int err = -EINVAL;
2131 __u8 *addr; 2124 __u8 *addr;
@@ -2133,8 +2126,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2133 if (!capable(CAP_NET_ADMIN)) 2126 if (!capable(CAP_NET_ADMIN))
2134 return -EPERM; 2127 return -EPERM;
2135 2128
2136 if (nlmsg_len(nlh) < sizeof(*ndm)) 2129 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
2137 return -EINVAL; 2130 if (err < 0)
2131 return err;
2138 2132
2139 ndm = nlmsg_data(nlh); 2133 ndm = nlmsg_data(nlh);
2140 if (ndm->ndm_ifindex == 0) { 2134 if (ndm->ndm_ifindex == 0) {
@@ -2148,22 +2142,27 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2148 return -ENODEV; 2142 return -ENODEV;
2149 } 2143 }
2150 2144
2151 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); 2145 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
2152 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { 2146 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
2153 pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); 2147 return -EINVAL;
2148 }
2149
2150 addr = nla_data(tb[NDA_LLADDR]);
2151 if (!is_valid_ether_addr(addr)) {
2152 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
2154 return -EINVAL; 2153 return -EINVAL;
2155 } 2154 }
2156 2155
2157 addr = nla_data(llattr);
2158 err = -EOPNOTSUPP; 2156 err = -EOPNOTSUPP;
2159 2157
2160 /* Support fdb on master device the net/bridge default case */ 2158 /* Support fdb on master device the net/bridge default case */
2161 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && 2159 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2162 (dev->priv_flags & IFF_BRIDGE_PORT)) { 2160 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2163 struct net_device *master = dev->master; 2161 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2162 const struct net_device_ops *ops = br_dev->netdev_ops;
2164 2163
2165 if (master->netdev_ops->ndo_fdb_del) 2164 if (ops->ndo_fdb_del)
2166 err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr); 2165 err = ops->ndo_fdb_del(ndm, tb, dev, addr);
2167 2166
2168 if (err) 2167 if (err)
2169 goto out; 2168 goto out;
@@ -2173,7 +2172,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2173 2172
2174 /* Embedded bridge, macvlan, and any other device support */ 2173 /* Embedded bridge, macvlan, and any other device support */
2175 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { 2174 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
2176 err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); 2175 err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
2177 2176
2178 if (!err) { 2177 if (!err) {
2179 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); 2178 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2247,9 +2246,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2247 rcu_read_lock(); 2246 rcu_read_lock();
2248 for_each_netdev_rcu(net, dev) { 2247 for_each_netdev_rcu(net, dev) {
2249 if (dev->priv_flags & IFF_BRIDGE_PORT) { 2248 if (dev->priv_flags & IFF_BRIDGE_PORT) {
2250 struct net_device *master = dev->master; 2249 struct net_device *br_dev;
2251 const struct net_device_ops *ops = master->netdev_ops; 2250 const struct net_device_ops *ops;
2252 2251
2252 br_dev = netdev_master_upper_dev_get(dev);
2253 ops = br_dev->netdev_ops;
2253 if (ops->ndo_fdb_dump) 2254 if (ops->ndo_fdb_dump)
2254 idx = ops->ndo_fdb_dump(skb, cb, dev, idx); 2255 idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
2255 } 2256 }
@@ -2270,6 +2271,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2270 struct ifinfomsg *ifm; 2271 struct ifinfomsg *ifm;
2271 struct nlattr *br_afspec; 2272 struct nlattr *br_afspec;
2272 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; 2273 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2274 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2273 2275
2274 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); 2276 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
2275 if (nlh == NULL) 2277 if (nlh == NULL)
@@ -2287,8 +2289,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2287 if (nla_put_string(skb, IFLA_IFNAME, dev->name) || 2289 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
2288 nla_put_u32(skb, IFLA_MTU, dev->mtu) || 2290 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
2289 nla_put_u8(skb, IFLA_OPERSTATE, operstate) || 2291 nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
2290 (dev->master && 2292 (br_dev &&
2291 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || 2293 nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
2292 (dev->addr_len && 2294 (dev->addr_len &&
2293 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || 2295 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
2294 (dev->ifindex != dev->iflink && 2296 (dev->ifindex != dev->iflink &&
@@ -2320,23 +2322,31 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2320 int idx = 0; 2322 int idx = 0;
2321 u32 portid = NETLINK_CB(cb->skb).portid; 2323 u32 portid = NETLINK_CB(cb->skb).portid;
2322 u32 seq = cb->nlh->nlmsg_seq; 2324 u32 seq = cb->nlh->nlmsg_seq;
2325 struct nlattr *extfilt;
2326 u32 filter_mask = 0;
2327
2328 extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
2329 IFLA_EXT_MASK);
2330 if (extfilt)
2331 filter_mask = nla_get_u32(extfilt);
2323 2332
2324 rcu_read_lock(); 2333 rcu_read_lock();
2325 for_each_netdev_rcu(net, dev) { 2334 for_each_netdev_rcu(net, dev) {
2326 const struct net_device_ops *ops = dev->netdev_ops; 2335 const struct net_device_ops *ops = dev->netdev_ops;
2327 struct net_device *master = dev->master; 2336 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2328 2337
2329 if (master && master->netdev_ops->ndo_bridge_getlink) { 2338 if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
2330 if (idx >= cb->args[0] && 2339 if (idx >= cb->args[0] &&
2331 master->netdev_ops->ndo_bridge_getlink( 2340 br_dev->netdev_ops->ndo_bridge_getlink(
2332 skb, portid, seq, dev) < 0) 2341 skb, portid, seq, dev, filter_mask) < 0)
2333 break; 2342 break;
2334 idx++; 2343 idx++;
2335 } 2344 }
2336 2345
2337 if (ops->ndo_bridge_getlink) { 2346 if (ops->ndo_bridge_getlink) {
2338 if (idx >= cb->args[0] && 2347 if (idx >= cb->args[0] &&
2339 ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) 2348 ops->ndo_bridge_getlink(skb, portid, seq, dev,
2349 filter_mask) < 0)
2340 break; 2350 break;
2341 idx++; 2351 idx++;
2342 } 2352 }
@@ -2365,7 +2375,7 @@ static inline size_t bridge_nlmsg_size(void)
2365static int rtnl_bridge_notify(struct net_device *dev, u16 flags) 2375static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2366{ 2376{
2367 struct net *net = dev_net(dev); 2377 struct net *net = dev_net(dev);
2368 struct net_device *master = dev->master; 2378 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2369 struct sk_buff *skb; 2379 struct sk_buff *skb;
2370 int err = -EOPNOTSUPP; 2380 int err = -EOPNOTSUPP;
2371 2381
@@ -2376,15 +2386,15 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2376 } 2386 }
2377 2387
2378 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && 2388 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
2379 master && master->netdev_ops->ndo_bridge_getlink) { 2389 br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
2380 err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); 2390 err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
2381 if (err < 0) 2391 if (err < 0)
2382 goto errout; 2392 goto errout;
2383 } 2393 }
2384 2394
2385 if ((flags & BRIDGE_FLAGS_SELF) && 2395 if ((flags & BRIDGE_FLAGS_SELF) &&
2386 dev->netdev_ops->ndo_bridge_getlink) { 2396 dev->netdev_ops->ndo_bridge_getlink) {
2387 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); 2397 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
2388 if (err < 0) 2398 if (err < 0)
2389 goto errout; 2399 goto errout;
2390 } 2400 }
@@ -2436,13 +2446,14 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2436 oflags = flags; 2446 oflags = flags;
2437 2447
2438 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { 2448 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2439 if (!dev->master || 2449 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2440 !dev->master->netdev_ops->ndo_bridge_setlink) { 2450
2451 if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
2441 err = -EOPNOTSUPP; 2452 err = -EOPNOTSUPP;
2442 goto out; 2453 goto out;
2443 } 2454 }
2444 2455
2445 err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); 2456 err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
2446 if (err) 2457 if (err)
2447 goto out; 2458 goto out;
2448 2459
@@ -2468,6 +2479,77 @@ out:
2468 return err; 2479 return err;
2469} 2480}
2470 2481
2482static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
2483 void *arg)
2484{
2485 struct net *net = sock_net(skb->sk);
2486 struct ifinfomsg *ifm;
2487 struct net_device *dev;
2488 struct nlattr *br_spec, *attr = NULL;
2489 int rem, err = -EOPNOTSUPP;
2490 u16 oflags, flags = 0;
2491 bool have_flags = false;
2492
2493 if (nlmsg_len(nlh) < sizeof(*ifm))
2494 return -EINVAL;
2495
2496 ifm = nlmsg_data(nlh);
2497 if (ifm->ifi_family != AF_BRIDGE)
2498 return -EPFNOSUPPORT;
2499
2500 dev = __dev_get_by_index(net, ifm->ifi_index);
2501 if (!dev) {
2502 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
2503 return -ENODEV;
2504 }
2505
2506 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
2507 if (br_spec) {
2508 nla_for_each_nested(attr, br_spec, rem) {
2509 if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
2510 have_flags = true;
2511 flags = nla_get_u16(attr);
2512 break;
2513 }
2514 }
2515 }
2516
2517 oflags = flags;
2518
2519 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2520 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2521
2522 if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
2523 err = -EOPNOTSUPP;
2524 goto out;
2525 }
2526
2527 err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
2528 if (err)
2529 goto out;
2530
2531 flags &= ~BRIDGE_FLAGS_MASTER;
2532 }
2533
2534 if ((flags & BRIDGE_FLAGS_SELF)) {
2535 if (!dev->netdev_ops->ndo_bridge_dellink)
2536 err = -EOPNOTSUPP;
2537 else
2538 err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
2539
2540 if (!err)
2541 flags &= ~BRIDGE_FLAGS_SELF;
2542 }
2543
2544 if (have_flags)
2545 memcpy(nla_data(attr), &flags, sizeof(flags));
2546 /* Generate event to notify upper layer of bridge change */
2547 if (!err)
2548 err = rtnl_bridge_notify(dev, oflags);
2549out:
2550 return err;
2551}
2552
2471/* Protected by RTNL sempahore. */ 2553/* Protected by RTNL sempahore. */
2472static struct rtattr **rta_buf; 2554static struct rtattr **rta_buf;
2473static int rtattr_max; 2555static int rtattr_max;
@@ -2651,6 +2733,7 @@ void __init rtnetlink_init(void)
2651 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); 2733 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2652 2734
2653 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); 2735 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
2736 rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
2654 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); 2737 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
2655} 2738}
2656 2739
diff --git a/net/core/scm.c b/net/core/scm.c
index 57fb1ee6649f..905dcc6ad1e3 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -35,6 +35,7 @@
35#include <net/sock.h> 35#include <net/sock.h>
36#include <net/compat.h> 36#include <net/compat.h>
37#include <net/scm.h> 37#include <net/scm.h>
38#include <net/cls_cgroup.h>
38 39
39 40
40/* 41/*
@@ -302,8 +303,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
302 } 303 }
303 /* Bump the usage count and install the file. */ 304 /* Bump the usage count and install the file. */
304 sock = sock_from_file(fp[i], &err); 305 sock = sock_from_file(fp[i], &err);
305 if (sock) 306 if (sock) {
306 sock_update_netprioidx(sock->sk, current); 307 sock_update_netprioidx(sock->sk, current);
308 sock_update_classid(sock->sk, current);
309 }
307 fd_install(new_fd, get_file(fp[i])); 310 fd_install(new_fd, get_file(fp[i]));
308 } 311 }
309 312
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ab989b0de42..33245ef54c3b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
104 .get = sock_pipe_buf_get, 104 .get = sock_pipe_buf_get,
105}; 105};
106 106
107/*
108 * Keep out-of-line to prevent kernel bloat.
109 * __builtin_return_address is not used because it is not always
110 * reliable.
111 */
112
113/** 107/**
114 * skb_over_panic - private function 108 * skb_panic - private function for out-of-line support
115 * @skb: buffer 109 * @skb: buffer
116 * @sz: size 110 * @sz: size
117 * @here: address 111 * @addr: address
118 * 112 * @msg: skb_over_panic or skb_under_panic
119 * Out of line support code for skb_put(). Not user callable. 113 *
114 * Out-of-line support for skb_put() and skb_push().
115 * Called via the wrapper skb_over_panic() or skb_under_panic().
116 * Keep out of line to prevent kernel bloat.
117 * __builtin_return_address is not used because it is not always reliable.
120 */ 118 */
121static void skb_over_panic(struct sk_buff *skb, int sz, void *here) 119static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
120 const char msg[])
122{ 121{
123 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", 122 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
124 __func__, here, skb->len, sz, skb->head, skb->data, 123 msg, addr, skb->len, sz, skb->head, skb->data,
125 (unsigned long)skb->tail, (unsigned long)skb->end, 124 (unsigned long)skb->tail, (unsigned long)skb->end,
126 skb->dev ? skb->dev->name : "<NULL>"); 125 skb->dev ? skb->dev->name : "<NULL>");
127 BUG(); 126 BUG();
128} 127}
129 128
130/** 129static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
131 * skb_under_panic - private function
132 * @skb: buffer
133 * @sz: size
134 * @here: address
135 *
136 * Out of line support code for skb_push(). Not user callable.
137 */
138
139static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
140{ 130{
141 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", 131 skb_panic(skb, sz, addr, __func__);
142 __func__, here, skb->len, sz, skb->head, skb->data,
143 (unsigned long)skb->tail, (unsigned long)skb->end,
144 skb->dev ? skb->dev->name : "<NULL>");
145 BUG();
146} 132}
147 133
134static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
135{
136 skb_panic(skb, sz, addr, __func__);
137}
148 138
149/* 139/*
150 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells 140 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
@@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
155 */ 145 */
156#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ 146#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
157 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) 147 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
158void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, 148
159 bool *pfmemalloc) 149static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
150 unsigned long ip, bool *pfmemalloc)
160{ 151{
161 void *obj; 152 void *obj;
162 bool ret_pfmemalloc = false; 153 bool ret_pfmemalloc = false;
@@ -259,6 +250,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
259 skb->end = skb->tail + size; 250 skb->end = skb->tail + size;
260#ifdef NET_SKBUFF_DATA_USES_OFFSET 251#ifdef NET_SKBUFF_DATA_USES_OFFSET
261 skb->mac_header = ~0U; 252 skb->mac_header = ~0U;
253 skb->transport_header = ~0U;
262#endif 254#endif
263 255
264 /* make sure we initialize shinfo sequentially */ 256 /* make sure we initialize shinfo sequentially */
@@ -327,6 +319,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
327 skb->end = skb->tail + size; 319 skb->end = skb->tail + size;
328#ifdef NET_SKBUFF_DATA_USES_OFFSET 320#ifdef NET_SKBUFF_DATA_USES_OFFSET
329 skb->mac_header = ~0U; 321 skb->mac_header = ~0U;
322 skb->transport_header = ~0U;
330#endif 323#endif
331 324
332 /* make sure we initialize shinfo sequentially */ 325 /* make sure we initialize shinfo sequentially */
@@ -348,10 +341,6 @@ struct netdev_alloc_cache {
348}; 341};
349static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 342static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
350 343
351#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
352#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
353#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
354
355static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 344static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
356{ 345{
357 struct netdev_alloc_cache *nc; 346 struct netdev_alloc_cache *nc;
@@ -683,7 +672,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
683 new->network_header = old->network_header; 672 new->network_header = old->network_header;
684 new->mac_header = old->mac_header; 673 new->mac_header = old->mac_header;
685 new->inner_transport_header = old->inner_transport_header; 674 new->inner_transport_header = old->inner_transport_header;
686 new->inner_network_header = old->inner_transport_header; 675 new->inner_network_header = old->inner_network_header;
687 skb_dst_copy(new, old); 676 skb_dst_copy(new, old);
688 new->rxhash = old->rxhash; 677 new->rxhash = old->rxhash;
689 new->ooo_okay = old->ooo_okay; 678 new->ooo_okay = old->ooo_okay;
@@ -1649,7 +1638,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1649 1638
1650static struct page *linear_to_page(struct page *page, unsigned int *len, 1639static struct page *linear_to_page(struct page *page, unsigned int *len,
1651 unsigned int *offset, 1640 unsigned int *offset,
1652 struct sk_buff *skb, struct sock *sk) 1641 struct sock *sk)
1653{ 1642{
1654 struct page_frag *pfrag = sk_page_frag(sk); 1643 struct page_frag *pfrag = sk_page_frag(sk);
1655 1644
@@ -1682,14 +1671,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
1682static bool spd_fill_page(struct splice_pipe_desc *spd, 1671static bool spd_fill_page(struct splice_pipe_desc *spd,
1683 struct pipe_inode_info *pipe, struct page *page, 1672 struct pipe_inode_info *pipe, struct page *page,
1684 unsigned int *len, unsigned int offset, 1673 unsigned int *len, unsigned int offset,
1685 struct sk_buff *skb, bool linear, 1674 bool linear,
1686 struct sock *sk) 1675 struct sock *sk)
1687{ 1676{
1688 if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) 1677 if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
1689 return true; 1678 return true;
1690 1679
1691 if (linear) { 1680 if (linear) {
1692 page = linear_to_page(page, len, &offset, skb, sk); 1681 page = linear_to_page(page, len, &offset, sk);
1693 if (!page) 1682 if (!page)
1694 return true; 1683 return true;
1695 } 1684 }
@@ -1706,23 +1695,9 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
1706 return false; 1695 return false;
1707} 1696}
1708 1697
1709static inline void __segment_seek(struct page **page, unsigned int *poff,
1710 unsigned int *plen, unsigned int off)
1711{
1712 unsigned long n;
1713
1714 *poff += off;
1715 n = *poff / PAGE_SIZE;
1716 if (n)
1717 *page = nth_page(*page, n);
1718
1719 *poff = *poff % PAGE_SIZE;
1720 *plen -= off;
1721}
1722
1723static bool __splice_segment(struct page *page, unsigned int poff, 1698static bool __splice_segment(struct page *page, unsigned int poff,
1724 unsigned int plen, unsigned int *off, 1699 unsigned int plen, unsigned int *off,
1725 unsigned int *len, struct sk_buff *skb, 1700 unsigned int *len,
1726 struct splice_pipe_desc *spd, bool linear, 1701 struct splice_pipe_desc *spd, bool linear,
1727 struct sock *sk, 1702 struct sock *sk,
1728 struct pipe_inode_info *pipe) 1703 struct pipe_inode_info *pipe)
@@ -1737,23 +1712,19 @@ static bool __splice_segment(struct page *page, unsigned int poff,
1737 } 1712 }
1738 1713
1739 /* ignore any bits we already processed */ 1714 /* ignore any bits we already processed */
1740 if (*off) { 1715 poff += *off;
1741 __segment_seek(&page, &poff, &plen, *off); 1716 plen -= *off;
1742 *off = 0; 1717 *off = 0;
1743 }
1744 1718
1745 do { 1719 do {
1746 unsigned int flen = min(*len, plen); 1720 unsigned int flen = min(*len, plen);
1747 1721
1748 /* the linear region may spread across several pages */ 1722 if (spd_fill_page(spd, pipe, page, &flen, poff,
1749 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1723 linear, sk))
1750
1751 if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
1752 return true; 1724 return true;
1753 1725 poff += flen;
1754 __segment_seek(&page, &poff, &plen, flen); 1726 plen -= flen;
1755 *len -= flen; 1727 *len -= flen;
1756
1757 } while (*len && plen); 1728 } while (*len && plen);
1758 1729
1759 return false; 1730 return false;
@@ -1777,7 +1748,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1777 if (__splice_segment(virt_to_page(skb->data), 1748 if (__splice_segment(virt_to_page(skb->data),
1778 (unsigned long) skb->data & (PAGE_SIZE - 1), 1749 (unsigned long) skb->data & (PAGE_SIZE - 1),
1779 skb_headlen(skb), 1750 skb_headlen(skb),
1780 offset, len, skb, spd, 1751 offset, len, spd,
1781 skb_head_is_locked(skb), 1752 skb_head_is_locked(skb),
1782 sk, pipe)) 1753 sk, pipe))
1783 return true; 1754 return true;
@@ -1790,7 +1761,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1790 1761
1791 if (__splice_segment(skb_frag_page(f), 1762 if (__splice_segment(skb_frag_page(f),
1792 f->page_offset, skb_frag_size(f), 1763 f->page_offset, skb_frag_size(f),
1793 offset, len, skb, spd, false, sk, pipe)) 1764 offset, len, spd, false, sk, pipe))
1794 return true; 1765 return true;
1795 } 1766 }
1796 1767
@@ -2355,6 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2355{ 2326{
2356 int pos = skb_headlen(skb); 2327 int pos = skb_headlen(skb);
2357 2328
2329 skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
2358 if (len < pos) /* Split line is inside header. */ 2330 if (len < pos) /* Split line is inside header. */
2359 skb_split_inside_header(skb, skb1, len, pos); 2331 skb_split_inside_header(skb, skb1, len, pos);
2360 else /* Second chunk has no header, nothing to copy. */ 2332 else /* Second chunk has no header, nothing to copy. */
@@ -2686,48 +2658,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2686 int len, int odd, struct sk_buff *skb), 2658 int len, int odd, struct sk_buff *skb),
2687 void *from, int length) 2659 void *from, int length)
2688{ 2660{
2689 int frg_cnt = 0; 2661 int frg_cnt = skb_shinfo(skb)->nr_frags;
2690 skb_frag_t *frag = NULL; 2662 int copy;
2691 struct page *page = NULL;
2692 int copy, left;
2693 int offset = 0; 2663 int offset = 0;
2694 int ret; 2664 int ret;
2665 struct page_frag *pfrag = &current->task_frag;
2695 2666
2696 do { 2667 do {
2697 /* Return error if we don't have space for new frag */ 2668 /* Return error if we don't have space for new frag */
2698 frg_cnt = skb_shinfo(skb)->nr_frags;
2699 if (frg_cnt >= MAX_SKB_FRAGS) 2669 if (frg_cnt >= MAX_SKB_FRAGS)
2700 return -EFAULT; 2670 return -EMSGSIZE;
2701 2671
2702 /* allocate a new page for next frag */ 2672 if (!sk_page_frag_refill(sk, pfrag))
2703 page = alloc_pages(sk->sk_allocation, 0);
2704
2705 /* If alloc_page fails just return failure and caller will
2706 * free previous allocated pages by doing kfree_skb()
2707 */
2708 if (page == NULL)
2709 return -ENOMEM; 2673 return -ENOMEM;
2710 2674
2711 /* initialize the next frag */
2712 skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2713 skb->truesize += PAGE_SIZE;
2714 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
2715
2716 /* get the new initialized frag */
2717 frg_cnt = skb_shinfo(skb)->nr_frags;
2718 frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
2719
2720 /* copy the user data to page */ 2675 /* copy the user data to page */
2721 left = PAGE_SIZE - frag->page_offset; 2676 copy = min_t(int, length, pfrag->size - pfrag->offset);
2722 copy = (length > left)? left : length;
2723 2677
2724 ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), 2678 ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
2725 offset, copy, 0, skb); 2679 offset, copy, 0, skb);
2726 if (ret < 0) 2680 if (ret < 0)
2727 return -EFAULT; 2681 return -EFAULT;
2728 2682
2729 /* copy was successful so update the size parameters */ 2683 /* copy was successful so update the size parameters */
2730 skb_frag_size_add(frag, copy); 2684 skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
2685 copy);
2686 frg_cnt++;
2687 pfrag->offset += copy;
2688 get_page(pfrag->page);
2689
2690 skb->truesize += copy;
2691 atomic_add(copy, &sk->sk_wmem_alloc);
2731 skb->len += copy; 2692 skb->len += copy;
2732 skb->data_len += copy; 2693 skb->data_len += copy;
2733 offset += copy; 2694 offset += copy;
@@ -2777,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2777 unsigned int mss = skb_shinfo(skb)->gso_size; 2738 unsigned int mss = skb_shinfo(skb)->gso_size;
2778 unsigned int doffset = skb->data - skb_mac_header(skb); 2739 unsigned int doffset = skb->data - skb_mac_header(skb);
2779 unsigned int offset = doffset; 2740 unsigned int offset = doffset;
2741 unsigned int tnl_hlen = skb_tnl_header_len(skb);
2780 unsigned int headroom; 2742 unsigned int headroom;
2781 unsigned int len; 2743 unsigned int len;
2782 int sg = !!(features & NETIF_F_SG); 2744 int sg = !!(features & NETIF_F_SG);
@@ -2853,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2853 skb_set_network_header(nskb, skb->mac_len); 2815 skb_set_network_header(nskb, skb->mac_len);
2854 nskb->transport_header = (nskb->network_header + 2816 nskb->transport_header = (nskb->network_header +
2855 skb_network_header_len(skb)); 2817 skb_network_header_len(skb));
2856 skb_copy_from_linear_data(skb, nskb->data, doffset); 2818
2819 skb_copy_from_linear_data_offset(skb, -tnl_hlen,
2820 nskb->data - tnl_hlen,
2821 doffset + tnl_hlen);
2857 2822
2858 if (fskb != skb_shinfo(skb)->frag_list) 2823 if (fskb != skb_shinfo(skb)->frag_list)
2859 continue; 2824 continue;
@@ -2871,6 +2836,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2871 skb_copy_from_linear_data_offset(skb, offset, 2836 skb_copy_from_linear_data_offset(skb, offset,
2872 skb_put(nskb, hsize), hsize); 2837 skb_put(nskb, hsize), hsize);
2873 2838
2839 skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
2840
2874 while (pos < offset + len && i < nfrags) { 2841 while (pos < offset + len && i < nfrags) {
2875 *frag = skb_shinfo(skb)->frags[i]; 2842 *frag = skb_shinfo(skb)->frags[i];
2876 __skb_frag_ref(frag); 2843 __skb_frag_ref(frag);
diff --git a/net/core/sock.c b/net/core/sock.c
index bc131d419683..b261a7977746 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -186,8 +186,10 @@ void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
186static struct lock_class_key af_family_keys[AF_MAX]; 186static struct lock_class_key af_family_keys[AF_MAX];
187static struct lock_class_key af_family_slock_keys[AF_MAX]; 187static struct lock_class_key af_family_slock_keys[AF_MAX];
188 188
189#if defined(CONFIG_MEMCG_KMEM)
189struct static_key memcg_socket_limit_enabled; 190struct static_key memcg_socket_limit_enabled;
190EXPORT_SYMBOL(memcg_socket_limit_enabled); 191EXPORT_SYMBOL(memcg_socket_limit_enabled);
192#endif
191 193
192/* 194/*
193 * Make lock validator output more readable. (we pre-construct these 195 * Make lock validator output more readable. (we pre-construct these
@@ -665,6 +667,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
665 case SO_REUSEADDR: 667 case SO_REUSEADDR:
666 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 668 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
667 break; 669 break;
670 case SO_REUSEPORT:
671 sk->sk_reuseport = valbool;
672 break;
668 case SO_TYPE: 673 case SO_TYPE:
669 case SO_PROTOCOL: 674 case SO_PROTOCOL:
670 case SO_DOMAIN: 675 case SO_DOMAIN:
@@ -861,6 +866,13 @@ set_rcvbuf:
861 ret = sk_detach_filter(sk); 866 ret = sk_detach_filter(sk);
862 break; 867 break;
863 868
869 case SO_LOCK_FILTER:
870 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
871 ret = -EPERM;
872 else
873 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
874 break;
875
864 case SO_PASSSEC: 876 case SO_PASSSEC:
865 if (valbool) 877 if (valbool)
866 set_bit(SOCK_PASSSEC, &sock->flags); 878 set_bit(SOCK_PASSSEC, &sock->flags);
@@ -965,6 +977,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
965 v.val = sk->sk_reuse; 977 v.val = sk->sk_reuse;
966 break; 978 break;
967 979
980 case SO_REUSEPORT:
981 v.val = sk->sk_reuseport;
982 break;
983
968 case SO_KEEPALIVE: 984 case SO_KEEPALIVE:
969 v.val = sock_flag(sk, SOCK_KEEPOPEN); 985 v.val = sock_flag(sk, SOCK_KEEPOPEN);
970 break; 986 break;
@@ -1140,6 +1156,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1140 1156
1141 goto lenout; 1157 goto lenout;
1142 1158
1159 case SO_LOCK_FILTER:
1160 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1161 break;
1162
1143 default: 1163 default:
1144 return -ENOPROTOOPT; 1164 return -ENOPROTOOPT;
1145 } 1165 }
@@ -2212,7 +2232,7 @@ EXPORT_SYMBOL(sk_reset_timer);
2212 2232
2213void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2233void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2214{ 2234{
2215 if (timer_pending(timer) && del_timer(timer)) 2235 if (del_timer(timer))
2216 __sock_put(sk); 2236 __sock_put(sk);
2217} 2237}
2218EXPORT_SYMBOL(sk_stop_timer); 2238EXPORT_SYMBOL(sk_stop_timer);
@@ -2818,7 +2838,7 @@ static const struct file_operations proto_seq_fops = {
2818 2838
2819static __net_init int proto_init_net(struct net *net) 2839static __net_init int proto_init_net(struct net *net)
2820{ 2840{
2821 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) 2841 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2822 return -ENOMEM; 2842 return -ENOMEM;
2823 2843
2824 return 0; 2844 return 0;
@@ -2826,7 +2846,7 @@ static __net_init int proto_init_net(struct net *net)
2826 2846
2827static __net_exit void proto_exit_net(struct net *net) 2847static __net_exit void proto_exit_net(struct net *net)
2828{ 2848{
2829 proc_net_remove(net, "protocols"); 2849 remove_proc_entry("protocols", net->proc_net);
2830} 2850}
2831 2851
2832 2852
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 602cd637182e..a29e90cf36b7 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -97,21 +97,6 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
97} 97}
98EXPORT_SYMBOL_GPL(sock_diag_unregister); 98EXPORT_SYMBOL_GPL(sock_diag_unregister);
99 99
100static const inline struct sock_diag_handler *sock_diag_lock_handler(int family)
101{
102 if (sock_diag_handlers[family] == NULL)
103 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
104 NETLINK_SOCK_DIAG, family);
105
106 mutex_lock(&sock_diag_table_mutex);
107 return sock_diag_handlers[family];
108}
109
110static inline void sock_diag_unlock_handler(const struct sock_diag_handler *h)
111{
112 mutex_unlock(&sock_diag_table_mutex);
113}
114
115static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 100static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
116{ 101{
117 int err; 102 int err;
@@ -121,12 +106,20 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
121 if (nlmsg_len(nlh) < sizeof(*req)) 106 if (nlmsg_len(nlh) < sizeof(*req))
122 return -EINVAL; 107 return -EINVAL;
123 108
124 hndl = sock_diag_lock_handler(req->sdiag_family); 109 if (req->sdiag_family >= AF_MAX)
110 return -EINVAL;
111
112 if (sock_diag_handlers[req->sdiag_family] == NULL)
113 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
114 NETLINK_SOCK_DIAG, req->sdiag_family);
115
116 mutex_lock(&sock_diag_table_mutex);
117 hndl = sock_diag_handlers[req->sdiag_family];
125 if (hndl == NULL) 118 if (hndl == NULL)
126 err = -ENOENT; 119 err = -ENOENT;
127 else 120 else
128 err = hndl->dump(skb, nlh); 121 err = hndl->dump(skb, nlh);
129 sock_diag_unlock_handler(hndl); 122 mutex_unlock(&sock_diag_table_mutex);
130 123
131 return err; 124 return err;
132} 125}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d1b08045a9df..cfdb46ab3a7f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,8 @@
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/net_ratelimit.h> 21#include <net/net_ratelimit.h>
22 22
23static int one = 1;
24
23#ifdef CONFIG_RPS 25#ifdef CONFIG_RPS
24static int rps_sock_flow_sysctl(ctl_table *table, int write, 26static int rps_sock_flow_sysctl(ctl_table *table, int write,
25 void __user *buffer, size_t *lenp, loff_t *ppos) 27 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = {
92 .data = &sysctl_wmem_max, 94 .data = &sysctl_wmem_max,
93 .maxlen = sizeof(int), 95 .maxlen = sizeof(int),
94 .mode = 0644, 96 .mode = 0644,
95 .proc_handler = proc_dointvec 97 .proc_handler = proc_dointvec_minmax,
98 .extra1 = &one,
96 }, 99 },
97 { 100 {
98 .procname = "rmem_max", 101 .procname = "rmem_max",
99 .data = &sysctl_rmem_max, 102 .data = &sysctl_rmem_max,
100 .maxlen = sizeof(int), 103 .maxlen = sizeof(int),
101 .mode = 0644, 104 .mode = 0644,
102 .proc_handler = proc_dointvec 105 .proc_handler = proc_dointvec_minmax,
106 .extra1 = &one,
103 }, 107 },
104 { 108 {
105 .procname = "wmem_default", 109 .procname = "wmem_default",
106 .data = &sysctl_wmem_default, 110 .data = &sysctl_wmem_default,
107 .maxlen = sizeof(int), 111 .maxlen = sizeof(int),
108 .mode = 0644, 112 .mode = 0644,
109 .proc_handler = proc_dointvec 113 .proc_handler = proc_dointvec_minmax,
114 .extra1 = &one,
110 }, 115 },
111 { 116 {
112 .procname = "rmem_default", 117 .procname = "rmem_default",
113 .data = &sysctl_rmem_default, 118 .data = &sysctl_rmem_default,
114 .maxlen = sizeof(int), 119 .maxlen = sizeof(int),
115 .mode = 0644, 120 .mode = 0644,
116 .proc_handler = proc_dointvec 121 .proc_handler = proc_dointvec_minmax,
122 .extra1 = &one,
117 }, 123 },
118 { 124 {
119 .procname = "dev_weight", 125 .procname = "dev_weight",