aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c1742
-rw-r--r--net/core/dev_addr_lists.c74
-rw-r--r--net/core/dev_ioctl.c576
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/ethtool.c46
-rw-r--r--net/core/filter.c13
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/flow_dissector.c173
-rw-r--r--net/core/neighbour.c20
-rw-r--r--net/core/net-procfs.c412
-rw-r--r--net/core/net-sysfs.c184
-rw-r--r--net/core/net_namespace.c32
-rw-r--r--net/core/netpoll.c721
-rw-r--r--net/core/netprio_cgroup.c4
-rw-r--r--net/core/pktgen.c205
-rw-r--r--net/core/request_sock.c2
-rw-r--r--net/core/rtnetlink.c214
-rw-r--r--net/core/scm.c5
-rw-r--r--net/core/skbuff.c157
-rw-r--r--net/core/sock.c28
-rw-r--r--net/core/sysctl_net_core.c14
23 files changed, 2741 insertions, 1889 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 674641b13aea..b33b996f5dd6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,10 +9,11 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
12 sock_diag.o 12 sock_diag.o dev_ioctl.o
13 13
14obj-$(CONFIG_XFRM) += flow.o 14obj-$(CONFIG_XFRM) += flow.o
15obj-y += net-sysfs.o 15obj-y += net-sysfs.o
16obj-$(CONFIG_PROC_FS) += net-procfs.o
16obj-$(CONFIG_NET_PKTGEN) += pktgen.o 17obj-$(CONFIG_NET_PKTGEN) += pktgen.o
17obj-$(CONFIG_NETPOLL) += netpoll.o 18obj-$(CONFIG_NETPOLL) += netpoll.o
18obj-$(CONFIG_NET_DMA) += user_dma.o 19obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 0337e2b76862..368f9c3f9dc6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
187 skb_queue_walk(queue, skb) { 187 skb_queue_walk(queue, skb) {
188 *peeked = skb->peeked; 188 *peeked = skb->peeked;
189 if (flags & MSG_PEEK) { 189 if (flags & MSG_PEEK) {
190 if (*off >= skb->len) { 190 if (*off >= skb->len && skb->len) {
191 *off -= skb->len; 191 *off -= skb->len;
192 continue; 192 continue;
193 } 193 }
diff --git a/net/core/dev.c b/net/core/dev.c
index d0cbc93fcf32..17bc535115d3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -97,8 +97,6 @@
97#include <net/net_namespace.h> 97#include <net/net_namespace.h>
98#include <net/sock.h> 98#include <net/sock.h>
99#include <linux/rtnetlink.h> 99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h> 100#include <linux/stat.h>
103#include <net/dst.h> 101#include <net/dst.h>
104#include <net/pkt_sched.h> 102#include <net/pkt_sched.h>
@@ -106,12 +104,10 @@
106#include <net/xfrm.h> 104#include <net/xfrm.h>
107#include <linux/highmem.h> 105#include <linux/highmem.h>
108#include <linux/init.h> 106#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h> 107#include <linux/module.h>
111#include <linux/netpoll.h> 108#include <linux/netpoll.h>
112#include <linux/rcupdate.h> 109#include <linux/rcupdate.h>
113#include <linux/delay.h> 110#include <linux/delay.h>
114#include <net/wext.h>
115#include <net/iw_handler.h> 111#include <net/iw_handler.h>
116#include <asm/current.h> 112#include <asm/current.h>
117#include <linux/audit.h> 113#include <linux/audit.h>
@@ -132,9 +128,7 @@
132#include <linux/pci.h> 128#include <linux/pci.h>
133#include <linux/inetdevice.h> 129#include <linux/inetdevice.h>
134#include <linux/cpu_rmap.h> 130#include <linux/cpu_rmap.h>
135#include <linux/net_tstamp.h>
136#include <linux/static_key.h> 131#include <linux/static_key.h>
137#include <net/flow_keys.h>
138 132
139#include "net-sysfs.h" 133#include "net-sysfs.h"
140 134
@@ -144,41 +138,10 @@
144/* This should be increased if a protocol with a bigger head is added. */ 138/* This should be increased if a protocol with a bigger head is added. */
145#define GRO_MAX_HEAD (MAX_HEADER + 128) 139#define GRO_MAX_HEAD (MAX_HEADER + 128)
146 140
147/*
148 * The list of packet types we will receive (as opposed to discard)
149 * and the routines to invoke.
150 *
151 * Why 16. Because with 16 the only overlap we get on a hash of the
152 * low nibble of the protocol value is RARP/SNAP/X.25.
153 *
154 * NOTE: That is no longer true with the addition of VLAN tags. Not
155 * sure which should go first, but I bet it won't make much
156 * difference if we are running VLANs. The good news is that
157 * this protocol won't be in the list unless compiled in, so
158 * the average user (w/out VLANs) will not be adversely affected.
159 * --BLG
160 *
161 * 0800 IP
162 * 8100 802.1Q VLAN
163 * 0001 802.3
164 * 0002 AX.25
165 * 0004 802.2
166 * 8035 RARP
167 * 0005 SNAP
168 * 0805 X.25
169 * 0806 ARP
170 * 8137 IPX
171 * 0009 Localtalk
172 * 86DD IPv6
173 */
174
175#define PTYPE_HASH_SIZE (16)
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177
178static DEFINE_SPINLOCK(ptype_lock); 141static DEFINE_SPINLOCK(ptype_lock);
179static DEFINE_SPINLOCK(offload_lock); 142static DEFINE_SPINLOCK(offload_lock);
180static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; 143struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
181static struct list_head ptype_all __read_mostly; /* Taps */ 144struct list_head ptype_all __read_mostly; /* Taps */
182static struct list_head offload_base __read_mostly; 145static struct list_head offload_base __read_mostly;
183 146
184/* 147/*
@@ -203,7 +166,7 @@ static struct list_head offload_base __read_mostly;
203DEFINE_RWLOCK(dev_base_lock); 166DEFINE_RWLOCK(dev_base_lock);
204EXPORT_SYMBOL(dev_base_lock); 167EXPORT_SYMBOL(dev_base_lock);
205 168
206DEFINE_SEQLOCK(devnet_rename_seq); 169seqcount_t devnet_rename_seq;
207 170
208static inline void dev_base_seq_inc(struct net *net) 171static inline void dev_base_seq_inc(struct net *net)
209{ 172{
@@ -1093,10 +1056,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
1093 if (dev->flags & IFF_UP) 1056 if (dev->flags & IFF_UP)
1094 return -EBUSY; 1057 return -EBUSY;
1095 1058
1096 write_seqlock(&devnet_rename_seq); 1059 write_seqcount_begin(&devnet_rename_seq);
1097 1060
1098 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { 1061 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1099 write_sequnlock(&devnet_rename_seq); 1062 write_seqcount_end(&devnet_rename_seq);
1100 return 0; 1063 return 0;
1101 } 1064 }
1102 1065
@@ -1104,7 +1067,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
1104 1067
1105 err = dev_get_valid_name(net, dev, newname); 1068 err = dev_get_valid_name(net, dev, newname);
1106 if (err < 0) { 1069 if (err < 0) {
1107 write_sequnlock(&devnet_rename_seq); 1070 write_seqcount_end(&devnet_rename_seq);
1108 return err; 1071 return err;
1109 } 1072 }
1110 1073
@@ -1112,11 +1075,11 @@ rollback:
1112 ret = device_rename(&dev->dev, dev->name); 1075 ret = device_rename(&dev->dev, dev->name);
1113 if (ret) { 1076 if (ret) {
1114 memcpy(dev->name, oldname, IFNAMSIZ); 1077 memcpy(dev->name, oldname, IFNAMSIZ);
1115 write_sequnlock(&devnet_rename_seq); 1078 write_seqcount_end(&devnet_rename_seq);
1116 return ret; 1079 return ret;
1117 } 1080 }
1118 1081
1119 write_sequnlock(&devnet_rename_seq); 1082 write_seqcount_end(&devnet_rename_seq);
1120 1083
1121 write_lock_bh(&dev_base_lock); 1084 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist); 1085 hlist_del_rcu(&dev->name_hlist);
@@ -1135,7 +1098,7 @@ rollback:
1135 /* err >= 0 after dev_alloc_name() or stores the first errno */ 1098 /* err >= 0 after dev_alloc_name() or stores the first errno */
1136 if (err >= 0) { 1099 if (err >= 0) {
1137 err = ret; 1100 err = ret;
1138 write_seqlock(&devnet_rename_seq); 1101 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ); 1102 memcpy(dev->name, oldname, IFNAMSIZ);
1140 goto rollback; 1103 goto rollback;
1141 } else { 1104 } else {
@@ -1227,36 +1190,6 @@ void netdev_notify_peers(struct net_device *dev)
1227} 1190}
1228EXPORT_SYMBOL(netdev_notify_peers); 1191EXPORT_SYMBOL(netdev_notify_peers);
1229 1192
1230/**
1231 * dev_load - load a network module
1232 * @net: the applicable net namespace
1233 * @name: name of interface
1234 *
1235 * If a network interface is not present and the process has suitable
1236 * privileges this function loads the module. If module loading is not
1237 * available in this kernel then it becomes a nop.
1238 */
1239
1240void dev_load(struct net *net, const char *name)
1241{
1242 struct net_device *dev;
1243 int no_module;
1244
1245 rcu_read_lock();
1246 dev = dev_get_by_name_rcu(net, name);
1247 rcu_read_unlock();
1248
1249 no_module = !dev;
1250 if (no_module && capable(CAP_NET_ADMIN))
1251 no_module = request_module("netdev-%s", name);
1252 if (no_module && capable(CAP_SYS_MODULE)) {
1253 if (!request_module("%s", name))
1254 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1255 name);
1256 }
1257}
1258EXPORT_SYMBOL(dev_load);
1259
1260static int __dev_open(struct net_device *dev) 1193static int __dev_open(struct net_device *dev)
1261{ 1194{
1262 const struct net_device_ops *ops = dev->netdev_ops; 1195 const struct net_device_ops *ops = dev->netdev_ops;
@@ -1267,6 +1200,14 @@ static int __dev_open(struct net_device *dev)
1267 if (!netif_device_present(dev)) 1200 if (!netif_device_present(dev))
1268 return -ENODEV; 1201 return -ENODEV;
1269 1202
1203 /* Block netpoll from trying to do any rx path servicing.
1204 * If we don't do this there is a chance ndo_poll_controller
1205 * or ndo_poll may be running while we open the device
1206 */
1207 ret = netpoll_rx_disable(dev);
1208 if (ret)
1209 return ret;
1210
1270 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); 1211 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1271 ret = notifier_to_errno(ret); 1212 ret = notifier_to_errno(ret);
1272 if (ret) 1213 if (ret)
@@ -1280,6 +1221,8 @@ static int __dev_open(struct net_device *dev)
1280 if (!ret && ops->ndo_open) 1221 if (!ret && ops->ndo_open)
1281 ret = ops->ndo_open(dev); 1222 ret = ops->ndo_open(dev);
1282 1223
1224 netpoll_rx_enable(dev);
1225
1283 if (ret) 1226 if (ret)
1284 clear_bit(__LINK_STATE_START, &dev->state); 1227 clear_bit(__LINK_STATE_START, &dev->state);
1285 else { 1228 else {
@@ -1371,9 +1314,16 @@ static int __dev_close(struct net_device *dev)
1371 int retval; 1314 int retval;
1372 LIST_HEAD(single); 1315 LIST_HEAD(single);
1373 1316
1317 /* Temporarily disable netpoll until the interface is down */
1318 retval = netpoll_rx_disable(dev);
1319 if (retval)
1320 return retval;
1321
1374 list_add(&dev->unreg_list, &single); 1322 list_add(&dev->unreg_list, &single);
1375 retval = __dev_close_many(&single); 1323 retval = __dev_close_many(&single);
1376 list_del(&single); 1324 list_del(&single);
1325
1326 netpoll_rx_enable(dev);
1377 return retval; 1327 return retval;
1378} 1328}
1379 1329
@@ -1409,14 +1359,22 @@ static int dev_close_many(struct list_head *head)
1409 */ 1359 */
1410int dev_close(struct net_device *dev) 1360int dev_close(struct net_device *dev)
1411{ 1361{
1362 int ret = 0;
1412 if (dev->flags & IFF_UP) { 1363 if (dev->flags & IFF_UP) {
1413 LIST_HEAD(single); 1364 LIST_HEAD(single);
1414 1365
1366 /* Block netpoll rx while the interface is going down */
1367 ret = netpoll_rx_disable(dev);
1368 if (ret)
1369 return ret;
1370
1415 list_add(&dev->unreg_list, &single); 1371 list_add(&dev->unreg_list, &single);
1416 dev_close_many(&single); 1372 dev_close_many(&single);
1417 list_del(&single); 1373 list_del(&single);
1374
1375 netpoll_rx_enable(dev);
1418 } 1376 }
1419 return 0; 1377 return ret;
1420} 1378}
1421EXPORT_SYMBOL(dev_close); 1379EXPORT_SYMBOL(dev_close);
1422 1380
@@ -1621,57 +1579,6 @@ static inline void net_timestamp_set(struct sk_buff *skb)
1621 __net_timestamp(SKB); \ 1579 __net_timestamp(SKB); \
1622 } \ 1580 } \
1623 1581
1624static int net_hwtstamp_validate(struct ifreq *ifr)
1625{
1626 struct hwtstamp_config cfg;
1627 enum hwtstamp_tx_types tx_type;
1628 enum hwtstamp_rx_filters rx_filter;
1629 int tx_type_valid = 0;
1630 int rx_filter_valid = 0;
1631
1632 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1633 return -EFAULT;
1634
1635 if (cfg.flags) /* reserved for future extensions */
1636 return -EINVAL;
1637
1638 tx_type = cfg.tx_type;
1639 rx_filter = cfg.rx_filter;
1640
1641 switch (tx_type) {
1642 case HWTSTAMP_TX_OFF:
1643 case HWTSTAMP_TX_ON:
1644 case HWTSTAMP_TX_ONESTEP_SYNC:
1645 tx_type_valid = 1;
1646 break;
1647 }
1648
1649 switch (rx_filter) {
1650 case HWTSTAMP_FILTER_NONE:
1651 case HWTSTAMP_FILTER_ALL:
1652 case HWTSTAMP_FILTER_SOME:
1653 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1654 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1655 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1656 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1657 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1658 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1659 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1660 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1661 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1662 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1663 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1664 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1665 rx_filter_valid = 1;
1666 break;
1667 }
1668
1669 if (!tx_type_valid || !rx_filter_valid)
1670 return -ERANGE;
1671
1672 return 0;
1673}
1674
1675static inline bool is_skb_forwardable(struct net_device *dev, 1582static inline bool is_skb_forwardable(struct net_device *dev,
1676 struct sk_buff *skb) 1583 struct sk_buff *skb)
1677{ 1584{
@@ -1857,6 +1764,228 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1857 } 1764 }
1858} 1765}
1859 1766
1767#ifdef CONFIG_XPS
1768static DEFINE_MUTEX(xps_map_mutex);
1769#define xmap_dereference(P) \
1770 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1771
1772static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1773 int cpu, u16 index)
1774{
1775 struct xps_map *map = NULL;
1776 int pos;
1777
1778 if (dev_maps)
1779 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1780
1781 for (pos = 0; map && pos < map->len; pos++) {
1782 if (map->queues[pos] == index) {
1783 if (map->len > 1) {
1784 map->queues[pos] = map->queues[--map->len];
1785 } else {
1786 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1787 kfree_rcu(map, rcu);
1788 map = NULL;
1789 }
1790 break;
1791 }
1792 }
1793
1794 return map;
1795}
1796
1797static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1798{
1799 struct xps_dev_maps *dev_maps;
1800 int cpu, i;
1801 bool active = false;
1802
1803 mutex_lock(&xps_map_mutex);
1804 dev_maps = xmap_dereference(dev->xps_maps);
1805
1806 if (!dev_maps)
1807 goto out_no_maps;
1808
1809 for_each_possible_cpu(cpu) {
1810 for (i = index; i < dev->num_tx_queues; i++) {
1811 if (!remove_xps_queue(dev_maps, cpu, i))
1812 break;
1813 }
1814 if (i == dev->num_tx_queues)
1815 active = true;
1816 }
1817
1818 if (!active) {
1819 RCU_INIT_POINTER(dev->xps_maps, NULL);
1820 kfree_rcu(dev_maps, rcu);
1821 }
1822
1823 for (i = index; i < dev->num_tx_queues; i++)
1824 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1825 NUMA_NO_NODE);
1826
1827out_no_maps:
1828 mutex_unlock(&xps_map_mutex);
1829}
1830
1831static struct xps_map *expand_xps_map(struct xps_map *map,
1832 int cpu, u16 index)
1833{
1834 struct xps_map *new_map;
1835 int alloc_len = XPS_MIN_MAP_ALLOC;
1836 int i, pos;
1837
1838 for (pos = 0; map && pos < map->len; pos++) {
1839 if (map->queues[pos] != index)
1840 continue;
1841 return map;
1842 }
1843
1844 /* Need to add queue to this CPU's existing map */
1845 if (map) {
1846 if (pos < map->alloc_len)
1847 return map;
1848
1849 alloc_len = map->alloc_len * 2;
1850 }
1851
1852 /* Need to allocate new map to store queue on this CPU's map */
1853 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1854 cpu_to_node(cpu));
1855 if (!new_map)
1856 return NULL;
1857
1858 for (i = 0; i < pos; i++)
1859 new_map->queues[i] = map->queues[i];
1860 new_map->alloc_len = alloc_len;
1861 new_map->len = pos;
1862
1863 return new_map;
1864}
1865
1866int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
1867{
1868 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1869 struct xps_map *map, *new_map;
1870 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1871 int cpu, numa_node_id = -2;
1872 bool active = false;
1873
1874 mutex_lock(&xps_map_mutex);
1875
1876 dev_maps = xmap_dereference(dev->xps_maps);
1877
1878 /* allocate memory for queue storage */
1879 for_each_online_cpu(cpu) {
1880 if (!cpumask_test_cpu(cpu, mask))
1881 continue;
1882
1883 if (!new_dev_maps)
1884 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
1885 if (!new_dev_maps)
1886 return -ENOMEM;
1887
1888 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1889 NULL;
1890
1891 map = expand_xps_map(map, cpu, index);
1892 if (!map)
1893 goto error;
1894
1895 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1896 }
1897
1898 if (!new_dev_maps)
1899 goto out_no_new_maps;
1900
1901 for_each_possible_cpu(cpu) {
1902 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
1903 /* add queue to CPU maps */
1904 int pos = 0;
1905
1906 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1907 while ((pos < map->len) && (map->queues[pos] != index))
1908 pos++;
1909
1910 if (pos == map->len)
1911 map->queues[map->len++] = index;
1912#ifdef CONFIG_NUMA
1913 if (numa_node_id == -2)
1914 numa_node_id = cpu_to_node(cpu);
1915 else if (numa_node_id != cpu_to_node(cpu))
1916 numa_node_id = -1;
1917#endif
1918 } else if (dev_maps) {
1919 /* fill in the new device map from the old device map */
1920 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1921 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1922 }
1923
1924 }
1925
1926 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1927
1928 /* Cleanup old maps */
1929 if (dev_maps) {
1930 for_each_possible_cpu(cpu) {
1931 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1932 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1933 if (map && map != new_map)
1934 kfree_rcu(map, rcu);
1935 }
1936
1937 kfree_rcu(dev_maps, rcu);
1938 }
1939
1940 dev_maps = new_dev_maps;
1941 active = true;
1942
1943out_no_new_maps:
1944 /* update Tx queue numa node */
1945 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
1946 (numa_node_id >= 0) ? numa_node_id :
1947 NUMA_NO_NODE);
1948
1949 if (!dev_maps)
1950 goto out_no_maps;
1951
1952 /* removes queue from unused CPUs */
1953 for_each_possible_cpu(cpu) {
1954 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
1955 continue;
1956
1957 if (remove_xps_queue(dev_maps, cpu, index))
1958 active = true;
1959 }
1960
1961 /* free map if not active */
1962 if (!active) {
1963 RCU_INIT_POINTER(dev->xps_maps, NULL);
1964 kfree_rcu(dev_maps, rcu);
1965 }
1966
1967out_no_maps:
1968 mutex_unlock(&xps_map_mutex);
1969
1970 return 0;
1971error:
1972 /* remove any maps that we added */
1973 for_each_possible_cpu(cpu) {
1974 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1975 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1976 NULL;
1977 if (new_map && new_map != map)
1978 kfree(new_map);
1979 }
1980
1981 mutex_unlock(&xps_map_mutex);
1982
1983 kfree(new_dev_maps);
1984 return -ENOMEM;
1985}
1986EXPORT_SYMBOL(netif_set_xps_queue);
1987
1988#endif
1860/* 1989/*
1861 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1990 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1862 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1991 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1880,8 +2009,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1880 if (dev->num_tc) 2009 if (dev->num_tc)
1881 netif_setup_tc(dev, txq); 2010 netif_setup_tc(dev, txq);
1882 2011
1883 if (txq < dev->real_num_tx_queues) 2012 if (txq < dev->real_num_tx_queues) {
1884 qdisc_reset_all_tx_gt(dev, txq); 2013 qdisc_reset_all_tx_gt(dev, txq);
2014#ifdef CONFIG_XPS
2015 netif_reset_xps_queues_gt(dev, txq);
2016#endif
2017 }
1885 } 2018 }
1886 2019
1887 dev->real_num_tx_queues = txq; 2020 dev->real_num_tx_queues = txq;
@@ -2046,6 +2179,15 @@ int skb_checksum_help(struct sk_buff *skb)
2046 return -EINVAL; 2179 return -EINVAL;
2047 } 2180 }
2048 2181
2182 /* Before computing a checksum, we should make sure no frag could
2183 * be modified by an external entity : checksum could be wrong.
2184 */
2185 if (skb_has_shared_frag(skb)) {
2186 ret = __skb_linearize(skb);
2187 if (ret)
2188 goto out;
2189 }
2190
2049 offset = skb_checksum_start_offset(skb); 2191 offset = skb_checksum_start_offset(skb);
2050 BUG_ON(offset >= skb_headlen(skb)); 2192 BUG_ON(offset >= skb_headlen(skb));
2051 csum = skb_checksum(skb, offset, skb->len - offset, 0); 2193 csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -2069,25 +2211,19 @@ out:
2069EXPORT_SYMBOL(skb_checksum_help); 2211EXPORT_SYMBOL(skb_checksum_help);
2070 2212
2071/** 2213/**
2072 * skb_gso_segment - Perform segmentation on skb. 2214 * skb_mac_gso_segment - mac layer segmentation handler.
2073 * @skb: buffer to segment 2215 * @skb: buffer to segment
2074 * @features: features for the output path (see dev->features) 2216 * @features: features for the output path (see dev->features)
2075 *
2076 * This function segments the given skb and returns a list of segments.
2077 *
2078 * It may return NULL if the skb requires no segmentation. This is
2079 * only possible when GSO is used for verifying header integrity.
2080 */ 2217 */
2081struct sk_buff *skb_gso_segment(struct sk_buff *skb, 2218struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2082 netdev_features_t features) 2219 netdev_features_t features)
2083{ 2220{
2084 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 2221 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2085 struct packet_offload *ptype; 2222 struct packet_offload *ptype;
2086 __be16 type = skb->protocol; 2223 __be16 type = skb->protocol;
2087 int vlan_depth = ETH_HLEN;
2088 int err;
2089 2224
2090 while (type == htons(ETH_P_8021Q)) { 2225 while (type == htons(ETH_P_8021Q)) {
2226 int vlan_depth = ETH_HLEN;
2091 struct vlan_hdr *vh; 2227 struct vlan_hdr *vh;
2092 2228
2093 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2229 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2098,22 +2234,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2098 vlan_depth += VLAN_HLEN; 2234 vlan_depth += VLAN_HLEN;
2099 } 2235 }
2100 2236
2101 skb_reset_mac_header(skb);
2102 skb->mac_len = skb->network_header - skb->mac_header;
2103 __skb_pull(skb, skb->mac_len); 2237 __skb_pull(skb, skb->mac_len);
2104 2238
2105 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2106 skb_warn_bad_offload(skb);
2107
2108 if (skb_header_cloned(skb) &&
2109 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2110 return ERR_PTR(err);
2111 }
2112
2113 rcu_read_lock(); 2239 rcu_read_lock();
2114 list_for_each_entry_rcu(ptype, &offload_base, list) { 2240 list_for_each_entry_rcu(ptype, &offload_base, list) {
2115 if (ptype->type == type && ptype->callbacks.gso_segment) { 2241 if (ptype->type == type && ptype->callbacks.gso_segment) {
2116 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 2242 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2243 int err;
2244
2117 err = ptype->callbacks.gso_send_check(skb); 2245 err = ptype->callbacks.gso_send_check(skb);
2118 segs = ERR_PTR(err); 2246 segs = ERR_PTR(err);
2119 if (err || skb_gso_ok(skb, features)) 2247 if (err || skb_gso_ok(skb, features))
@@ -2131,7 +2259,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2131 2259
2132 return segs; 2260 return segs;
2133} 2261}
2134EXPORT_SYMBOL(skb_gso_segment); 2262EXPORT_SYMBOL(skb_mac_gso_segment);
2263
2264
2265/* openvswitch calls this on rx path, so we need a different check.
2266 */
2267static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2268{
2269 if (tx_path)
2270 return skb->ip_summed != CHECKSUM_PARTIAL;
2271 else
2272 return skb->ip_summed == CHECKSUM_NONE;
2273}
2274
2275/**
2276 * __skb_gso_segment - Perform segmentation on skb.
2277 * @skb: buffer to segment
2278 * @features: features for the output path (see dev->features)
2279 * @tx_path: whether it is called in TX path
2280 *
2281 * This function segments the given skb and returns a list of segments.
2282 *
2283 * It may return NULL if the skb requires no segmentation. This is
2284 * only possible when GSO is used for verifying header integrity.
2285 */
2286struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2287 netdev_features_t features, bool tx_path)
2288{
2289 if (unlikely(skb_needs_check(skb, tx_path))) {
2290 int err;
2291
2292 skb_warn_bad_offload(skb);
2293
2294 if (skb_header_cloned(skb) &&
2295 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2296 return ERR_PTR(err);
2297 }
2298
2299 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2300 skb_reset_mac_header(skb);
2301 skb_reset_mac_len(skb);
2302
2303 return skb_mac_gso_segment(skb, features);
2304}
2305EXPORT_SYMBOL(__skb_gso_segment);
2135 2306
2136/* Take action when hardware reception checksum errors are detected. */ 2307/* Take action when hardware reception checksum errors are detected. */
2137#ifdef CONFIG_BUG 2308#ifdef CONFIG_BUG
@@ -2410,126 +2581,28 @@ out:
2410 return rc; 2581 return rc;
2411} 2582}
2412 2583
2413static u32 hashrnd __read_mostly; 2584static void qdisc_pkt_len_init(struct sk_buff *skb)
2414
2415/*
2416 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2417 * to be used as a distribution range.
2418 */
2419u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2420 unsigned int num_tx_queues)
2421{ 2585{
2422 u32 hash; 2586 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2423 u16 qoffset = 0;
2424 u16 qcount = num_tx_queues;
2425
2426 if (skb_rx_queue_recorded(skb)) {
2427 hash = skb_get_rx_queue(skb);
2428 while (unlikely(hash >= num_tx_queues))
2429 hash -= num_tx_queues;
2430 return hash;
2431 }
2432
2433 if (dev->num_tc) {
2434 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2435 qoffset = dev->tc_to_txq[tc].offset;
2436 qcount = dev->tc_to_txq[tc].count;
2437 }
2438
2439 if (skb->sk && skb->sk->sk_hash)
2440 hash = skb->sk->sk_hash;
2441 else
2442 hash = (__force u16) skb->protocol;
2443 hash = jhash_1word(hash, hashrnd);
2444 2587
2445 return (u16) (((u64) hash * qcount) >> 32) + qoffset; 2588 qdisc_skb_cb(skb)->pkt_len = skb->len;
2446}
2447EXPORT_SYMBOL(__skb_tx_hash);
2448 2589
2449static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) 2590 /* To get more precise estimation of bytes sent on wire,
2450{ 2591 * we add to pkt_len the headers size of all segments
2451 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 2592 */
2452 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", 2593 if (shinfo->gso_size) {
2453 dev->name, queue_index, 2594 unsigned int hdr_len;
2454 dev->real_num_tx_queues);
2455 return 0;
2456 }
2457 return queue_index;
2458}
2459 2595
2460static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) 2596 /* mac layer + network layer */
2461{ 2597 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2462#ifdef CONFIG_XPS
2463 struct xps_dev_maps *dev_maps;
2464 struct xps_map *map;
2465 int queue_index = -1;
2466 2598
2467 rcu_read_lock(); 2599 /* + transport layer */
2468 dev_maps = rcu_dereference(dev->xps_maps); 2600 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2469 if (dev_maps) { 2601 hdr_len += tcp_hdrlen(skb);
2470 map = rcu_dereference( 2602 else
2471 dev_maps->cpu_map[raw_smp_processor_id()]); 2603 hdr_len += sizeof(struct udphdr);
2472 if (map) { 2604 qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
2473 if (map->len == 1)
2474 queue_index = map->queues[0];
2475 else {
2476 u32 hash;
2477 if (skb->sk && skb->sk->sk_hash)
2478 hash = skb->sk->sk_hash;
2479 else
2480 hash = (__force u16) skb->protocol ^
2481 skb->rxhash;
2482 hash = jhash_1word(hash, hashrnd);
2483 queue_index = map->queues[
2484 ((u64)hash * map->len) >> 32];
2485 }
2486 if (unlikely(queue_index >= dev->real_num_tx_queues))
2487 queue_index = -1;
2488 }
2489 } 2605 }
2490 rcu_read_unlock();
2491
2492 return queue_index;
2493#else
2494 return -1;
2495#endif
2496}
2497
2498struct netdev_queue *netdev_pick_tx(struct net_device *dev,
2499 struct sk_buff *skb)
2500{
2501 int queue_index;
2502 const struct net_device_ops *ops = dev->netdev_ops;
2503
2504 if (dev->real_num_tx_queues == 1)
2505 queue_index = 0;
2506 else if (ops->ndo_select_queue) {
2507 queue_index = ops->ndo_select_queue(dev, skb);
2508 queue_index = dev_cap_txqueue(dev, queue_index);
2509 } else {
2510 struct sock *sk = skb->sk;
2511 queue_index = sk_tx_queue_get(sk);
2512
2513 if (queue_index < 0 || skb->ooo_okay ||
2514 queue_index >= dev->real_num_tx_queues) {
2515 int old_index = queue_index;
2516
2517 queue_index = get_xps_queue(dev, skb);
2518 if (queue_index < 0)
2519 queue_index = skb_tx_hash(dev, skb);
2520
2521 if (queue_index != old_index && sk) {
2522 struct dst_entry *dst =
2523 rcu_dereference_check(sk->sk_dst_cache, 1);
2524
2525 if (dst && skb_dst(skb) == dst)
2526 sk_tx_queue_set(sk, queue_index);
2527 }
2528 }
2529 }
2530
2531 skb_set_queue_mapping(skb, queue_index);
2532 return netdev_get_tx_queue(dev, queue_index);
2533} 2606}
2534 2607
2535static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, 2608static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
@@ -2540,7 +2613,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2540 bool contended; 2613 bool contended;
2541 int rc; 2614 int rc;
2542 2615
2543 qdisc_skb_cb(skb)->pkt_len = skb->len; 2616 qdisc_pkt_len_init(skb);
2544 qdisc_calculate_pkt_len(skb, q); 2617 qdisc_calculate_pkt_len(skb, q);
2545 /* 2618 /*
2546 * Heuristic to force contended enqueues to serialize on a 2619 * Heuristic to force contended enqueues to serialize on a
@@ -2663,6 +2736,8 @@ int dev_queue_xmit(struct sk_buff *skb)
2663 struct Qdisc *q; 2736 struct Qdisc *q;
2664 int rc = -ENOMEM; 2737 int rc = -ENOMEM;
2665 2738
2739 skb_reset_mac_header(skb);
2740
2666 /* Disable soft irqs for various locks below. Also 2741 /* Disable soft irqs for various locks below. Also
2667 * stops preemption for RCU. 2742 * stops preemption for RCU.
2668 */ 2743 */
@@ -2757,41 +2832,6 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2757 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2832 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2758} 2833}
2759 2834
2760/*
2761 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2762 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
2763 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
2764 * if hash is a canonical 4-tuple hash over transport ports.
2765 */
2766void __skb_get_rxhash(struct sk_buff *skb)
2767{
2768 struct flow_keys keys;
2769 u32 hash;
2770
2771 if (!skb_flow_dissect(skb, &keys))
2772 return;
2773
2774 if (keys.ports)
2775 skb->l4_rxhash = 1;
2776
2777 /* get a consistent hash (same value on both flow directions) */
2778 if (((__force u32)keys.dst < (__force u32)keys.src) ||
2779 (((__force u32)keys.dst == (__force u32)keys.src) &&
2780 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
2781 swap(keys.dst, keys.src);
2782 swap(keys.port16[0], keys.port16[1]);
2783 }
2784
2785 hash = jhash_3words((__force u32)keys.dst,
2786 (__force u32)keys.src,
2787 (__force u32)keys.ports, hashrnd);
2788 if (!hash)
2789 hash = 1;
2790
2791 skb->rxhash = hash;
2792}
2793EXPORT_SYMBOL(__skb_get_rxhash);
2794
2795#ifdef CONFIG_RPS 2835#ifdef CONFIG_RPS
2796 2836
2797/* One global table that all flow-based protocols share. */ 2837/* One global table that all flow-based protocols share. */
@@ -3318,7 +3358,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3318 } 3358 }
3319} 3359}
3320 3360
3321static int __netif_receive_skb(struct sk_buff *skb) 3361static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3322{ 3362{
3323 struct packet_type *ptype, *pt_prev; 3363 struct packet_type *ptype, *pt_prev;
3324 rx_handler_func_t *rx_handler; 3364 rx_handler_func_t *rx_handler;
@@ -3327,24 +3367,11 @@ static int __netif_receive_skb(struct sk_buff *skb)
3327 bool deliver_exact = false; 3367 bool deliver_exact = false;
3328 int ret = NET_RX_DROP; 3368 int ret = NET_RX_DROP;
3329 __be16 type; 3369 __be16 type;
3330 unsigned long pflags = current->flags;
3331 3370
3332 net_timestamp_check(!netdev_tstamp_prequeue, skb); 3371 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3333 3372
3334 trace_netif_receive_skb(skb); 3373 trace_netif_receive_skb(skb);
3335 3374
3336 /*
3337 * PFMEMALLOC skbs are special, they should
3338 * - be delivered to SOCK_MEMALLOC sockets only
3339 * - stay away from userspace
3340 * - have bounded memory usage
3341 *
3342 * Use PF_MEMALLOC as this saves us from propagating the allocation
3343 * context down to all allocation sites.
3344 */
3345 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3346 current->flags |= PF_MEMALLOC;
3347
3348 /* if we've gotten here through NAPI, check netpoll */ 3375 /* if we've gotten here through NAPI, check netpoll */
3349 if (netpoll_receive_skb(skb)) 3376 if (netpoll_receive_skb(skb))
3350 goto out; 3377 goto out;
@@ -3352,7 +3379,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
3352 orig_dev = skb->dev; 3379 orig_dev = skb->dev;
3353 3380
3354 skb_reset_network_header(skb); 3381 skb_reset_network_header(skb);
3355 skb_reset_transport_header(skb); 3382 if (!skb_transport_header_was_set(skb))
3383 skb_reset_transport_header(skb);
3356 skb_reset_mac_len(skb); 3384 skb_reset_mac_len(skb);
3357 3385
3358 pt_prev = NULL; 3386 pt_prev = NULL;
@@ -3377,7 +3405,7 @@ another_round:
3377 } 3405 }
3378#endif 3406#endif
3379 3407
3380 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) 3408 if (pfmemalloc)
3381 goto skip_taps; 3409 goto skip_taps;
3382 3410
3383 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3411 list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -3396,8 +3424,7 @@ skip_taps:
3396ncls: 3424ncls:
3397#endif 3425#endif
3398 3426
3399 if (sk_memalloc_socks() && skb_pfmemalloc(skb) 3427 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3400 && !skb_pfmemalloc_protocol(skb))
3401 goto drop; 3428 goto drop;
3402 3429
3403 if (vlan_tx_tag_present(skb)) { 3430 if (vlan_tx_tag_present(skb)) {
@@ -3467,7 +3494,31 @@ drop:
3467unlock: 3494unlock:
3468 rcu_read_unlock(); 3495 rcu_read_unlock();
3469out: 3496out:
3470 tsk_restore_flags(current, pflags, PF_MEMALLOC); 3497 return ret;
3498}
3499
3500static int __netif_receive_skb(struct sk_buff *skb)
3501{
3502 int ret;
3503
3504 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3505 unsigned long pflags = current->flags;
3506
3507 /*
3508 * PFMEMALLOC skbs are special, they should
3509 * - be delivered to SOCK_MEMALLOC sockets only
3510 * - stay away from userspace
3511 * - have bounded memory usage
3512 *
3513 * Use PF_MEMALLOC as this saves us from propagating the allocation
3514 * context down to all allocation sites.
3515 */
3516 current->flags |= PF_MEMALLOC;
3517 ret = __netif_receive_skb_core(skb, true);
3518 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3519 } else
3520 ret = __netif_receive_skb_core(skb, false);
3521
3471 return ret; 3522 return ret;
3472} 3523}
3473 3524
@@ -3634,7 +3685,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3634 __be16 type = skb->protocol; 3685 __be16 type = skb->protocol;
3635 struct list_head *head = &offload_base; 3686 struct list_head *head = &offload_base;
3636 int same_flow; 3687 int same_flow;
3637 int mac_len;
3638 enum gro_result ret; 3688 enum gro_result ret;
3639 3689
3640 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3690 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
@@ -3651,8 +3701,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
3651 continue; 3701 continue;
3652 3702
3653 skb_set_network_header(skb, skb_gro_offset(skb)); 3703 skb_set_network_header(skb, skb_gro_offset(skb));
3654 mac_len = skb->network_header - skb->mac_header; 3704 skb_reset_mac_len(skb);
3655 skb->mac_len = mac_len;
3656 NAPI_GRO_CB(skb)->same_flow = 0; 3705 NAPI_GRO_CB(skb)->same_flow = 0;
3657 NAPI_GRO_CB(skb)->flush = 0; 3706 NAPI_GRO_CB(skb)->flush = 0;
3658 NAPI_GRO_CB(skb)->free = 0; 3707 NAPI_GRO_CB(skb)->free = 0;
@@ -4134,530 +4183,231 @@ softnet_break:
4134 goto out; 4183 goto out;
4135} 4184}
4136 4185
4137static gifconf_func_t *gifconf_list[NPROTO]; 4186struct netdev_upper {
4138
4139/**
4140 * register_gifconf - register a SIOCGIF handler
4141 * @family: Address family
4142 * @gifconf: Function handler
4143 *
4144 * Register protocol dependent address dumping routines. The handler
4145 * that is passed must not be freed or reused until it has been replaced
4146 * by another handler.
4147 */
4148int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
4149{
4150 if (family >= NPROTO)
4151 return -EINVAL;
4152 gifconf_list[family] = gifconf;
4153 return 0;
4154}
4155EXPORT_SYMBOL(register_gifconf);
4156
4157
4158/*
4159 * Map an interface index to its name (SIOCGIFNAME)
4160 */
4161
4162/*
4163 * We need this ioctl for efficient implementation of the
4164 * if_indextoname() function required by the IPv6 API. Without
4165 * it, we would have to search all the interfaces to find a
4166 * match. --pb
4167 */
4168
4169static int dev_ifname(struct net *net, struct ifreq __user *arg)
4170{
4171 struct net_device *dev; 4187 struct net_device *dev;
4172 struct ifreq ifr; 4188 bool master;
4173 unsigned seq; 4189 struct list_head list;
4174 4190 struct rcu_head rcu;
4175 /* 4191 struct list_head search_list;
4176 * Fetch the caller's info block. 4192};
4177 */
4178 4193
4179 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 4194static void __append_search_uppers(struct list_head *search_list,
4180 return -EFAULT; 4195 struct net_device *dev)
4196{
4197 struct netdev_upper *upper;
4181 4198
4182retry: 4199 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4183 seq = read_seqbegin(&devnet_rename_seq); 4200 /* check if this upper is not already in search list */
4184 rcu_read_lock(); 4201 if (list_empty(&upper->search_list))
4185 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); 4202 list_add_tail(&upper->search_list, search_list);
4186 if (!dev) {
4187 rcu_read_unlock();
4188 return -ENODEV;
4189 } 4203 }
4190
4191 strcpy(ifr.ifr_name, dev->name);
4192 rcu_read_unlock();
4193 if (read_seqretry(&devnet_rename_seq, seq))
4194 goto retry;
4195
4196 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4197 return -EFAULT;
4198 return 0;
4199} 4204}
4200 4205
4201/* 4206static bool __netdev_search_upper_dev(struct net_device *dev,
4202 * Perform a SIOCGIFCONF call. This structure will change 4207 struct net_device *upper_dev)
4203 * size eventually, and there is nothing I can do about it.
4204 * Thus we will need a 'compatibility mode'.
4205 */
4206
4207static int dev_ifconf(struct net *net, char __user *arg)
4208{ 4208{
4209 struct ifconf ifc; 4209 LIST_HEAD(search_list);
4210 struct net_device *dev; 4210 struct netdev_upper *upper;
4211 char __user *pos; 4211 struct netdev_upper *tmp;
4212 int len; 4212 bool ret = false;
4213 int total;
4214 int i;
4215
4216 /*
4217 * Fetch the caller's info block.
4218 */
4219 4213
4220 if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) 4214 __append_search_uppers(&search_list, dev);
4221 return -EFAULT; 4215 list_for_each_entry(upper, &search_list, search_list) {
4222 4216 if (upper->dev == upper_dev) {
4223 pos = ifc.ifc_buf; 4217 ret = true;
4224 len = ifc.ifc_len; 4218 break;
4225
4226 /*
4227 * Loop over the interfaces, and write an info block for each.
4228 */
4229
4230 total = 0;
4231 for_each_netdev(net, dev) {
4232 for (i = 0; i < NPROTO; i++) {
4233 if (gifconf_list[i]) {
4234 int done;
4235 if (!pos)
4236 done = gifconf_list[i](dev, NULL, 0);
4237 else
4238 done = gifconf_list[i](dev, pos + total,
4239 len - total);
4240 if (done < 0)
4241 return -EFAULT;
4242 total += done;
4243 }
4244 } 4219 }
4220 __append_search_uppers(&search_list, upper->dev);
4245 } 4221 }
4246 4222 list_for_each_entry_safe(upper, tmp, &search_list, search_list)
4247 /* 4223 INIT_LIST_HEAD(&upper->search_list);
4248 * All done. Write the updated control block back to the caller. 4224 return ret;
4249 */
4250 ifc.ifc_len = total;
4251
4252 /*
4253 * Both BSD and Solaris return 0 here, so we do too.
4254 */
4255 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4256} 4225}
4257 4226
4258#ifdef CONFIG_PROC_FS 4227static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
4259 4228 struct net_device *upper_dev)
4260#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4261
4262#define get_bucket(x) ((x) >> BUCKET_SPACE)
4263#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4264#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4265
4266static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4267{ 4229{
4268 struct net *net = seq_file_net(seq); 4230 struct netdev_upper *upper;
4269 struct net_device *dev;
4270 struct hlist_node *p;
4271 struct hlist_head *h;
4272 unsigned int count = 0, offset = get_offset(*pos);
4273 4231
4274 h = &net->dev_name_head[get_bucket(*pos)]; 4232 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4275 hlist_for_each_entry_rcu(dev, p, h, name_hlist) { 4233 if (upper->dev == upper_dev)
4276 if (++count == offset) 4234 return upper;
4277 return dev;
4278 } 4235 }
4279
4280 return NULL; 4236 return NULL;
4281} 4237}
4282 4238
4283static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) 4239/**
4284{ 4240 * netdev_has_upper_dev - Check if device is linked to an upper device
4285 struct net_device *dev; 4241 * @dev: device
4286 unsigned int bucket; 4242 * @upper_dev: upper device to check
4287 4243 *
4288 do { 4244 * Find out if a device is linked to specified upper device and return true
4289 dev = dev_from_same_bucket(seq, pos); 4245 * in case it is. Note that this checks only immediate upper device,
4290 if (dev) 4246 * not through a complete stack of devices. The caller must hold the RTNL lock.
4291 return dev;
4292
4293 bucket = get_bucket(*pos) + 1;
4294 *pos = set_bucket_offset(bucket, 1);
4295 } while (bucket < NETDEV_HASHENTRIES);
4296
4297 return NULL;
4298}
4299
4300/*
4301 * This is invoked by the /proc filesystem handler to display a device
4302 * in detail.
4303 */ 4247 */
4304void *dev_seq_start(struct seq_file *seq, loff_t *pos) 4248bool netdev_has_upper_dev(struct net_device *dev,
4305 __acquires(RCU) 4249 struct net_device *upper_dev)
4306{
4307 rcu_read_lock();
4308 if (!*pos)
4309 return SEQ_START_TOKEN;
4310
4311 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4312 return NULL;
4313
4314 return dev_from_bucket(seq, pos);
4315}
4316
4317void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4318{
4319 ++*pos;
4320 return dev_from_bucket(seq, pos);
4321}
4322
4323void dev_seq_stop(struct seq_file *seq, void *v)
4324 __releases(RCU)
4325{
4326 rcu_read_unlock();
4327}
4328
4329static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4330{ 4250{
4331 struct rtnl_link_stats64 temp; 4251 ASSERT_RTNL();
4332 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4333 4252
4334 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " 4253 return __netdev_find_upper(dev, upper_dev);
4335 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4336 dev->name, stats->rx_bytes, stats->rx_packets,
4337 stats->rx_errors,
4338 stats->rx_dropped + stats->rx_missed_errors,
4339 stats->rx_fifo_errors,
4340 stats->rx_length_errors + stats->rx_over_errors +
4341 stats->rx_crc_errors + stats->rx_frame_errors,
4342 stats->rx_compressed, stats->multicast,
4343 stats->tx_bytes, stats->tx_packets,
4344 stats->tx_errors, stats->tx_dropped,
4345 stats->tx_fifo_errors, stats->collisions,
4346 stats->tx_carrier_errors +
4347 stats->tx_aborted_errors +
4348 stats->tx_window_errors +
4349 stats->tx_heartbeat_errors,
4350 stats->tx_compressed);
4351} 4254}
4255EXPORT_SYMBOL(netdev_has_upper_dev);
4352 4256
4353/* 4257/**
4354 * Called from the PROCfs module. This now uses the new arbitrary sized 4258 * netdev_has_any_upper_dev - Check if device is linked to some device
4355 * /proc/net interface to create /proc/net/dev 4259 * @dev: device
4260 *
4261 * Find out if a device is linked to an upper device and return true in case
4262 * it is. The caller must hold the RTNL lock.
4356 */ 4263 */
4357static int dev_seq_show(struct seq_file *seq, void *v) 4264bool netdev_has_any_upper_dev(struct net_device *dev)
4358{
4359 if (v == SEQ_START_TOKEN)
4360 seq_puts(seq, "Inter-| Receive "
4361 " | Transmit\n"
4362 " face |bytes packets errs drop fifo frame "
4363 "compressed multicast|bytes packets errs "
4364 "drop fifo colls carrier compressed\n");
4365 else
4366 dev_seq_printf_stats(seq, v);
4367 return 0;
4368}
4369
4370static struct softnet_data *softnet_get_online(loff_t *pos)
4371{
4372 struct softnet_data *sd = NULL;
4373
4374 while (*pos < nr_cpu_ids)
4375 if (cpu_online(*pos)) {
4376 sd = &per_cpu(softnet_data, *pos);
4377 break;
4378 } else
4379 ++*pos;
4380 return sd;
4381}
4382
4383static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4384{ 4265{
4385 return softnet_get_online(pos); 4266 ASSERT_RTNL();
4386}
4387
4388static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4389{
4390 ++*pos;
4391 return softnet_get_online(pos);
4392}
4393 4267
4394static void softnet_seq_stop(struct seq_file *seq, void *v) 4268 return !list_empty(&dev->upper_dev_list);
4395{
4396} 4269}
4270EXPORT_SYMBOL(netdev_has_any_upper_dev);
4397 4271
4398static int softnet_seq_show(struct seq_file *seq, void *v) 4272/**
4399{ 4273 * netdev_master_upper_dev_get - Get master upper device
4400 struct softnet_data *sd = v; 4274 * @dev: device
4401 4275 *
4402 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 4276 * Find a master upper device and return pointer to it or NULL in case
4403 sd->processed, sd->dropped, sd->time_squeeze, 0, 4277 * it's not there. The caller must hold the RTNL lock.
4404 0, 0, 0, 0, /* was fastroute */ 4278 */
4405 sd->cpu_collision, sd->received_rps); 4279struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4406 return 0;
4407}
4408
4409static const struct seq_operations dev_seq_ops = {
4410 .start = dev_seq_start,
4411 .next = dev_seq_next,
4412 .stop = dev_seq_stop,
4413 .show = dev_seq_show,
4414};
4415
4416static int dev_seq_open(struct inode *inode, struct file *file)
4417{ 4280{
4418 return seq_open_net(inode, file, &dev_seq_ops, 4281 struct netdev_upper *upper;
4419 sizeof(struct seq_net_private));
4420}
4421 4282
4422static const struct file_operations dev_seq_fops = { 4283 ASSERT_RTNL();
4423 .owner = THIS_MODULE,
4424 .open = dev_seq_open,
4425 .read = seq_read,
4426 .llseek = seq_lseek,
4427 .release = seq_release_net,
4428};
4429 4284
4430static const struct seq_operations softnet_seq_ops = { 4285 if (list_empty(&dev->upper_dev_list))
4431 .start = softnet_seq_start, 4286 return NULL;
4432 .next = softnet_seq_next,
4433 .stop = softnet_seq_stop,
4434 .show = softnet_seq_show,
4435};
4436 4287
4437static int softnet_seq_open(struct inode *inode, struct file *file) 4288 upper = list_first_entry(&dev->upper_dev_list,
4438{ 4289 struct netdev_upper, list);
4439 return seq_open(file, &softnet_seq_ops); 4290 if (likely(upper->master))
4291 return upper->dev;
4292 return NULL;
4440} 4293}
4294EXPORT_SYMBOL(netdev_master_upper_dev_get);
4441 4295
4442static const struct file_operations softnet_seq_fops = { 4296/**
4443 .owner = THIS_MODULE, 4297 * netdev_master_upper_dev_get_rcu - Get master upper device
4444 .open = softnet_seq_open, 4298 * @dev: device
4445 .read = seq_read, 4299 *
4446 .llseek = seq_lseek, 4300 * Find a master upper device and return pointer to it or NULL in case
4447 .release = seq_release, 4301 * it's not there. The caller must hold the RCU read lock.
4448}; 4302 */
4449 4303struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4450static void *ptype_get_idx(loff_t pos)
4451{ 4304{
4452 struct packet_type *pt = NULL; 4305 struct netdev_upper *upper;
4453 loff_t i = 0;
4454 int t;
4455
4456 list_for_each_entry_rcu(pt, &ptype_all, list) {
4457 if (i == pos)
4458 return pt;
4459 ++i;
4460 }
4461 4306
4462 for (t = 0; t < PTYPE_HASH_SIZE; t++) { 4307 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4463 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 4308 struct netdev_upper, list);
4464 if (i == pos) 4309 if (upper && likely(upper->master))
4465 return pt; 4310 return upper->dev;
4466 ++i;
4467 }
4468 }
4469 return NULL; 4311 return NULL;
4470} 4312}
4313EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4471 4314
4472static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 4315static int __netdev_upper_dev_link(struct net_device *dev,
4473 __acquires(RCU) 4316 struct net_device *upper_dev, bool master)
4474{ 4317{
4475 rcu_read_lock(); 4318 struct netdev_upper *upper;
4476 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4477}
4478 4319
4479static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4320 ASSERT_RTNL();
4480{
4481 struct packet_type *pt;
4482 struct list_head *nxt;
4483 int hash;
4484 4321
4485 ++*pos; 4322 if (dev == upper_dev)
4486 if (v == SEQ_START_TOKEN) 4323 return -EBUSY;
4487 return ptype_get_idx(0);
4488 4324
4489 pt = v; 4325 /* To prevent loops, check if dev is not upper device to upper_dev. */
4490 nxt = pt->list.next; 4326 if (__netdev_search_upper_dev(upper_dev, dev))
4491 if (pt->type == htons(ETH_P_ALL)) { 4327 return -EBUSY;
4492 if (nxt != &ptype_all)
4493 goto found;
4494 hash = 0;
4495 nxt = ptype_base[0].next;
4496 } else
4497 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4498 4328
4499 while (nxt == &ptype_base[hash]) { 4329 if (__netdev_find_upper(dev, upper_dev))
4500 if (++hash >= PTYPE_HASH_SIZE) 4330 return -EEXIST;
4501 return NULL;
4502 nxt = ptype_base[hash].next;
4503 }
4504found:
4505 return list_entry(nxt, struct packet_type, list);
4506}
4507 4331
4508static void ptype_seq_stop(struct seq_file *seq, void *v) 4332 if (master && netdev_master_upper_dev_get(dev))
4509 __releases(RCU) 4333 return -EBUSY;
4510{
4511 rcu_read_unlock();
4512}
4513 4334
4514static int ptype_seq_show(struct seq_file *seq, void *v) 4335 upper = kmalloc(sizeof(*upper), GFP_KERNEL);
4515{ 4336 if (!upper)
4516 struct packet_type *pt = v; 4337 return -ENOMEM;
4517 4338
4518 if (v == SEQ_START_TOKEN) 4339 upper->dev = upper_dev;
4519 seq_puts(seq, "Type Device Function\n"); 4340 upper->master = master;
4520 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { 4341 INIT_LIST_HEAD(&upper->search_list);
4521 if (pt->type == htons(ETH_P_ALL))
4522 seq_puts(seq, "ALL ");
4523 else
4524 seq_printf(seq, "%04x", ntohs(pt->type));
4525 4342
4526 seq_printf(seq, " %-8s %pF\n", 4343 /* Ensure that master upper link is always the first item in list. */
4527 pt->dev ? pt->dev->name : "", pt->func); 4344 if (master)
4528 } 4345 list_add_rcu(&upper->list, &dev->upper_dev_list);
4346 else
4347 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4348 dev_hold(upper_dev);
4529 4349
4530 return 0; 4350 return 0;
4531} 4351}
4532 4352
4533static const struct seq_operations ptype_seq_ops = { 4353/**
4534 .start = ptype_seq_start, 4354 * netdev_upper_dev_link - Add a link to the upper device
4535 .next = ptype_seq_next, 4355 * @dev: device
4536 .stop = ptype_seq_stop, 4356 * @upper_dev: new upper device
4537 .show = ptype_seq_show, 4357 *
4538}; 4358 * Adds a link to device which is upper to this one. The caller must hold
4539 4359 * the RTNL lock. On a failure a negative errno code is returned.
4540static int ptype_seq_open(struct inode *inode, struct file *file) 4360 * On success the reference counts are adjusted and the function
4541{ 4361 * returns zero.
4542 return seq_open_net(inode, file, &ptype_seq_ops, 4362 */
4543 sizeof(struct seq_net_private)); 4363int netdev_upper_dev_link(struct net_device *dev,
4544} 4364 struct net_device *upper_dev)
4545
4546static const struct file_operations ptype_seq_fops = {
4547 .owner = THIS_MODULE,
4548 .open = ptype_seq_open,
4549 .read = seq_read,
4550 .llseek = seq_lseek,
4551 .release = seq_release_net,
4552};
4553
4554
4555static int __net_init dev_proc_net_init(struct net *net)
4556{
4557 int rc = -ENOMEM;
4558
4559 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4560 goto out;
4561 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4562 goto out_dev;
4563 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4564 goto out_softnet;
4565
4566 if (wext_proc_init(net))
4567 goto out_ptype;
4568 rc = 0;
4569out:
4570 return rc;
4571out_ptype:
4572 proc_net_remove(net, "ptype");
4573out_softnet:
4574 proc_net_remove(net, "softnet_stat");
4575out_dev:
4576 proc_net_remove(net, "dev");
4577 goto out;
4578}
4579
4580static void __net_exit dev_proc_net_exit(struct net *net)
4581{
4582 wext_proc_exit(net);
4583
4584 proc_net_remove(net, "ptype");
4585 proc_net_remove(net, "softnet_stat");
4586 proc_net_remove(net, "dev");
4587}
4588
4589static struct pernet_operations __net_initdata dev_proc_ops = {
4590 .init = dev_proc_net_init,
4591 .exit = dev_proc_net_exit,
4592};
4593
4594static int __init dev_proc_init(void)
4595{ 4365{
4596 return register_pernet_subsys(&dev_proc_ops); 4366 return __netdev_upper_dev_link(dev, upper_dev, false);
4597} 4367}
4598#else 4368EXPORT_SYMBOL(netdev_upper_dev_link);
4599#define dev_proc_init() 0
4600#endif /* CONFIG_PROC_FS */
4601
4602 4369
4603/** 4370/**
4604 * netdev_set_master - set up master pointer 4371 * netdev_master_upper_dev_link - Add a master link to the upper device
4605 * @slave: slave device 4372 * @dev: device
4606 * @master: new master device 4373 * @upper_dev: new upper device
4607 * 4374 *
4608 * Changes the master device of the slave. Pass %NULL to break the 4375 * Adds a link to device which is upper to this one. In this case, only
4609 * bonding. The caller must hold the RTNL semaphore. On a failure 4376 * one master upper device can be linked, although other non-master devices
4610 * a negative errno code is returned. On success the reference counts 4377 * might be linked as well. The caller must hold the RTNL lock.
4611 * are adjusted and the function returns zero. 4378 * On a failure a negative errno code is returned. On success the reference
4379 * counts are adjusted and the function returns zero.
4612 */ 4380 */
4613int netdev_set_master(struct net_device *slave, struct net_device *master) 4381int netdev_master_upper_dev_link(struct net_device *dev,
4382 struct net_device *upper_dev)
4614{ 4383{
4615 struct net_device *old = slave->master; 4384 return __netdev_upper_dev_link(dev, upper_dev, true);
4616
4617 ASSERT_RTNL();
4618
4619 if (master) {
4620 if (old)
4621 return -EBUSY;
4622 dev_hold(master);
4623 }
4624
4625 slave->master = master;
4626
4627 if (old)
4628 dev_put(old);
4629 return 0;
4630} 4385}
4631EXPORT_SYMBOL(netdev_set_master); 4386EXPORT_SYMBOL(netdev_master_upper_dev_link);
4632 4387
4633/** 4388/**
4634 * netdev_set_bond_master - set up bonding master/slave pair 4389 * netdev_upper_dev_unlink - Removes a link to upper device
4635 * @slave: slave device 4390 * @dev: device
4636 * @master: new master device 4391 * @upper_dev: new upper device
4637 * 4392 *
4638 * Changes the master device of the slave. Pass %NULL to break the 4393 * Removes a link to device which is upper to this one. The caller must hold
4639 * bonding. The caller must hold the RTNL semaphore. On a failure 4394 * the RTNL lock.
4640 * a negative errno code is returned. On success %RTM_NEWLINK is sent
4641 * to the routing socket and the function returns zero.
4642 */ 4395 */
4643int netdev_set_bond_master(struct net_device *slave, struct net_device *master) 4396void netdev_upper_dev_unlink(struct net_device *dev,
4397 struct net_device *upper_dev)
4644{ 4398{
4645 int err; 4399 struct netdev_upper *upper;
4646 4400
4647 ASSERT_RTNL(); 4401 ASSERT_RTNL();
4648 4402
4649 err = netdev_set_master(slave, master); 4403 upper = __netdev_find_upper(dev, upper_dev);
4650 if (err) 4404 if (!upper)
4651 return err; 4405 return;
4652 if (master) 4406 list_del_rcu(&upper->list);
4653 slave->flags |= IFF_SLAVE; 4407 dev_put(upper_dev);
4654 else 4408 kfree_rcu(upper, rcu);
4655 slave->flags &= ~IFF_SLAVE;
4656
4657 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4658 return 0;
4659} 4409}
4660EXPORT_SYMBOL(netdev_set_bond_master); 4410EXPORT_SYMBOL(netdev_upper_dev_unlink);
4661 4411
4662static void dev_change_rx_flags(struct net_device *dev, int flags) 4412static void dev_change_rx_flags(struct net_device *dev, int flags)
4663{ 4413{
@@ -5020,381 +4770,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5020 if (!netif_device_present(dev)) 4770 if (!netif_device_present(dev))
5021 return -ENODEV; 4771 return -ENODEV;
5022 err = ops->ndo_set_mac_address(dev, sa); 4772 err = ops->ndo_set_mac_address(dev, sa);
5023 if (!err) 4773 if (err)
5024 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 4774 return err;
4775 dev->addr_assign_type = NET_ADDR_SET;
4776 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5025 add_device_randomness(dev->dev_addr, dev->addr_len); 4777 add_device_randomness(dev->dev_addr, dev->addr_len);
5026 return err; 4778 return 0;
5027} 4779}
5028EXPORT_SYMBOL(dev_set_mac_address); 4780EXPORT_SYMBOL(dev_set_mac_address);
5029 4781
5030/*
5031 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
5032 */
5033static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
5034{
5035 int err;
5036 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
5037
5038 if (!dev)
5039 return -ENODEV;
5040
5041 switch (cmd) {
5042 case SIOCGIFFLAGS: /* Get interface flags */
5043 ifr->ifr_flags = (short) dev_get_flags(dev);
5044 return 0;
5045
5046 case SIOCGIFMETRIC: /* Get the metric on the interface
5047 (currently unused) */
5048 ifr->ifr_metric = 0;
5049 return 0;
5050
5051 case SIOCGIFMTU: /* Get the MTU of a device */
5052 ifr->ifr_mtu = dev->mtu;
5053 return 0;
5054
5055 case SIOCGIFHWADDR:
5056 if (!dev->addr_len)
5057 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
5058 else
5059 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
5060 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5061 ifr->ifr_hwaddr.sa_family = dev->type;
5062 return 0;
5063
5064 case SIOCGIFSLAVE:
5065 err = -EINVAL;
5066 break;
5067
5068 case SIOCGIFMAP:
5069 ifr->ifr_map.mem_start = dev->mem_start;
5070 ifr->ifr_map.mem_end = dev->mem_end;
5071 ifr->ifr_map.base_addr = dev->base_addr;
5072 ifr->ifr_map.irq = dev->irq;
5073 ifr->ifr_map.dma = dev->dma;
5074 ifr->ifr_map.port = dev->if_port;
5075 return 0;
5076
5077 case SIOCGIFINDEX:
5078 ifr->ifr_ifindex = dev->ifindex;
5079 return 0;
5080
5081 case SIOCGIFTXQLEN:
5082 ifr->ifr_qlen = dev->tx_queue_len;
5083 return 0;
5084
5085 default:
5086 /* dev_ioctl() should ensure this case
5087 * is never reached
5088 */
5089 WARN_ON(1);
5090 err = -ENOTTY;
5091 break;
5092
5093 }
5094 return err;
5095}
5096
5097/*
5098 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
5099 */
5100static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
5101{
5102 int err;
5103 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
5104 const struct net_device_ops *ops;
5105
5106 if (!dev)
5107 return -ENODEV;
5108
5109 ops = dev->netdev_ops;
5110
5111 switch (cmd) {
5112 case SIOCSIFFLAGS: /* Set interface flags */
5113 return dev_change_flags(dev, ifr->ifr_flags);
5114
5115 case SIOCSIFMETRIC: /* Set the metric on the interface
5116 (currently unused) */
5117 return -EOPNOTSUPP;
5118
5119 case SIOCSIFMTU: /* Set the MTU of a device */
5120 return dev_set_mtu(dev, ifr->ifr_mtu);
5121
5122 case SIOCSIFHWADDR:
5123 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
5124
5125 case SIOCSIFHWBROADCAST:
5126 if (ifr->ifr_hwaddr.sa_family != dev->type)
5127 return -EINVAL;
5128 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
5129 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5130 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5131 return 0;
5132
5133 case SIOCSIFMAP:
5134 if (ops->ndo_set_config) {
5135 if (!netif_device_present(dev))
5136 return -ENODEV;
5137 return ops->ndo_set_config(dev, &ifr->ifr_map);
5138 }
5139 return -EOPNOTSUPP;
5140
5141 case SIOCADDMULTI:
5142 if (!ops->ndo_set_rx_mode ||
5143 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5144 return -EINVAL;
5145 if (!netif_device_present(dev))
5146 return -ENODEV;
5147 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
5148
5149 case SIOCDELMULTI:
5150 if (!ops->ndo_set_rx_mode ||
5151 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5152 return -EINVAL;
5153 if (!netif_device_present(dev))
5154 return -ENODEV;
5155 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
5156
5157 case SIOCSIFTXQLEN:
5158 if (ifr->ifr_qlen < 0)
5159 return -EINVAL;
5160 dev->tx_queue_len = ifr->ifr_qlen;
5161 return 0;
5162
5163 case SIOCSIFNAME:
5164 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5165 return dev_change_name(dev, ifr->ifr_newname);
5166
5167 case SIOCSHWTSTAMP:
5168 err = net_hwtstamp_validate(ifr);
5169 if (err)
5170 return err;
5171 /* fall through */
5172
5173 /*
5174 * Unknown or private ioctl
5175 */
5176 default:
5177 if ((cmd >= SIOCDEVPRIVATE &&
5178 cmd <= SIOCDEVPRIVATE + 15) ||
5179 cmd == SIOCBONDENSLAVE ||
5180 cmd == SIOCBONDRELEASE ||
5181 cmd == SIOCBONDSETHWADDR ||
5182 cmd == SIOCBONDSLAVEINFOQUERY ||
5183 cmd == SIOCBONDINFOQUERY ||
5184 cmd == SIOCBONDCHANGEACTIVE ||
5185 cmd == SIOCGMIIPHY ||
5186 cmd == SIOCGMIIREG ||
5187 cmd == SIOCSMIIREG ||
5188 cmd == SIOCBRADDIF ||
5189 cmd == SIOCBRDELIF ||
5190 cmd == SIOCSHWTSTAMP ||
5191 cmd == SIOCWANDEV) {
5192 err = -EOPNOTSUPP;
5193 if (ops->ndo_do_ioctl) {
5194 if (netif_device_present(dev))
5195 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5196 else
5197 err = -ENODEV;
5198 }
5199 } else
5200 err = -EINVAL;
5201
5202 }
5203 return err;
5204}
5205
5206/*
5207 * This function handles all "interface"-type I/O control requests. The actual
5208 * 'doing' part of this is dev_ifsioc above.
5209 */
5210
5211/** 4782/**
5212 * dev_ioctl - network device ioctl 4783 * dev_change_carrier - Change device carrier
5213 * @net: the applicable net namespace 4784 * @dev: device
5214 * @cmd: command to issue 4785 * @new_carries: new value
5215 * @arg: pointer to a struct ifreq in user space
5216 * 4786 *
5217 * Issue ioctl functions to devices. This is normally called by the 4787 * Change device carrier
5218 * user space syscall interfaces but can sometimes be useful for
5219 * other purposes. The return value is the return from the syscall if
5220 * positive or a negative errno code on error.
5221 */ 4788 */
5222 4789int dev_change_carrier(struct net_device *dev, bool new_carrier)
5223int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5224{ 4790{
5225 struct ifreq ifr; 4791 const struct net_device_ops *ops = dev->netdev_ops;
5226 int ret;
5227 char *colon;
5228
5229 /* One special case: SIOCGIFCONF takes ifconf argument
5230 and requires shared lock, because it sleeps writing
5231 to user space.
5232 */
5233
5234 if (cmd == SIOCGIFCONF) {
5235 rtnl_lock();
5236 ret = dev_ifconf(net, (char __user *) arg);
5237 rtnl_unlock();
5238 return ret;
5239 }
5240 if (cmd == SIOCGIFNAME)
5241 return dev_ifname(net, (struct ifreq __user *)arg);
5242
5243 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5244 return -EFAULT;
5245
5246 ifr.ifr_name[IFNAMSIZ-1] = 0;
5247
5248 colon = strchr(ifr.ifr_name, ':');
5249 if (colon)
5250 *colon = 0;
5251
5252 /*
5253 * See which interface the caller is talking about.
5254 */
5255
5256 switch (cmd) {
5257 /*
5258 * These ioctl calls:
5259 * - can be done by all.
5260 * - atomic and do not require locking.
5261 * - return a value
5262 */
5263 case SIOCGIFFLAGS:
5264 case SIOCGIFMETRIC:
5265 case SIOCGIFMTU:
5266 case SIOCGIFHWADDR:
5267 case SIOCGIFSLAVE:
5268 case SIOCGIFMAP:
5269 case SIOCGIFINDEX:
5270 case SIOCGIFTXQLEN:
5271 dev_load(net, ifr.ifr_name);
5272 rcu_read_lock();
5273 ret = dev_ifsioc_locked(net, &ifr, cmd);
5274 rcu_read_unlock();
5275 if (!ret) {
5276 if (colon)
5277 *colon = ':';
5278 if (copy_to_user(arg, &ifr,
5279 sizeof(struct ifreq)))
5280 ret = -EFAULT;
5281 }
5282 return ret;
5283
5284 case SIOCETHTOOL:
5285 dev_load(net, ifr.ifr_name);
5286 rtnl_lock();
5287 ret = dev_ethtool(net, &ifr);
5288 rtnl_unlock();
5289 if (!ret) {
5290 if (colon)
5291 *colon = ':';
5292 if (copy_to_user(arg, &ifr,
5293 sizeof(struct ifreq)))
5294 ret = -EFAULT;
5295 }
5296 return ret;
5297
5298 /*
5299 * These ioctl calls:
5300 * - require superuser power.
5301 * - require strict serialization.
5302 * - return a value
5303 */
5304 case SIOCGMIIPHY:
5305 case SIOCGMIIREG:
5306 case SIOCSIFNAME:
5307 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5308 return -EPERM;
5309 dev_load(net, ifr.ifr_name);
5310 rtnl_lock();
5311 ret = dev_ifsioc(net, &ifr, cmd);
5312 rtnl_unlock();
5313 if (!ret) {
5314 if (colon)
5315 *colon = ':';
5316 if (copy_to_user(arg, &ifr,
5317 sizeof(struct ifreq)))
5318 ret = -EFAULT;
5319 }
5320 return ret;
5321
5322 /*
5323 * These ioctl calls:
5324 * - require superuser power.
5325 * - require strict serialization.
5326 * - do not return a value
5327 */
5328 case SIOCSIFMAP:
5329 case SIOCSIFTXQLEN:
5330 if (!capable(CAP_NET_ADMIN))
5331 return -EPERM;
5332 /* fall through */
5333 /*
5334 * These ioctl calls:
5335 * - require local superuser power.
5336 * - require strict serialization.
5337 * - do not return a value
5338 */
5339 case SIOCSIFFLAGS:
5340 case SIOCSIFMETRIC:
5341 case SIOCSIFMTU:
5342 case SIOCSIFHWADDR:
5343 case SIOCSIFSLAVE:
5344 case SIOCADDMULTI:
5345 case SIOCDELMULTI:
5346 case SIOCSIFHWBROADCAST:
5347 case SIOCSMIIREG:
5348 case SIOCBONDENSLAVE:
5349 case SIOCBONDRELEASE:
5350 case SIOCBONDSETHWADDR:
5351 case SIOCBONDCHANGEACTIVE:
5352 case SIOCBRADDIF:
5353 case SIOCBRDELIF:
5354 case SIOCSHWTSTAMP:
5355 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5356 return -EPERM;
5357 /* fall through */
5358 case SIOCBONDSLAVEINFOQUERY:
5359 case SIOCBONDINFOQUERY:
5360 dev_load(net, ifr.ifr_name);
5361 rtnl_lock();
5362 ret = dev_ifsioc(net, &ifr, cmd);
5363 rtnl_unlock();
5364 return ret;
5365
5366 case SIOCGIFMEM:
5367 /* Get the per device memory space. We can add this but
5368 * currently do not support it */
5369 case SIOCSIFMEM:
5370 /* Set the per device memory buffer space.
5371 * Not applicable in our case */
5372 case SIOCSIFLINK:
5373 return -ENOTTY;
5374 4792
5375 /* 4793 if (!ops->ndo_change_carrier)
5376 * Unknown or private ioctl. 4794 return -EOPNOTSUPP;
5377 */ 4795 if (!netif_device_present(dev))
5378 default: 4796 return -ENODEV;
5379 if (cmd == SIOCWANDEV || 4797 return ops->ndo_change_carrier(dev, new_carrier);
5380 (cmd >= SIOCDEVPRIVATE &&
5381 cmd <= SIOCDEVPRIVATE + 15)) {
5382 dev_load(net, ifr.ifr_name);
5383 rtnl_lock();
5384 ret = dev_ifsioc(net, &ifr, cmd);
5385 rtnl_unlock();
5386 if (!ret && copy_to_user(arg, &ifr,
5387 sizeof(struct ifreq)))
5388 ret = -EFAULT;
5389 return ret;
5390 }
5391 /* Take care of Wireless Extensions */
5392 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5393 return wext_handle_ioctl(net, &ifr, cmd, arg);
5394 return -ENOTTY;
5395 }
5396} 4798}
5397 4799EXPORT_SYMBOL(dev_change_carrier);
5398 4800
5399/** 4801/**
5400 * dev_new_index - allocate an ifindex 4802 * dev_new_index - allocate an ifindex
@@ -5482,11 +4884,15 @@ static void rollback_registered_many(struct list_head *head)
5482 if (dev->netdev_ops->ndo_uninit) 4884 if (dev->netdev_ops->ndo_uninit)
5483 dev->netdev_ops->ndo_uninit(dev); 4885 dev->netdev_ops->ndo_uninit(dev);
5484 4886
5485 /* Notifier chain MUST detach us from master device. */ 4887 /* Notifier chain MUST detach us all upper devices. */
5486 WARN_ON(dev->master); 4888 WARN_ON(netdev_has_any_upper_dev(dev));
5487 4889
5488 /* Remove entries from kobject tree */ 4890 /* Remove entries from kobject tree */
5489 netdev_unregister_kobject(dev); 4891 netdev_unregister_kobject(dev);
4892#ifdef CONFIG_XPS
4893 /* Remove XPS queueing entries */
4894 netif_reset_xps_queues_gt(dev, 0);
4895#endif
5490 } 4896 }
5491 4897
5492 synchronize_net(); 4898 synchronize_net();
@@ -5664,10 +5070,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
5664 BUG_ON(count < 1); 5070 BUG_ON(count < 1);
5665 5071
5666 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); 5072 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5667 if (!rx) { 5073 if (!rx)
5668 pr_err("netdev: Unable to allocate %u rx queues\n", count);
5669 return -ENOMEM; 5074 return -ENOMEM;
5670 } 5075
5671 dev->_rx = rx; 5076 dev->_rx = rx;
5672 5077
5673 for (i = 0; i < count; i++) 5078 for (i = 0; i < count; i++)
@@ -5698,10 +5103,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
5698 BUG_ON(count < 1); 5103 BUG_ON(count < 1);
5699 5104
5700 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); 5105 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5701 if (!tx) { 5106 if (!tx)
5702 pr_err("netdev: Unable to allocate %u tx queues\n", count);
5703 return -ENOMEM; 5107 return -ENOMEM;
5704 } 5108
5705 dev->_tx = tx; 5109 dev->_tx = tx;
5706 5110
5707 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5111 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5760,6 +5164,14 @@ int register_netdevice(struct net_device *dev)
5760 } 5164 }
5761 } 5165 }
5762 5166
5167 if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
5168 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5169 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5170 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
5171 ret = -EINVAL;
5172 goto err_uninit;
5173 }
5174
5763 ret = -EBUSY; 5175 ret = -EBUSY;
5764 if (!dev->ifindex) 5176 if (!dev->ifindex)
5765 dev->ifindex = dev_new_index(net); 5177 dev->ifindex = dev_new_index(net);
@@ -5815,6 +5227,13 @@ int register_netdevice(struct net_device *dev)
5815 list_netdevice(dev); 5227 list_netdevice(dev);
5816 add_device_randomness(dev->dev_addr, dev->addr_len); 5228 add_device_randomness(dev->dev_addr, dev->addr_len);
5817 5229
5230 /* If the device has permanent device address, driver should
5231 * set dev_addr and also addr_assign_type should be set to
5232 * NET_ADDR_PERM (default value).
5233 */
5234 if (dev->addr_assign_type == NET_ADDR_PERM)
5235 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
5236
5818 /* Notify protocols, that a new device appeared. */ 5237 /* Notify protocols, that a new device appeared. */
5819 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 5238 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5820 ret = notifier_to_errno(ret); 5239 ret = notifier_to_errno(ret);
@@ -6121,6 +5540,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
6121 5540
6122static const struct ethtool_ops default_ethtool_ops; 5541static const struct ethtool_ops default_ethtool_ops;
6123 5542
5543void netdev_set_default_ethtool_ops(struct net_device *dev,
5544 const struct ethtool_ops *ops)
5545{
5546 if (dev->ethtool_ops == &default_ethtool_ops)
5547 dev->ethtool_ops = ops;
5548}
5549EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
5550
6124/** 5551/**
6125 * alloc_netdev_mqs - allocate network device 5552 * alloc_netdev_mqs - allocate network device
6126 * @sizeof_priv: size of private data to allocate space for 5553 * @sizeof_priv: size of private data to allocate space for
@@ -6165,10 +5592,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6165 alloc_size += NETDEV_ALIGN - 1; 5592 alloc_size += NETDEV_ALIGN - 1;
6166 5593
6167 p = kzalloc(alloc_size, GFP_KERNEL); 5594 p = kzalloc(alloc_size, GFP_KERNEL);
6168 if (!p) { 5595 if (!p)
6169 pr_err("alloc_netdev: Unable to allocate device\n");
6170 return NULL; 5596 return NULL;
6171 }
6172 5597
6173 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5598 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6174 dev->padded = (char *)dev - (char *)p; 5599 dev->padded = (char *)dev - (char *)p;
@@ -6191,6 +5616,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6191 INIT_LIST_HEAD(&dev->napi_list); 5616 INIT_LIST_HEAD(&dev->napi_list);
6192 INIT_LIST_HEAD(&dev->unreg_list); 5617 INIT_LIST_HEAD(&dev->unreg_list);
6193 INIT_LIST_HEAD(&dev->link_watch_list); 5618 INIT_LIST_HEAD(&dev->link_watch_list);
5619 INIT_LIST_HEAD(&dev->upper_dev_list);
6194 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5620 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6195 setup(dev); 5621 setup(dev);
6196 5622
@@ -6834,19 +6260,9 @@ static int __init net_dev_init(void)
6834 6260
6835 hotcpu_notifier(dev_cpu_callback, 0); 6261 hotcpu_notifier(dev_cpu_callback, 0);
6836 dst_init(); 6262 dst_init();
6837 dev_mcast_init();
6838 rc = 0; 6263 rc = 0;
6839out: 6264out:
6840 return rc; 6265 return rc;
6841} 6266}
6842 6267
6843subsys_initcall(net_dev_init); 6268subsys_initcall(net_dev_init);
6844
6845static int __init initialize_hashrnd(void)
6846{
6847 get_random_bytes(&hashrnd, sizeof(hashrnd));
6848 return 0;
6849}
6850
6851late_initcall_sync(initialize_hashrnd);
6852
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index b079c7bbc157..bd2eb9d3e369 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -15,7 +15,6 @@
15#include <linux/rtnetlink.h> 15#include <linux/rtnetlink.h>
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/proc_fs.h>
19 18
20/* 19/*
21 * General list handling functions 20 * General list handling functions
@@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev)
727 __hw_addr_init(&dev->mc); 726 __hw_addr_init(&dev->mc);
728} 727}
729EXPORT_SYMBOL(dev_mc_init); 728EXPORT_SYMBOL(dev_mc_init);
730
731#ifdef CONFIG_PROC_FS
732#include <linux/seq_file.h>
733
734static int dev_mc_seq_show(struct seq_file *seq, void *v)
735{
736 struct netdev_hw_addr *ha;
737 struct net_device *dev = v;
738
739 if (v == SEQ_START_TOKEN)
740 return 0;
741
742 netif_addr_lock_bh(dev);
743 netdev_for_each_mc_addr(ha, dev) {
744 int i;
745
746 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
747 dev->name, ha->refcount, ha->global_use);
748
749 for (i = 0; i < dev->addr_len; i++)
750 seq_printf(seq, "%02x", ha->addr[i]);
751
752 seq_putc(seq, '\n');
753 }
754 netif_addr_unlock_bh(dev);
755 return 0;
756}
757
758static const struct seq_operations dev_mc_seq_ops = {
759 .start = dev_seq_start,
760 .next = dev_seq_next,
761 .stop = dev_seq_stop,
762 .show = dev_mc_seq_show,
763};
764
765static int dev_mc_seq_open(struct inode *inode, struct file *file)
766{
767 return seq_open_net(inode, file, &dev_mc_seq_ops,
768 sizeof(struct seq_net_private));
769}
770
771static const struct file_operations dev_mc_seq_fops = {
772 .owner = THIS_MODULE,
773 .open = dev_mc_seq_open,
774 .read = seq_read,
775 .llseek = seq_lseek,
776 .release = seq_release_net,
777};
778
779#endif
780
781static int __net_init dev_mc_net_init(struct net *net)
782{
783 if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
784 return -ENOMEM;
785 return 0;
786}
787
788static void __net_exit dev_mc_net_exit(struct net *net)
789{
790 proc_net_remove(net, "dev_mcast");
791}
792
793static struct pernet_operations __net_initdata dev_mc_net_ops = {
794 .init = dev_mc_net_init,
795 .exit = dev_mc_net_exit,
796};
797
798void __init dev_mcast_init(void)
799{
800 register_pernet_subsys(&dev_mc_net_ops);
801}
802
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
new file mode 100644
index 000000000000..6cc0481faade
--- /dev/null
+++ b/net/core/dev_ioctl.c
@@ -0,0 +1,576 @@
1#include <linux/kmod.h>
2#include <linux/netdevice.h>
3#include <linux/etherdevice.h>
4#include <linux/rtnetlink.h>
5#include <linux/net_tstamp.h>
6#include <linux/wireless.h>
7#include <net/wext.h>
8
9/*
10 * Map an interface index to its name (SIOCGIFNAME)
11 */
12
13/*
14 * We need this ioctl for efficient implementation of the
15 * if_indextoname() function required by the IPv6 API. Without
16 * it, we would have to search all the interfaces to find a
17 * match. --pb
18 */
19
20static int dev_ifname(struct net *net, struct ifreq __user *arg)
21{
22 struct net_device *dev;
23 struct ifreq ifr;
24 unsigned seq;
25
26 /*
27 * Fetch the caller's info block.
28 */
29
30 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
31 return -EFAULT;
32
33retry:
34 seq = read_seqcount_begin(&devnet_rename_seq);
35 rcu_read_lock();
36 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
37 if (!dev) {
38 rcu_read_unlock();
39 return -ENODEV;
40 }
41
42 strcpy(ifr.ifr_name, dev->name);
43 rcu_read_unlock();
44 if (read_seqcount_retry(&devnet_rename_seq, seq))
45 goto retry;
46
47 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
48 return -EFAULT;
49 return 0;
50}
51
52static gifconf_func_t *gifconf_list[NPROTO];
53
54/**
55 * register_gifconf - register a SIOCGIF handler
56 * @family: Address family
57 * @gifconf: Function handler
58 *
59 * Register protocol dependent address dumping routines. The handler
60 * that is passed must not be freed or reused until it has been replaced
61 * by another handler.
62 */
63int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
64{
65 if (family >= NPROTO)
66 return -EINVAL;
67 gifconf_list[family] = gifconf;
68 return 0;
69}
70EXPORT_SYMBOL(register_gifconf);
71
72/*
73 * Perform a SIOCGIFCONF call. This structure will change
74 * size eventually, and there is nothing I can do about it.
75 * Thus we will need a 'compatibility mode'.
76 */
77
78static int dev_ifconf(struct net *net, char __user *arg)
79{
80 struct ifconf ifc;
81 struct net_device *dev;
82 char __user *pos;
83 int len;
84 int total;
85 int i;
86
87 /*
88 * Fetch the caller's info block.
89 */
90
91 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
92 return -EFAULT;
93
94 pos = ifc.ifc_buf;
95 len = ifc.ifc_len;
96
97 /*
98 * Loop over the interfaces, and write an info block for each.
99 */
100
101 total = 0;
102 for_each_netdev(net, dev) {
103 for (i = 0; i < NPROTO; i++) {
104 if (gifconf_list[i]) {
105 int done;
106 if (!pos)
107 done = gifconf_list[i](dev, NULL, 0);
108 else
109 done = gifconf_list[i](dev, pos + total,
110 len - total);
111 if (done < 0)
112 return -EFAULT;
113 total += done;
114 }
115 }
116 }
117
118 /*
119 * All done. Write the updated control block back to the caller.
120 */
121 ifc.ifc_len = total;
122
123 /*
124 * Both BSD and Solaris return 0 here, so we do too.
125 */
126 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
127}
128
129/*
130 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
131 */
132static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
133{
134 int err;
135 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
136
137 if (!dev)
138 return -ENODEV;
139
140 switch (cmd) {
141 case SIOCGIFFLAGS: /* Get interface flags */
142 ifr->ifr_flags = (short) dev_get_flags(dev);
143 return 0;
144
145 case SIOCGIFMETRIC: /* Get the metric on the interface
146 (currently unused) */
147 ifr->ifr_metric = 0;
148 return 0;
149
150 case SIOCGIFMTU: /* Get the MTU of a device */
151 ifr->ifr_mtu = dev->mtu;
152 return 0;
153
154 case SIOCGIFHWADDR:
155 if (!dev->addr_len)
156 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
157 else
158 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
159 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
160 ifr->ifr_hwaddr.sa_family = dev->type;
161 return 0;
162
163 case SIOCGIFSLAVE:
164 err = -EINVAL;
165 break;
166
167 case SIOCGIFMAP:
168 ifr->ifr_map.mem_start = dev->mem_start;
169 ifr->ifr_map.mem_end = dev->mem_end;
170 ifr->ifr_map.base_addr = dev->base_addr;
171 ifr->ifr_map.irq = dev->irq;
172 ifr->ifr_map.dma = dev->dma;
173 ifr->ifr_map.port = dev->if_port;
174 return 0;
175
176 case SIOCGIFINDEX:
177 ifr->ifr_ifindex = dev->ifindex;
178 return 0;
179
180 case SIOCGIFTXQLEN:
181 ifr->ifr_qlen = dev->tx_queue_len;
182 return 0;
183
184 default:
185 /* dev_ioctl() should ensure this case
186 * is never reached
187 */
188 WARN_ON(1);
189 err = -ENOTTY;
190 break;
191
192 }
193 return err;
194}
195
196static int net_hwtstamp_validate(struct ifreq *ifr)
197{
198 struct hwtstamp_config cfg;
199 enum hwtstamp_tx_types tx_type;
200 enum hwtstamp_rx_filters rx_filter;
201 int tx_type_valid = 0;
202 int rx_filter_valid = 0;
203
204 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
205 return -EFAULT;
206
207 if (cfg.flags) /* reserved for future extensions */
208 return -EINVAL;
209
210 tx_type = cfg.tx_type;
211 rx_filter = cfg.rx_filter;
212
213 switch (tx_type) {
214 case HWTSTAMP_TX_OFF:
215 case HWTSTAMP_TX_ON:
216 case HWTSTAMP_TX_ONESTEP_SYNC:
217 tx_type_valid = 1;
218 break;
219 }
220
221 switch (rx_filter) {
222 case HWTSTAMP_FILTER_NONE:
223 case HWTSTAMP_FILTER_ALL:
224 case HWTSTAMP_FILTER_SOME:
225 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
226 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
227 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
228 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
229 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
230 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
231 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
232 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
233 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
234 case HWTSTAMP_FILTER_PTP_V2_EVENT:
235 case HWTSTAMP_FILTER_PTP_V2_SYNC:
236 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
237 rx_filter_valid = 1;
238 break;
239 }
240
241 if (!tx_type_valid || !rx_filter_valid)
242 return -ERANGE;
243
244 return 0;
245}
246
247/*
248 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
249 */
250static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
251{
252 int err;
253 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
254 const struct net_device_ops *ops;
255
256 if (!dev)
257 return -ENODEV;
258
259 ops = dev->netdev_ops;
260
261 switch (cmd) {
262 case SIOCSIFFLAGS: /* Set interface flags */
263 return dev_change_flags(dev, ifr->ifr_flags);
264
265 case SIOCSIFMETRIC: /* Set the metric on the interface
266 (currently unused) */
267 return -EOPNOTSUPP;
268
269 case SIOCSIFMTU: /* Set the MTU of a device */
270 return dev_set_mtu(dev, ifr->ifr_mtu);
271
272 case SIOCSIFHWADDR:
273 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
274
275 case SIOCSIFHWBROADCAST:
276 if (ifr->ifr_hwaddr.sa_family != dev->type)
277 return -EINVAL;
278 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
279 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
280 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
281 return 0;
282
283 case SIOCSIFMAP:
284 if (ops->ndo_set_config) {
285 if (!netif_device_present(dev))
286 return -ENODEV;
287 return ops->ndo_set_config(dev, &ifr->ifr_map);
288 }
289 return -EOPNOTSUPP;
290
291 case SIOCADDMULTI:
292 if (!ops->ndo_set_rx_mode ||
293 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
294 return -EINVAL;
295 if (!netif_device_present(dev))
296 return -ENODEV;
297 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
298
299 case SIOCDELMULTI:
300 if (!ops->ndo_set_rx_mode ||
301 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
302 return -EINVAL;
303 if (!netif_device_present(dev))
304 return -ENODEV;
305 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
306
307 case SIOCSIFTXQLEN:
308 if (ifr->ifr_qlen < 0)
309 return -EINVAL;
310 dev->tx_queue_len = ifr->ifr_qlen;
311 return 0;
312
313 case SIOCSIFNAME:
314 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
315 return dev_change_name(dev, ifr->ifr_newname);
316
317 case SIOCSHWTSTAMP:
318 err = net_hwtstamp_validate(ifr);
319 if (err)
320 return err;
321 /* fall through */
322
323 /*
324 * Unknown or private ioctl
325 */
326 default:
327 if ((cmd >= SIOCDEVPRIVATE &&
328 cmd <= SIOCDEVPRIVATE + 15) ||
329 cmd == SIOCBONDENSLAVE ||
330 cmd == SIOCBONDRELEASE ||
331 cmd == SIOCBONDSETHWADDR ||
332 cmd == SIOCBONDSLAVEINFOQUERY ||
333 cmd == SIOCBONDINFOQUERY ||
334 cmd == SIOCBONDCHANGEACTIVE ||
335 cmd == SIOCGMIIPHY ||
336 cmd == SIOCGMIIREG ||
337 cmd == SIOCSMIIREG ||
338 cmd == SIOCBRADDIF ||
339 cmd == SIOCBRDELIF ||
340 cmd == SIOCSHWTSTAMP ||
341 cmd == SIOCWANDEV) {
342 err = -EOPNOTSUPP;
343 if (ops->ndo_do_ioctl) {
344 if (netif_device_present(dev))
345 err = ops->ndo_do_ioctl(dev, ifr, cmd);
346 else
347 err = -ENODEV;
348 }
349 } else
350 err = -EINVAL;
351
352 }
353 return err;
354}
355
356/**
357 * dev_load - load a network module
358 * @net: the applicable net namespace
359 * @name: name of interface
360 *
361 * If a network interface is not present and the process has suitable
362 * privileges this function loads the module. If module loading is not
363 * available in this kernel then it becomes a nop.
364 */
365
366void dev_load(struct net *net, const char *name)
367{
368 struct net_device *dev;
369 int no_module;
370
371 rcu_read_lock();
372 dev = dev_get_by_name_rcu(net, name);
373 rcu_read_unlock();
374
375 no_module = !dev;
376 if (no_module && capable(CAP_NET_ADMIN))
377 no_module = request_module("netdev-%s", name);
378 if (no_module && capable(CAP_SYS_MODULE)) {
379 if (!request_module("%s", name))
380 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
381 name);
382 }
383}
384EXPORT_SYMBOL(dev_load);
385
386/*
387 * This function handles all "interface"-type I/O control requests. The actual
388 * 'doing' part of this is dev_ifsioc above.
389 */
390
391/**
392 * dev_ioctl - network device ioctl
393 * @net: the applicable net namespace
394 * @cmd: command to issue
395 * @arg: pointer to a struct ifreq in user space
396 *
397 * Issue ioctl functions to devices. This is normally called by the
398 * user space syscall interfaces but can sometimes be useful for
399 * other purposes. The return value is the return from the syscall if
400 * positive or a negative errno code on error.
401 */
402
403int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
404{
405 struct ifreq ifr;
406 int ret;
407 char *colon;
408
409 /* One special case: SIOCGIFCONF takes ifconf argument
410 and requires shared lock, because it sleeps writing
411 to user space.
412 */
413
414 if (cmd == SIOCGIFCONF) {
415 rtnl_lock();
416 ret = dev_ifconf(net, (char __user *) arg);
417 rtnl_unlock();
418 return ret;
419 }
420 if (cmd == SIOCGIFNAME)
421 return dev_ifname(net, (struct ifreq __user *)arg);
422
423 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
424 return -EFAULT;
425
426 ifr.ifr_name[IFNAMSIZ-1] = 0;
427
428 colon = strchr(ifr.ifr_name, ':');
429 if (colon)
430 *colon = 0;
431
432 /*
433 * See which interface the caller is talking about.
434 */
435
436 switch (cmd) {
437 /*
438 * These ioctl calls:
439 * - can be done by all.
440 * - atomic and do not require locking.
441 * - return a value
442 */
443 case SIOCGIFFLAGS:
444 case SIOCGIFMETRIC:
445 case SIOCGIFMTU:
446 case SIOCGIFHWADDR:
447 case SIOCGIFSLAVE:
448 case SIOCGIFMAP:
449 case SIOCGIFINDEX:
450 case SIOCGIFTXQLEN:
451 dev_load(net, ifr.ifr_name);
452 rcu_read_lock();
453 ret = dev_ifsioc_locked(net, &ifr, cmd);
454 rcu_read_unlock();
455 if (!ret) {
456 if (colon)
457 *colon = ':';
458 if (copy_to_user(arg, &ifr,
459 sizeof(struct ifreq)))
460 ret = -EFAULT;
461 }
462 return ret;
463
464 case SIOCETHTOOL:
465 dev_load(net, ifr.ifr_name);
466 rtnl_lock();
467 ret = dev_ethtool(net, &ifr);
468 rtnl_unlock();
469 if (!ret) {
470 if (colon)
471 *colon = ':';
472 if (copy_to_user(arg, &ifr,
473 sizeof(struct ifreq)))
474 ret = -EFAULT;
475 }
476 return ret;
477
478 /*
479 * These ioctl calls:
480 * - require superuser power.
481 * - require strict serialization.
482 * - return a value
483 */
484 case SIOCGMIIPHY:
485 case SIOCGMIIREG:
486 case SIOCSIFNAME:
487 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
488 return -EPERM;
489 dev_load(net, ifr.ifr_name);
490 rtnl_lock();
491 ret = dev_ifsioc(net, &ifr, cmd);
492 rtnl_unlock();
493 if (!ret) {
494 if (colon)
495 *colon = ':';
496 if (copy_to_user(arg, &ifr,
497 sizeof(struct ifreq)))
498 ret = -EFAULT;
499 }
500 return ret;
501
502 /*
503 * These ioctl calls:
504 * - require superuser power.
505 * - require strict serialization.
506 * - do not return a value
507 */
508 case SIOCSIFMAP:
509 case SIOCSIFTXQLEN:
510 if (!capable(CAP_NET_ADMIN))
511 return -EPERM;
512 /* fall through */
513 /*
514 * These ioctl calls:
515 * - require local superuser power.
516 * - require strict serialization.
517 * - do not return a value
518 */
519 case SIOCSIFFLAGS:
520 case SIOCSIFMETRIC:
521 case SIOCSIFMTU:
522 case SIOCSIFHWADDR:
523 case SIOCSIFSLAVE:
524 case SIOCADDMULTI:
525 case SIOCDELMULTI:
526 case SIOCSIFHWBROADCAST:
527 case SIOCSMIIREG:
528 case SIOCBONDENSLAVE:
529 case SIOCBONDRELEASE:
530 case SIOCBONDSETHWADDR:
531 case SIOCBONDCHANGEACTIVE:
532 case SIOCBRADDIF:
533 case SIOCBRDELIF:
534 case SIOCSHWTSTAMP:
535 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
536 return -EPERM;
537 /* fall through */
538 case SIOCBONDSLAVEINFOQUERY:
539 case SIOCBONDINFOQUERY:
540 dev_load(net, ifr.ifr_name);
541 rtnl_lock();
542 ret = dev_ifsioc(net, &ifr, cmd);
543 rtnl_unlock();
544 return ret;
545
546 case SIOCGIFMEM:
547 /* Get the per device memory space. We can add this but
548 * currently do not support it */
549 case SIOCSIFMEM:
550 /* Set the per device memory buffer space.
551 * Not applicable in our case */
552 case SIOCSIFLINK:
553 return -ENOTTY;
554
555 /*
556 * Unknown or private ioctl.
557 */
558 default:
559 if (cmd == SIOCWANDEV ||
560 (cmd >= SIOCDEVPRIVATE &&
561 cmd <= SIOCDEVPRIVATE + 15)) {
562 dev_load(net, ifr.ifr_name);
563 rtnl_lock();
564 ret = dev_ifsioc(net, &ifr, cmd);
565 rtnl_unlock();
566 if (!ret && copy_to_user(arg, &ifr,
567 sizeof(struct ifreq)))
568 ret = -EFAULT;
569 return ret;
570 }
571 /* Take care of Wireless Extensions */
572 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
573 return wext_handle_ioctl(net, &ifr, cmd, arg);
574 return -ENOTTY;
575 }
576}
diff --git a/net/core/dst.c b/net/core/dst.c
index ee6153e2cf43..35fd12f1a69c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -179,6 +179,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
179 dst_init_metrics(dst, dst_default_metrics, true); 179 dst_init_metrics(dst, dst_default_metrics, true);
180 dst->expires = 0UL; 180 dst->expires = 0UL;
181 dst->path = dst; 181 dst->path = dst;
182 dst->from = NULL;
182#ifdef CONFIG_XFRM 183#ifdef CONFIG_XFRM
183 dst->xfrm = NULL; 184 dst->xfrm = NULL;
184#endif 185#endif
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a8705432e4b1..3e9b2c3e30f0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
77 [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", 77 [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
80 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
80 81
81 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 82 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
82 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", 83 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -175,7 +176,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
175 if (sset == ETH_SS_FEATURES) 176 if (sset == ETH_SS_FEATURES)
176 return ARRAY_SIZE(netdev_features_strings); 177 return ARRAY_SIZE(netdev_features_strings);
177 178
178 if (ops && ops->get_sset_count && ops->get_strings) 179 if (ops->get_sset_count && ops->get_strings)
179 return ops->get_sset_count(dev, sset); 180 return ops->get_sset_count(dev, sset);
180 else 181 else
181 return -EOPNOTSUPP; 182 return -EOPNOTSUPP;
@@ -311,7 +312,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
311{ 312{
312 ASSERT_RTNL(); 313 ASSERT_RTNL();
313 314
314 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) 315 if (!dev->ethtool_ops->get_settings)
315 return -EOPNOTSUPP; 316 return -EOPNOTSUPP;
316 317
317 memset(cmd, 0, sizeof(struct ethtool_cmd)); 318 memset(cmd, 0, sizeof(struct ethtool_cmd));
@@ -355,7 +356,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
355 356
356 memset(&info, 0, sizeof(info)); 357 memset(&info, 0, sizeof(info));
357 info.cmd = ETHTOOL_GDRVINFO; 358 info.cmd = ETHTOOL_GDRVINFO;
358 if (ops && ops->get_drvinfo) { 359 if (ops->get_drvinfo) {
359 ops->get_drvinfo(dev, &info); 360 ops->get_drvinfo(dev, &info);
360 } else if (dev->dev.parent && dev->dev.parent->driver) { 361 } else if (dev->dev.parent && dev->dev.parent->driver) {
361 strlcpy(info.bus_info, dev_name(dev->dev.parent), 362 strlcpy(info.bus_info, dev_name(dev->dev.parent),
@@ -370,7 +371,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
370 * this method of obtaining string set info is deprecated; 371 * this method of obtaining string set info is deprecated;
371 * Use ETHTOOL_GSSET_INFO instead. 372 * Use ETHTOOL_GSSET_INFO instead.
372 */ 373 */
373 if (ops && ops->get_sset_count) { 374 if (ops->get_sset_count) {
374 int rc; 375 int rc;
375 376
376 rc = ops->get_sset_count(dev, ETH_SS_TEST); 377 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -383,9 +384,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
383 if (rc >= 0) 384 if (rc >= 0)
384 info.n_priv_flags = rc; 385 info.n_priv_flags = rc;
385 } 386 }
386 if (ops && ops->get_regs_len) 387 if (ops->get_regs_len)
387 info.regdump_len = ops->get_regs_len(dev); 388 info.regdump_len = ops->get_regs_len(dev);
388 if (ops && ops->get_eeprom_len) 389 if (ops->get_eeprom_len)
389 info.eedump_len = ops->get_eeprom_len(dev); 390 info.eedump_len = ops->get_eeprom_len(dev);
390 391
391 if (copy_to_user(useraddr, &info, sizeof(info))) 392 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -590,13 +591,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
590 struct ethtool_rxnfc rx_rings; 591 struct ethtool_rxnfc rx_rings;
591 u32 user_size, dev_size, i; 592 u32 user_size, dev_size, i;
592 u32 *indir; 593 u32 *indir;
594 const struct ethtool_ops *ops = dev->ethtool_ops;
593 int ret; 595 int ret;
594 596
595 if (!dev->ethtool_ops->get_rxfh_indir_size || 597 if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
596 !dev->ethtool_ops->set_rxfh_indir || 598 !ops->get_rxnfc)
597 !dev->ethtool_ops->get_rxnfc)
598 return -EOPNOTSUPP; 599 return -EOPNOTSUPP;
599 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); 600
601 dev_size = ops->get_rxfh_indir_size(dev);
600 if (dev_size == 0) 602 if (dev_size == 0)
601 return -EOPNOTSUPP; 603 return -EOPNOTSUPP;
602 604
@@ -613,7 +615,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
613 return -ENOMEM; 615 return -ENOMEM;
614 616
615 rx_rings.cmd = ETHTOOL_GRXRINGS; 617 rx_rings.cmd = ETHTOOL_GRXRINGS;
616 ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); 618 ret = ops->get_rxnfc(dev, &rx_rings, NULL);
617 if (ret) 619 if (ret)
618 goto out; 620 goto out;
619 621
@@ -639,7 +641,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
639 } 641 }
640 } 642 }
641 643
642 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); 644 ret = ops->set_rxfh_indir(dev, indir);
643 645
644out: 646out:
645 kfree(indir); 647 kfree(indir);
@@ -1082,9 +1084,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1082{ 1084{
1083 struct ethtool_value id; 1085 struct ethtool_value id;
1084 static bool busy; 1086 static bool busy;
1087 const struct ethtool_ops *ops = dev->ethtool_ops;
1085 int rc; 1088 int rc;
1086 1089
1087 if (!dev->ethtool_ops->set_phys_id) 1090 if (!ops->set_phys_id)
1088 return -EOPNOTSUPP; 1091 return -EOPNOTSUPP;
1089 1092
1090 if (busy) 1093 if (busy)
@@ -1093,7 +1096,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1093 if (copy_from_user(&id, useraddr, sizeof(id))) 1096 if (copy_from_user(&id, useraddr, sizeof(id)))
1094 return -EFAULT; 1097 return -EFAULT;
1095 1098
1096 rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); 1099 rc = ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
1097 if (rc < 0) 1100 if (rc < 0)
1098 return rc; 1101 return rc;
1099 1102
@@ -1118,7 +1121,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1118 i = n; 1121 i = n;
1119 do { 1122 do {
1120 rtnl_lock(); 1123 rtnl_lock();
1121 rc = dev->ethtool_ops->set_phys_id(dev, 1124 rc = ops->set_phys_id(dev,
1122 (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); 1125 (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
1123 rtnl_unlock(); 1126 rtnl_unlock();
1124 if (rc) 1127 if (rc)
@@ -1133,7 +1136,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1133 dev_put(dev); 1136 dev_put(dev);
1134 busy = false; 1137 busy = false;
1135 1138
1136 (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); 1139 (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
1137 return rc; 1140 return rc;
1138} 1141}
1139 1142
@@ -1275,7 +1278,7 @@ static int ethtool_get_dump_flag(struct net_device *dev,
1275 struct ethtool_dump dump; 1278 struct ethtool_dump dump;
1276 const struct ethtool_ops *ops = dev->ethtool_ops; 1279 const struct ethtool_ops *ops = dev->ethtool_ops;
1277 1280
1278 if (!dev->ethtool_ops->get_dump_flag) 1281 if (!ops->get_dump_flag)
1279 return -EOPNOTSUPP; 1282 return -EOPNOTSUPP;
1280 1283
1281 if (copy_from_user(&dump, useraddr, sizeof(dump))) 1284 if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1299,8 +1302,7 @@ static int ethtool_get_dump_data(struct net_device *dev,
1299 const struct ethtool_ops *ops = dev->ethtool_ops; 1302 const struct ethtool_ops *ops = dev->ethtool_ops;
1300 void *data = NULL; 1303 void *data = NULL;
1301 1304
1302 if (!dev->ethtool_ops->get_dump_data || 1305 if (!ops->get_dump_data || !ops->get_dump_flag)
1303 !dev->ethtool_ops->get_dump_flag)
1304 return -EOPNOTSUPP; 1306 return -EOPNOTSUPP;
1305 1307
1306 if (copy_from_user(&dump, useraddr, sizeof(dump))) 1308 if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1346,13 +1348,9 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
1346 info.cmd = ETHTOOL_GET_TS_INFO; 1348 info.cmd = ETHTOOL_GET_TS_INFO;
1347 1349
1348 if (phydev && phydev->drv && phydev->drv->ts_info) { 1350 if (phydev && phydev->drv && phydev->drv->ts_info) {
1349
1350 err = phydev->drv->ts_info(phydev, &info); 1351 err = phydev->drv->ts_info(phydev, &info);
1351 1352 } else if (ops->get_ts_info) {
1352 } else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) {
1353
1354 err = ops->get_ts_info(dev, &info); 1353 err = ops->get_ts_info(dev, &info);
1355
1356 } else { 1354 } else {
1357 info.so_timestamping = 1355 info.so_timestamping =
1358 SOF_TIMESTAMPING_RX_SOFTWARE | 1356 SOF_TIMESTAMPING_RX_SOFTWARE |
diff --git a/net/core/filter.c b/net/core/filter.c
index c23543cba132..2e20b55a7830 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -532,6 +532,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
532 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, 532 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
533 }; 533 };
534 int pc; 534 int pc;
535 bool anc_found;
535 536
536 if (flen == 0 || flen > BPF_MAXINSNS) 537 if (flen == 0 || flen > BPF_MAXINSNS)
537 return -EINVAL; 538 return -EINVAL;
@@ -592,8 +593,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
592 case BPF_S_LD_W_ABS: 593 case BPF_S_LD_W_ABS:
593 case BPF_S_LD_H_ABS: 594 case BPF_S_LD_H_ABS:
594 case BPF_S_LD_B_ABS: 595 case BPF_S_LD_B_ABS:
596 anc_found = false;
595#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ 597#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
596 code = BPF_S_ANC_##CODE; \ 598 code = BPF_S_ANC_##CODE; \
599 anc_found = true; \
597 break 600 break
598 switch (ftest->k) { 601 switch (ftest->k) {
599 ANCILLARY(PROTOCOL); 602 ANCILLARY(PROTOCOL);
@@ -610,6 +613,10 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
610 ANCILLARY(VLAN_TAG); 613 ANCILLARY(VLAN_TAG);
611 ANCILLARY(VLAN_TAG_PRESENT); 614 ANCILLARY(VLAN_TAG_PRESENT);
612 } 615 }
616
617 /* ancillary operation unknown or unsupported */
618 if (anc_found == false && ftest->k >= SKF_AD_OFF)
619 return -EINVAL;
613 } 620 }
614 ftest->code = code; 621 ftest->code = code;
615 } 622 }
@@ -714,6 +721,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
714 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 721 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
715 int err; 722 int err;
716 723
724 if (sock_flag(sk, SOCK_FILTER_LOCKED))
725 return -EPERM;
726
717 /* Make sure new filter is there and in the right amounts. */ 727 /* Make sure new filter is there and in the right amounts. */
718 if (fprog->filter == NULL) 728 if (fprog->filter == NULL)
719 return -EINVAL; 729 return -EINVAL;
@@ -750,6 +760,9 @@ int sk_detach_filter(struct sock *sk)
750 int ret = -ENOENT; 760 int ret = -ENOENT;
751 struct sk_filter *filter; 761 struct sk_filter *filter;
752 762
763 if (sock_flag(sk, SOCK_FILTER_LOCKED))
764 return -EPERM;
765
753 filter = rcu_dereference_protected(sk->sk_filter, 766 filter = rcu_dereference_protected(sk->sk_filter,
754 sock_owned_by_user(sk)); 767 sock_owned_by_user(sk));
755 if (filter) { 768 if (filter) {
diff --git a/net/core/flow.c b/net/core/flow.c
index b0901ee5a002..43f7495df27a 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -286,7 +286,7 @@ nocache:
286 else 286 else
287 fle->genid--; 287 fle->genid--;
288 } else { 288 } else {
289 if (flo && !IS_ERR(flo)) 289 if (!IS_ERR_OR_NULL(flo))
290 flo->ops->delete(flo); 290 flo->ops->delete(flo);
291 } 291 }
292ret_object: 292ret_object:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 466820b6e344..9d4c7201400d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -143,3 +143,176 @@ ipv6:
143 return true; 143 return true;
144} 144}
145EXPORT_SYMBOL(skb_flow_dissect); 145EXPORT_SYMBOL(skb_flow_dissect);
146
147static u32 hashrnd __read_mostly;
148
149/*
150 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
151 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
152 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
153 * if hash is a canonical 4-tuple hash over transport ports.
154 */
155void __skb_get_rxhash(struct sk_buff *skb)
156{
157 struct flow_keys keys;
158 u32 hash;
159
160 if (!skb_flow_dissect(skb, &keys))
161 return;
162
163 if (keys.ports)
164 skb->l4_rxhash = 1;
165
166 /* get a consistent hash (same value on both flow directions) */
167 if (((__force u32)keys.dst < (__force u32)keys.src) ||
168 (((__force u32)keys.dst == (__force u32)keys.src) &&
169 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
170 swap(keys.dst, keys.src);
171 swap(keys.port16[0], keys.port16[1]);
172 }
173
174 hash = jhash_3words((__force u32)keys.dst,
175 (__force u32)keys.src,
176 (__force u32)keys.ports, hashrnd);
177 if (!hash)
178 hash = 1;
179
180 skb->rxhash = hash;
181}
182EXPORT_SYMBOL(__skb_get_rxhash);
183
184/*
185 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
186 * to be used as a distribution range.
187 */
188u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
189 unsigned int num_tx_queues)
190{
191 u32 hash;
192 u16 qoffset = 0;
193 u16 qcount = num_tx_queues;
194
195 if (skb_rx_queue_recorded(skb)) {
196 hash = skb_get_rx_queue(skb);
197 while (unlikely(hash >= num_tx_queues))
198 hash -= num_tx_queues;
199 return hash;
200 }
201
202 if (dev->num_tc) {
203 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
204 qoffset = dev->tc_to_txq[tc].offset;
205 qcount = dev->tc_to_txq[tc].count;
206 }
207
208 if (skb->sk && skb->sk->sk_hash)
209 hash = skb->sk->sk_hash;
210 else
211 hash = (__force u16) skb->protocol;
212 hash = jhash_1word(hash, hashrnd);
213
214 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
215}
216EXPORT_SYMBOL(__skb_tx_hash);
217
218static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
219{
220 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
221 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
222 dev->name, queue_index,
223 dev->real_num_tx_queues);
224 return 0;
225 }
226 return queue_index;
227}
228
229static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
230{
231#ifdef CONFIG_XPS
232 struct xps_dev_maps *dev_maps;
233 struct xps_map *map;
234 int queue_index = -1;
235
236 rcu_read_lock();
237 dev_maps = rcu_dereference(dev->xps_maps);
238 if (dev_maps) {
239 map = rcu_dereference(
240 dev_maps->cpu_map[raw_smp_processor_id()]);
241 if (map) {
242 if (map->len == 1)
243 queue_index = map->queues[0];
244 else {
245 u32 hash;
246 if (skb->sk && skb->sk->sk_hash)
247 hash = skb->sk->sk_hash;
248 else
249 hash = (__force u16) skb->protocol ^
250 skb->rxhash;
251 hash = jhash_1word(hash, hashrnd);
252 queue_index = map->queues[
253 ((u64)hash * map->len) >> 32];
254 }
255 if (unlikely(queue_index >= dev->real_num_tx_queues))
256 queue_index = -1;
257 }
258 }
259 rcu_read_unlock();
260
261 return queue_index;
262#else
263 return -1;
264#endif
265}
266
267u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
268{
269 struct sock *sk = skb->sk;
270 int queue_index = sk_tx_queue_get(sk);
271
272 if (queue_index < 0 || skb->ooo_okay ||
273 queue_index >= dev->real_num_tx_queues) {
274 int new_index = get_xps_queue(dev, skb);
275 if (new_index < 0)
276 new_index = skb_tx_hash(dev, skb);
277
278 if (queue_index != new_index && sk) {
279 struct dst_entry *dst =
280 rcu_dereference_check(sk->sk_dst_cache, 1);
281
282 if (dst && skb_dst(skb) == dst)
283 sk_tx_queue_set(sk, queue_index);
284
285 }
286
287 queue_index = new_index;
288 }
289
290 return queue_index;
291}
292EXPORT_SYMBOL(__netdev_pick_tx);
293
294struct netdev_queue *netdev_pick_tx(struct net_device *dev,
295 struct sk_buff *skb)
296{
297 int queue_index = 0;
298
299 if (dev->real_num_tx_queues != 1) {
300 const struct net_device_ops *ops = dev->netdev_ops;
301 if (ops->ndo_select_queue)
302 queue_index = ops->ndo_select_queue(dev, skb);
303 else
304 queue_index = __netdev_pick_tx(dev, skb);
305 queue_index = dev_cap_txqueue(dev, queue_index);
306 }
307
308 skb_set_queue_mapping(skb, queue_index);
309 return netdev_get_tx_queue(dev, queue_index);
310}
311
312static int __init initialize_hashrnd(void)
313{
314 get_random_bytes(&hashrnd, sizeof(hashrnd));
315 return 0;
316}
317
318late_initcall_sync(initialize_hashrnd);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c815f285e5ab..3863b8f639c5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -290,15 +290,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
290 goto out_entries; 290 goto out_entries;
291 } 291 }
292 292
293 if (tbl->entry_size) 293 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
294 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295 else {
296 int sz = sizeof(*n) + tbl->key_len;
297
298 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299 sz += dev->neigh_priv_len;
300 n = kzalloc(sz, GFP_ATOMIC);
301 }
302 if (!n) 294 if (!n)
303 goto out_entries; 295 goto out_entries;
304 296
@@ -778,6 +770,9 @@ static void neigh_periodic_work(struct work_struct *work)
778 nht = rcu_dereference_protected(tbl->nht, 770 nht = rcu_dereference_protected(tbl->nht,
779 lockdep_is_held(&tbl->lock)); 771 lockdep_is_held(&tbl->lock));
780 772
773 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
774 goto out;
775
781 /* 776 /*
782 * periodically recompute ReachableTime from random function 777 * periodically recompute ReachableTime from random function
783 */ 778 */
@@ -832,6 +827,7 @@ next_elt:
832 nht = rcu_dereference_protected(tbl->nht, 827 nht = rcu_dereference_protected(tbl->nht,
833 lockdep_is_held(&tbl->lock)); 828 lockdep_is_held(&tbl->lock));
834 } 829 }
830out:
835 /* Cycle through all hash buckets every base_reachable_time/2 ticks. 831 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
836 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 832 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
837 * base_reachable_time. 833 * base_reachable_time.
@@ -1542,6 +1538,12 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1542 if (!tbl->nht || !tbl->phash_buckets) 1538 if (!tbl->nht || !tbl->phash_buckets)
1543 panic("cannot allocate neighbour cache hashes"); 1539 panic("cannot allocate neighbour cache hashes");
1544 1540
1541 if (!tbl->entry_size)
1542 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1543 tbl->key_len, NEIGH_PRIV_ALIGN);
1544 else
1545 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1546
1545 rwlock_init(&tbl->lock); 1547 rwlock_init(&tbl->lock);
1546 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1548 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1547 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); 1549 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
new file mode 100644
index 000000000000..0f6bb6f8d391
--- /dev/null
+++ b/net/core/net-procfs.c
@@ -0,0 +1,412 @@
1#include <linux/netdevice.h>
2#include <linux/proc_fs.h>
3#include <linux/seq_file.h>
4#include <net/wext.h>
5
6#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
7
8#define get_bucket(x) ((x) >> BUCKET_SPACE)
9#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
10#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
11
12extern struct list_head ptype_all __read_mostly;
13extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
14
15static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
16{
17 struct net *net = seq_file_net(seq);
18 struct net_device *dev;
19 struct hlist_node *p;
20 struct hlist_head *h;
21 unsigned int count = 0, offset = get_offset(*pos);
22
23 h = &net->dev_name_head[get_bucket(*pos)];
24 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
25 if (++count == offset)
26 return dev;
27 }
28
29 return NULL;
30}
31
32static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
33{
34 struct net_device *dev;
35 unsigned int bucket;
36
37 do {
38 dev = dev_from_same_bucket(seq, pos);
39 if (dev)
40 return dev;
41
42 bucket = get_bucket(*pos) + 1;
43 *pos = set_bucket_offset(bucket, 1);
44 } while (bucket < NETDEV_HASHENTRIES);
45
46 return NULL;
47}
48
49/*
50 * This is invoked by the /proc filesystem handler to display a device
51 * in detail.
52 */
53static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
54 __acquires(RCU)
55{
56 rcu_read_lock();
57 if (!*pos)
58 return SEQ_START_TOKEN;
59
60 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
61 return NULL;
62
63 return dev_from_bucket(seq, pos);
64}
65
66static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
67{
68 ++*pos;
69 return dev_from_bucket(seq, pos);
70}
71
72static void dev_seq_stop(struct seq_file *seq, void *v)
73 __releases(RCU)
74{
75 rcu_read_unlock();
76}
77
78static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
79{
80 struct rtnl_link_stats64 temp;
81 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
82
83 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
84 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
85 dev->name, stats->rx_bytes, stats->rx_packets,
86 stats->rx_errors,
87 stats->rx_dropped + stats->rx_missed_errors,
88 stats->rx_fifo_errors,
89 stats->rx_length_errors + stats->rx_over_errors +
90 stats->rx_crc_errors + stats->rx_frame_errors,
91 stats->rx_compressed, stats->multicast,
92 stats->tx_bytes, stats->tx_packets,
93 stats->tx_errors, stats->tx_dropped,
94 stats->tx_fifo_errors, stats->collisions,
95 stats->tx_carrier_errors +
96 stats->tx_aborted_errors +
97 stats->tx_window_errors +
98 stats->tx_heartbeat_errors,
99 stats->tx_compressed);
100}
101
102/*
103 * Called from the PROCfs module. This now uses the new arbitrary sized
104 * /proc/net interface to create /proc/net/dev
105 */
106static int dev_seq_show(struct seq_file *seq, void *v)
107{
108 if (v == SEQ_START_TOKEN)
109 seq_puts(seq, "Inter-| Receive "
110 " | Transmit\n"
111 " face |bytes packets errs drop fifo frame "
112 "compressed multicast|bytes packets errs "
113 "drop fifo colls carrier compressed\n");
114 else
115 dev_seq_printf_stats(seq, v);
116 return 0;
117}
118
119static struct softnet_data *softnet_get_online(loff_t *pos)
120{
121 struct softnet_data *sd = NULL;
122
123 while (*pos < nr_cpu_ids)
124 if (cpu_online(*pos)) {
125 sd = &per_cpu(softnet_data, *pos);
126 break;
127 } else
128 ++*pos;
129 return sd;
130}
131
132static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
133{
134 return softnet_get_online(pos);
135}
136
137static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
138{
139 ++*pos;
140 return softnet_get_online(pos);
141}
142
143static void softnet_seq_stop(struct seq_file *seq, void *v)
144{
145}
146
147static int softnet_seq_show(struct seq_file *seq, void *v)
148{
149 struct softnet_data *sd = v;
150
151 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
152 sd->processed, sd->dropped, sd->time_squeeze, 0,
153 0, 0, 0, 0, /* was fastroute */
154 sd->cpu_collision, sd->received_rps);
155 return 0;
156}
157
158static const struct seq_operations dev_seq_ops = {
159 .start = dev_seq_start,
160 .next = dev_seq_next,
161 .stop = dev_seq_stop,
162 .show = dev_seq_show,
163};
164
165static int dev_seq_open(struct inode *inode, struct file *file)
166{
167 return seq_open_net(inode, file, &dev_seq_ops,
168 sizeof(struct seq_net_private));
169}
170
171static const struct file_operations dev_seq_fops = {
172 .owner = THIS_MODULE,
173 .open = dev_seq_open,
174 .read = seq_read,
175 .llseek = seq_lseek,
176 .release = seq_release_net,
177};
178
179static const struct seq_operations softnet_seq_ops = {
180 .start = softnet_seq_start,
181 .next = softnet_seq_next,
182 .stop = softnet_seq_stop,
183 .show = softnet_seq_show,
184};
185
186static int softnet_seq_open(struct inode *inode, struct file *file)
187{
188 return seq_open(file, &softnet_seq_ops);
189}
190
191static const struct file_operations softnet_seq_fops = {
192 .owner = THIS_MODULE,
193 .open = softnet_seq_open,
194 .read = seq_read,
195 .llseek = seq_lseek,
196 .release = seq_release,
197};
198
199static void *ptype_get_idx(loff_t pos)
200{
201 struct packet_type *pt = NULL;
202 loff_t i = 0;
203 int t;
204
205 list_for_each_entry_rcu(pt, &ptype_all, list) {
206 if (i == pos)
207 return pt;
208 ++i;
209 }
210
211 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
212 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
213 if (i == pos)
214 return pt;
215 ++i;
216 }
217 }
218 return NULL;
219}
220
221static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
222 __acquires(RCU)
223{
224 rcu_read_lock();
225 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
226}
227
228static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
229{
230 struct packet_type *pt;
231 struct list_head *nxt;
232 int hash;
233
234 ++*pos;
235 if (v == SEQ_START_TOKEN)
236 return ptype_get_idx(0);
237
238 pt = v;
239 nxt = pt->list.next;
240 if (pt->type == htons(ETH_P_ALL)) {
241 if (nxt != &ptype_all)
242 goto found;
243 hash = 0;
244 nxt = ptype_base[0].next;
245 } else
246 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
247
248 while (nxt == &ptype_base[hash]) {
249 if (++hash >= PTYPE_HASH_SIZE)
250 return NULL;
251 nxt = ptype_base[hash].next;
252 }
253found:
254 return list_entry(nxt, struct packet_type, list);
255}
256
257static void ptype_seq_stop(struct seq_file *seq, void *v)
258 __releases(RCU)
259{
260 rcu_read_unlock();
261}
262
263static int ptype_seq_show(struct seq_file *seq, void *v)
264{
265 struct packet_type *pt = v;
266
267 if (v == SEQ_START_TOKEN)
268 seq_puts(seq, "Type Device Function\n");
269 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
270 if (pt->type == htons(ETH_P_ALL))
271 seq_puts(seq, "ALL ");
272 else
273 seq_printf(seq, "%04x", ntohs(pt->type));
274
275 seq_printf(seq, " %-8s %pF\n",
276 pt->dev ? pt->dev->name : "", pt->func);
277 }
278
279 return 0;
280}
281
282static const struct seq_operations ptype_seq_ops = {
283 .start = ptype_seq_start,
284 .next = ptype_seq_next,
285 .stop = ptype_seq_stop,
286 .show = ptype_seq_show,
287};
288
289static int ptype_seq_open(struct inode *inode, struct file *file)
290{
291 return seq_open_net(inode, file, &ptype_seq_ops,
292 sizeof(struct seq_net_private));
293}
294
295static const struct file_operations ptype_seq_fops = {
296 .owner = THIS_MODULE,
297 .open = ptype_seq_open,
298 .read = seq_read,
299 .llseek = seq_lseek,
300 .release = seq_release_net,
301};
302
303
304static int __net_init dev_proc_net_init(struct net *net)
305{
306 int rc = -ENOMEM;
307
308 if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
309 goto out;
310 if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
311 &softnet_seq_fops))
312 goto out_dev;
313 if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
314 goto out_softnet;
315
316 if (wext_proc_init(net))
317 goto out_ptype;
318 rc = 0;
319out:
320 return rc;
321out_ptype:
322 remove_proc_entry("ptype", net->proc_net);
323out_softnet:
324 remove_proc_entry("softnet_stat", net->proc_net);
325out_dev:
326 remove_proc_entry("dev", net->proc_net);
327 goto out;
328}
329
330static void __net_exit dev_proc_net_exit(struct net *net)
331{
332 wext_proc_exit(net);
333
334 remove_proc_entry("ptype", net->proc_net);
335 remove_proc_entry("softnet_stat", net->proc_net);
336 remove_proc_entry("dev", net->proc_net);
337}
338
339static struct pernet_operations __net_initdata dev_proc_ops = {
340 .init = dev_proc_net_init,
341 .exit = dev_proc_net_exit,
342};
343
344static int dev_mc_seq_show(struct seq_file *seq, void *v)
345{
346 struct netdev_hw_addr *ha;
347 struct net_device *dev = v;
348
349 if (v == SEQ_START_TOKEN)
350 return 0;
351
352 netif_addr_lock_bh(dev);
353 netdev_for_each_mc_addr(ha, dev) {
354 int i;
355
356 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
357 dev->name, ha->refcount, ha->global_use);
358
359 for (i = 0; i < dev->addr_len; i++)
360 seq_printf(seq, "%02x", ha->addr[i]);
361
362 seq_putc(seq, '\n');
363 }
364 netif_addr_unlock_bh(dev);
365 return 0;
366}
367
368static const struct seq_operations dev_mc_seq_ops = {
369 .start = dev_seq_start,
370 .next = dev_seq_next,
371 .stop = dev_seq_stop,
372 .show = dev_mc_seq_show,
373};
374
375static int dev_mc_seq_open(struct inode *inode, struct file *file)
376{
377 return seq_open_net(inode, file, &dev_mc_seq_ops,
378 sizeof(struct seq_net_private));
379}
380
381static const struct file_operations dev_mc_seq_fops = {
382 .owner = THIS_MODULE,
383 .open = dev_mc_seq_open,
384 .read = seq_read,
385 .llseek = seq_lseek,
386 .release = seq_release_net,
387};
388
389static int __net_init dev_mc_net_init(struct net *net)
390{
391 if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
392 return -ENOMEM;
393 return 0;
394}
395
396static void __net_exit dev_mc_net_exit(struct net *net)
397{
398 remove_proc_entry("dev_mcast", net->proc_net);
399}
400
401static struct pernet_operations __net_initdata dev_mc_net_ops = {
402 .init = dev_mc_net_init,
403 .exit = dev_mc_net_exit,
404};
405
406int __init dev_proc_init(void)
407{
408 int ret = register_pernet_subsys(&dev_proc_ops);
409 if (!ret)
410 return register_pernet_subsys(&dev_mc_net_ops);
411 return ret;
412}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 334efd5d67a9..7427ab5e27d8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/export.h> 22#include <linux/export.h>
23#include <linux/jiffies.h> 23#include <linux/jiffies.h>
24#include <linux/pm_runtime.h>
24 25
25#include "net-sysfs.h" 26#include "net-sysfs.h"
26 27
@@ -126,6 +127,19 @@ static ssize_t show_broadcast(struct device *dev,
126 return -EINVAL; 127 return -EINVAL;
127} 128}
128 129
130static int change_carrier(struct net_device *net, unsigned long new_carrier)
131{
132 if (!netif_running(net))
133 return -EINVAL;
134 return dev_change_carrier(net, (bool) new_carrier);
135}
136
137static ssize_t store_carrier(struct device *dev, struct device_attribute *attr,
138 const char *buf, size_t len)
139{
140 return netdev_store(dev, attr, buf, len, change_carrier);
141}
142
129static ssize_t show_carrier(struct device *dev, 143static ssize_t show_carrier(struct device *dev,
130 struct device_attribute *attr, char *buf) 144 struct device_attribute *attr, char *buf)
131{ 145{
@@ -331,7 +345,7 @@ static struct device_attribute net_class_attributes[] = {
331 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), 345 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
332 __ATTR(address, S_IRUGO, show_address, NULL), 346 __ATTR(address, S_IRUGO, show_address, NULL),
333 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 347 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
334 __ATTR(carrier, S_IRUGO, show_carrier, NULL), 348 __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier),
335 __ATTR(speed, S_IRUGO, show_speed, NULL), 349 __ATTR(speed, S_IRUGO, show_speed, NULL),
336 __ATTR(duplex, S_IRUGO, show_duplex, NULL), 350 __ATTR(duplex, S_IRUGO, show_duplex, NULL),
337 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 351 __ATTR(dormant, S_IRUGO, show_dormant, NULL),
@@ -989,68 +1003,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
989 return len; 1003 return len;
990} 1004}
991 1005
992static DEFINE_MUTEX(xps_map_mutex);
993#define xmap_dereference(P) \
994 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
995
996static void xps_queue_release(struct netdev_queue *queue)
997{
998 struct net_device *dev = queue->dev;
999 struct xps_dev_maps *dev_maps;
1000 struct xps_map *map;
1001 unsigned long index;
1002 int i, pos, nonempty = 0;
1003
1004 index = get_netdev_queue_index(queue);
1005
1006 mutex_lock(&xps_map_mutex);
1007 dev_maps = xmap_dereference(dev->xps_maps);
1008
1009 if (dev_maps) {
1010 for_each_possible_cpu(i) {
1011 map = xmap_dereference(dev_maps->cpu_map[i]);
1012 if (!map)
1013 continue;
1014
1015 for (pos = 0; pos < map->len; pos++)
1016 if (map->queues[pos] == index)
1017 break;
1018
1019 if (pos < map->len) {
1020 if (map->len > 1)
1021 map->queues[pos] =
1022 map->queues[--map->len];
1023 else {
1024 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1025 NULL);
1026 kfree_rcu(map, rcu);
1027 map = NULL;
1028 }
1029 }
1030 if (map)
1031 nonempty = 1;
1032 }
1033
1034 if (!nonempty) {
1035 RCU_INIT_POINTER(dev->xps_maps, NULL);
1036 kfree_rcu(dev_maps, rcu);
1037 }
1038 }
1039 mutex_unlock(&xps_map_mutex);
1040}
1041
1042static ssize_t store_xps_map(struct netdev_queue *queue, 1006static ssize_t store_xps_map(struct netdev_queue *queue,
1043 struct netdev_queue_attribute *attribute, 1007 struct netdev_queue_attribute *attribute,
1044 const char *buf, size_t len) 1008 const char *buf, size_t len)
1045{ 1009{
1046 struct net_device *dev = queue->dev; 1010 struct net_device *dev = queue->dev;
1047 cpumask_var_t mask;
1048 int err, i, cpu, pos, map_len, alloc_len, need_set;
1049 unsigned long index; 1011 unsigned long index;
1050 struct xps_map *map, *new_map; 1012 cpumask_var_t mask;
1051 struct xps_dev_maps *dev_maps, *new_dev_maps; 1013 int err;
1052 int nonempty = 0;
1053 int numa_node_id = -2;
1054 1014
1055 if (!capable(CAP_NET_ADMIN)) 1015 if (!capable(CAP_NET_ADMIN))
1056 return -EPERM; 1016 return -EPERM;
@@ -1066,105 +1026,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
1066 return err; 1026 return err;
1067 } 1027 }
1068 1028
1069 new_dev_maps = kzalloc(max_t(unsigned int, 1029 err = netif_set_xps_queue(dev, mask, index);
1070 XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
1071 if (!new_dev_maps) {
1072 free_cpumask_var(mask);
1073 return -ENOMEM;
1074 }
1075
1076 mutex_lock(&xps_map_mutex);
1077
1078 dev_maps = xmap_dereference(dev->xps_maps);
1079
1080 for_each_possible_cpu(cpu) {
1081 map = dev_maps ?
1082 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
1083 new_map = map;
1084 if (map) {
1085 for (pos = 0; pos < map->len; pos++)
1086 if (map->queues[pos] == index)
1087 break;
1088 map_len = map->len;
1089 alloc_len = map->alloc_len;
1090 } else
1091 pos = map_len = alloc_len = 0;
1092
1093 need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
1094#ifdef CONFIG_NUMA
1095 if (need_set) {
1096 if (numa_node_id == -2)
1097 numa_node_id = cpu_to_node(cpu);
1098 else if (numa_node_id != cpu_to_node(cpu))
1099 numa_node_id = -1;
1100 }
1101#endif
1102 if (need_set && pos >= map_len) {
1103 /* Need to add queue to this CPU's map */
1104 if (map_len >= alloc_len) {
1105 alloc_len = alloc_len ?
1106 2 * alloc_len : XPS_MIN_MAP_ALLOC;
1107 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
1108 GFP_KERNEL,
1109 cpu_to_node(cpu));
1110 if (!new_map)
1111 goto error;
1112 new_map->alloc_len = alloc_len;
1113 for (i = 0; i < map_len; i++)
1114 new_map->queues[i] = map->queues[i];
1115 new_map->len = map_len;
1116 }
1117 new_map->queues[new_map->len++] = index;
1118 } else if (!need_set && pos < map_len) {
1119 /* Need to remove queue from this CPU's map */
1120 if (map_len > 1)
1121 new_map->queues[pos] =
1122 new_map->queues[--new_map->len];
1123 else
1124 new_map = NULL;
1125 }
1126 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
1127 }
1128
1129 /* Cleanup old maps */
1130 for_each_possible_cpu(cpu) {
1131 map = dev_maps ?
1132 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
1133 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
1134 kfree_rcu(map, rcu);
1135 if (new_dev_maps->cpu_map[cpu])
1136 nonempty = 1;
1137 }
1138
1139 if (nonempty) {
1140 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1141 } else {
1142 kfree(new_dev_maps);
1143 RCU_INIT_POINTER(dev->xps_maps, NULL);
1144 }
1145
1146 if (dev_maps)
1147 kfree_rcu(dev_maps, rcu);
1148
1149 netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
1150 NUMA_NO_NODE);
1151
1152 mutex_unlock(&xps_map_mutex);
1153 1030
1154 free_cpumask_var(mask); 1031 free_cpumask_var(mask);
1155 return len;
1156 1032
1157error: 1033 return err ? : len;
1158 mutex_unlock(&xps_map_mutex);
1159
1160 if (new_dev_maps)
1161 for_each_possible_cpu(i)
1162 kfree(rcu_dereference_protected(
1163 new_dev_maps->cpu_map[i],
1164 1));
1165 kfree(new_dev_maps);
1166 free_cpumask_var(mask);
1167 return -ENOMEM;
1168} 1034}
1169 1035
1170static struct netdev_queue_attribute xps_cpus_attribute = 1036static struct netdev_queue_attribute xps_cpus_attribute =
@@ -1183,10 +1049,6 @@ static void netdev_queue_release(struct kobject *kobj)
1183{ 1049{
1184 struct netdev_queue *queue = to_netdev_queue(kobj); 1050 struct netdev_queue *queue = to_netdev_queue(kobj);
1185 1051
1186#ifdef CONFIG_XPS
1187 xps_queue_release(queue);
1188#endif
1189
1190 memset(kobj, 0, sizeof(*kobj)); 1052 memset(kobj, 0, sizeof(*kobj));
1191 dev_put(queue->dev); 1053 dev_put(queue->dev);
1192} 1054}
@@ -1334,7 +1196,6 @@ struct kobj_ns_type_operations net_ns_type_operations = {
1334}; 1196};
1335EXPORT_SYMBOL_GPL(net_ns_type_operations); 1197EXPORT_SYMBOL_GPL(net_ns_type_operations);
1336 1198
1337#ifdef CONFIG_HOTPLUG
1338static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) 1199static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
1339{ 1200{
1340 struct net_device *dev = to_net_dev(d); 1201 struct net_device *dev = to_net_dev(d);
@@ -1353,7 +1214,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
1353exit: 1214exit:
1354 return retval; 1215 return retval;
1355} 1216}
1356#endif
1357 1217
1358/* 1218/*
1359 * netdev_release -- destroy and free a dead device. 1219 * netdev_release -- destroy and free a dead device.
@@ -1382,9 +1242,7 @@ static struct class net_class = {
1382#ifdef CONFIG_SYSFS 1242#ifdef CONFIG_SYSFS
1383 .dev_attrs = net_class_attributes, 1243 .dev_attrs = net_class_attributes,
1384#endif /* CONFIG_SYSFS */ 1244#endif /* CONFIG_SYSFS */
1385#ifdef CONFIG_HOTPLUG
1386 .dev_uevent = netdev_uevent, 1245 .dev_uevent = netdev_uevent,
1387#endif
1388 .ns_type = &net_ns_type_operations, 1246 .ns_type = &net_ns_type_operations,
1389 .namespace = net_namespace, 1247 .namespace = net_namespace,
1390}; 1248};
@@ -1400,6 +1258,8 @@ void netdev_unregister_kobject(struct net_device * net)
1400 1258
1401 remove_queue_kobjects(net); 1259 remove_queue_kobjects(net);
1402 1260
1261 pm_runtime_set_memalloc_noio(dev, false);
1262
1403 device_del(dev); 1263 device_del(dev);
1404} 1264}
1405 1265
@@ -1444,6 +1304,8 @@ int netdev_register_kobject(struct net_device *net)
1444 return error; 1304 return error;
1445 } 1305 }
1446 1306
1307 pm_runtime_set_memalloc_noio(dev, true);
1308
1447 return error; 1309 return error;
1448} 1310}
1449 1311
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6456439cbbd9..8acce01b6dab 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
381} 381}
382EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 382EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
383 383
384static __net_init int net_ns_net_init(struct net *net)
385{
386 return proc_alloc_inum(&net->proc_inum);
387}
388
389static __net_exit void net_ns_net_exit(struct net *net)
390{
391 proc_free_inum(net->proc_inum);
392}
393
394static struct pernet_operations __net_initdata net_ns_ops = {
395 .init = net_ns_net_init,
396 .exit = net_ns_net_exit,
397};
398
384static int __init net_ns_init(void) 399static int __init net_ns_init(void)
385{ 400{
386 struct net_generic *ng; 401 struct net_generic *ng;
@@ -412,6 +427,8 @@ static int __init net_ns_init(void)
412 427
413 mutex_unlock(&net_mutex); 428 mutex_unlock(&net_mutex);
414 429
430 register_pernet_subsys(&net_ns_ops);
431
415 return 0; 432 return 0;
416} 433}
417 434
@@ -630,16 +647,29 @@ static void netns_put(void *ns)
630 647
631static int netns_install(struct nsproxy *nsproxy, void *ns) 648static int netns_install(struct nsproxy *nsproxy, void *ns)
632{ 649{
650 struct net *net = ns;
651
652 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
653 !nsown_capable(CAP_SYS_ADMIN))
654 return -EPERM;
655
633 put_net(nsproxy->net_ns); 656 put_net(nsproxy->net_ns);
634 nsproxy->net_ns = get_net(ns); 657 nsproxy->net_ns = get_net(net);
635 return 0; 658 return 0;
636} 659}
637 660
661static unsigned int netns_inum(void *ns)
662{
663 struct net *net = ns;
664 return net->proc_inum;
665}
666
638const struct proc_ns_operations netns_operations = { 667const struct proc_ns_operations netns_operations = {
639 .name = "net", 668 .name = "net",
640 .type = CLONE_NEWNET, 669 .type = CLONE_NEWNET,
641 .get = netns_get, 670 .get = netns_get,
642 .put = netns_put, 671 .put = netns_put,
643 .install = netns_install, 672 .install = netns_install,
673 .inum = netns_inum,
644}; 674};
645#endif 675#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3151acf5ec13..fa32899006a2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,6 +29,9 @@
29#include <linux/if_vlan.h> 29#include <linux/if_vlan.h>
30#include <net/tcp.h> 30#include <net/tcp.h>
31#include <net/udp.h> 31#include <net/udp.h>
32#include <net/addrconf.h>
33#include <net/ndisc.h>
34#include <net/ip6_checksum.h>
32#include <asm/unaligned.h> 35#include <asm/unaligned.h>
33#include <trace/events/napi.h> 36#include <trace/events/napi.h>
34 37
@@ -44,6 +47,8 @@ static struct sk_buff_head skb_pool;
44 47
45static atomic_t trapped; 48static atomic_t trapped;
46 49
50static struct srcu_struct netpoll_srcu;
51
47#define USEC_PER_POLL 50 52#define USEC_PER_POLL 50
48#define NETPOLL_RX_ENABLED 1 53#define NETPOLL_RX_ENABLED 1
49#define NETPOLL_RX_DROP 2 54#define NETPOLL_RX_DROP 2
@@ -55,7 +60,8 @@ static atomic_t trapped;
55 MAX_UDP_CHUNK) 60 MAX_UDP_CHUNK)
56 61
57static void zap_completion_queue(void); 62static void zap_completion_queue(void);
58static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); 63static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
64static void netpoll_async_cleanup(struct work_struct *work);
59 65
60static unsigned int carrier_timeout = 4; 66static unsigned int carrier_timeout = 4;
61module_param(carrier_timeout, uint, 0644); 67module_param(carrier_timeout, uint, 0644);
@@ -181,13 +187,13 @@ static void poll_napi(struct net_device *dev)
181 } 187 }
182} 188}
183 189
184static void service_arp_queue(struct netpoll_info *npi) 190static void service_neigh_queue(struct netpoll_info *npi)
185{ 191{
186 if (npi) { 192 if (npi) {
187 struct sk_buff *skb; 193 struct sk_buff *skb;
188 194
189 while ((skb = skb_dequeue(&npi->arp_tx))) 195 while ((skb = skb_dequeue(&npi->neigh_tx)))
190 netpoll_arp_reply(skb, npi); 196 netpoll_neigh_reply(skb, npi);
191 } 197 }
192} 198}
193 199
@@ -196,35 +202,76 @@ static void netpoll_poll_dev(struct net_device *dev)
196 const struct net_device_ops *ops; 202 const struct net_device_ops *ops;
197 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); 203 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
198 204
199 if (!dev || !netif_running(dev)) 205 /* Don't do any rx activity if the dev_lock mutex is held
206 * the dev_open/close paths use this to block netpoll activity
207 * while changing device state
208 */
209 if (!mutex_trylock(&ni->dev_lock))
200 return; 210 return;
201 211
212 if (!netif_running(dev)) {
213 mutex_unlock(&ni->dev_lock);
214 return;
215 }
216
202 ops = dev->netdev_ops; 217 ops = dev->netdev_ops;
203 if (!ops->ndo_poll_controller) 218 if (!ops->ndo_poll_controller) {
219 mutex_unlock(&ni->dev_lock);
204 return; 220 return;
221 }
205 222
206 /* Process pending work on NIC */ 223 /* Process pending work on NIC */
207 ops->ndo_poll_controller(dev); 224 ops->ndo_poll_controller(dev);
208 225
209 poll_napi(dev); 226 poll_napi(dev);
210 227
228 mutex_unlock(&ni->dev_lock);
229
211 if (dev->flags & IFF_SLAVE) { 230 if (dev->flags & IFF_SLAVE) {
212 if (ni) { 231 if (ni) {
213 struct net_device *bond_dev = dev->master; 232 struct net_device *bond_dev;
214 struct sk_buff *skb; 233 struct sk_buff *skb;
215 struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); 234 struct netpoll_info *bond_ni;
216 while ((skb = skb_dequeue(&ni->arp_tx))) { 235
236 bond_dev = netdev_master_upper_dev_get_rcu(dev);
237 bond_ni = rcu_dereference_bh(bond_dev->npinfo);
238 while ((skb = skb_dequeue(&ni->neigh_tx))) {
217 skb->dev = bond_dev; 239 skb->dev = bond_dev;
218 skb_queue_tail(&bond_ni->arp_tx, skb); 240 skb_queue_tail(&bond_ni->neigh_tx, skb);
219 } 241 }
220 } 242 }
221 } 243 }
222 244
223 service_arp_queue(ni); 245 service_neigh_queue(ni);
224 246
225 zap_completion_queue(); 247 zap_completion_queue();
226} 248}
227 249
250int netpoll_rx_disable(struct net_device *dev)
251{
252 struct netpoll_info *ni;
253 int idx;
254 might_sleep();
255 idx = srcu_read_lock(&netpoll_srcu);
256 ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
257 if (ni)
258 mutex_lock(&ni->dev_lock);
259 srcu_read_unlock(&netpoll_srcu, idx);
260 return 0;
261}
262EXPORT_SYMBOL(netpoll_rx_disable);
263
264void netpoll_rx_enable(struct net_device *dev)
265{
266 struct netpoll_info *ni;
267 rcu_read_lock();
268 ni = rcu_dereference(dev->npinfo);
269 if (ni)
270 mutex_unlock(&ni->dev_lock);
271 rcu_read_unlock();
272}
273EXPORT_SYMBOL(netpoll_rx_enable);
274
228static void refill_skbs(void) 275static void refill_skbs(void)
229{ 276{
230 struct sk_buff *skb; 277 struct sk_buff *skb;
@@ -381,9 +428,14 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
381 struct iphdr *iph; 428 struct iphdr *iph;
382 struct ethhdr *eth; 429 struct ethhdr *eth;
383 static atomic_t ip_ident; 430 static atomic_t ip_ident;
431 struct ipv6hdr *ip6h;
384 432
385 udp_len = len + sizeof(*udph); 433 udp_len = len + sizeof(*udph);
386 ip_len = udp_len + sizeof(*iph); 434 if (np->ipv6)
435 ip_len = udp_len + sizeof(*ip6h);
436 else
437 ip_len = udp_len + sizeof(*iph);
438
387 total_len = ip_len + LL_RESERVED_SPACE(np->dev); 439 total_len = ip_len + LL_RESERVED_SPACE(np->dev);
388 440
389 skb = find_skb(np, total_len + np->dev->needed_tailroom, 441 skb = find_skb(np, total_len + np->dev->needed_tailroom,
@@ -400,34 +452,66 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
400 udph->source = htons(np->local_port); 452 udph->source = htons(np->local_port);
401 udph->dest = htons(np->remote_port); 453 udph->dest = htons(np->remote_port);
402 udph->len = htons(udp_len); 454 udph->len = htons(udp_len);
403 udph->check = 0; 455
404 udph->check = csum_tcpudp_magic(np->local_ip, 456 if (np->ipv6) {
405 np->remote_ip, 457 udph->check = 0;
406 udp_len, IPPROTO_UDP, 458 udph->check = csum_ipv6_magic(&np->local_ip.in6,
407 csum_partial(udph, udp_len, 0)); 459 &np->remote_ip.in6,
408 if (udph->check == 0) 460 udp_len, IPPROTO_UDP,
409 udph->check = CSUM_MANGLED_0; 461 csum_partial(udph, udp_len, 0));
410 462 if (udph->check == 0)
411 skb_push(skb, sizeof(*iph)); 463 udph->check = CSUM_MANGLED_0;
412 skb_reset_network_header(skb); 464
413 iph = ip_hdr(skb); 465 skb_push(skb, sizeof(*ip6h));
414 466 skb_reset_network_header(skb);
415 /* iph->version = 4; iph->ihl = 5; */ 467 ip6h = ipv6_hdr(skb);
416 put_unaligned(0x45, (unsigned char *)iph); 468
417 iph->tos = 0; 469 /* ip6h->version = 6; ip6h->priority = 0; */
418 put_unaligned(htons(ip_len), &(iph->tot_len)); 470 put_unaligned(0x60, (unsigned char *)ip6h);
419 iph->id = htons(atomic_inc_return(&ip_ident)); 471 ip6h->flow_lbl[0] = 0;
420 iph->frag_off = 0; 472 ip6h->flow_lbl[1] = 0;
421 iph->ttl = 64; 473 ip6h->flow_lbl[2] = 0;
422 iph->protocol = IPPROTO_UDP; 474
423 iph->check = 0; 475 ip6h->payload_len = htons(sizeof(struct udphdr) + len);
424 put_unaligned(np->local_ip, &(iph->saddr)); 476 ip6h->nexthdr = IPPROTO_UDP;
425 put_unaligned(np->remote_ip, &(iph->daddr)); 477 ip6h->hop_limit = 32;
426 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 478 ip6h->saddr = np->local_ip.in6;
427 479 ip6h->daddr = np->remote_ip.in6;
428 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 480
429 skb_reset_mac_header(skb); 481 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
430 skb->protocol = eth->h_proto = htons(ETH_P_IP); 482 skb_reset_mac_header(skb);
483 skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
484 } else {
485 udph->check = 0;
486 udph->check = csum_tcpudp_magic(np->local_ip.ip,
487 np->remote_ip.ip,
488 udp_len, IPPROTO_UDP,
489 csum_partial(udph, udp_len, 0));
490 if (udph->check == 0)
491 udph->check = CSUM_MANGLED_0;
492
493 skb_push(skb, sizeof(*iph));
494 skb_reset_network_header(skb);
495 iph = ip_hdr(skb);
496
497 /* iph->version = 4; iph->ihl = 5; */
498 put_unaligned(0x45, (unsigned char *)iph);
499 iph->tos = 0;
500 put_unaligned(htons(ip_len), &(iph->tot_len));
501 iph->id = htons(atomic_inc_return(&ip_ident));
502 iph->frag_off = 0;
503 iph->ttl = 64;
504 iph->protocol = IPPROTO_UDP;
505 iph->check = 0;
506 put_unaligned(np->local_ip.ip, &(iph->saddr));
507 put_unaligned(np->remote_ip.ip, &(iph->daddr));
508 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
509
510 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
511 skb_reset_mac_header(skb);
512 skb->protocol = eth->h_proto = htons(ETH_P_IP);
513 }
514
431 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); 515 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
432 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); 516 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
433 517
@@ -437,18 +521,16 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
437} 521}
438EXPORT_SYMBOL(netpoll_send_udp); 522EXPORT_SYMBOL(netpoll_send_udp);
439 523
440static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) 524static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
441{ 525{
442 struct arphdr *arp; 526 int size, type = ARPOP_REPLY;
443 unsigned char *arp_ptr;
444 int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
445 __be32 sip, tip; 527 __be32 sip, tip;
446 unsigned char *sha; 528 unsigned char *sha;
447 struct sk_buff *send_skb; 529 struct sk_buff *send_skb;
448 struct netpoll *np, *tmp; 530 struct netpoll *np, *tmp;
449 unsigned long flags; 531 unsigned long flags;
450 int hlen, tlen; 532 int hlen, tlen;
451 int hits = 0; 533 int hits = 0, proto;
452 534
453 if (list_empty(&npinfo->rx_np)) 535 if (list_empty(&npinfo->rx_np))
454 return; 536 return;
@@ -466,94 +548,214 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
466 if (!hits) 548 if (!hits)
467 return; 549 return;
468 550
469 /* No arp on this interface */ 551 proto = ntohs(eth_hdr(skb)->h_proto);
470 if (skb->dev->flags & IFF_NOARP) 552 if (proto == ETH_P_IP) {
471 return; 553 struct arphdr *arp;
472 554 unsigned char *arp_ptr;
473 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 555 /* No arp on this interface */
474 return; 556 if (skb->dev->flags & IFF_NOARP)
557 return;
475 558
476 skb_reset_network_header(skb); 559 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
477 skb_reset_transport_header(skb); 560 return;
478 arp = arp_hdr(skb);
479 561
480 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 562 skb_reset_network_header(skb);
481 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 563 skb_reset_transport_header(skb);
482 arp->ar_pro != htons(ETH_P_IP) || 564 arp = arp_hdr(skb);
483 arp->ar_op != htons(ARPOP_REQUEST))
484 return;
485 565
486 arp_ptr = (unsigned char *)(arp+1); 566 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
487 /* save the location of the src hw addr */ 567 arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
488 sha = arp_ptr; 568 arp->ar_pro != htons(ETH_P_IP) ||
489 arp_ptr += skb->dev->addr_len; 569 arp->ar_op != htons(ARPOP_REQUEST))
490 memcpy(&sip, arp_ptr, 4); 570 return;
491 arp_ptr += 4;
492 /* If we actually cared about dst hw addr,
493 it would get copied here */
494 arp_ptr += skb->dev->addr_len;
495 memcpy(&tip, arp_ptr, 4);
496
497 /* Should we ignore arp? */
498 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
499 return;
500 571
501 size = arp_hdr_len(skb->dev); 572 arp_ptr = (unsigned char *)(arp+1);
573 /* save the location of the src hw addr */
574 sha = arp_ptr;
575 arp_ptr += skb->dev->addr_len;
576 memcpy(&sip, arp_ptr, 4);
577 arp_ptr += 4;
578 /* If we actually cared about dst hw addr,
579 it would get copied here */
580 arp_ptr += skb->dev->addr_len;
581 memcpy(&tip, arp_ptr, 4);
502 582
503 spin_lock_irqsave(&npinfo->rx_lock, flags); 583 /* Should we ignore arp? */
504 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 584 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
505 if (tip != np->local_ip) 585 return;
506 continue;
507 586
508 hlen = LL_RESERVED_SPACE(np->dev); 587 size = arp_hdr_len(skb->dev);
509 tlen = np->dev->needed_tailroom;
510 send_skb = find_skb(np, size + hlen + tlen, hlen);
511 if (!send_skb)
512 continue;
513 588
514 skb_reset_network_header(send_skb); 589 spin_lock_irqsave(&npinfo->rx_lock, flags);
515 arp = (struct arphdr *) skb_put(send_skb, size); 590 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
516 send_skb->dev = skb->dev; 591 if (tip != np->local_ip.ip)
517 send_skb->protocol = htons(ETH_P_ARP); 592 continue;
593
594 hlen = LL_RESERVED_SPACE(np->dev);
595 tlen = np->dev->needed_tailroom;
596 send_skb = find_skb(np, size + hlen + tlen, hlen);
597 if (!send_skb)
598 continue;
599
600 skb_reset_network_header(send_skb);
601 arp = (struct arphdr *) skb_put(send_skb, size);
602 send_skb->dev = skb->dev;
603 send_skb->protocol = htons(ETH_P_ARP);
604
605 /* Fill the device header for the ARP frame */
606 if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
607 sha, np->dev->dev_addr,
608 send_skb->len) < 0) {
609 kfree_skb(send_skb);
610 continue;
611 }
518 612
519 /* Fill the device header for the ARP frame */ 613 /*
520 if (dev_hard_header(send_skb, skb->dev, ptype, 614 * Fill out the arp protocol part.
521 sha, np->dev->dev_addr, 615 *
522 send_skb->len) < 0) { 616 * we only support ethernet device type,
523 kfree_skb(send_skb); 617 * which (according to RFC 1390) should
524 continue; 618 * always equal 1 (Ethernet).
619 */
620
621 arp->ar_hrd = htons(np->dev->type);
622 arp->ar_pro = htons(ETH_P_IP);
623 arp->ar_hln = np->dev->addr_len;
624 arp->ar_pln = 4;
625 arp->ar_op = htons(type);
626
627 arp_ptr = (unsigned char *)(arp + 1);
628 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
629 arp_ptr += np->dev->addr_len;
630 memcpy(arp_ptr, &tip, 4);
631 arp_ptr += 4;
632 memcpy(arp_ptr, sha, np->dev->addr_len);
633 arp_ptr += np->dev->addr_len;
634 memcpy(arp_ptr, &sip, 4);
635
636 netpoll_send_skb(np, send_skb);
637
638 /* If there are several rx_hooks for the same address,
639 we're fine by sending a single reply */
640 break;
525 } 641 }
642 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
643 } else if( proto == ETH_P_IPV6) {
644#if IS_ENABLED(CONFIG_IPV6)
645 struct nd_msg *msg;
646 u8 *lladdr = NULL;
647 struct ipv6hdr *hdr;
648 struct icmp6hdr *icmp6h;
649 const struct in6_addr *saddr;
650 const struct in6_addr *daddr;
651 struct inet6_dev *in6_dev = NULL;
652 struct in6_addr *target;
653
654 in6_dev = in6_dev_get(skb->dev);
655 if (!in6_dev || !in6_dev->cnf.accept_ra)
656 return;
526 657
527 /* 658 if (!pskb_may_pull(skb, skb->len))
528 * Fill out the arp protocol part. 659 return;
529 *
530 * we only support ethernet device type,
531 * which (according to RFC 1390) should
532 * always equal 1 (Ethernet).
533 */
534 660
535 arp->ar_hrd = htons(np->dev->type); 661 msg = (struct nd_msg *)skb_transport_header(skb);
536 arp->ar_pro = htons(ETH_P_IP);
537 arp->ar_hln = np->dev->addr_len;
538 arp->ar_pln = 4;
539 arp->ar_op = htons(type);
540 662
541 arp_ptr = (unsigned char *)(arp + 1); 663 __skb_push(skb, skb->data - skb_transport_header(skb));
542 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
543 arp_ptr += np->dev->addr_len;
544 memcpy(arp_ptr, &tip, 4);
545 arp_ptr += 4;
546 memcpy(arp_ptr, sha, np->dev->addr_len);
547 arp_ptr += np->dev->addr_len;
548 memcpy(arp_ptr, &sip, 4);
549 664
550 netpoll_send_skb(np, send_skb); 665 if (ipv6_hdr(skb)->hop_limit != 255)
666 return;
667 if (msg->icmph.icmp6_code != 0)
668 return;
669 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
670 return;
671
672 saddr = &ipv6_hdr(skb)->saddr;
673 daddr = &ipv6_hdr(skb)->daddr;
551 674
552 /* If there are several rx_hooks for the same address, 675 size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
553 we're fine by sending a single reply */ 676
554 break; 677 spin_lock_irqsave(&npinfo->rx_lock, flags);
678 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
679 if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
680 continue;
681
682 hlen = LL_RESERVED_SPACE(np->dev);
683 tlen = np->dev->needed_tailroom;
684 send_skb = find_skb(np, size + hlen + tlen, hlen);
685 if (!send_skb)
686 continue;
687
688 send_skb->protocol = htons(ETH_P_IPV6);
689 send_skb->dev = skb->dev;
690
691 skb_reset_network_header(send_skb);
692 skb_put(send_skb, sizeof(struct ipv6hdr));
693 hdr = ipv6_hdr(send_skb);
694
695 *(__be32*)hdr = htonl(0x60000000);
696
697 hdr->payload_len = htons(size);
698 hdr->nexthdr = IPPROTO_ICMPV6;
699 hdr->hop_limit = 255;
700 hdr->saddr = *saddr;
701 hdr->daddr = *daddr;
702
703 send_skb->transport_header = send_skb->tail;
704 skb_put(send_skb, size);
705
706 icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
707 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
708 icmp6h->icmp6_router = 0;
709 icmp6h->icmp6_solicited = 1;
710 target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
711 *target = msg->target;
712 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
713 IPPROTO_ICMPV6,
714 csum_partial(icmp6h,
715 size, 0));
716
717 if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
718 lladdr, np->dev->dev_addr,
719 send_skb->len) < 0) {
720 kfree_skb(send_skb);
721 continue;
722 }
723
724 netpoll_send_skb(np, send_skb);
725
726 /* If there are several rx_hooks for the same address,
727 we're fine by sending a single reply */
728 break;
729 }
730 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
731#endif
555 } 732 }
556 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 733}
734
735static bool pkt_is_ns(struct sk_buff *skb)
736{
737 struct nd_msg *msg;
738 struct ipv6hdr *hdr;
739
740 if (skb->protocol != htons(ETH_P_ARP))
741 return false;
742 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
743 return false;
744
745 msg = (struct nd_msg *)skb_transport_header(skb);
746 __skb_push(skb, skb->data - skb_transport_header(skb));
747 hdr = ipv6_hdr(skb);
748
749 if (hdr->nexthdr != IPPROTO_ICMPV6)
750 return false;
751 if (hdr->hop_limit != 255)
752 return false;
753 if (msg->icmph.icmp6_code != 0)
754 return false;
755 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
756 return false;
757
758 return true;
557} 759}
558 760
559int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) 761int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
@@ -571,9 +773,11 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
571 goto out; 773 goto out;
572 774
573 /* check if netpoll clients need ARP */ 775 /* check if netpoll clients need ARP */
574 if (skb->protocol == htons(ETH_P_ARP) && 776 if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
575 atomic_read(&trapped)) { 777 skb_queue_tail(&npinfo->neigh_tx, skb);
576 skb_queue_tail(&npinfo->arp_tx, skb); 778 return 1;
779 } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
780 skb_queue_tail(&npinfo->neigh_tx, skb);
577 return 1; 781 return 1;
578 } 782 }
579 783
@@ -584,60 +788,100 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
584 } 788 }
585 789
586 proto = ntohs(eth_hdr(skb)->h_proto); 790 proto = ntohs(eth_hdr(skb)->h_proto);
587 if (proto != ETH_P_IP) 791 if (proto != ETH_P_IP && proto != ETH_P_IPV6)
588 goto out; 792 goto out;
589 if (skb->pkt_type == PACKET_OTHERHOST) 793 if (skb->pkt_type == PACKET_OTHERHOST)
590 goto out; 794 goto out;
591 if (skb_shared(skb)) 795 if (skb_shared(skb))
592 goto out; 796 goto out;
593 797
594 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 798 if (proto == ETH_P_IP) {
595 goto out; 799 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
596 iph = (struct iphdr *)skb->data; 800 goto out;
597 if (iph->ihl < 5 || iph->version != 4) 801 iph = (struct iphdr *)skb->data;
598 goto out; 802 if (iph->ihl < 5 || iph->version != 4)
599 if (!pskb_may_pull(skb, iph->ihl*4)) 803 goto out;
600 goto out; 804 if (!pskb_may_pull(skb, iph->ihl*4))
601 iph = (struct iphdr *)skb->data; 805 goto out;
602 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) 806 iph = (struct iphdr *)skb->data;
603 goto out; 807 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
604 808 goto out;
605 len = ntohs(iph->tot_len);
606 if (skb->len < len || len < iph->ihl*4)
607 goto out;
608 809
609 /* 810 len = ntohs(iph->tot_len);
610 * Our transport medium may have padded the buffer out. 811 if (skb->len < len || len < iph->ihl*4)
611 * Now We trim to the true length of the frame. 812 goto out;
612 */
613 if (pskb_trim_rcsum(skb, len))
614 goto out;
615 813
616 iph = (struct iphdr *)skb->data; 814 /*
617 if (iph->protocol != IPPROTO_UDP) 815 * Our transport medium may have padded the buffer out.
618 goto out; 816 * Now We trim to the true length of the frame.
817 */
818 if (pskb_trim_rcsum(skb, len))
819 goto out;
619 820
620 len -= iph->ihl*4; 821 iph = (struct iphdr *)skb->data;
621 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); 822 if (iph->protocol != IPPROTO_UDP)
622 ulen = ntohs(uh->len); 823 goto out;
623 824
624 if (ulen != len) 825 len -= iph->ihl*4;
625 goto out; 826 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
626 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) 827 ulen = ntohs(uh->len);
627 goto out;
628 828
629 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 829 if (ulen != len)
630 if (np->local_ip && np->local_ip != iph->daddr) 830 goto out;
631 continue; 831 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
632 if (np->remote_ip && np->remote_ip != iph->saddr) 832 goto out;
633 continue; 833 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
634 if (np->local_port && np->local_port != ntohs(uh->dest)) 834 if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
635 continue; 835 continue;
836 if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
837 continue;
838 if (np->local_port && np->local_port != ntohs(uh->dest))
839 continue;
840
841 np->rx_hook(np, ntohs(uh->source),
842 (char *)(uh+1),
843 ulen - sizeof(struct udphdr));
844 hits++;
845 }
846 } else {
847#if IS_ENABLED(CONFIG_IPV6)
848 const struct ipv6hdr *ip6h;
636 849
637 np->rx_hook(np, ntohs(uh->source), 850 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
638 (char *)(uh+1), 851 goto out;
639 ulen - sizeof(struct udphdr)); 852 ip6h = (struct ipv6hdr *)skb->data;
640 hits++; 853 if (ip6h->version != 6)
854 goto out;
855 len = ntohs(ip6h->payload_len);
856 if (!len)
857 goto out;
858 if (len + sizeof(struct ipv6hdr) > skb->len)
859 goto out;
860 if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
861 goto out;
862 ip6h = ipv6_hdr(skb);
863 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
864 goto out;
865 uh = udp_hdr(skb);
866 ulen = ntohs(uh->len);
867 if (ulen != skb->len)
868 goto out;
869 if (udp6_csum_init(skb, uh, IPPROTO_UDP))
870 goto out;
871 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
872 if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
873 continue;
874 if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
875 continue;
876 if (np->local_port && np->local_port != ntohs(uh->dest))
877 continue;
878
879 np->rx_hook(np, ntohs(uh->source),
880 (char *)(uh+1),
881 ulen - sizeof(struct udphdr));
882 hits++;
883 }
884#endif
641 } 885 }
642 886
643 if (!hits) 887 if (!hits)
@@ -658,17 +902,44 @@ out:
658void netpoll_print_options(struct netpoll *np) 902void netpoll_print_options(struct netpoll *np)
659{ 903{
660 np_info(np, "local port %d\n", np->local_port); 904 np_info(np, "local port %d\n", np->local_port);
661 np_info(np, "local IP %pI4\n", &np->local_ip); 905 if (np->ipv6)
906 np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
907 else
908 np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
662 np_info(np, "interface '%s'\n", np->dev_name); 909 np_info(np, "interface '%s'\n", np->dev_name);
663 np_info(np, "remote port %d\n", np->remote_port); 910 np_info(np, "remote port %d\n", np->remote_port);
664 np_info(np, "remote IP %pI4\n", &np->remote_ip); 911 if (np->ipv6)
912 np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
913 else
914 np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
665 np_info(np, "remote ethernet address %pM\n", np->remote_mac); 915 np_info(np, "remote ethernet address %pM\n", np->remote_mac);
666} 916}
667EXPORT_SYMBOL(netpoll_print_options); 917EXPORT_SYMBOL(netpoll_print_options);
668 918
919static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
920{
921 const char *end;
922
923 if (!strchr(str, ':') &&
924 in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
925 if (!*end)
926 return 0;
927 }
928 if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
929#if IS_ENABLED(CONFIG_IPV6)
930 if (!*end)
931 return 1;
932#else
933 return -1;
934#endif
935 }
936 return -1;
937}
938
669int netpoll_parse_options(struct netpoll *np, char *opt) 939int netpoll_parse_options(struct netpoll *np, char *opt)
670{ 940{
671 char *cur=opt, *delim; 941 char *cur=opt, *delim;
942 int ipv6;
672 943
673 if (*cur != '@') { 944 if (*cur != '@') {
674 if ((delim = strchr(cur, '@')) == NULL) 945 if ((delim = strchr(cur, '@')) == NULL)
@@ -684,7 +955,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
684 if ((delim = strchr(cur, '/')) == NULL) 955 if ((delim = strchr(cur, '/')) == NULL)
685 goto parse_failed; 956 goto parse_failed;
686 *delim = 0; 957 *delim = 0;
687 np->local_ip = in_aton(cur); 958 ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
959 if (ipv6 < 0)
960 goto parse_failed;
961 else
962 np->ipv6 = (bool)ipv6;
688 cur = delim; 963 cur = delim;
689 } 964 }
690 cur++; 965 cur++;
@@ -716,7 +991,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
716 if ((delim = strchr(cur, '/')) == NULL) 991 if ((delim = strchr(cur, '/')) == NULL)
717 goto parse_failed; 992 goto parse_failed;
718 *delim = 0; 993 *delim = 0;
719 np->remote_ip = in_aton(cur); 994 ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
995 if (ipv6 < 0)
996 goto parse_failed;
997 else if (np->ipv6 != (bool)ipv6)
998 goto parse_failed;
999 else
1000 np->ipv6 = (bool)ipv6;
720 cur = delim + 1; 1001 cur = delim + 1;
721 1002
722 if (*cur != 0) { 1003 if (*cur != 0) {
@@ -744,6 +1025,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
744 1025
745 np->dev = ndev; 1026 np->dev = ndev;
746 strlcpy(np->dev_name, ndev->name, IFNAMSIZ); 1027 strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1028 INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
747 1029
748 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || 1030 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
749 !ndev->netdev_ops->ndo_poll_controller) { 1031 !ndev->netdev_ops->ndo_poll_controller) {
@@ -764,7 +1046,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
764 INIT_LIST_HEAD(&npinfo->rx_np); 1046 INIT_LIST_HEAD(&npinfo->rx_np);
765 1047
766 spin_lock_init(&npinfo->rx_lock); 1048 spin_lock_init(&npinfo->rx_lock);
767 skb_queue_head_init(&npinfo->arp_tx); 1049 mutex_init(&npinfo->dev_lock);
1050 skb_queue_head_init(&npinfo->neigh_tx);
768 skb_queue_head_init(&npinfo->txq); 1051 skb_queue_head_init(&npinfo->txq);
769 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 1052 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
770 1053
@@ -777,7 +1060,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
777 goto free_npinfo; 1060 goto free_npinfo;
778 } 1061 }
779 } else { 1062 } else {
780 npinfo = ndev->npinfo; 1063 npinfo = rtnl_dereference(ndev->npinfo);
781 atomic_inc(&npinfo->refcnt); 1064 atomic_inc(&npinfo->refcnt);
782 } 1065 }
783 1066
@@ -808,14 +1091,19 @@ int netpoll_setup(struct netpoll *np)
808 struct in_device *in_dev; 1091 struct in_device *in_dev;
809 int err; 1092 int err;
810 1093
811 if (np->dev_name) 1094 rtnl_lock();
812 ndev = dev_get_by_name(&init_net, np->dev_name); 1095 if (np->dev_name) {
1096 struct net *net = current->nsproxy->net_ns;
1097 ndev = __dev_get_by_name(net, np->dev_name);
1098 }
813 if (!ndev) { 1099 if (!ndev) {
814 np_err(np, "%s doesn't exist, aborting\n", np->dev_name); 1100 np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
815 return -ENODEV; 1101 err = -ENODEV;
1102 goto unlock;
816 } 1103 }
1104 dev_hold(ndev);
817 1105
818 if (ndev->master) { 1106 if (netdev_master_upper_dev_get(ndev)) {
819 np_err(np, "%s is a slave device, aborting\n", np->dev_name); 1107 np_err(np, "%s is a slave device, aborting\n", np->dev_name);
820 err = -EBUSY; 1108 err = -EBUSY;
821 goto put; 1109 goto put;
@@ -826,15 +1114,14 @@ int netpoll_setup(struct netpoll *np)
826 1114
827 np_info(np, "device %s not up yet, forcing it\n", np->dev_name); 1115 np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
828 1116
829 rtnl_lock();
830 err = dev_open(ndev); 1117 err = dev_open(ndev);
831 rtnl_unlock();
832 1118
833 if (err) { 1119 if (err) {
834 np_err(np, "failed to open %s\n", ndev->name); 1120 np_err(np, "failed to open %s\n", ndev->name);
835 goto put; 1121 goto put;
836 } 1122 }
837 1123
1124 rtnl_unlock();
838 atleast = jiffies + HZ/10; 1125 atleast = jiffies + HZ/10;
839 atmost = jiffies + carrier_timeout * HZ; 1126 atmost = jiffies + carrier_timeout * HZ;
840 while (!netif_carrier_ok(ndev)) { 1127 while (!netif_carrier_ok(ndev)) {
@@ -854,39 +1141,70 @@ int netpoll_setup(struct netpoll *np)
854 np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); 1141 np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
855 msleep(4000); 1142 msleep(4000);
856 } 1143 }
1144 rtnl_lock();
857 } 1145 }
858 1146
859 if (!np->local_ip) { 1147 if (!np->local_ip.ip) {
860 rcu_read_lock(); 1148 if (!np->ipv6) {
861 in_dev = __in_dev_get_rcu(ndev); 1149 in_dev = __in_dev_get_rtnl(ndev);
1150
1151 if (!in_dev || !in_dev->ifa_list) {
1152 np_err(np, "no IP address for %s, aborting\n",
1153 np->dev_name);
1154 err = -EDESTADDRREQ;
1155 goto put;
1156 }
1157
1158 np->local_ip.ip = in_dev->ifa_list->ifa_local;
1159 np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1160 } else {
1161#if IS_ENABLED(CONFIG_IPV6)
1162 struct inet6_dev *idev;
862 1163
863 if (!in_dev || !in_dev->ifa_list) {
864 rcu_read_unlock();
865 np_err(np, "no IP address for %s, aborting\n",
866 np->dev_name);
867 err = -EDESTADDRREQ; 1164 err = -EDESTADDRREQ;
1165 idev = __in6_dev_get(ndev);
1166 if (idev) {
1167 struct inet6_ifaddr *ifp;
1168
1169 read_lock_bh(&idev->lock);
1170 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1171 if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1172 continue;
1173 np->local_ip.in6 = ifp->addr;
1174 err = 0;
1175 break;
1176 }
1177 read_unlock_bh(&idev->lock);
1178 }
1179 if (err) {
1180 np_err(np, "no IPv6 address for %s, aborting\n",
1181 np->dev_name);
1182 goto put;
1183 } else
1184 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1185#else
1186 np_err(np, "IPv6 is not supported %s, aborting\n",
1187 np->dev_name);
1188 err = -EINVAL;
868 goto put; 1189 goto put;
1190#endif
869 } 1191 }
870
871 np->local_ip = in_dev->ifa_list->ifa_local;
872 rcu_read_unlock();
873 np_info(np, "local IP %pI4\n", &np->local_ip);
874 } 1192 }
875 1193
876 /* fill up the skb queue */ 1194 /* fill up the skb queue */
877 refill_skbs(); 1195 refill_skbs();
878 1196
879 rtnl_lock();
880 err = __netpoll_setup(np, ndev, GFP_KERNEL); 1197 err = __netpoll_setup(np, ndev, GFP_KERNEL);
881 rtnl_unlock();
882
883 if (err) 1198 if (err)
884 goto put; 1199 goto put;
885 1200
1201 rtnl_unlock();
886 return 0; 1202 return 0;
887 1203
888put: 1204put:
889 dev_put(ndev); 1205 dev_put(ndev);
1206unlock:
1207 rtnl_unlock();
890 return err; 1208 return err;
891} 1209}
892EXPORT_SYMBOL(netpoll_setup); 1210EXPORT_SYMBOL(netpoll_setup);
@@ -894,6 +1212,7 @@ EXPORT_SYMBOL(netpoll_setup);
894static int __init netpoll_init(void) 1212static int __init netpoll_init(void)
895{ 1213{
896 skb_queue_head_init(&skb_pool); 1214 skb_queue_head_init(&skb_pool);
1215 init_srcu_struct(&netpoll_srcu);
897 return 0; 1216 return 0;
898} 1217}
899core_initcall(netpoll_init); 1218core_initcall(netpoll_init);
@@ -903,7 +1222,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
903 struct netpoll_info *npinfo = 1222 struct netpoll_info *npinfo =
904 container_of(rcu_head, struct netpoll_info, rcu); 1223 container_of(rcu_head, struct netpoll_info, rcu);
905 1224
906 skb_queue_purge(&npinfo->arp_tx); 1225 skb_queue_purge(&npinfo->neigh_tx);
907 skb_queue_purge(&npinfo->txq); 1226 skb_queue_purge(&npinfo->txq);
908 1227
909 /* we can't call cancel_delayed_work_sync here, as we are in softirq */ 1228 /* we can't call cancel_delayed_work_sync here, as we are in softirq */
@@ -921,7 +1240,11 @@ void __netpoll_cleanup(struct netpoll *np)
921 struct netpoll_info *npinfo; 1240 struct netpoll_info *npinfo;
922 unsigned long flags; 1241 unsigned long flags;
923 1242
924 npinfo = np->dev->npinfo; 1243 /* rtnl_dereference would be preferable here but
1244 * rcu_cleanup_netpoll path can put us in here safely without
1245 * holding the rtnl, so plain rcu_dereference it is
1246 */
1247 npinfo = rtnl_dereference(np->dev->npinfo);
925 if (!npinfo) 1248 if (!npinfo)
926 return; 1249 return;
927 1250
@@ -933,6 +1256,8 @@ void __netpoll_cleanup(struct netpoll *np)
933 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 1256 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
934 } 1257 }
935 1258
1259 synchronize_srcu(&netpoll_srcu);
1260
936 if (atomic_dec_and_test(&npinfo->refcnt)) { 1261 if (atomic_dec_and_test(&npinfo->refcnt)) {
937 const struct net_device_ops *ops; 1262 const struct net_device_ops *ops;
938 1263
@@ -940,25 +1265,27 @@ void __netpoll_cleanup(struct netpoll *np)
940 if (ops->ndo_netpoll_cleanup) 1265 if (ops->ndo_netpoll_cleanup)
941 ops->ndo_netpoll_cleanup(np->dev); 1266 ops->ndo_netpoll_cleanup(np->dev);
942 1267
943 RCU_INIT_POINTER(np->dev->npinfo, NULL); 1268 rcu_assign_pointer(np->dev->npinfo, NULL);
944 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); 1269 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
945 } 1270 }
946} 1271}
947EXPORT_SYMBOL_GPL(__netpoll_cleanup); 1272EXPORT_SYMBOL_GPL(__netpoll_cleanup);
948 1273
949static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) 1274static void netpoll_async_cleanup(struct work_struct *work)
950{ 1275{
951 struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); 1276 struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
952 1277
1278 rtnl_lock();
953 __netpoll_cleanup(np); 1279 __netpoll_cleanup(np);
1280 rtnl_unlock();
954 kfree(np); 1281 kfree(np);
955} 1282}
956 1283
957void __netpoll_free_rcu(struct netpoll *np) 1284void __netpoll_free_async(struct netpoll *np)
958{ 1285{
959 call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); 1286 schedule_work(&np->cleanup_work);
960} 1287}
961EXPORT_SYMBOL_GPL(__netpoll_free_rcu); 1288EXPORT_SYMBOL_GPL(__netpoll_free_async);
962 1289
963void netpoll_cleanup(struct netpoll *np) 1290void netpoll_cleanup(struct netpoll *np)
964{ 1291{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 5e67defe2cb0..0777d0aa18c3 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -69,10 +69,8 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
69 69
70 /* allocate & copy */ 70 /* allocate & copy */
71 new = kzalloc(new_sz, GFP_KERNEL); 71 new = kzalloc(new_sz, GFP_KERNEL);
72 if (!new) { 72 if (!new)
73 pr_warn("Unable to alloc new priomap!\n");
74 return -ENOMEM; 73 return -ENOMEM;
75 }
76 74
77 if (old) 75 if (old)
78 memcpy(new->priomap, old->priomap, 76 memcpy(new->priomap, old->priomap,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b29dacf900f9..6048fc1da1c2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,6 +164,7 @@
164#ifdef CONFIG_XFRM 164#ifdef CONFIG_XFRM
165#include <net/xfrm.h> 165#include <net/xfrm.h>
166#endif 166#endif
167#include <net/netns/generic.h>
167#include <asm/byteorder.h> 168#include <asm/byteorder.h>
168#include <linux/rcupdate.h> 169#include <linux/rcupdate.h>
169#include <linux/bitops.h> 170#include <linux/bitops.h>
@@ -212,7 +213,6 @@
212#define PKTGEN_MAGIC 0xbe9be955 213#define PKTGEN_MAGIC 0xbe9be955
213#define PG_PROC_DIR "pktgen" 214#define PG_PROC_DIR "pktgen"
214#define PGCTRL "pgctrl" 215#define PGCTRL "pgctrl"
215static struct proc_dir_entry *pg_proc_dir;
216 216
217#define MAX_CFLOWS 65536 217#define MAX_CFLOWS 65536
218 218
@@ -397,7 +397,15 @@ struct pktgen_hdr {
397 __be32 tv_usec; 397 __be32 tv_usec;
398}; 398};
399 399
400static bool pktgen_exiting __read_mostly; 400
401static int pg_net_id __read_mostly;
402
403struct pktgen_net {
404 struct net *net;
405 struct proc_dir_entry *proc_dir;
406 struct list_head pktgen_threads;
407 bool pktgen_exiting;
408};
401 409
402struct pktgen_thread { 410struct pktgen_thread {
403 spinlock_t if_lock; /* for list of devices */ 411 spinlock_t if_lock; /* for list of devices */
@@ -414,6 +422,7 @@ struct pktgen_thread {
414 422
415 wait_queue_head_t queue; 423 wait_queue_head_t queue;
416 struct completion start_done; 424 struct completion start_done;
425 struct pktgen_net *net;
417}; 426};
418 427
419#define REMOVE 1 428#define REMOVE 1
@@ -428,9 +437,9 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
428static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, 437static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
429 const char *ifname, bool exact); 438 const char *ifname, bool exact);
430static int pktgen_device_event(struct notifier_block *, unsigned long, void *); 439static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
431static void pktgen_run_all_threads(void); 440static void pktgen_run_all_threads(struct pktgen_net *pn);
432static void pktgen_reset_all_threads(void); 441static void pktgen_reset_all_threads(struct pktgen_net *pn);
433static void pktgen_stop_all_threads_ifs(void); 442static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
434 443
435static void pktgen_stop(struct pktgen_thread *t); 444static void pktgen_stop(struct pktgen_thread *t);
436static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); 445static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
@@ -442,7 +451,6 @@ static int pg_clone_skb_d __read_mostly;
442static int debug __read_mostly; 451static int debug __read_mostly;
443 452
444static DEFINE_MUTEX(pktgen_thread_lock); 453static DEFINE_MUTEX(pktgen_thread_lock);
445static LIST_HEAD(pktgen_threads);
446 454
447static struct notifier_block pktgen_notifier_block = { 455static struct notifier_block pktgen_notifier_block = {
448 .notifier_call = pktgen_device_event, 456 .notifier_call = pktgen_device_event,
@@ -464,6 +472,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
464{ 472{
465 int err = 0; 473 int err = 0;
466 char data[128]; 474 char data[128];
475 struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
467 476
468 if (!capable(CAP_NET_ADMIN)) { 477 if (!capable(CAP_NET_ADMIN)) {
469 err = -EPERM; 478 err = -EPERM;
@@ -480,13 +489,13 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
480 data[count - 1] = 0; /* Make string */ 489 data[count - 1] = 0; /* Make string */
481 490
482 if (!strcmp(data, "stop")) 491 if (!strcmp(data, "stop"))
483 pktgen_stop_all_threads_ifs(); 492 pktgen_stop_all_threads_ifs(pn);
484 493
485 else if (!strcmp(data, "start")) 494 else if (!strcmp(data, "start"))
486 pktgen_run_all_threads(); 495 pktgen_run_all_threads(pn);
487 496
488 else if (!strcmp(data, "reset")) 497 else if (!strcmp(data, "reset"))
489 pktgen_reset_all_threads(); 498 pktgen_reset_all_threads(pn);
490 499
491 else 500 else
492 pr_warning("Unknown command: %s\n", data); 501 pr_warning("Unknown command: %s\n", data);
@@ -1781,10 +1790,13 @@ static ssize_t pktgen_thread_write(struct file *file,
1781 return -EFAULT; 1790 return -EFAULT;
1782 i += len; 1791 i += len;
1783 mutex_lock(&pktgen_thread_lock); 1792 mutex_lock(&pktgen_thread_lock);
1784 pktgen_add_device(t, f); 1793 ret = pktgen_add_device(t, f);
1785 mutex_unlock(&pktgen_thread_lock); 1794 mutex_unlock(&pktgen_thread_lock);
1786 ret = count; 1795 if (!ret) {
1787 sprintf(pg_result, "OK: add_device=%s", f); 1796 ret = count;
1797 sprintf(pg_result, "OK: add_device=%s", f);
1798 } else
1799 sprintf(pg_result, "ERROR: can not add device %s", f);
1788 goto out; 1800 goto out;
1789 } 1801 }
1790 1802
@@ -1824,13 +1836,14 @@ static const struct file_operations pktgen_thread_fops = {
1824}; 1836};
1825 1837
1826/* Think find or remove for NN */ 1838/* Think find or remove for NN */
1827static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) 1839static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
1840 const char *ifname, int remove)
1828{ 1841{
1829 struct pktgen_thread *t; 1842 struct pktgen_thread *t;
1830 struct pktgen_dev *pkt_dev = NULL; 1843 struct pktgen_dev *pkt_dev = NULL;
1831 bool exact = (remove == FIND); 1844 bool exact = (remove == FIND);
1832 1845
1833 list_for_each_entry(t, &pktgen_threads, th_list) { 1846 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
1834 pkt_dev = pktgen_find_dev(t, ifname, exact); 1847 pkt_dev = pktgen_find_dev(t, ifname, exact);
1835 if (pkt_dev) { 1848 if (pkt_dev) {
1836 if (remove) { 1849 if (remove) {
@@ -1848,7 +1861,7 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
1848/* 1861/*
1849 * mark a device for removal 1862 * mark a device for removal
1850 */ 1863 */
1851static void pktgen_mark_device(const char *ifname) 1864static void pktgen_mark_device(const struct pktgen_net *pn, const char *ifname)
1852{ 1865{
1853 struct pktgen_dev *pkt_dev = NULL; 1866 struct pktgen_dev *pkt_dev = NULL;
1854 const int max_tries = 10, msec_per_try = 125; 1867 const int max_tries = 10, msec_per_try = 125;
@@ -1859,7 +1872,7 @@ static void pktgen_mark_device(const char *ifname)
1859 1872
1860 while (1) { 1873 while (1) {
1861 1874
1862 pkt_dev = __pktgen_NN_threads(ifname, REMOVE); 1875 pkt_dev = __pktgen_NN_threads(pn, ifname, REMOVE);
1863 if (pkt_dev == NULL) 1876 if (pkt_dev == NULL)
1864 break; /* success */ 1877 break; /* success */
1865 1878
@@ -1880,21 +1893,21 @@ static void pktgen_mark_device(const char *ifname)
1880 mutex_unlock(&pktgen_thread_lock); 1893 mutex_unlock(&pktgen_thread_lock);
1881} 1894}
1882 1895
1883static void pktgen_change_name(struct net_device *dev) 1896static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *dev)
1884{ 1897{
1885 struct pktgen_thread *t; 1898 struct pktgen_thread *t;
1886 1899
1887 list_for_each_entry(t, &pktgen_threads, th_list) { 1900 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
1888 struct pktgen_dev *pkt_dev; 1901 struct pktgen_dev *pkt_dev;
1889 1902
1890 list_for_each_entry(pkt_dev, &t->if_list, list) { 1903 list_for_each_entry(pkt_dev, &t->if_list, list) {
1891 if (pkt_dev->odev != dev) 1904 if (pkt_dev->odev != dev)
1892 continue; 1905 continue;
1893 1906
1894 remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); 1907 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
1895 1908
1896 pkt_dev->entry = proc_create_data(dev->name, 0600, 1909 pkt_dev->entry = proc_create_data(dev->name, 0600,
1897 pg_proc_dir, 1910 pn->proc_dir,
1898 &pktgen_if_fops, 1911 &pktgen_if_fops,
1899 pkt_dev); 1912 pkt_dev);
1900 if (!pkt_dev->entry) 1913 if (!pkt_dev->entry)
@@ -1909,8 +1922,9 @@ static int pktgen_device_event(struct notifier_block *unused,
1909 unsigned long event, void *ptr) 1922 unsigned long event, void *ptr)
1910{ 1923{
1911 struct net_device *dev = ptr; 1924 struct net_device *dev = ptr;
1925 struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
1912 1926
1913 if (!net_eq(dev_net(dev), &init_net) || pktgen_exiting) 1927 if (pn->pktgen_exiting)
1914 return NOTIFY_DONE; 1928 return NOTIFY_DONE;
1915 1929
1916 /* It is OK that we do not hold the group lock right now, 1930 /* It is OK that we do not hold the group lock right now,
@@ -1919,18 +1933,19 @@ static int pktgen_device_event(struct notifier_block *unused,
1919 1933
1920 switch (event) { 1934 switch (event) {
1921 case NETDEV_CHANGENAME: 1935 case NETDEV_CHANGENAME:
1922 pktgen_change_name(dev); 1936 pktgen_change_name(pn, dev);
1923 break; 1937 break;
1924 1938
1925 case NETDEV_UNREGISTER: 1939 case NETDEV_UNREGISTER:
1926 pktgen_mark_device(dev->name); 1940 pktgen_mark_device(pn, dev->name);
1927 break; 1941 break;
1928 } 1942 }
1929 1943
1930 return NOTIFY_DONE; 1944 return NOTIFY_DONE;
1931} 1945}
1932 1946
1933static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, 1947static struct net_device *pktgen_dev_get_by_name(const struct pktgen_net *pn,
1948 struct pktgen_dev *pkt_dev,
1934 const char *ifname) 1949 const char *ifname)
1935{ 1950{
1936 char b[IFNAMSIZ+5]; 1951 char b[IFNAMSIZ+5];
@@ -1944,13 +1959,14 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
1944 } 1959 }
1945 b[i] = 0; 1960 b[i] = 0;
1946 1961
1947 return dev_get_by_name(&init_net, b); 1962 return dev_get_by_name(pn->net, b);
1948} 1963}
1949 1964
1950 1965
1951/* Associate pktgen_dev with a device. */ 1966/* Associate pktgen_dev with a device. */
1952 1967
1953static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) 1968static int pktgen_setup_dev(const struct pktgen_net *pn,
1969 struct pktgen_dev *pkt_dev, const char *ifname)
1954{ 1970{
1955 struct net_device *odev; 1971 struct net_device *odev;
1956 int err; 1972 int err;
@@ -1961,7 +1977,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
1961 pkt_dev->odev = NULL; 1977 pkt_dev->odev = NULL;
1962 } 1978 }
1963 1979
1964 odev = pktgen_dev_get_by_name(pkt_dev, ifname); 1980 odev = pktgen_dev_get_by_name(pn, pkt_dev, ifname);
1965 if (!odev) { 1981 if (!odev) {
1966 pr_err("no such netdevice: \"%s\"\n", ifname); 1982 pr_err("no such netdevice: \"%s\"\n", ifname);
1967 return -ENODEV; 1983 return -ENODEV;
@@ -2203,9 +2219,10 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
2203static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) 2219static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2204{ 2220{
2205 struct xfrm_state *x = pkt_dev->flows[flow].x; 2221 struct xfrm_state *x = pkt_dev->flows[flow].x;
2222 struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
2206 if (!x) { 2223 if (!x) {
2207 /*slow path: we dont already have xfrm_state*/ 2224 /*slow path: we dont already have xfrm_state*/
2208 x = xfrm_stateonly_find(&init_net, DUMMY_MARK, 2225 x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
2209 (xfrm_address_t *)&pkt_dev->cur_daddr, 2226 (xfrm_address_t *)&pkt_dev->cur_daddr,
2210 (xfrm_address_t *)&pkt_dev->cur_saddr, 2227 (xfrm_address_t *)&pkt_dev->cur_saddr,
2211 AF_INET, 2228 AF_INET,
@@ -2912,7 +2929,7 @@ static void pktgen_run(struct pktgen_thread *t)
2912 t->control &= ~(T_STOP); 2929 t->control &= ~(T_STOP);
2913} 2930}
2914 2931
2915static void pktgen_stop_all_threads_ifs(void) 2932static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
2916{ 2933{
2917 struct pktgen_thread *t; 2934 struct pktgen_thread *t;
2918 2935
@@ -2920,7 +2937,7 @@ static void pktgen_stop_all_threads_ifs(void)
2920 2937
2921 mutex_lock(&pktgen_thread_lock); 2938 mutex_lock(&pktgen_thread_lock);
2922 2939
2923 list_for_each_entry(t, &pktgen_threads, th_list) 2940 list_for_each_entry(t, &pn->pktgen_threads, th_list)
2924 t->control |= T_STOP; 2941 t->control |= T_STOP;
2925 2942
2926 mutex_unlock(&pktgen_thread_lock); 2943 mutex_unlock(&pktgen_thread_lock);
@@ -2956,28 +2973,28 @@ signal:
2956 return 0; 2973 return 0;
2957} 2974}
2958 2975
2959static int pktgen_wait_all_threads_run(void) 2976static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
2960{ 2977{
2961 struct pktgen_thread *t; 2978 struct pktgen_thread *t;
2962 int sig = 1; 2979 int sig = 1;
2963 2980
2964 mutex_lock(&pktgen_thread_lock); 2981 mutex_lock(&pktgen_thread_lock);
2965 2982
2966 list_for_each_entry(t, &pktgen_threads, th_list) { 2983 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
2967 sig = pktgen_wait_thread_run(t); 2984 sig = pktgen_wait_thread_run(t);
2968 if (sig == 0) 2985 if (sig == 0)
2969 break; 2986 break;
2970 } 2987 }
2971 2988
2972 if (sig == 0) 2989 if (sig == 0)
2973 list_for_each_entry(t, &pktgen_threads, th_list) 2990 list_for_each_entry(t, &pn->pktgen_threads, th_list)
2974 t->control |= (T_STOP); 2991 t->control |= (T_STOP);
2975 2992
2976 mutex_unlock(&pktgen_thread_lock); 2993 mutex_unlock(&pktgen_thread_lock);
2977 return sig; 2994 return sig;
2978} 2995}
2979 2996
2980static void pktgen_run_all_threads(void) 2997static void pktgen_run_all_threads(struct pktgen_net *pn)
2981{ 2998{
2982 struct pktgen_thread *t; 2999 struct pktgen_thread *t;
2983 3000
@@ -2985,7 +3002,7 @@ static void pktgen_run_all_threads(void)
2985 3002
2986 mutex_lock(&pktgen_thread_lock); 3003 mutex_lock(&pktgen_thread_lock);
2987 3004
2988 list_for_each_entry(t, &pktgen_threads, th_list) 3005 list_for_each_entry(t, &pn->pktgen_threads, th_list)
2989 t->control |= (T_RUN); 3006 t->control |= (T_RUN);
2990 3007
2991 mutex_unlock(&pktgen_thread_lock); 3008 mutex_unlock(&pktgen_thread_lock);
@@ -2993,10 +3010,10 @@ static void pktgen_run_all_threads(void)
2993 /* Propagate thread->control */ 3010 /* Propagate thread->control */
2994 schedule_timeout_interruptible(msecs_to_jiffies(125)); 3011 schedule_timeout_interruptible(msecs_to_jiffies(125));
2995 3012
2996 pktgen_wait_all_threads_run(); 3013 pktgen_wait_all_threads_run(pn);
2997} 3014}
2998 3015
2999static void pktgen_reset_all_threads(void) 3016static void pktgen_reset_all_threads(struct pktgen_net *pn)
3000{ 3017{
3001 struct pktgen_thread *t; 3018 struct pktgen_thread *t;
3002 3019
@@ -3004,7 +3021,7 @@ static void pktgen_reset_all_threads(void)
3004 3021
3005 mutex_lock(&pktgen_thread_lock); 3022 mutex_lock(&pktgen_thread_lock);
3006 3023
3007 list_for_each_entry(t, &pktgen_threads, th_list) 3024 list_for_each_entry(t, &pn->pktgen_threads, th_list)
3008 t->control |= (T_REMDEVALL); 3025 t->control |= (T_REMDEVALL);
3009 3026
3010 mutex_unlock(&pktgen_thread_lock); 3027 mutex_unlock(&pktgen_thread_lock);
@@ -3012,7 +3029,7 @@ static void pktgen_reset_all_threads(void)
3012 /* Propagate thread->control */ 3029 /* Propagate thread->control */
3013 schedule_timeout_interruptible(msecs_to_jiffies(125)); 3030 schedule_timeout_interruptible(msecs_to_jiffies(125));
3014 3031
3015 pktgen_wait_all_threads_run(); 3032 pktgen_wait_all_threads_run(pn);
3016} 3033}
3017 3034
3018static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) 3035static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
@@ -3154,9 +3171,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3154static void pktgen_rem_thread(struct pktgen_thread *t) 3171static void pktgen_rem_thread(struct pktgen_thread *t)
3155{ 3172{
3156 /* Remove from the thread list */ 3173 /* Remove from the thread list */
3157 3174 remove_proc_entry(t->tsk->comm, t->net->proc_dir);
3158 remove_proc_entry(t->tsk->comm, pg_proc_dir);
3159
3160} 3175}
3161 3176
3162static void pktgen_resched(struct pktgen_dev *pkt_dev) 3177static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3302,7 +3317,7 @@ static int pktgen_thread_worker(void *arg)
3302 pkt_dev = next_to_run(t); 3317 pkt_dev = next_to_run(t);
3303 3318
3304 if (unlikely(!pkt_dev && t->control == 0)) { 3319 if (unlikely(!pkt_dev && t->control == 0)) {
3305 if (pktgen_exiting) 3320 if (t->net->pktgen_exiting)
3306 break; 3321 break;
3307 wait_event_interruptible_timeout(t->queue, 3322 wait_event_interruptible_timeout(t->queue,
3308 t->control != 0, 3323 t->control != 0,
@@ -3424,7 +3439,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3424 3439
3425 /* We don't allow a device to be on several threads */ 3440 /* We don't allow a device to be on several threads */
3426 3441
3427 pkt_dev = __pktgen_NN_threads(ifname, FIND); 3442 pkt_dev = __pktgen_NN_threads(t->net, ifname, FIND);
3428 if (pkt_dev) { 3443 if (pkt_dev) {
3429 pr_err("ERROR: interface already used\n"); 3444 pr_err("ERROR: interface already used\n");
3430 return -EBUSY; 3445 return -EBUSY;
@@ -3459,13 +3474,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3459 pkt_dev->svlan_id = 0xffff; 3474 pkt_dev->svlan_id = 0xffff;
3460 pkt_dev->node = -1; 3475 pkt_dev->node = -1;
3461 3476
3462 err = pktgen_setup_dev(pkt_dev, ifname); 3477 err = pktgen_setup_dev(t->net, pkt_dev, ifname);
3463 if (err) 3478 if (err)
3464 goto out1; 3479 goto out1;
3465 if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) 3480 if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
3466 pkt_dev->clone_skb = pg_clone_skb_d; 3481 pkt_dev->clone_skb = pg_clone_skb_d;
3467 3482
3468 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, 3483 pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
3469 &pktgen_if_fops, pkt_dev); 3484 &pktgen_if_fops, pkt_dev);
3470 if (!pkt_dev->entry) { 3485 if (!pkt_dev->entry) {
3471 pr_err("cannot create %s/%s procfs entry\n", 3486 pr_err("cannot create %s/%s procfs entry\n",
@@ -3490,7 +3505,7 @@ out1:
3490 return err; 3505 return err;
3491} 3506}
3492 3507
3493static int __init pktgen_create_thread(int cpu) 3508static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
3494{ 3509{
3495 struct pktgen_thread *t; 3510 struct pktgen_thread *t;
3496 struct proc_dir_entry *pe; 3511 struct proc_dir_entry *pe;
@@ -3508,7 +3523,7 @@ static int __init pktgen_create_thread(int cpu)
3508 3523
3509 INIT_LIST_HEAD(&t->if_list); 3524 INIT_LIST_HEAD(&t->if_list);
3510 3525
3511 list_add_tail(&t->th_list, &pktgen_threads); 3526 list_add_tail(&t->th_list, &pn->pktgen_threads);
3512 init_completion(&t->start_done); 3527 init_completion(&t->start_done);
3513 3528
3514 p = kthread_create_on_node(pktgen_thread_worker, 3529 p = kthread_create_on_node(pktgen_thread_worker,
@@ -3524,7 +3539,7 @@ static int __init pktgen_create_thread(int cpu)
3524 kthread_bind(p, cpu); 3539 kthread_bind(p, cpu);
3525 t->tsk = p; 3540 t->tsk = p;
3526 3541
3527 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, 3542 pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
3528 &pktgen_thread_fops, t); 3543 &pktgen_thread_fops, t);
3529 if (!pe) { 3544 if (!pe) {
3530 pr_err("cannot create %s/%s procfs entry\n", 3545 pr_err("cannot create %s/%s procfs entry\n",
@@ -3535,6 +3550,7 @@ static int __init pktgen_create_thread(int cpu)
3535 return -EINVAL; 3550 return -EINVAL;
3536 } 3551 }
3537 3552
3553 t->net = pn;
3538 wake_up_process(p); 3554 wake_up_process(p);
3539 wait_for_completion(&t->start_done); 3555 wait_for_completion(&t->start_done);
3540 3556
@@ -3560,6 +3576,7 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
3560static int pktgen_remove_device(struct pktgen_thread *t, 3576static int pktgen_remove_device(struct pktgen_thread *t,
3561 struct pktgen_dev *pkt_dev) 3577 struct pktgen_dev *pkt_dev)
3562{ 3578{
3579 struct pktgen_net *pn = t->net;
3563 3580
3564 pr_debug("remove_device pkt_dev=%p\n", pkt_dev); 3581 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3565 3582
@@ -3580,7 +3597,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3580 _rem_dev_from_if_list(t, pkt_dev); 3597 _rem_dev_from_if_list(t, pkt_dev);
3581 3598
3582 if (pkt_dev->entry) 3599 if (pkt_dev->entry)
3583 remove_proc_entry(pkt_dev->entry->name, pg_proc_dir); 3600 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir);
3584 3601
3585#ifdef CONFIG_XFRM 3602#ifdef CONFIG_XFRM
3586 free_SAs(pkt_dev); 3603 free_SAs(pkt_dev);
@@ -3592,63 +3609,63 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3592 return 0; 3609 return 0;
3593} 3610}
3594 3611
3595static int __init pg_init(void) 3612static int __net_init pg_net_init(struct net *net)
3596{ 3613{
3597 int cpu; 3614 struct pktgen_net *pn = net_generic(net, pg_net_id);
3598 struct proc_dir_entry *pe; 3615 struct proc_dir_entry *pe;
3599 int ret = 0; 3616 int cpu, ret = 0;
3600 3617
3601 pr_info("%s", version); 3618 pn->net = net;
3602 3619 INIT_LIST_HEAD(&pn->pktgen_threads);
3603 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); 3620 pn->pktgen_exiting = false;
3604 if (!pg_proc_dir) 3621 pn->proc_dir = proc_mkdir(PG_PROC_DIR, pn->net->proc_net);
3622 if (!pn->proc_dir) {
3623 pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
3605 return -ENODEV; 3624 return -ENODEV;
3606 3625 }
3607 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); 3626 pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
3608 if (pe == NULL) { 3627 if (pe == NULL) {
3609 pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); 3628 pr_err("cannot create %s procfs entry\n", PGCTRL);
3610 ret = -EINVAL; 3629 ret = -EINVAL;
3611 goto remove_dir; 3630 goto remove;
3612 } 3631 }
3613 3632
3614 register_netdevice_notifier(&pktgen_notifier_block);
3615
3616 for_each_online_cpu(cpu) { 3633 for_each_online_cpu(cpu) {
3617 int err; 3634 int err;
3618 3635
3619 err = pktgen_create_thread(cpu); 3636 err = pktgen_create_thread(cpu, pn);
3620 if (err) 3637 if (err)
3621 pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", 3638 pr_warn("Cannot create thread for cpu %d (%d)\n",
3622 cpu, err); 3639 cpu, err);
3623 } 3640 }
3624 3641
3625 if (list_empty(&pktgen_threads)) { 3642 if (list_empty(&pn->pktgen_threads)) {
3626 pr_err("ERROR: Initialization failed for all threads\n"); 3643 pr_err("Initialization failed for all threads\n");
3627 ret = -ENODEV; 3644 ret = -ENODEV;
3628 goto unregister; 3645 goto remove_entry;
3629 } 3646 }
3630 3647
3631 return 0; 3648 return 0;
3632 3649
3633 unregister: 3650remove_entry:
3634 unregister_netdevice_notifier(&pktgen_notifier_block); 3651 remove_proc_entry(PGCTRL, pn->proc_dir);
3635 remove_proc_entry(PGCTRL, pg_proc_dir); 3652remove:
3636 remove_dir: 3653 remove_proc_entry(PG_PROC_DIR, pn->net->proc_net);
3637 proc_net_remove(&init_net, PG_PROC_DIR);
3638 return ret; 3654 return ret;
3639} 3655}
3640 3656
3641static void __exit pg_cleanup(void) 3657static void __net_exit pg_net_exit(struct net *net)
3642{ 3658{
3659 struct pktgen_net *pn = net_generic(net, pg_net_id);
3643 struct pktgen_thread *t; 3660 struct pktgen_thread *t;
3644 struct list_head *q, *n; 3661 struct list_head *q, *n;
3645 LIST_HEAD(list); 3662 LIST_HEAD(list);
3646 3663
3647 /* Stop all interfaces & threads */ 3664 /* Stop all interfaces & threads */
3648 pktgen_exiting = true; 3665 pn->pktgen_exiting = true;
3649 3666
3650 mutex_lock(&pktgen_thread_lock); 3667 mutex_lock(&pktgen_thread_lock);
3651 list_splice_init(&pktgen_threads, &list); 3668 list_splice_init(&pn->pktgen_threads, &list);
3652 mutex_unlock(&pktgen_thread_lock); 3669 mutex_unlock(&pktgen_thread_lock);
3653 3670
3654 list_for_each_safe(q, n, &list) { 3671 list_for_each_safe(q, n, &list) {
@@ -3658,12 +3675,36 @@ static void __exit pg_cleanup(void)
3658 kfree(t); 3675 kfree(t);
3659 } 3676 }
3660 3677
3661 /* Un-register us from receiving netdevice events */ 3678 remove_proc_entry(PGCTRL, pn->proc_dir);
3662 unregister_netdevice_notifier(&pktgen_notifier_block); 3679 remove_proc_entry(PG_PROC_DIR, pn->net->proc_net);
3680}
3681
3682static struct pernet_operations pg_net_ops = {
3683 .init = pg_net_init,
3684 .exit = pg_net_exit,
3685 .id = &pg_net_id,
3686 .size = sizeof(struct pktgen_net),
3687};
3688
3689static int __init pg_init(void)
3690{
3691 int ret = 0;
3663 3692
3664 /* Clean up proc file system */ 3693 pr_info("%s", version);
3665 remove_proc_entry(PGCTRL, pg_proc_dir); 3694 ret = register_pernet_subsys(&pg_net_ops);
3666 proc_net_remove(&init_net, PG_PROC_DIR); 3695 if (ret)
3696 return ret;
3697 ret = register_netdevice_notifier(&pktgen_notifier_block);
3698 if (ret)
3699 unregister_pernet_subsys(&pg_net_ops);
3700
3701 return ret;
3702}
3703
3704static void __exit pg_cleanup(void)
3705{
3706 unregister_netdevice_notifier(&pktgen_notifier_block);
3707 unregister_pernet_subsys(&pg_net_ops);
3667} 3708}
3668 3709
3669module_init(pg_init); 3710module_init(pg_init);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index c31d9e8668c3..4425148d2b51 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -186,8 +186,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
186 struct fastopen_queue *fastopenq = 186 struct fastopen_queue *fastopenq =
187 inet_csk(lsk)->icsk_accept_queue.fastopenq; 187 inet_csk(lsk)->icsk_accept_queue.fastopenq;
188 188
189 BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
190
191 tcp_sk(sk)->fastopen_rsk = NULL; 189 tcp_sk(sk)->fastopen_rsk = NULL;
192 spin_lock_bh(&fastopenq->lock); 190 spin_lock_bh(&fastopenq->lock);
193 fastopenq->qlen--; 191 fastopenq->qlen--;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1868625af25e..d8aa20f6a46e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -780,6 +780,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
780 + nla_total_size(4) /* IFLA_MTU */ 780 + nla_total_size(4) /* IFLA_MTU */
781 + nla_total_size(4) /* IFLA_LINK */ 781 + nla_total_size(4) /* IFLA_LINK */
782 + nla_total_size(4) /* IFLA_MASTER */ 782 + nla_total_size(4) /* IFLA_MASTER */
783 + nla_total_size(1) /* IFLA_CARRIER */
783 + nla_total_size(4) /* IFLA_PROMISCUITY */ 784 + nla_total_size(4) /* IFLA_PROMISCUITY */
784 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ 785 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
785 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ 786 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
@@ -879,6 +880,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
879 const struct rtnl_link_stats64 *stats; 880 const struct rtnl_link_stats64 *stats;
880 struct nlattr *attr, *af_spec; 881 struct nlattr *attr, *af_spec;
881 struct rtnl_af_ops *af_ops; 882 struct rtnl_af_ops *af_ops;
883 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
882 884
883 ASSERT_RTNL(); 885 ASSERT_RTNL();
884 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 886 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -907,8 +909,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
907#endif 909#endif
908 (dev->ifindex != dev->iflink && 910 (dev->ifindex != dev->iflink &&
909 nla_put_u32(skb, IFLA_LINK, dev->iflink)) || 911 nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
910 (dev->master && 912 (upper_dev &&
911 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || 913 nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
914 nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
912 (dev->qdisc && 915 (dev->qdisc &&
913 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || 916 nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
914 (dev->ifalias && 917 (dev->ifalias &&
@@ -1108,6 +1111,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1108 [IFLA_MTU] = { .type = NLA_U32 }, 1111 [IFLA_MTU] = { .type = NLA_U32 },
1109 [IFLA_LINK] = { .type = NLA_U32 }, 1112 [IFLA_LINK] = { .type = NLA_U32 },
1110 [IFLA_MASTER] = { .type = NLA_U32 }, 1113 [IFLA_MASTER] = { .type = NLA_U32 },
1114 [IFLA_CARRIER] = { .type = NLA_U8 },
1111 [IFLA_TXQLEN] = { .type = NLA_U32 }, 1115 [IFLA_TXQLEN] = { .type = NLA_U32 },
1112 [IFLA_WEIGHT] = { .type = NLA_U32 }, 1116 [IFLA_WEIGHT] = { .type = NLA_U32 },
1113 [IFLA_OPERSTATE] = { .type = NLA_U8 }, 1117 [IFLA_OPERSTATE] = { .type = NLA_U8 },
@@ -1270,16 +1274,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1270 1274
1271static int do_set_master(struct net_device *dev, int ifindex) 1275static int do_set_master(struct net_device *dev, int ifindex)
1272{ 1276{
1273 struct net_device *master_dev; 1277 struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
1274 const struct net_device_ops *ops; 1278 const struct net_device_ops *ops;
1275 int err; 1279 int err;
1276 1280
1277 if (dev->master) { 1281 if (upper_dev) {
1278 if (dev->master->ifindex == ifindex) 1282 if (upper_dev->ifindex == ifindex)
1279 return 0; 1283 return 0;
1280 ops = dev->master->netdev_ops; 1284 ops = upper_dev->netdev_ops;
1281 if (ops->ndo_del_slave) { 1285 if (ops->ndo_del_slave) {
1282 err = ops->ndo_del_slave(dev->master, dev); 1286 err = ops->ndo_del_slave(upper_dev, dev);
1283 if (err) 1287 if (err)
1284 return err; 1288 return err;
1285 } else { 1289 } else {
@@ -1288,12 +1292,12 @@ static int do_set_master(struct net_device *dev, int ifindex)
1288 } 1292 }
1289 1293
1290 if (ifindex) { 1294 if (ifindex) {
1291 master_dev = __dev_get_by_index(dev_net(dev), ifindex); 1295 upper_dev = __dev_get_by_index(dev_net(dev), ifindex);
1292 if (!master_dev) 1296 if (!upper_dev)
1293 return -EINVAL; 1297 return -EINVAL;
1294 ops = master_dev->netdev_ops; 1298 ops = upper_dev->netdev_ops;
1295 if (ops->ndo_add_slave) { 1299 if (ops->ndo_add_slave) {
1296 err = ops->ndo_add_slave(master_dev, dev); 1300 err = ops->ndo_add_slave(upper_dev, dev);
1297 if (err) 1301 if (err)
1298 return err; 1302 return err;
1299 } else { 1303 } else {
@@ -1307,7 +1311,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1307 struct nlattr **tb, char *ifname, int modified) 1311 struct nlattr **tb, char *ifname, int modified)
1308{ 1312{
1309 const struct net_device_ops *ops = dev->netdev_ops; 1313 const struct net_device_ops *ops = dev->netdev_ops;
1310 int send_addr_notify = 0;
1311 int err; 1314 int err;
1312 1315
1313 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { 1316 if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
@@ -1360,16 +1363,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1360 struct sockaddr *sa; 1363 struct sockaddr *sa;
1361 int len; 1364 int len;
1362 1365
1363 if (!ops->ndo_set_mac_address) {
1364 err = -EOPNOTSUPP;
1365 goto errout;
1366 }
1367
1368 if (!netif_device_present(dev)) {
1369 err = -ENODEV;
1370 goto errout;
1371 }
1372
1373 len = sizeof(sa_family_t) + dev->addr_len; 1366 len = sizeof(sa_family_t) + dev->addr_len;
1374 sa = kmalloc(len, GFP_KERNEL); 1367 sa = kmalloc(len, GFP_KERNEL);
1375 if (!sa) { 1368 if (!sa) {
@@ -1379,13 +1372,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1379 sa->sa_family = dev->type; 1372 sa->sa_family = dev->type;
1380 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), 1373 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
1381 dev->addr_len); 1374 dev->addr_len);
1382 err = ops->ndo_set_mac_address(dev, sa); 1375 err = dev_set_mac_address(dev, sa);
1383 kfree(sa); 1376 kfree(sa);
1384 if (err) 1377 if (err)
1385 goto errout; 1378 goto errout;
1386 send_addr_notify = 1;
1387 modified = 1; 1379 modified = 1;
1388 add_device_randomness(dev->dev_addr, dev->addr_len);
1389 } 1380 }
1390 1381
1391 if (tb[IFLA_MTU]) { 1382 if (tb[IFLA_MTU]) {
@@ -1422,7 +1413,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1422 1413
1423 if (tb[IFLA_BROADCAST]) { 1414 if (tb[IFLA_BROADCAST]) {
1424 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); 1415 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
1425 send_addr_notify = 1; 1416 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1426 } 1417 }
1427 1418
1428 if (ifm->ifi_flags || ifm->ifi_change) { 1419 if (ifm->ifi_flags || ifm->ifi_change) {
@@ -1438,6 +1429,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1438 modified = 1; 1429 modified = 1;
1439 } 1430 }
1440 1431
1432 if (tb[IFLA_CARRIER]) {
1433 err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
1434 if (err)
1435 goto errout;
1436 modified = 1;
1437 }
1438
1441 if (tb[IFLA_TXQLEN]) 1439 if (tb[IFLA_TXQLEN])
1442 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 1440 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
1443 1441
@@ -1536,9 +1534,6 @@ errout:
1536 net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", 1534 net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
1537 dev->name); 1535 dev->name);
1538 1536
1539 if (send_addr_notify)
1540 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1541
1542 return err; 1537 return err;
1543} 1538}
1544 1539
@@ -1672,9 +1667,11 @@ struct net_device *rtnl_create_link(struct net *net,
1672 1667
1673 if (tb[IFLA_MTU]) 1668 if (tb[IFLA_MTU])
1674 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1669 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
1675 if (tb[IFLA_ADDRESS]) 1670 if (tb[IFLA_ADDRESS]) {
1676 memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), 1671 memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
1677 nla_len(tb[IFLA_ADDRESS])); 1672 nla_len(tb[IFLA_ADDRESS]));
1673 dev->addr_assign_type = NET_ADDR_SET;
1674 }
1678 if (tb[IFLA_BROADCAST]) 1675 if (tb[IFLA_BROADCAST])
1679 memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), 1676 memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
1680 nla_len(tb[IFLA_BROADCAST])); 1677 nla_len(tb[IFLA_BROADCAST]));
@@ -1992,6 +1989,7 @@ errout:
1992 if (err < 0) 1989 if (err < 0)
1993 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 1990 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
1994} 1991}
1992EXPORT_SYMBOL(rtmsg_ifinfo);
1995 1993
1996static int nlmsg_populate_fdb_fill(struct sk_buff *skb, 1994static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
1997 struct net_device *dev, 1995 struct net_device *dev,
@@ -2054,16 +2052,12 @@ errout:
2054static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2052static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2055{ 2053{
2056 struct net *net = sock_net(skb->sk); 2054 struct net *net = sock_net(skb->sk);
2057 struct net_device *master = NULL;
2058 struct ndmsg *ndm; 2055 struct ndmsg *ndm;
2059 struct nlattr *tb[NDA_MAX+1]; 2056 struct nlattr *tb[NDA_MAX+1];
2060 struct net_device *dev; 2057 struct net_device *dev;
2061 u8 *addr; 2058 u8 *addr;
2062 int err; 2059 int err;
2063 2060
2064 if (!capable(CAP_NET_ADMIN))
2065 return -EPERM;
2066
2067 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 2061 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
2068 if (err < 0) 2062 if (err < 0)
2069 return err; 2063 return err;
@@ -2096,10 +2090,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2096 /* Support fdb on master device the net/bridge default case */ 2090 /* Support fdb on master device the net/bridge default case */
2097 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && 2091 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2098 (dev->priv_flags & IFF_BRIDGE_PORT)) { 2092 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2099 master = dev->master; 2093 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2100 err = master->netdev_ops->ndo_fdb_add(ndm, tb, 2094 const struct net_device_ops *ops = br_dev->netdev_ops;
2101 dev, addr, 2095
2102 nlh->nlmsg_flags); 2096 err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags);
2103 if (err) 2097 if (err)
2104 goto out; 2098 goto out;
2105 else 2099 else
@@ -2125,7 +2119,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2125{ 2119{
2126 struct net *net = sock_net(skb->sk); 2120 struct net *net = sock_net(skb->sk);
2127 struct ndmsg *ndm; 2121 struct ndmsg *ndm;
2128 struct nlattr *llattr; 2122 struct nlattr *tb[NDA_MAX+1];
2129 struct net_device *dev; 2123 struct net_device *dev;
2130 int err = -EINVAL; 2124 int err = -EINVAL;
2131 __u8 *addr; 2125 __u8 *addr;
@@ -2133,8 +2127,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2133 if (!capable(CAP_NET_ADMIN)) 2127 if (!capable(CAP_NET_ADMIN))
2134 return -EPERM; 2128 return -EPERM;
2135 2129
2136 if (nlmsg_len(nlh) < sizeof(*ndm)) 2130 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
2137 return -EINVAL; 2131 if (err < 0)
2132 return err;
2138 2133
2139 ndm = nlmsg_data(nlh); 2134 ndm = nlmsg_data(nlh);
2140 if (ndm->ndm_ifindex == 0) { 2135 if (ndm->ndm_ifindex == 0) {
@@ -2148,22 +2143,27 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2148 return -ENODEV; 2143 return -ENODEV;
2149 } 2144 }
2150 2145
2151 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); 2146 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
2152 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { 2147 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
2153 pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); 2148 return -EINVAL;
2149 }
2150
2151 addr = nla_data(tb[NDA_LLADDR]);
2152 if (!is_valid_ether_addr(addr)) {
2153 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
2154 return -EINVAL; 2154 return -EINVAL;
2155 } 2155 }
2156 2156
2157 addr = nla_data(llattr);
2158 err = -EOPNOTSUPP; 2157 err = -EOPNOTSUPP;
2159 2158
2160 /* Support fdb on master device the net/bridge default case */ 2159 /* Support fdb on master device the net/bridge default case */
2161 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && 2160 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2162 (dev->priv_flags & IFF_BRIDGE_PORT)) { 2161 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2163 struct net_device *master = dev->master; 2162 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2163 const struct net_device_ops *ops = br_dev->netdev_ops;
2164 2164
2165 if (master->netdev_ops->ndo_fdb_del) 2165 if (ops->ndo_fdb_del)
2166 err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr); 2166 err = ops->ndo_fdb_del(ndm, tb, dev, addr);
2167 2167
2168 if (err) 2168 if (err)
2169 goto out; 2169 goto out;
@@ -2173,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2173 2173
2174 /* Embedded bridge, macvlan, and any other device support */ 2174 /* Embedded bridge, macvlan, and any other device support */
2175 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { 2175 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
2176 err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); 2176 err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
2177 2177
2178 if (!err) { 2178 if (!err) {
2179 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); 2179 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2247,9 +2247,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2247 rcu_read_lock(); 2247 rcu_read_lock();
2248 for_each_netdev_rcu(net, dev) { 2248 for_each_netdev_rcu(net, dev) {
2249 if (dev->priv_flags & IFF_BRIDGE_PORT) { 2249 if (dev->priv_flags & IFF_BRIDGE_PORT) {
2250 struct net_device *master = dev->master; 2250 struct net_device *br_dev;
2251 const struct net_device_ops *ops = master->netdev_ops; 2251 const struct net_device_ops *ops;
2252 2252
2253 br_dev = netdev_master_upper_dev_get(dev);
2254 ops = br_dev->netdev_ops;
2253 if (ops->ndo_fdb_dump) 2255 if (ops->ndo_fdb_dump)
2254 idx = ops->ndo_fdb_dump(skb, cb, dev, idx); 2256 idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
2255 } 2257 }
@@ -2270,6 +2272,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2270 struct ifinfomsg *ifm; 2272 struct ifinfomsg *ifm;
2271 struct nlattr *br_afspec; 2273 struct nlattr *br_afspec;
2272 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; 2274 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2275 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2273 2276
2274 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); 2277 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
2275 if (nlh == NULL) 2278 if (nlh == NULL)
@@ -2287,8 +2290,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2287 if (nla_put_string(skb, IFLA_IFNAME, dev->name) || 2290 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
2288 nla_put_u32(skb, IFLA_MTU, dev->mtu) || 2291 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
2289 nla_put_u8(skb, IFLA_OPERSTATE, operstate) || 2292 nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
2290 (dev->master && 2293 (br_dev &&
2291 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || 2294 nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
2292 (dev->addr_len && 2295 (dev->addr_len &&
2293 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || 2296 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
2294 (dev->ifindex != dev->iflink && 2297 (dev->ifindex != dev->iflink &&
@@ -2320,23 +2323,31 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2320 int idx = 0; 2323 int idx = 0;
2321 u32 portid = NETLINK_CB(cb->skb).portid; 2324 u32 portid = NETLINK_CB(cb->skb).portid;
2322 u32 seq = cb->nlh->nlmsg_seq; 2325 u32 seq = cb->nlh->nlmsg_seq;
2326 struct nlattr *extfilt;
2327 u32 filter_mask = 0;
2328
2329 extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
2330 IFLA_EXT_MASK);
2331 if (extfilt)
2332 filter_mask = nla_get_u32(extfilt);
2323 2333
2324 rcu_read_lock(); 2334 rcu_read_lock();
2325 for_each_netdev_rcu(net, dev) { 2335 for_each_netdev_rcu(net, dev) {
2326 const struct net_device_ops *ops = dev->netdev_ops; 2336 const struct net_device_ops *ops = dev->netdev_ops;
2327 struct net_device *master = dev->master; 2337 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2328 2338
2329 if (master && master->netdev_ops->ndo_bridge_getlink) { 2339 if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
2330 if (idx >= cb->args[0] && 2340 if (idx >= cb->args[0] &&
2331 master->netdev_ops->ndo_bridge_getlink( 2341 br_dev->netdev_ops->ndo_bridge_getlink(
2332 skb, portid, seq, dev) < 0) 2342 skb, portid, seq, dev, filter_mask) < 0)
2333 break; 2343 break;
2334 idx++; 2344 idx++;
2335 } 2345 }
2336 2346
2337 if (ops->ndo_bridge_getlink) { 2347 if (ops->ndo_bridge_getlink) {
2338 if (idx >= cb->args[0] && 2348 if (idx >= cb->args[0] &&
2339 ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) 2349 ops->ndo_bridge_getlink(skb, portid, seq, dev,
2350 filter_mask) < 0)
2340 break; 2351 break;
2341 idx++; 2352 idx++;
2342 } 2353 }
@@ -2365,7 +2376,7 @@ static inline size_t bridge_nlmsg_size(void)
2365static int rtnl_bridge_notify(struct net_device *dev, u16 flags) 2376static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2366{ 2377{
2367 struct net *net = dev_net(dev); 2378 struct net *net = dev_net(dev);
2368 struct net_device *master = dev->master; 2379 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2369 struct sk_buff *skb; 2380 struct sk_buff *skb;
2370 int err = -EOPNOTSUPP; 2381 int err = -EOPNOTSUPP;
2371 2382
@@ -2376,15 +2387,15 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2376 } 2387 }
2377 2388
2378 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && 2389 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
2379 master && master->netdev_ops->ndo_bridge_getlink) { 2390 br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
2380 err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); 2391 err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
2381 if (err < 0) 2392 if (err < 0)
2382 goto errout; 2393 goto errout;
2383 } 2394 }
2384 2395
2385 if ((flags & BRIDGE_FLAGS_SELF) && 2396 if ((flags & BRIDGE_FLAGS_SELF) &&
2386 dev->netdev_ops->ndo_bridge_getlink) { 2397 dev->netdev_ops->ndo_bridge_getlink) {
2387 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); 2398 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
2388 if (err < 0) 2399 if (err < 0)
2389 goto errout; 2400 goto errout;
2390 } 2401 }
@@ -2436,13 +2447,14 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2436 oflags = flags; 2447 oflags = flags;
2437 2448
2438 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { 2449 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2439 if (!dev->master || 2450 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2440 !dev->master->netdev_ops->ndo_bridge_setlink) { 2451
2452 if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
2441 err = -EOPNOTSUPP; 2453 err = -EOPNOTSUPP;
2442 goto out; 2454 goto out;
2443 } 2455 }
2444 2456
2445 err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); 2457 err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
2446 if (err) 2458 if (err)
2447 goto out; 2459 goto out;
2448 2460
@@ -2468,6 +2480,77 @@ out:
2468 return err; 2480 return err;
2469} 2481}
2470 2482
2483static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
2484 void *arg)
2485{
2486 struct net *net = sock_net(skb->sk);
2487 struct ifinfomsg *ifm;
2488 struct net_device *dev;
2489 struct nlattr *br_spec, *attr = NULL;
2490 int rem, err = -EOPNOTSUPP;
2491 u16 oflags, flags = 0;
2492 bool have_flags = false;
2493
2494 if (nlmsg_len(nlh) < sizeof(*ifm))
2495 return -EINVAL;
2496
2497 ifm = nlmsg_data(nlh);
2498 if (ifm->ifi_family != AF_BRIDGE)
2499 return -EPFNOSUPPORT;
2500
2501 dev = __dev_get_by_index(net, ifm->ifi_index);
2502 if (!dev) {
2503 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
2504 return -ENODEV;
2505 }
2506
2507 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
2508 if (br_spec) {
2509 nla_for_each_nested(attr, br_spec, rem) {
2510 if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
2511 have_flags = true;
2512 flags = nla_get_u16(attr);
2513 break;
2514 }
2515 }
2516 }
2517
2518 oflags = flags;
2519
2520 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2521 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
2522
2523 if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
2524 err = -EOPNOTSUPP;
2525 goto out;
2526 }
2527
2528 err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
2529 if (err)
2530 goto out;
2531
2532 flags &= ~BRIDGE_FLAGS_MASTER;
2533 }
2534
2535 if ((flags & BRIDGE_FLAGS_SELF)) {
2536 if (!dev->netdev_ops->ndo_bridge_dellink)
2537 err = -EOPNOTSUPP;
2538 else
2539 err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
2540
2541 if (!err)
2542 flags &= ~BRIDGE_FLAGS_SELF;
2543 }
2544
2545 if (have_flags)
2546 memcpy(nla_data(attr), &flags, sizeof(flags));
2547 /* Generate event to notify upper layer of bridge change */
2548 if (!err)
2549 err = rtnl_bridge_notify(dev, oflags);
2550out:
2551 return err;
2552}
2553
2471/* Protected by RTNL sempahore. */ 2554/* Protected by RTNL sempahore. */
2472static struct rtattr **rta_buf; 2555static struct rtattr **rta_buf;
2473static int rtattr_max; 2556static int rtattr_max;
@@ -2651,6 +2734,7 @@ void __init rtnetlink_init(void)
2651 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); 2734 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2652 2735
2653 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); 2736 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
2737 rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
2654 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); 2738 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
2655} 2739}
2656 2740
diff --git a/net/core/scm.c b/net/core/scm.c
index 57fb1ee6649f..905dcc6ad1e3 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -35,6 +35,7 @@
35#include <net/sock.h> 35#include <net/sock.h>
36#include <net/compat.h> 36#include <net/compat.h>
37#include <net/scm.h> 37#include <net/scm.h>
38#include <net/cls_cgroup.h>
38 39
39 40
40/* 41/*
@@ -302,8 +303,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
302 } 303 }
303 /* Bump the usage count and install the file. */ 304 /* Bump the usage count and install the file. */
304 sock = sock_from_file(fp[i], &err); 305 sock = sock_from_file(fp[i], &err);
305 if (sock) 306 if (sock) {
306 sock_update_netprioidx(sock->sk, current); 307 sock_update_netprioidx(sock->sk, current);
308 sock_update_classid(sock->sk, current);
309 }
307 fd_install(new_fd, get_file(fp[i])); 310 fd_install(new_fd, get_file(fp[i]));
308 } 311 }
309 312
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ab989b0de42..33245ef54c3b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -104,47 +104,37 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
104 .get = sock_pipe_buf_get, 104 .get = sock_pipe_buf_get,
105}; 105};
106 106
107/*
108 * Keep out-of-line to prevent kernel bloat.
109 * __builtin_return_address is not used because it is not always
110 * reliable.
111 */
112
113/** 107/**
114 * skb_over_panic - private function 108 * skb_panic - private function for out-of-line support
115 * @skb: buffer 109 * @skb: buffer
116 * @sz: size 110 * @sz: size
117 * @here: address 111 * @addr: address
118 * 112 * @msg: skb_over_panic or skb_under_panic
119 * Out of line support code for skb_put(). Not user callable. 113 *
114 * Out-of-line support for skb_put() and skb_push().
115 * Called via the wrapper skb_over_panic() or skb_under_panic().
116 * Keep out of line to prevent kernel bloat.
117 * __builtin_return_address is not used because it is not always reliable.
120 */ 118 */
121static void skb_over_panic(struct sk_buff *skb, int sz, void *here) 119static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
120 const char msg[])
122{ 121{
123 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", 122 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
124 __func__, here, skb->len, sz, skb->head, skb->data, 123 msg, addr, skb->len, sz, skb->head, skb->data,
125 (unsigned long)skb->tail, (unsigned long)skb->end, 124 (unsigned long)skb->tail, (unsigned long)skb->end,
126 skb->dev ? skb->dev->name : "<NULL>"); 125 skb->dev ? skb->dev->name : "<NULL>");
127 BUG(); 126 BUG();
128} 127}
129 128
130/** 129static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
131 * skb_under_panic - private function
132 * @skb: buffer
133 * @sz: size
134 * @here: address
135 *
136 * Out of line support code for skb_push(). Not user callable.
137 */
138
139static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
140{ 130{
141 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", 131 skb_panic(skb, sz, addr, __func__);
142 __func__, here, skb->len, sz, skb->head, skb->data,
143 (unsigned long)skb->tail, (unsigned long)skb->end,
144 skb->dev ? skb->dev->name : "<NULL>");
145 BUG();
146} 132}
147 133
134static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
135{
136 skb_panic(skb, sz, addr, __func__);
137}
148 138
149/* 139/*
150 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells 140 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
@@ -155,8 +145,9 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
155 */ 145 */
156#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ 146#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
157 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) 147 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
158void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, 148
159 bool *pfmemalloc) 149static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
150 unsigned long ip, bool *pfmemalloc)
160{ 151{
161 void *obj; 152 void *obj;
162 bool ret_pfmemalloc = false; 153 bool ret_pfmemalloc = false;
@@ -259,6 +250,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
259 skb->end = skb->tail + size; 250 skb->end = skb->tail + size;
260#ifdef NET_SKBUFF_DATA_USES_OFFSET 251#ifdef NET_SKBUFF_DATA_USES_OFFSET
261 skb->mac_header = ~0U; 252 skb->mac_header = ~0U;
253 skb->transport_header = ~0U;
262#endif 254#endif
263 255
264 /* make sure we initialize shinfo sequentially */ 256 /* make sure we initialize shinfo sequentially */
@@ -327,6 +319,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
327 skb->end = skb->tail + size; 319 skb->end = skb->tail + size;
328#ifdef NET_SKBUFF_DATA_USES_OFFSET 320#ifdef NET_SKBUFF_DATA_USES_OFFSET
329 skb->mac_header = ~0U; 321 skb->mac_header = ~0U;
322 skb->transport_header = ~0U;
330#endif 323#endif
331 324
332 /* make sure we initialize shinfo sequentially */ 325 /* make sure we initialize shinfo sequentially */
@@ -348,10 +341,6 @@ struct netdev_alloc_cache {
348}; 341};
349static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); 342static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
350 343
351#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
352#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
353#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
354
355static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 344static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
356{ 345{
357 struct netdev_alloc_cache *nc; 346 struct netdev_alloc_cache *nc;
@@ -683,7 +672,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
683 new->network_header = old->network_header; 672 new->network_header = old->network_header;
684 new->mac_header = old->mac_header; 673 new->mac_header = old->mac_header;
685 new->inner_transport_header = old->inner_transport_header; 674 new->inner_transport_header = old->inner_transport_header;
686 new->inner_network_header = old->inner_transport_header; 675 new->inner_network_header = old->inner_network_header;
687 skb_dst_copy(new, old); 676 skb_dst_copy(new, old);
688 new->rxhash = old->rxhash; 677 new->rxhash = old->rxhash;
689 new->ooo_okay = old->ooo_okay; 678 new->ooo_okay = old->ooo_okay;
@@ -1649,7 +1638,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1649 1638
1650static struct page *linear_to_page(struct page *page, unsigned int *len, 1639static struct page *linear_to_page(struct page *page, unsigned int *len,
1651 unsigned int *offset, 1640 unsigned int *offset,
1652 struct sk_buff *skb, struct sock *sk) 1641 struct sock *sk)
1653{ 1642{
1654 struct page_frag *pfrag = sk_page_frag(sk); 1643 struct page_frag *pfrag = sk_page_frag(sk);
1655 1644
@@ -1682,14 +1671,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
1682static bool spd_fill_page(struct splice_pipe_desc *spd, 1671static bool spd_fill_page(struct splice_pipe_desc *spd,
1683 struct pipe_inode_info *pipe, struct page *page, 1672 struct pipe_inode_info *pipe, struct page *page,
1684 unsigned int *len, unsigned int offset, 1673 unsigned int *len, unsigned int offset,
1685 struct sk_buff *skb, bool linear, 1674 bool linear,
1686 struct sock *sk) 1675 struct sock *sk)
1687{ 1676{
1688 if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) 1677 if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
1689 return true; 1678 return true;
1690 1679
1691 if (linear) { 1680 if (linear) {
1692 page = linear_to_page(page, len, &offset, skb, sk); 1681 page = linear_to_page(page, len, &offset, sk);
1693 if (!page) 1682 if (!page)
1694 return true; 1683 return true;
1695 } 1684 }
@@ -1706,23 +1695,9 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
1706 return false; 1695 return false;
1707} 1696}
1708 1697
1709static inline void __segment_seek(struct page **page, unsigned int *poff,
1710 unsigned int *plen, unsigned int off)
1711{
1712 unsigned long n;
1713
1714 *poff += off;
1715 n = *poff / PAGE_SIZE;
1716 if (n)
1717 *page = nth_page(*page, n);
1718
1719 *poff = *poff % PAGE_SIZE;
1720 *plen -= off;
1721}
1722
1723static bool __splice_segment(struct page *page, unsigned int poff, 1698static bool __splice_segment(struct page *page, unsigned int poff,
1724 unsigned int plen, unsigned int *off, 1699 unsigned int plen, unsigned int *off,
1725 unsigned int *len, struct sk_buff *skb, 1700 unsigned int *len,
1726 struct splice_pipe_desc *spd, bool linear, 1701 struct splice_pipe_desc *spd, bool linear,
1727 struct sock *sk, 1702 struct sock *sk,
1728 struct pipe_inode_info *pipe) 1703 struct pipe_inode_info *pipe)
@@ -1737,23 +1712,19 @@ static bool __splice_segment(struct page *page, unsigned int poff,
1737 } 1712 }
1738 1713
1739 /* ignore any bits we already processed */ 1714 /* ignore any bits we already processed */
1740 if (*off) { 1715 poff += *off;
1741 __segment_seek(&page, &poff, &plen, *off); 1716 plen -= *off;
1742 *off = 0; 1717 *off = 0;
1743 }
1744 1718
1745 do { 1719 do {
1746 unsigned int flen = min(*len, plen); 1720 unsigned int flen = min(*len, plen);
1747 1721
1748 /* the linear region may spread across several pages */ 1722 if (spd_fill_page(spd, pipe, page, &flen, poff,
1749 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1723 linear, sk))
1750
1751 if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
1752 return true; 1724 return true;
1753 1725 poff += flen;
1754 __segment_seek(&page, &poff, &plen, flen); 1726 plen -= flen;
1755 *len -= flen; 1727 *len -= flen;
1756
1757 } while (*len && plen); 1728 } while (*len && plen);
1758 1729
1759 return false; 1730 return false;
@@ -1777,7 +1748,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1777 if (__splice_segment(virt_to_page(skb->data), 1748 if (__splice_segment(virt_to_page(skb->data),
1778 (unsigned long) skb->data & (PAGE_SIZE - 1), 1749 (unsigned long) skb->data & (PAGE_SIZE - 1),
1779 skb_headlen(skb), 1750 skb_headlen(skb),
1780 offset, len, skb, spd, 1751 offset, len, spd,
1781 skb_head_is_locked(skb), 1752 skb_head_is_locked(skb),
1782 sk, pipe)) 1753 sk, pipe))
1783 return true; 1754 return true;
@@ -1790,7 +1761,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1790 1761
1791 if (__splice_segment(skb_frag_page(f), 1762 if (__splice_segment(skb_frag_page(f),
1792 f->page_offset, skb_frag_size(f), 1763 f->page_offset, skb_frag_size(f),
1793 offset, len, skb, spd, false, sk, pipe)) 1764 offset, len, spd, false, sk, pipe))
1794 return true; 1765 return true;
1795 } 1766 }
1796 1767
@@ -2355,6 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2355{ 2326{
2356 int pos = skb_headlen(skb); 2327 int pos = skb_headlen(skb);
2357 2328
2329 skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
2358 if (len < pos) /* Split line is inside header. */ 2330 if (len < pos) /* Split line is inside header. */
2359 skb_split_inside_header(skb, skb1, len, pos); 2331 skb_split_inside_header(skb, skb1, len, pos);
2360 else /* Second chunk has no header, nothing to copy. */ 2332 else /* Second chunk has no header, nothing to copy. */
@@ -2686,48 +2658,37 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2686 int len, int odd, struct sk_buff *skb), 2658 int len, int odd, struct sk_buff *skb),
2687 void *from, int length) 2659 void *from, int length)
2688{ 2660{
2689 int frg_cnt = 0; 2661 int frg_cnt = skb_shinfo(skb)->nr_frags;
2690 skb_frag_t *frag = NULL; 2662 int copy;
2691 struct page *page = NULL;
2692 int copy, left;
2693 int offset = 0; 2663 int offset = 0;
2694 int ret; 2664 int ret;
2665 struct page_frag *pfrag = &current->task_frag;
2695 2666
2696 do { 2667 do {
2697 /* Return error if we don't have space for new frag */ 2668 /* Return error if we don't have space for new frag */
2698 frg_cnt = skb_shinfo(skb)->nr_frags;
2699 if (frg_cnt >= MAX_SKB_FRAGS) 2669 if (frg_cnt >= MAX_SKB_FRAGS)
2700 return -EFAULT; 2670 return -EMSGSIZE;
2701 2671
2702 /* allocate a new page for next frag */ 2672 if (!sk_page_frag_refill(sk, pfrag))
2703 page = alloc_pages(sk->sk_allocation, 0);
2704
2705 /* If alloc_page fails just return failure and caller will
2706 * free previous allocated pages by doing kfree_skb()
2707 */
2708 if (page == NULL)
2709 return -ENOMEM; 2673 return -ENOMEM;
2710 2674
2711 /* initialize the next frag */
2712 skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2713 skb->truesize += PAGE_SIZE;
2714 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
2715
2716 /* get the new initialized frag */
2717 frg_cnt = skb_shinfo(skb)->nr_frags;
2718 frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
2719
2720 /* copy the user data to page */ 2675 /* copy the user data to page */
2721 left = PAGE_SIZE - frag->page_offset; 2676 copy = min_t(int, length, pfrag->size - pfrag->offset);
2722 copy = (length > left)? left : length;
2723 2677
2724 ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), 2678 ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
2725 offset, copy, 0, skb); 2679 offset, copy, 0, skb);
2726 if (ret < 0) 2680 if (ret < 0)
2727 return -EFAULT; 2681 return -EFAULT;
2728 2682
2729 /* copy was successful so update the size parameters */ 2683 /* copy was successful so update the size parameters */
2730 skb_frag_size_add(frag, copy); 2684 skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
2685 copy);
2686 frg_cnt++;
2687 pfrag->offset += copy;
2688 get_page(pfrag->page);
2689
2690 skb->truesize += copy;
2691 atomic_add(copy, &sk->sk_wmem_alloc);
2731 skb->len += copy; 2692 skb->len += copy;
2732 skb->data_len += copy; 2693 skb->data_len += copy;
2733 offset += copy; 2694 offset += copy;
@@ -2777,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2777 unsigned int mss = skb_shinfo(skb)->gso_size; 2738 unsigned int mss = skb_shinfo(skb)->gso_size;
2778 unsigned int doffset = skb->data - skb_mac_header(skb); 2739 unsigned int doffset = skb->data - skb_mac_header(skb);
2779 unsigned int offset = doffset; 2740 unsigned int offset = doffset;
2741 unsigned int tnl_hlen = skb_tnl_header_len(skb);
2780 unsigned int headroom; 2742 unsigned int headroom;
2781 unsigned int len; 2743 unsigned int len;
2782 int sg = !!(features & NETIF_F_SG); 2744 int sg = !!(features & NETIF_F_SG);
@@ -2853,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2853 skb_set_network_header(nskb, skb->mac_len); 2815 skb_set_network_header(nskb, skb->mac_len);
2854 nskb->transport_header = (nskb->network_header + 2816 nskb->transport_header = (nskb->network_header +
2855 skb_network_header_len(skb)); 2817 skb_network_header_len(skb));
2856 skb_copy_from_linear_data(skb, nskb->data, doffset); 2818
2819 skb_copy_from_linear_data_offset(skb, -tnl_hlen,
2820 nskb->data - tnl_hlen,
2821 doffset + tnl_hlen);
2857 2822
2858 if (fskb != skb_shinfo(skb)->frag_list) 2823 if (fskb != skb_shinfo(skb)->frag_list)
2859 continue; 2824 continue;
@@ -2871,6 +2836,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2871 skb_copy_from_linear_data_offset(skb, offset, 2836 skb_copy_from_linear_data_offset(skb, offset,
2872 skb_put(nskb, hsize), hsize); 2837 skb_put(nskb, hsize), hsize);
2873 2838
2839 skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
2840
2874 while (pos < offset + len && i < nfrags) { 2841 while (pos < offset + len && i < nfrags) {
2875 *frag = skb_shinfo(skb)->frags[i]; 2842 *frag = skb_shinfo(skb)->frags[i];
2876 __skb_frag_ref(frag); 2843 __skb_frag_ref(frag);
diff --git a/net/core/sock.c b/net/core/sock.c
index a692ef49c9bb..fe96c5d34299 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -583,7 +583,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
583 goto out; 583 goto out;
584 584
585retry: 585retry:
586 seq = read_seqbegin(&devnet_rename_seq); 586 seq = read_seqcount_begin(&devnet_rename_seq);
587 rcu_read_lock(); 587 rcu_read_lock();
588 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); 588 dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
589 ret = -ENODEV; 589 ret = -ENODEV;
@@ -594,7 +594,7 @@ retry:
594 594
595 strcpy(devname, dev->name); 595 strcpy(devname, dev->name);
596 rcu_read_unlock(); 596 rcu_read_unlock();
597 if (read_seqretry(&devnet_rename_seq, seq)) 597 if (read_seqcount_retry(&devnet_rename_seq, seq))
598 goto retry; 598 goto retry;
599 599
600 len = strlen(devname) + 1; 600 len = strlen(devname) + 1;
@@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
665 case SO_REUSEADDR: 665 case SO_REUSEADDR:
666 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 666 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
667 break; 667 break;
668 case SO_REUSEPORT:
669 sk->sk_reuseport = valbool;
670 break;
668 case SO_TYPE: 671 case SO_TYPE:
669 case SO_PROTOCOL: 672 case SO_PROTOCOL:
670 case SO_DOMAIN: 673 case SO_DOMAIN:
@@ -861,6 +864,13 @@ set_rcvbuf:
861 ret = sk_detach_filter(sk); 864 ret = sk_detach_filter(sk);
862 break; 865 break;
863 866
867 case SO_LOCK_FILTER:
868 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
869 ret = -EPERM;
870 else
871 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
872 break;
873
864 case SO_PASSSEC: 874 case SO_PASSSEC:
865 if (valbool) 875 if (valbool)
866 set_bit(SOCK_PASSSEC, &sock->flags); 876 set_bit(SOCK_PASSSEC, &sock->flags);
@@ -965,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
965 v.val = sk->sk_reuse; 975 v.val = sk->sk_reuse;
966 break; 976 break;
967 977
978 case SO_REUSEPORT:
979 v.val = sk->sk_reuseport;
980 break;
981
968 case SO_KEEPALIVE: 982 case SO_KEEPALIVE:
969 v.val = sock_flag(sk, SOCK_KEEPOPEN); 983 v.val = sock_flag(sk, SOCK_KEEPOPEN);
970 break; 984 break;
@@ -1140,6 +1154,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1140 1154
1141 goto lenout; 1155 goto lenout;
1142 1156
1157 case SO_LOCK_FILTER:
1158 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1159 break;
1160
1143 default: 1161 default:
1144 return -ENOPROTOOPT; 1162 return -ENOPROTOOPT;
1145 } 1163 }
@@ -2212,7 +2230,7 @@ EXPORT_SYMBOL(sk_reset_timer);
2212 2230
2213void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2231void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2214{ 2232{
2215 if (timer_pending(timer) && del_timer(timer)) 2233 if (del_timer(timer))
2216 __sock_put(sk); 2234 __sock_put(sk);
2217} 2235}
2218EXPORT_SYMBOL(sk_stop_timer); 2236EXPORT_SYMBOL(sk_stop_timer);
@@ -2818,7 +2836,7 @@ static const struct file_operations proto_seq_fops = {
2818 2836
2819static __net_init int proto_init_net(struct net *net) 2837static __net_init int proto_init_net(struct net *net)
2820{ 2838{
2821 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) 2839 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2822 return -ENOMEM; 2840 return -ENOMEM;
2823 2841
2824 return 0; 2842 return 0;
@@ -2826,7 +2844,7 @@ static __net_init int proto_init_net(struct net *net)
2826 2844
2827static __net_exit void proto_exit_net(struct net *net) 2845static __net_exit void proto_exit_net(struct net *net)
2828{ 2846{
2829 proc_net_remove(net, "protocols"); 2847 remove_proc_entry("protocols", net->proc_net);
2830} 2848}
2831 2849
2832 2850
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d1b08045a9df..cfdb46ab3a7f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,8 @@
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/net_ratelimit.h> 21#include <net/net_ratelimit.h>
22 22
23static int one = 1;
24
23#ifdef CONFIG_RPS 25#ifdef CONFIG_RPS
24static int rps_sock_flow_sysctl(ctl_table *table, int write, 26static int rps_sock_flow_sysctl(ctl_table *table, int write,
25 void __user *buffer, size_t *lenp, loff_t *ppos) 27 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -92,28 +94,32 @@ static struct ctl_table net_core_table[] = {
92 .data = &sysctl_wmem_max, 94 .data = &sysctl_wmem_max,
93 .maxlen = sizeof(int), 95 .maxlen = sizeof(int),
94 .mode = 0644, 96 .mode = 0644,
95 .proc_handler = proc_dointvec 97 .proc_handler = proc_dointvec_minmax,
98 .extra1 = &one,
96 }, 99 },
97 { 100 {
98 .procname = "rmem_max", 101 .procname = "rmem_max",
99 .data = &sysctl_rmem_max, 102 .data = &sysctl_rmem_max,
100 .maxlen = sizeof(int), 103 .maxlen = sizeof(int),
101 .mode = 0644, 104 .mode = 0644,
102 .proc_handler = proc_dointvec 105 .proc_handler = proc_dointvec_minmax,
106 .extra1 = &one,
103 }, 107 },
104 { 108 {
105 .procname = "wmem_default", 109 .procname = "wmem_default",
106 .data = &sysctl_wmem_default, 110 .data = &sysctl_wmem_default,
107 .maxlen = sizeof(int), 111 .maxlen = sizeof(int),
108 .mode = 0644, 112 .mode = 0644,
109 .proc_handler = proc_dointvec 113 .proc_handler = proc_dointvec_minmax,
114 .extra1 = &one,
110 }, 115 },
111 { 116 {
112 .procname = "rmem_default", 117 .procname = "rmem_default",
113 .data = &sysctl_rmem_default, 118 .data = &sysctl_rmem_default,
114 .maxlen = sizeof(int), 119 .maxlen = sizeof(int),
115 .mode = 0644, 120 .mode = 0644,
116 .proc_handler = proc_dointvec 121 .proc_handler = proc_dointvec_minmax,
122 .extra1 = &one,
117 }, 123 },
118 { 124 {
119 .procname = "dev_weight", 125 .procname = "dev_weight",