aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/datagram.c32
-rw-r--r--net/core/dev.c2115
-rw-r--r--net/core/dev_addr_lists.c741
-rw-r--r--net/core/dev_mcast.c229
-rw-r--r--net/core/drop_monitor.c47
-rw-r--r--net/core/dst.c48
-rw-r--r--net/core/ethtool.c596
-rw-r--r--net/core/fib_rules.c34
-rw-r--r--net/core/filter.c228
-rw-r--r--net/core/flow.c408
-rw-r--r--net/core/gen_estimator.c17
-rw-r--r--net/core/gen_stats.c14
-rw-r--r--net/core/iovec.c15
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c27
-rw-r--r--net/core/net-sysfs.c400
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/net_namespace.c95
-rw-r--r--net/core/netevent.c5
-rw-r--r--net/core/netpoll.c335
-rw-r--r--net/core/pktgen.c274
-rw-r--r--net/core/rtnetlink.c446
-rw-r--r--net/core/scm.c36
-rw-r--r--net/core/skbuff.c130
-rw-r--r--net/core/sock.c223
-rw-r--r--net/core/stream.c36
-rw-r--r--net/core/sysctl_net_core.c76
-rw-r--r--net/core/timestamping.c126
-rw-r--r--net/core/utils.c3
31 files changed, 4688 insertions, 2056 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..8a04dd22cf77 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
10obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o
12 12
13obj-$(CONFIG_XFRM) += flow.o 13obj-$(CONFIG_XFRM) += flow.o
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
18obj-$(CONFIG_FIB_RULES) += fib_rules.o 18obj-$(CONFIG_FIB_RULES) += fib_rules.o
19obj-$(CONFIG_TRACEPOINTS) += net-traces.o 19obj-$(CONFIG_TRACEPOINTS) += net-traces.o
20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
21 21obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 95c2e0840d0d..251997a95483 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
48#include <linux/poll.h> 48#include <linux/poll.h>
49#include <linux/highmem.h> 49#include <linux/highmem.h>
50#include <linux/spinlock.h> 50#include <linux/spinlock.h>
51#include <linux/slab.h>
51 52
52#include <net/protocol.h> 53#include <net/protocol.h>
53#include <linux/skbuff.h> 54#include <linux/skbuff.h>
@@ -85,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
85 int error; 86 int error;
86 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 87 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
87 88
88 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 89 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
89 90
90 /* Socket errors? */ 91 /* Socket errors? */
91 error = sock_error(sk); 92 error = sock_error(sk);
@@ -114,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
114 error = 0; 115 error = 0;
115 *timeo_p = schedule_timeout(*timeo_p); 116 *timeo_p = schedule_timeout(*timeo_p);
116out: 117out:
117 finish_wait(sk->sk_sleep, &wait); 118 finish_wait(sk_sleep(sk), &wait);
118 return error; 119 return error;
119interrupted: 120interrupted:
120 error = sock_intr_errno(*timeo_p); 121 error = sock_intr_errno(*timeo_p);
@@ -218,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
218 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 219 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
219 &peeked, err); 220 &peeked, err);
220} 221}
222EXPORT_SYMBOL(skb_recv_datagram);
221 223
222void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 224void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
223{ 225{
@@ -228,9 +230,20 @@ EXPORT_SYMBOL(skb_free_datagram);
228 230
229void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) 231void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
230{ 232{
231 lock_sock(sk); 233 bool slow;
232 skb_free_datagram(sk, skb); 234
233 release_sock(sk); 235 if (likely(atomic_read(&skb->users) == 1))
236 smp_rmb();
237 else if (likely(!atomic_dec_and_test(&skb->users)))
238 return;
239
240 slow = lock_sock_fast(sk);
241 skb_orphan(skb);
242 sk_mem_reclaim_partial(sk);
243 unlock_sock_fast(sk, slow);
244
245 /* skb is now orphaned, can be freed outside of locked section */
246 __kfree_skb(skb);
234} 247}
235EXPORT_SYMBOL(skb_free_datagram_locked); 248EXPORT_SYMBOL(skb_free_datagram_locked);
236 249
@@ -276,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
276 289
277 return err; 290 return err;
278} 291}
279
280EXPORT_SYMBOL(skb_kill_datagram); 292EXPORT_SYMBOL(skb_kill_datagram);
281 293
282/** 294/**
@@ -361,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
361fault: 373fault:
362 return -EFAULT; 374 return -EFAULT;
363} 375}
376EXPORT_SYMBOL(skb_copy_datagram_iovec);
364 377
365/** 378/**
366 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. 379 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -704,6 +717,7 @@ csum_error:
704fault: 717fault:
705 return -EFAULT; 718 return -EFAULT;
706} 719}
720EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
707 721
708/** 722/**
709 * datagram_poll - generic datagram poll 723 * datagram_poll - generic datagram poll
@@ -725,7 +739,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
725 struct sock *sk = sock->sk; 739 struct sock *sk = sock->sk;
726 unsigned int mask; 740 unsigned int mask;
727 741
728 sock_poll_wait(file, sk->sk_sleep, wait); 742 sock_poll_wait(file, sk_sleep(sk), wait);
729 mask = 0; 743 mask = 0;
730 744
731 /* exceptional events? */ 745 /* exceptional events? */
@@ -758,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
758 772
759 return mask; 773 return mask;
760} 774}
761
762EXPORT_SYMBOL(datagram_poll); 775EXPORT_SYMBOL(datagram_poll);
763EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
764EXPORT_SYMBOL(skb_copy_datagram_iovec);
765EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index be9924f60ec3..660dd41aaaa6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -80,6 +80,7 @@
80#include <linux/types.h> 80#include <linux/types.h>
81#include <linux/kernel.h> 81#include <linux/kernel.h>
82#include <linux/hash.h> 82#include <linux/hash.h>
83#include <linux/slab.h>
83#include <linux/sched.h> 84#include <linux/sched.h>
84#include <linux/mutex.h> 85#include <linux/mutex.h>
85#include <linux/string.h> 86#include <linux/string.h>
@@ -100,8 +101,6 @@
100#include <linux/proc_fs.h> 101#include <linux/proc_fs.h>
101#include <linux/seq_file.h> 102#include <linux/seq_file.h>
102#include <linux/stat.h> 103#include <linux/stat.h>
103#include <linux/if_bridge.h>
104#include <linux/if_macvlan.h>
105#include <net/dst.h> 104#include <net/dst.h>
106#include <net/pkt_sched.h> 105#include <net/pkt_sched.h>
107#include <net/checksum.h> 106#include <net/checksum.h>
@@ -129,6 +128,7 @@
129#include <linux/jhash.h> 128#include <linux/jhash.h>
130#include <linux/random.h> 129#include <linux/random.h>
131#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <linux/pci.h>
132 132
133#include "net-sysfs.h" 133#include "net-sysfs.h"
134 134
@@ -206,6 +206,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; 206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
207} 207}
208 208
209static inline void rps_lock(struct softnet_data *sd)
210{
211#ifdef CONFIG_RPS
212 spin_lock(&sd->input_pkt_queue.lock);
213#endif
214}
215
216static inline void rps_unlock(struct softnet_data *sd)
217{
218#ifdef CONFIG_RPS
219 spin_unlock(&sd->input_pkt_queue.lock);
220#endif
221}
222
209/* Device list insertion */ 223/* Device list insertion */
210static int list_netdevice(struct net_device *dev) 224static int list_netdevice(struct net_device *dev)
211{ 225{
@@ -248,7 +262,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
248 * queue in the local softnet handler. 262 * queue in the local softnet handler.
249 */ 263 */
250 264
251DEFINE_PER_CPU(struct softnet_data, softnet_data); 265DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
252EXPORT_PER_CPU_SYMBOL(softnet_data); 266EXPORT_PER_CPU_SYMBOL(softnet_data);
253 267
254#ifdef CONFIG_LOCKDEP 268#ifdef CONFIG_LOCKDEP
@@ -772,47 +786,46 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
772 786
773struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 787struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
774{ 788{
775 struct net_device *dev; 789 struct net_device *dev, *ret = NULL;
776 790
777 rtnl_lock(); 791 rcu_read_lock();
778 dev = __dev_getfirstbyhwtype(net, type); 792 for_each_netdev_rcu(net, dev)
779 if (dev) 793 if (dev->type == type) {
780 dev_hold(dev); 794 dev_hold(dev);
781 rtnl_unlock(); 795 ret = dev;
782 return dev; 796 break;
797 }
798 rcu_read_unlock();
799 return ret;
783} 800}
784EXPORT_SYMBOL(dev_getfirstbyhwtype); 801EXPORT_SYMBOL(dev_getfirstbyhwtype);
785 802
786/** 803/**
787 * dev_get_by_flags - find any device with given flags 804 * dev_get_by_flags_rcu - find any device with given flags
788 * @net: the applicable net namespace 805 * @net: the applicable net namespace
789 * @if_flags: IFF_* values 806 * @if_flags: IFF_* values
790 * @mask: bitmask of bits in if_flags to check 807 * @mask: bitmask of bits in if_flags to check
791 * 808 *
792 * Search for any interface with the given flags. Returns NULL if a device 809 * Search for any interface with the given flags. Returns NULL if a device
793 * is not found or a pointer to the device. The device returned has 810 * is not found or a pointer to the device. Must be called inside
794 * had a reference added and the pointer is safe until the user calls 811 * rcu_read_lock(), and result refcount is unchanged.
795 * dev_put to indicate they have finished with it.
796 */ 812 */
797 813
798struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, 814struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
799 unsigned short mask) 815 unsigned short mask)
800{ 816{
801 struct net_device *dev, *ret; 817 struct net_device *dev, *ret;
802 818
803 ret = NULL; 819 ret = NULL;
804 rcu_read_lock();
805 for_each_netdev_rcu(net, dev) { 820 for_each_netdev_rcu(net, dev) {
806 if (((dev->flags ^ if_flags) & mask) == 0) { 821 if (((dev->flags ^ if_flags) & mask) == 0) {
807 dev_hold(dev);
808 ret = dev; 822 ret = dev;
809 break; 823 break;
810 } 824 }
811 } 825 }
812 rcu_read_unlock();
813 return ret; 826 return ret;
814} 827}
815EXPORT_SYMBOL(dev_get_by_flags); 828EXPORT_SYMBOL(dev_get_by_flags_rcu);
816 829
817/** 830/**
818 * dev_valid_name - check if name is okay for network device 831 * dev_valid_name - check if name is okay for network device
@@ -935,18 +948,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
935} 948}
936EXPORT_SYMBOL(dev_alloc_name); 949EXPORT_SYMBOL(dev_alloc_name);
937 950
938static int dev_get_valid_name(struct net *net, const char *name, char *buf, 951static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
939 bool fmt)
940{ 952{
953 struct net *net;
954
955 BUG_ON(!dev_net(dev));
956 net = dev_net(dev);
957
941 if (!dev_valid_name(name)) 958 if (!dev_valid_name(name))
942 return -EINVAL; 959 return -EINVAL;
943 960
944 if (fmt && strchr(name, '%')) 961 if (fmt && strchr(name, '%'))
945 return __dev_alloc_name(net, name, buf); 962 return dev_alloc_name(dev, name);
946 else if (__dev_get_by_name(net, name)) 963 else if (__dev_get_by_name(net, name))
947 return -EEXIST; 964 return -EEXIST;
948 else if (buf != name) 965 else if (dev->name != name)
949 strlcpy(buf, name, IFNAMSIZ); 966 strlcpy(dev->name, name, IFNAMSIZ);
950 967
951 return 0; 968 return 0;
952} 969}
@@ -978,20 +995,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
978 995
979 memcpy(oldname, dev->name, IFNAMSIZ); 996 memcpy(oldname, dev->name, IFNAMSIZ);
980 997
981 err = dev_get_valid_name(net, newname, dev->name, 1); 998 err = dev_get_valid_name(dev, newname, 1);
982 if (err < 0) 999 if (err < 0)
983 return err; 1000 return err;
984 1001
985rollback: 1002rollback:
986 /* For now only devices in the initial network namespace 1003 ret = device_rename(&dev->dev, dev->name);
987 * are in sysfs. 1004 if (ret) {
988 */ 1005 memcpy(dev->name, oldname, IFNAMSIZ);
989 if (net_eq(net, &init_net)) { 1006 return ret;
990 ret = device_rename(&dev->dev, dev->name);
991 if (ret) {
992 memcpy(dev->name, oldname, IFNAMSIZ);
993 return ret;
994 }
995 } 1007 }
996 1008
997 write_lock_bh(&dev_base_lock); 1009 write_lock_bh(&dev_base_lock);
@@ -1084,9 +1096,9 @@ void netdev_state_change(struct net_device *dev)
1084} 1096}
1085EXPORT_SYMBOL(netdev_state_change); 1097EXPORT_SYMBOL(netdev_state_change);
1086 1098
1087void netdev_bonding_change(struct net_device *dev, unsigned long event) 1099int netdev_bonding_change(struct net_device *dev, unsigned long event)
1088{ 1100{
1089 call_netdevice_notifiers(event, dev); 1101 return call_netdevice_notifiers(event, dev);
1090} 1102}
1091EXPORT_SYMBOL(netdev_bonding_change); 1103EXPORT_SYMBOL(netdev_bonding_change);
1092 1104
@@ -1113,19 +1125,7 @@ void dev_load(struct net *net, const char *name)
1113} 1125}
1114EXPORT_SYMBOL(dev_load); 1126EXPORT_SYMBOL(dev_load);
1115 1127
1116/** 1128static int __dev_open(struct net_device *dev)
1117 * dev_open - prepare an interface for use.
1118 * @dev: device to open
1119 *
1120 * Takes a device from down to up state. The device's private open
1121 * function is invoked and then the multicast lists are loaded. Finally
1122 * the device is moved into the up state and a %NETDEV_UP message is
1123 * sent to the netdev notifier chain.
1124 *
1125 * Calling this function on an active interface is a nop. On a failure
1126 * a negative errno code is returned.
1127 */
1128int dev_open(struct net_device *dev)
1129{ 1129{
1130 const struct net_device_ops *ops = dev->netdev_ops; 1130 const struct net_device_ops *ops = dev->netdev_ops;
1131 int ret; 1131 int ret;
@@ -1133,13 +1133,6 @@ int dev_open(struct net_device *dev)
1133 ASSERT_RTNL(); 1133 ASSERT_RTNL();
1134 1134
1135 /* 1135 /*
1136 * Is it already up?
1137 */
1138
1139 if (dev->flags & IFF_UP)
1140 return 0;
1141
1142 /*
1143 * Is it even present? 1136 * Is it even present?
1144 */ 1137 */
1145 if (!netif_device_present(dev)) 1138 if (!netif_device_present(dev))
@@ -1187,36 +1180,57 @@ int dev_open(struct net_device *dev)
1187 * Wakeup transmit queue engine 1180 * Wakeup transmit queue engine
1188 */ 1181 */
1189 dev_activate(dev); 1182 dev_activate(dev);
1190
1191 /*
1192 * ... and announce new interface.
1193 */
1194 call_netdevice_notifiers(NETDEV_UP, dev);
1195 } 1183 }
1196 1184
1197 return ret; 1185 return ret;
1198} 1186}
1199EXPORT_SYMBOL(dev_open);
1200 1187
1201/** 1188/**
1202 * dev_close - shutdown an interface. 1189 * dev_open - prepare an interface for use.
1203 * @dev: device to shutdown 1190 * @dev: device to open
1204 * 1191 *
1205 * This function moves an active device into down state. A 1192 * Takes a device from down to up state. The device's private open
1206 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1193 * function is invoked and then the multicast lists are loaded. Finally
1207 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1194 * the device is moved into the up state and a %NETDEV_UP message is
1208 * chain. 1195 * sent to the netdev notifier chain.
1196 *
1197 * Calling this function on an active interface is a nop. On a failure
1198 * a negative errno code is returned.
1209 */ 1199 */
1210int dev_close(struct net_device *dev) 1200int dev_open(struct net_device *dev)
1201{
1202 int ret;
1203
1204 /*
1205 * Is it already up?
1206 */
1207 if (dev->flags & IFF_UP)
1208 return 0;
1209
1210 /*
1211 * Open device
1212 */
1213 ret = __dev_open(dev);
1214 if (ret < 0)
1215 return ret;
1216
1217 /*
1218 * ... and announce new interface.
1219 */
1220 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1221 call_netdevice_notifiers(NETDEV_UP, dev);
1222
1223 return ret;
1224}
1225EXPORT_SYMBOL(dev_open);
1226
1227static int __dev_close(struct net_device *dev)
1211{ 1228{
1212 const struct net_device_ops *ops = dev->netdev_ops; 1229 const struct net_device_ops *ops = dev->netdev_ops;
1213 ASSERT_RTNL();
1214 1230
1231 ASSERT_RTNL();
1215 might_sleep(); 1232 might_sleep();
1216 1233
1217 if (!(dev->flags & IFF_UP))
1218 return 0;
1219
1220 /* 1234 /*
1221 * Tell people we are going down, so that they can 1235 * Tell people we are going down, so that they can
1222 * prepare to death, when device is still operating. 1236 * prepare to death, when device is still operating.
@@ -1252,14 +1266,34 @@ int dev_close(struct net_device *dev)
1252 dev->flags &= ~IFF_UP; 1266 dev->flags &= ~IFF_UP;
1253 1267
1254 /* 1268 /*
1255 * Tell people we are down 1269 * Shutdown NET_DMA
1256 */ 1270 */
1257 call_netdevice_notifiers(NETDEV_DOWN, dev); 1271 net_dmaengine_put();
1272
1273 return 0;
1274}
1275
1276/**
1277 * dev_close - shutdown an interface.
1278 * @dev: device to shutdown
1279 *
1280 * This function moves an active device into down state. A
1281 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1282 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1283 * chain.
1284 */
1285int dev_close(struct net_device *dev)
1286{
1287 if (!(dev->flags & IFF_UP))
1288 return 0;
1289
1290 __dev_close(dev);
1258 1291
1259 /* 1292 /*
1260 * Shutdown NET_DMA 1293 * Tell people we are down
1261 */ 1294 */
1262 net_dmaengine_put(); 1295 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1296 call_netdevice_notifiers(NETDEV_DOWN, dev);
1263 1297
1264 return 0; 1298 return 0;
1265} 1299}
@@ -1394,6 +1428,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1394 1428
1395int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1429int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1396{ 1430{
1431 ASSERT_RTNL();
1397 return raw_notifier_call_chain(&netdev_chain, val, dev); 1432 return raw_notifier_call_chain(&netdev_chain, val, dev);
1398} 1433}
1399 1434
@@ -1412,7 +1447,7 @@ void net_disable_timestamp(void)
1412} 1447}
1413EXPORT_SYMBOL(net_disable_timestamp); 1448EXPORT_SYMBOL(net_disable_timestamp);
1414 1449
1415static inline void net_timestamp(struct sk_buff *skb) 1450static inline void net_timestamp_set(struct sk_buff *skb)
1416{ 1451{
1417 if (atomic_read(&netstamp_needed)) 1452 if (atomic_read(&netstamp_needed))
1418 __net_timestamp(skb); 1453 __net_timestamp(skb);
@@ -1420,6 +1455,12 @@ static inline void net_timestamp(struct sk_buff *skb)
1420 skb->tstamp.tv64 = 0; 1455 skb->tstamp.tv64 = 0;
1421} 1456}
1422 1457
1458static inline void net_timestamp_check(struct sk_buff *skb)
1459{
1460 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1461 __net_timestamp(skb);
1462}
1463
1423/** 1464/**
1424 * dev_forward_skb - loopback an skb to another netif 1465 * dev_forward_skb - loopback an skb to another netif
1425 * 1466 *
@@ -1428,7 +1469,7 @@ static inline void net_timestamp(struct sk_buff *skb)
1428 * 1469 *
1429 * return values: 1470 * return values:
1430 * NET_RX_SUCCESS (no congestion) 1471 * NET_RX_SUCCESS (no congestion)
1431 * NET_RX_DROP (packet was dropped) 1472 * NET_RX_DROP (packet was dropped, but freed)
1432 * 1473 *
1433 * dev_forward_skb can be used for injecting an skb from the 1474 * dev_forward_skb can be used for injecting an skb from the
1434 * start_xmit function of one device into the receive queue 1475 * start_xmit function of one device into the receive queue
@@ -1441,20 +1482,17 @@ static inline void net_timestamp(struct sk_buff *skb)
1441int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1482int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1442{ 1483{
1443 skb_orphan(skb); 1484 skb_orphan(skb);
1485 nf_reset(skb);
1444 1486
1445 if (!(dev->flags & IFF_UP)) 1487 if (!(dev->flags & IFF_UP) ||
1446 return NET_RX_DROP; 1488 (skb->len > (dev->mtu + dev->hard_header_len))) {
1447 1489 kfree_skb(skb);
1448 if (skb->len > (dev->mtu + dev->hard_header_len))
1449 return NET_RX_DROP; 1490 return NET_RX_DROP;
1450 1491 }
1451 skb_dst_drop(skb); 1492 skb_set_dev(skb, dev);
1452 skb->tstamp.tv64 = 0; 1493 skb->tstamp.tv64 = 0;
1453 skb->pkt_type = PACKET_HOST; 1494 skb->pkt_type = PACKET_HOST;
1454 skb->protocol = eth_type_trans(skb, dev); 1495 skb->protocol = eth_type_trans(skb, dev);
1455 skb->mark = 0;
1456 secpath_reset(skb);
1457 nf_reset(skb);
1458 return netif_rx(skb); 1496 return netif_rx(skb);
1459} 1497}
1460EXPORT_SYMBOL_GPL(dev_forward_skb); 1498EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1470,9 +1508,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1470 1508
1471#ifdef CONFIG_NET_CLS_ACT 1509#ifdef CONFIG_NET_CLS_ACT
1472 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) 1510 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1473 net_timestamp(skb); 1511 net_timestamp_set(skb);
1474#else 1512#else
1475 net_timestamp(skb); 1513 net_timestamp_set(skb);
1476#endif 1514#endif
1477 1515
1478 rcu_read_lock(); 1516 rcu_read_lock();
@@ -1498,7 +1536,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1498 if (net_ratelimit()) 1536 if (net_ratelimit())
1499 printk(KERN_CRIT "protocol %04x is " 1537 printk(KERN_CRIT "protocol %04x is "
1500 "buggy, dev %s\n", 1538 "buggy, dev %s\n",
1501 skb2->protocol, dev->name); 1539 ntohs(skb2->protocol),
1540 dev->name);
1502 skb_reset_network_header(skb2); 1541 skb_reset_network_header(skb2);
1503 } 1542 }
1504 1543
@@ -1510,6 +1549,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1510 rcu_read_unlock(); 1549 rcu_read_unlock();
1511} 1550}
1512 1551
1552/*
1553 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1554 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1555 */
1556void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1557{
1558 unsigned int real_num = dev->real_num_tx_queues;
1559
1560 if (unlikely(txq > dev->num_tx_queues))
1561 ;
1562 else if (txq > real_num)
1563 dev->real_num_tx_queues = txq;
1564 else if (txq < real_num) {
1565 dev->real_num_tx_queues = txq;
1566 qdisc_reset_all_tx_gt(dev, txq);
1567 }
1568}
1569EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1513 1570
1514static inline void __netif_reschedule(struct Qdisc *q) 1571static inline void __netif_reschedule(struct Qdisc *q)
1515{ 1572{
@@ -1518,8 +1575,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
1518 1575
1519 local_irq_save(flags); 1576 local_irq_save(flags);
1520 sd = &__get_cpu_var(softnet_data); 1577 sd = &__get_cpu_var(softnet_data);
1521 q->next_sched = sd->output_queue; 1578 q->next_sched = NULL;
1522 sd->output_queue = q; 1579 *sd->output_queue_tailp = q;
1580 sd->output_queue_tailp = &q->next_sched;
1523 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1581 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1524 local_irq_restore(flags); 1582 local_irq_restore(flags);
1525} 1583}
@@ -1614,6 +1672,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1614 return false; 1672 return false;
1615} 1673}
1616 1674
1675/**
1676 * skb_dev_set -- assign a new device to a buffer
1677 * @skb: buffer for the new device
1678 * @dev: network device
1679 *
1680 * If an skb is owned by a device already, we have to reset
1681 * all data private to the namespace a device belongs to
1682 * before assigning it a new device.
1683 */
1684#ifdef CONFIG_NET_NS
1685void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1686{
1687 skb_dst_drop(skb);
1688 if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1689 secpath_reset(skb);
1690 nf_reset(skb);
1691 skb_init_secmark(skb);
1692 skb->mark = 0;
1693 skb->priority = 0;
1694 skb->nf_trace = 0;
1695 skb->ipvs_property = 0;
1696#ifdef CONFIG_NET_SCHED
1697 skb->tc_index = 0;
1698#endif
1699 }
1700 skb->dev = dev;
1701}
1702EXPORT_SYMBOL(skb_set_dev);
1703#endif /* CONFIG_NET_NS */
1704
1617/* 1705/*
1618 * Invalidate hardware checksum when packet is to be mangled, and 1706 * Invalidate hardware checksum when packet is to be mangled, and
1619 * complete checksum manually on outgoing path. 1707 * complete checksum manually on outgoing path.
@@ -1734,18 +1822,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
1734 * 2. No high memory really exists on this machine. 1822 * 2. No high memory really exists on this machine.
1735 */ 1823 */
1736 1824
1737static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1825static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1738{ 1826{
1739#ifdef CONFIG_HIGHMEM 1827#ifdef CONFIG_HIGHMEM
1740 int i; 1828 int i;
1829 if (!(dev->features & NETIF_F_HIGHDMA)) {
1830 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1831 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1832 return 1;
1833 }
1741 1834
1742 if (dev->features & NETIF_F_HIGHDMA) 1835 if (PCI_DMA_BUS_IS_PHYS) {
1743 return 0; 1836 struct device *pdev = dev->dev.parent;
1744
1745 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1746 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1747 return 1;
1748 1837
1838 if (!pdev)
1839 return 0;
1840 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1841 dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1842 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1843 return 1;
1844 }
1845 }
1749#endif 1846#endif
1750 return 0; 1847 return 0;
1751} 1848}
@@ -1803,6 +1900,41 @@ static int dev_gso_segment(struct sk_buff *skb)
1803 return 0; 1900 return 0;
1804} 1901}
1805 1902
1903/*
1904 * Try to orphan skb early, right before transmission by the device.
1905 * We cannot orphan skb if tx timestamp is requested, since
1906 * drivers need to call skb_tstamp_tx() to send the timestamp.
1907 */
1908static inline void skb_orphan_try(struct sk_buff *skb)
1909{
1910 struct sock *sk = skb->sk;
1911
1912 if (sk && !skb_tx(skb)->flags) {
1913 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash
1915 */
1916 if (!skb->rxhash)
1917 skb->rxhash = sk->sk_hash;
1918 skb_orphan(skb);
1919 }
1920}
1921
1922/*
1923 * Returns true if either:
1924 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
1925 * 2. skb is fragmented and the device does not support SG, or if
1926 * at least one of fragments is in highmem and device does not
1927 * support DMA from it.
1928 */
1929static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev)
1931{
1932 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb))));
1936}
1937
1806int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1938int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1807 struct netdev_queue *txq) 1939 struct netdev_queue *txq)
1808{ 1940{
@@ -1813,13 +1945,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1813 if (!list_empty(&ptype_all)) 1945 if (!list_empty(&ptype_all))
1814 dev_queue_xmit_nit(skb, dev); 1946 dev_queue_xmit_nit(skb, dev);
1815 1947
1816 if (netif_needs_gso(dev, skb)) {
1817 if (unlikely(dev_gso_segment(skb)))
1818 goto out_kfree_skb;
1819 if (skb->next)
1820 goto gso;
1821 }
1822
1823 /* 1948 /*
1824 * If device doesnt need skb->dst, release it right now while 1949 * If device doesnt need skb->dst, release it right now while
1825 * its hot in this cpu cache 1950 * its hot in this cpu cache
@@ -1827,23 +1952,34 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1827 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 1952 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1828 skb_dst_drop(skb); 1953 skb_dst_drop(skb);
1829 1954
1955 skb_orphan_try(skb);
1956
1957 if (netif_needs_gso(dev, skb)) {
1958 if (unlikely(dev_gso_segment(skb)))
1959 goto out_kfree_skb;
1960 if (skb->next)
1961 goto gso;
1962 } else {
1963 if (skb_needs_linearize(skb, dev) &&
1964 __skb_linearize(skb))
1965 goto out_kfree_skb;
1966
1967 /* If packet is not checksummed and device does not
1968 * support checksumming for this protocol, complete
1969 * checksumming here.
1970 */
1971 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1972 skb_set_transport_header(skb, skb->csum_start -
1973 skb_headroom(skb));
1974 if (!dev_can_checksum(dev, skb) &&
1975 skb_checksum_help(skb))
1976 goto out_kfree_skb;
1977 }
1978 }
1979
1830 rc = ops->ndo_start_xmit(skb, dev); 1980 rc = ops->ndo_start_xmit(skb, dev);
1831 if (rc == NETDEV_TX_OK) 1981 if (rc == NETDEV_TX_OK)
1832 txq_trans_update(txq); 1982 txq_trans_update(txq);
1833 /*
1834 * TODO: if skb_orphan() was called by
1835 * dev->hard_start_xmit() (for example, the unmodified
1836 * igb driver does that; bnx2 doesn't), then
1837 * skb_tx_software_timestamp() will be unable to send
1838 * back the time stamp.
1839 *
1840 * How can this be prevented? Always create another
1841 * reference to the socket before calling
1842 * dev->hard_start_xmit()? Prevent that skb_orphan()
1843 * does anything in dev->hard_start_xmit() by clearing
1844 * the skb destructor before the call and restoring it
1845 * afterwards, then doing the skb_orphan() ourselves?
1846 */
1847 return rc; 1983 return rc;
1848 } 1984 }
1849 1985
@@ -1853,6 +1989,14 @@ gso:
1853 1989
1854 skb->next = nskb->next; 1990 skb->next = nskb->next;
1855 nskb->next = NULL; 1991 nskb->next = NULL;
1992
1993 /*
1994 * If device doesnt need nskb->dst, release it right now while
1995 * its hot in this cpu cache
1996 */
1997 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1998 skb_dst_drop(nskb);
1999
1856 rc = ops->ndo_start_xmit(nskb, dev); 2000 rc = ops->ndo_start_xmit(nskb, dev);
1857 if (unlikely(rc != NETDEV_TX_OK)) { 2001 if (unlikely(rc != NETDEV_TX_OK)) {
1858 if (rc & ~NETDEV_TX_MASK) 2002 if (rc & ~NETDEV_TX_MASK)
@@ -1874,7 +2018,7 @@ out_kfree_skb:
1874 return rc; 2018 return rc;
1875} 2019}
1876 2020
1877static u32 skb_tx_hashrnd; 2021static u32 hashrnd __read_mostly;
1878 2022
1879u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) 2023u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1880{ 2024{
@@ -1890,9 +2034,8 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1890 if (skb->sk && skb->sk->sk_hash) 2034 if (skb->sk && skb->sk->sk_hash)
1891 hash = skb->sk->sk_hash; 2035 hash = skb->sk->sk_hash;
1892 else 2036 else
1893 hash = skb->protocol; 2037 hash = (__force u16) skb->protocol ^ skb->rxhash;
1894 2038 hash = jhash_1word(hash, hashrnd);
1895 hash = jhash_1word(hash, skb_tx_hashrnd);
1896 2039
1897 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 2040 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1898} 2041}
@@ -1902,10 +2045,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1902{ 2045{
1903 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 2046 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1904 if (net_ratelimit()) { 2047 if (net_ratelimit()) {
1905 WARN(1, "%s selects TX queue %d, but " 2048 pr_warning("%s selects TX queue %d, but "
1906 "real number of TX queues is %d\n", 2049 "real number of TX queues is %d\n",
1907 dev->name, queue_index, 2050 dev->name, queue_index, dev->real_num_tx_queues);
1908 dev->real_num_tx_queues);
1909 } 2051 }
1910 return 0; 2052 return 0;
1911 } 2053 }
@@ -1915,24 +2057,27 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1915static struct netdev_queue *dev_pick_tx(struct net_device *dev, 2057static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1916 struct sk_buff *skb) 2058 struct sk_buff *skb)
1917{ 2059{
1918 u16 queue_index; 2060 int queue_index;
1919 struct sock *sk = skb->sk; 2061 const struct net_device_ops *ops = dev->netdev_ops;
1920 2062
1921 if (sk_tx_queue_recorded(sk)) { 2063 if (ops->ndo_select_queue) {
1922 queue_index = sk_tx_queue_get(sk); 2064 queue_index = ops->ndo_select_queue(dev, skb);
2065 queue_index = dev_cap_txqueue(dev, queue_index);
1923 } else { 2066 } else {
1924 const struct net_device_ops *ops = dev->netdev_ops; 2067 struct sock *sk = skb->sk;
2068 queue_index = sk_tx_queue_get(sk);
2069 if (queue_index < 0) {
1925 2070
1926 if (ops->ndo_select_queue) {
1927 queue_index = ops->ndo_select_queue(dev, skb);
1928 queue_index = dev_cap_txqueue(dev, queue_index);
1929 } else {
1930 queue_index = 0; 2071 queue_index = 0;
1931 if (dev->real_num_tx_queues > 1) 2072 if (dev->real_num_tx_queues > 1)
1932 queue_index = skb_tx_hash(dev, skb); 2073 queue_index = skb_tx_hash(dev, skb);
1933 2074
1934 if (sk && sk->sk_dst_cache) 2075 if (sk) {
1935 sk_tx_queue_set(sk, queue_index); 2076 struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
2077
2078 if (dst && skb_dst(skb) == dst)
2079 sk_tx_queue_set(sk, queue_index);
2080 }
1936 } 2081 }
1937 } 2082 }
1938 2083
@@ -1945,32 +2090,56 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1945 struct netdev_queue *txq) 2090 struct netdev_queue *txq)
1946{ 2091{
1947 spinlock_t *root_lock = qdisc_lock(q); 2092 spinlock_t *root_lock = qdisc_lock(q);
2093 bool contended = qdisc_is_running(q);
1948 int rc; 2094 int rc;
1949 2095
2096 /*
2097 * Heuristic to force contended enqueues to serialize on a
2098 * separate lock before trying to get qdisc main lock.
2099 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2100 * and dequeue packets faster.
2101 */
2102 if (unlikely(contended))
2103 spin_lock(&q->busylock);
2104
1950 spin_lock(root_lock); 2105 spin_lock(root_lock);
1951 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2106 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1952 kfree_skb(skb); 2107 kfree_skb(skb);
1953 rc = NET_XMIT_DROP; 2108 rc = NET_XMIT_DROP;
1954 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 2109 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
1955 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { 2110 qdisc_run_begin(q)) {
1956 /* 2111 /*
1957 * This is a work-conserving queue; there are no old skbs 2112 * This is a work-conserving queue; there are no old skbs
1958 * waiting to be sent out; and the qdisc is not running - 2113 * waiting to be sent out; and the qdisc is not running -
1959 * xmit the skb directly. 2114 * xmit the skb directly.
1960 */ 2115 */
2116 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2117 skb_dst_force(skb);
1961 __qdisc_update_bstats(q, skb->len); 2118 __qdisc_update_bstats(q, skb->len);
1962 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2119 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2120 if (unlikely(contended)) {
2121 spin_unlock(&q->busylock);
2122 contended = false;
2123 }
1963 __qdisc_run(q); 2124 __qdisc_run(q);
1964 else 2125 } else
1965 clear_bit(__QDISC_STATE_RUNNING, &q->state); 2126 qdisc_run_end(q);
1966 2127
1967 rc = NET_XMIT_SUCCESS; 2128 rc = NET_XMIT_SUCCESS;
1968 } else { 2129 } else {
2130 skb_dst_force(skb);
1969 rc = qdisc_enqueue_root(skb, q); 2131 rc = qdisc_enqueue_root(skb, q);
1970 qdisc_run(q); 2132 if (qdisc_run_begin(q)) {
2133 if (unlikely(contended)) {
2134 spin_unlock(&q->busylock);
2135 contended = false;
2136 }
2137 __qdisc_run(q);
2138 }
1971 } 2139 }
1972 spin_unlock(root_lock); 2140 spin_unlock(root_lock);
1973 2141 if (unlikely(contended))
2142 spin_unlock(&q->busylock);
1974 return rc; 2143 return rc;
1975} 2144}
1976 2145
@@ -2006,42 +2175,13 @@ int dev_queue_xmit(struct sk_buff *skb)
2006 struct Qdisc *q; 2175 struct Qdisc *q;
2007 int rc = -ENOMEM; 2176 int rc = -ENOMEM;
2008 2177
2009 /* GSO will handle the following emulations directly. */
2010 if (netif_needs_gso(dev, skb))
2011 goto gso;
2012
2013 if (skb_has_frags(skb) &&
2014 !(dev->features & NETIF_F_FRAGLIST) &&
2015 __skb_linearize(skb))
2016 goto out_kfree_skb;
2017
2018 /* Fragmented skb is linearized if device does not support SG,
2019 * or if at least one of fragments is in highmem and device
2020 * does not support DMA from it.
2021 */
2022 if (skb_shinfo(skb)->nr_frags &&
2023 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
2024 __skb_linearize(skb))
2025 goto out_kfree_skb;
2026
2027 /* If packet is not checksummed and device does not support
2028 * checksumming for this protocol, complete checksumming here.
2029 */
2030 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2031 skb_set_transport_header(skb, skb->csum_start -
2032 skb_headroom(skb));
2033 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2034 goto out_kfree_skb;
2035 }
2036
2037gso:
2038 /* Disable soft irqs for various locks below. Also 2178 /* Disable soft irqs for various locks below. Also
2039 * stops preemption for RCU. 2179 * stops preemption for RCU.
2040 */ 2180 */
2041 rcu_read_lock_bh(); 2181 rcu_read_lock_bh();
2042 2182
2043 txq = dev_pick_tx(dev, skb); 2183 txq = dev_pick_tx(dev, skb);
2044 q = rcu_dereference(txq->qdisc); 2184 q = rcu_dereference_bh(txq->qdisc);
2045 2185
2046#ifdef CONFIG_NET_CLS_ACT 2186#ifdef CONFIG_NET_CLS_ACT
2047 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2187 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
@@ -2093,7 +2233,6 @@ gso:
2093 rc = -ENETDOWN; 2233 rc = -ENETDOWN;
2094 rcu_read_unlock_bh(); 2234 rcu_read_unlock_bh();
2095 2235
2096out_kfree_skb:
2097 kfree_skb(skb); 2236 kfree_skb(skb);
2098 return rc; 2237 return rc;
2099out: 2238out:
@@ -2108,11 +2247,244 @@ EXPORT_SYMBOL(dev_queue_xmit);
2108 =======================================================================*/ 2247 =======================================================================*/
2109 2248
2110int netdev_max_backlog __read_mostly = 1000; 2249int netdev_max_backlog __read_mostly = 1000;
2250int netdev_tstamp_prequeue __read_mostly = 1;
2111int netdev_budget __read_mostly = 300; 2251int netdev_budget __read_mostly = 300;
2112int weight_p __read_mostly = 64; /* old backlog weight */ 2252int weight_p __read_mostly = 64; /* old backlog weight */
2113 2253
2114DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 2254/* Called with irq disabled */
2255static inline void ____napi_schedule(struct softnet_data *sd,
2256 struct napi_struct *napi)
2257{
2258 list_add_tail(&napi->poll_list, &sd->poll_list);
2259 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2260}
2261
2262#ifdef CONFIG_RPS
2263
2264/* One global table that all flow-based protocols share. */
2265struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2266EXPORT_SYMBOL(rps_sock_flow_table);
2267
2268/*
2269 * get_rps_cpu is called from netif_receive_skb and returns the target
2270 * CPU from the RPS map of the receiving queue for a given skb.
2271 * rcu_read_lock must be held on entry.
2272 */
2273static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2274 struct rps_dev_flow **rflowp)
2275{
2276 struct ipv6hdr *ip6;
2277 struct iphdr *ip;
2278 struct netdev_rx_queue *rxqueue;
2279 struct rps_map *map;
2280 struct rps_dev_flow_table *flow_table;
2281 struct rps_sock_flow_table *sock_flow_table;
2282 int cpu = -1;
2283 u8 ip_proto;
2284 u16 tcpu;
2285 u32 addr1, addr2, ihl;
2286 union {
2287 u32 v32;
2288 u16 v16[2];
2289 } ports;
2290
2291 if (skb_rx_queue_recorded(skb)) {
2292 u16 index = skb_get_rx_queue(skb);
2293 if (unlikely(index >= dev->num_rx_queues)) {
2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2295 "on queue %u, but number of RX queues is %u\n",
2296 dev->name, index, dev->num_rx_queues);
2297 goto done;
2298 }
2299 rxqueue = dev->_rx + index;
2300 } else
2301 rxqueue = dev->_rx;
2302
2303 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2304 goto done;
2305
2306 if (skb->rxhash)
2307 goto got_hash; /* Skip hash computation on packet header */
2308
2309 switch (skb->protocol) {
2310 case __constant_htons(ETH_P_IP):
2311 if (!pskb_may_pull(skb, sizeof(*ip)))
2312 goto done;
2313
2314 ip = (struct iphdr *) skb->data;
2315 ip_proto = ip->protocol;
2316 addr1 = (__force u32) ip->saddr;
2317 addr2 = (__force u32) ip->daddr;
2318 ihl = ip->ihl;
2319 break;
2320 case __constant_htons(ETH_P_IPV6):
2321 if (!pskb_may_pull(skb, sizeof(*ip6)))
2322 goto done;
2323
2324 ip6 = (struct ipv6hdr *) skb->data;
2325 ip_proto = ip6->nexthdr;
2326 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2327 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2328 ihl = (40 >> 2);
2329 break;
2330 default:
2331 goto done;
2332 }
2333 switch (ip_proto) {
2334 case IPPROTO_TCP:
2335 case IPPROTO_UDP:
2336 case IPPROTO_DCCP:
2337 case IPPROTO_ESP:
2338 case IPPROTO_AH:
2339 case IPPROTO_SCTP:
2340 case IPPROTO_UDPLITE:
2341 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2342 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2343 if (ports.v16[1] < ports.v16[0])
2344 swap(ports.v16[0], ports.v16[1]);
2345 break;
2346 }
2347 default:
2348 ports.v32 = 0;
2349 break;
2350 }
2351
2352 /* get a consistent hash (same value on both flow directions) */
2353 if (addr2 < addr1)
2354 swap(addr1, addr2);
2355 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2356 if (!skb->rxhash)
2357 skb->rxhash = 1;
2358
2359got_hash:
2360 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2361 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2362 if (flow_table && sock_flow_table) {
2363 u16 next_cpu;
2364 struct rps_dev_flow *rflow;
2365
2366 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2367 tcpu = rflow->cpu;
2368
2369 next_cpu = sock_flow_table->ents[skb->rxhash &
2370 sock_flow_table->mask];
2371
2372 /*
2373 * If the desired CPU (where last recvmsg was done) is
2374 * different from current CPU (one in the rx-queue flow
2375 * table entry), switch if one of the following holds:
2376 * - Current CPU is unset (equal to RPS_NO_CPU).
2377 * - Current CPU is offline.
2378 * - The current CPU's queue tail has advanced beyond the
2379 * last packet that was enqueued using this table entry.
2380 * This guarantees that all previous packets for the flow
2381 * have been dequeued, thus preserving in order delivery.
2382 */
2383 if (unlikely(tcpu != next_cpu) &&
2384 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2385 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2386 rflow->last_qtail)) >= 0)) {
2387 tcpu = rflow->cpu = next_cpu;
2388 if (tcpu != RPS_NO_CPU)
2389 rflow->last_qtail = per_cpu(softnet_data,
2390 tcpu).input_queue_head;
2391 }
2392 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2393 *rflowp = rflow;
2394 cpu = tcpu;
2395 goto done;
2396 }
2397 }
2115 2398
2399 map = rcu_dereference(rxqueue->rps_map);
2400 if (map) {
2401 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2402
2403 if (cpu_online(tcpu)) {
2404 cpu = tcpu;
2405 goto done;
2406 }
2407 }
2408
2409done:
2410 return cpu;
2411}
2412
2413/* Called from hardirq (IPI) context */
2414static void rps_trigger_softirq(void *data)
2415{
2416 struct softnet_data *sd = data;
2417
2418 ____napi_schedule(sd, &sd->backlog);
2419 sd->received_rps++;
2420}
2421
2422#endif /* CONFIG_RPS */
2423
2424/*
2425 * Check if this softnet_data structure is another cpu one
2426 * If yes, queue it to our IPI list and return 1
2427 * If no, return 0
2428 */
2429static int rps_ipi_queued(struct softnet_data *sd)
2430{
2431#ifdef CONFIG_RPS
2432 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2433
2434 if (sd != mysd) {
2435 sd->rps_ipi_next = mysd->rps_ipi_list;
2436 mysd->rps_ipi_list = sd;
2437
2438 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2439 return 1;
2440 }
2441#endif /* CONFIG_RPS */
2442 return 0;
2443}
2444
2445/*
2446 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2447 * queue (may be a remote CPU queue).
2448 */
2449static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2450 unsigned int *qtail)
2451{
2452 struct softnet_data *sd;
2453 unsigned long flags;
2454
2455 sd = &per_cpu(softnet_data, cpu);
2456
2457 local_irq_save(flags);
2458
2459 rps_lock(sd);
2460 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2461 if (skb_queue_len(&sd->input_pkt_queue)) {
2462enqueue:
2463 __skb_queue_tail(&sd->input_pkt_queue, skb);
2464 input_queue_tail_incr_save(sd, qtail);
2465 rps_unlock(sd);
2466 local_irq_restore(flags);
2467 return NET_RX_SUCCESS;
2468 }
2469
2470 /* Schedule NAPI for backlog device
2471 * We can use non atomic operation since we own the queue lock
2472 */
2473 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2474 if (!rps_ipi_queued(sd))
2475 ____napi_schedule(sd, &sd->backlog);
2476 }
2477 goto enqueue;
2478 }
2479
2480 sd->dropped++;
2481 rps_unlock(sd);
2482
2483 local_irq_restore(flags);
2484
2485 kfree_skb(skb);
2486 return NET_RX_DROP;
2487}
2116 2488
2117/** 2489/**
2118 * netif_rx - post buffer to the network code 2490 * netif_rx - post buffer to the network code
@@ -2131,41 +2503,40 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2131 2503
2132int netif_rx(struct sk_buff *skb) 2504int netif_rx(struct sk_buff *skb)
2133{ 2505{
2134 struct softnet_data *queue; 2506 int ret;
2135 unsigned long flags;
2136 2507
2137 /* if netpoll wants it, pretend we never saw it */ 2508 /* if netpoll wants it, pretend we never saw it */
2138 if (netpoll_rx(skb)) 2509 if (netpoll_rx(skb))
2139 return NET_RX_DROP; 2510 return NET_RX_DROP;
2140 2511
2141 if (!skb->tstamp.tv64) 2512 if (netdev_tstamp_prequeue)
2142 net_timestamp(skb); 2513 net_timestamp_check(skb);
2143 2514
2144 /* 2515#ifdef CONFIG_RPS
2145 * The code is rearranged so that the path is the most 2516 {
2146 * short when CPU is congested, but is still operating. 2517 struct rps_dev_flow voidflow, *rflow = &voidflow;
2147 */ 2518 int cpu;
2148 local_irq_save(flags);
2149 queue = &__get_cpu_var(softnet_data);
2150 2519
2151 __get_cpu_var(netdev_rx_stat).total++; 2520 preempt_disable();
2152 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 2521 rcu_read_lock();
2153 if (queue->input_pkt_queue.qlen) {
2154enqueue:
2155 __skb_queue_tail(&queue->input_pkt_queue, skb);
2156 local_irq_restore(flags);
2157 return NET_RX_SUCCESS;
2158 }
2159 2522
2160 napi_schedule(&queue->backlog); 2523 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2161 goto enqueue; 2524 if (cpu < 0)
2162 } 2525 cpu = smp_processor_id();
2163 2526
2164 __get_cpu_var(netdev_rx_stat).dropped++; 2527 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2165 local_irq_restore(flags);
2166 2528
2167 kfree_skb(skb); 2529 rcu_read_unlock();
2168 return NET_RX_DROP; 2530 preempt_enable();
2531 }
2532#else
2533 {
2534 unsigned int qtail;
2535 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2536 put_cpu();
2537 }
2538#endif
2539 return ret;
2169} 2540}
2170EXPORT_SYMBOL(netif_rx); 2541EXPORT_SYMBOL(netif_rx);
2171 2542
@@ -2210,6 +2581,7 @@ static void net_tx_action(struct softirq_action *h)
2210 local_irq_disable(); 2581 local_irq_disable();
2211 head = sd->output_queue; 2582 head = sd->output_queue;
2212 sd->output_queue = NULL; 2583 sd->output_queue = NULL;
2584 sd->output_queue_tailp = &sd->output_queue;
2213 local_irq_enable(); 2585 local_irq_enable();
2214 2586
2215 while (head) { 2587 while (head) {
@@ -2247,66 +2619,14 @@ static inline int deliver_skb(struct sk_buff *skb,
2247 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2619 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2248} 2620}
2249 2621
2250#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2622#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
2251 2623 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
2252#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2253/* This hook is defined here for ATM LANE */ 2624/* This hook is defined here for ATM LANE */
2254int (*br_fdb_test_addr_hook)(struct net_device *dev, 2625int (*br_fdb_test_addr_hook)(struct net_device *dev,
2255 unsigned char *addr) __read_mostly; 2626 unsigned char *addr) __read_mostly;
2256EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 2627EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2257#endif 2628#endif
2258 2629
2259/*
2260 * If bridge module is loaded call bridging hook.
2261 * returns NULL if packet was consumed.
2262 */
2263struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2264 struct sk_buff *skb) __read_mostly;
2265EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2266
2267static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2268 struct packet_type **pt_prev, int *ret,
2269 struct net_device *orig_dev)
2270{
2271 struct net_bridge_port *port;
2272
2273 if (skb->pkt_type == PACKET_LOOPBACK ||
2274 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2275 return skb;
2276
2277 if (*pt_prev) {
2278 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2279 *pt_prev = NULL;
2280 }
2281
2282 return br_handle_frame_hook(port, skb);
2283}
2284#else
2285#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
2286#endif
2287
2288#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2289struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2290EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2291
2292static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2293 struct packet_type **pt_prev,
2294 int *ret,
2295 struct net_device *orig_dev)
2296{
2297 if (skb->dev->macvlan_port == NULL)
2298 return skb;
2299
2300 if (*pt_prev) {
2301 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2302 *pt_prev = NULL;
2303 }
2304 return macvlan_handle_frame_hook(skb);
2305}
2306#else
2307#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2308#endif
2309
2310#ifdef CONFIG_NET_CLS_ACT 2630#ifdef CONFIG_NET_CLS_ACT
2311/* TODO: Maybe we should just force sch_ingress to be compiled in 2631/* TODO: Maybe we should just force sch_ingress to be compiled in
2312 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2632 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2324,10 +2644,10 @@ static int ing_filter(struct sk_buff *skb)
2324 int result = TC_ACT_OK; 2644 int result = TC_ACT_OK;
2325 struct Qdisc *q; 2645 struct Qdisc *q;
2326 2646
2327 if (MAX_RED_LOOP < ttl++) { 2647 if (unlikely(MAX_RED_LOOP < ttl++)) {
2328 printk(KERN_WARNING 2648 if (net_ratelimit())
2329 "Redir loop detected Dropping packet (%d->%d)\n", 2649 pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
2330 skb->skb_iif, dev->ifindex); 2650 skb->skb_iif, dev->ifindex);
2331 return TC_ACT_SHOT; 2651 return TC_ACT_SHOT;
2332 } 2652 }
2333 2653
@@ -2357,9 +2677,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2357 if (*pt_prev) { 2677 if (*pt_prev) {
2358 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2678 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2359 *pt_prev = NULL; 2679 *pt_prev = NULL;
2360 } else {
2361 /* Huh? Why does turning on AF_PACKET affect this? */
2362 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2363 } 2680 }
2364 2681
2365 switch (ing_filter(skb)) { 2682 switch (ing_filter(skb)) {
@@ -2403,30 +2720,113 @@ void netif_nit_deliver(struct sk_buff *skb)
2403} 2720}
2404 2721
2405/** 2722/**
2406 * netif_receive_skb - process receive buffer from network 2723 * netdev_rx_handler_register - register receive handler
2407 * @skb: buffer to process 2724 * @dev: device to register a handler for
2725 * @rx_handler: receive handler to register
2726 * @rx_handler_data: data pointer that is used by rx handler
2408 * 2727 *
2409 * netif_receive_skb() is the main receive data processing function. 2728 * Register a receive hander for a device. This handler will then be
2410 * It always succeeds. The buffer may be dropped during processing 2729 * called from __netif_receive_skb. A negative errno code is returned
2411 * for congestion control or by the protocol layers. 2730 * on a failure.
2412 * 2731 *
2413 * This function may only be called from softirq context and interrupts 2732 * The caller must hold the rtnl_mutex.
2414 * should be enabled. 2733 */
2734int netdev_rx_handler_register(struct net_device *dev,
2735 rx_handler_func_t *rx_handler,
2736 void *rx_handler_data)
2737{
2738 ASSERT_RTNL();
2739
2740 if (dev->rx_handler)
2741 return -EBUSY;
2742
2743 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
2744 rcu_assign_pointer(dev->rx_handler, rx_handler);
2745
2746 return 0;
2747}
2748EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
2749
2750/**
2751 * netdev_rx_handler_unregister - unregister receive handler
2752 * @dev: device to unregister a handler from
2415 * 2753 *
2416 * Return values (usually ignored): 2754 * Unregister a receive hander from a device.
2417 * NET_RX_SUCCESS: no congestion 2755 *
2418 * NET_RX_DROP: packet was dropped 2756 * The caller must hold the rtnl_mutex.
2419 */ 2757 */
2420int netif_receive_skb(struct sk_buff *skb) 2758void netdev_rx_handler_unregister(struct net_device *dev)
2759{
2760
2761 ASSERT_RTNL();
2762 rcu_assign_pointer(dev->rx_handler, NULL);
2763 rcu_assign_pointer(dev->rx_handler_data, NULL);
2764}
2765EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2766
2767static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2768 struct net_device *master)
2769{
2770 if (skb->pkt_type == PACKET_HOST) {
2771 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2772
2773 memcpy(dest, master->dev_addr, ETH_ALEN);
2774 }
2775}
2776
2777/* On bonding slaves other than the currently active slave, suppress
2778 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2779 * ARP on active-backup slaves with arp_validate enabled.
2780 */
2781int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2782{
2783 struct net_device *dev = skb->dev;
2784
2785 if (master->priv_flags & IFF_MASTER_ARPMON)
2786 dev->last_rx = jiffies;
2787
2788 if ((master->priv_flags & IFF_MASTER_ALB) &&
2789 (master->priv_flags & IFF_BRIDGE_PORT)) {
2790 /* Do address unmangle. The local destination address
2791 * will be always the one master has. Provides the right
2792 * functionality in a bridge.
2793 */
2794 skb_bond_set_mac_by_master(skb, master);
2795 }
2796
2797 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2798 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2799 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2800 return 0;
2801
2802 if (master->priv_flags & IFF_MASTER_ALB) {
2803 if (skb->pkt_type != PACKET_BROADCAST &&
2804 skb->pkt_type != PACKET_MULTICAST)
2805 return 0;
2806 }
2807 if (master->priv_flags & IFF_MASTER_8023AD &&
2808 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2809 return 0;
2810
2811 return 1;
2812 }
2813 return 0;
2814}
2815EXPORT_SYMBOL(__skb_bond_should_drop);
2816
2817static int __netif_receive_skb(struct sk_buff *skb)
2421{ 2818{
2422 struct packet_type *ptype, *pt_prev; 2819 struct packet_type *ptype, *pt_prev;
2820 rx_handler_func_t *rx_handler;
2423 struct net_device *orig_dev; 2821 struct net_device *orig_dev;
2822 struct net_device *master;
2424 struct net_device *null_or_orig; 2823 struct net_device *null_or_orig;
2824 struct net_device *orig_or_bond;
2425 int ret = NET_RX_DROP; 2825 int ret = NET_RX_DROP;
2426 __be16 type; 2826 __be16 type;
2427 2827
2428 if (!skb->tstamp.tv64) 2828 if (!netdev_tstamp_prequeue)
2429 net_timestamp(skb); 2829 net_timestamp_check(skb);
2430 2830
2431 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2432 return NET_RX_SUCCESS; 2832 return NET_RX_SUCCESS;
@@ -2438,17 +2838,28 @@ int netif_receive_skb(struct sk_buff *skb)
2438 if (!skb->skb_iif) 2838 if (!skb->skb_iif)
2439 skb->skb_iif = skb->dev->ifindex; 2839 skb->skb_iif = skb->dev->ifindex;
2440 2840
2841 /*
2842 * bonding note: skbs received on inactive slaves should only
2843 * be delivered to pkt handlers that are exact matches. Also
2844 * the deliver_no_wcard flag will be set. If packet handlers
2845 * are sensitive to duplicate packets these skbs will need to
2846 * be dropped at the handler. The vlan accel path may have
2847 * already set the deliver_no_wcard flag.
2848 */
2441 null_or_orig = NULL; 2849 null_or_orig = NULL;
2442 orig_dev = skb->dev; 2850 orig_dev = skb->dev;
2443 if (orig_dev->master) { 2851 master = ACCESS_ONCE(orig_dev->master);
2444 if (skb_bond_should_drop(skb)) 2852 if (skb->deliver_no_wcard)
2853 null_or_orig = orig_dev;
2854 else if (master) {
2855 if (skb_bond_should_drop(skb, master)) {
2856 skb->deliver_no_wcard = 1;
2445 null_or_orig = orig_dev; /* deliver only exact match */ 2857 null_or_orig = orig_dev; /* deliver only exact match */
2446 else 2858 } else
2447 skb->dev = orig_dev->master; 2859 skb->dev = master;
2448 } 2860 }
2449 2861
2450 __get_cpu_var(netdev_rx_stat).total++; 2862 __this_cpu_inc(softnet_data.processed);
2451
2452 skb_reset_network_header(skb); 2863 skb_reset_network_header(skb);
2453 skb_reset_transport_header(skb); 2864 skb_reset_transport_header(skb);
2454 skb->mac_len = skb->network_header - skb->mac_header; 2865 skb->mac_len = skb->network_header - skb->mac_header;
@@ -2480,19 +2891,36 @@ int netif_receive_skb(struct sk_buff *skb)
2480ncls: 2891ncls:
2481#endif 2892#endif
2482 2893
2483 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 2894 /* Handle special case of bridge or macvlan */
2484 if (!skb) 2895 rx_handler = rcu_dereference(skb->dev->rx_handler);
2485 goto out; 2896 if (rx_handler) {
2486 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); 2897 if (pt_prev) {
2487 if (!skb) 2898 ret = deliver_skb(skb, pt_prev, orig_dev);
2488 goto out; 2899 pt_prev = NULL;
2900 }
2901 skb = rx_handler(skb);
2902 if (!skb)
2903 goto out;
2904 }
2905
2906 /*
2907 * Make sure frames received on VLAN interfaces stacked on
2908 * bonding interfaces still make their way to any base bonding
2909 * device that may have registered for a specific ptype. The
2910 * handler may have to adjust skb->dev and orig_dev.
2911 */
2912 orig_or_bond = orig_dev;
2913 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2914 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2915 orig_or_bond = vlan_dev_real_dev(skb->dev);
2916 }
2489 2917
2490 type = skb->protocol; 2918 type = skb->protocol;
2491 list_for_each_entry_rcu(ptype, 2919 list_for_each_entry_rcu(ptype,
2492 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2920 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2493 if (ptype->type == type && 2921 if (ptype->type == type && (ptype->dev == null_or_orig ||
2494 (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2922 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2495 ptype->dev == orig_dev)) { 2923 ptype->dev == orig_or_bond)) {
2496 if (pt_prev) 2924 if (pt_prev)
2497 ret = deliver_skb(skb, pt_prev, orig_dev); 2925 ret = deliver_skb(skb, pt_prev, orig_dev);
2498 pt_prev = ptype; 2926 pt_prev = ptype;
@@ -2513,20 +2941,81 @@ out:
2513 rcu_read_unlock(); 2941 rcu_read_unlock();
2514 return ret; 2942 return ret;
2515} 2943}
2944
2945/**
2946 * netif_receive_skb - process receive buffer from network
2947 * @skb: buffer to process
2948 *
2949 * netif_receive_skb() is the main receive data processing function.
2950 * It always succeeds. The buffer may be dropped during processing
2951 * for congestion control or by the protocol layers.
2952 *
2953 * This function may only be called from softirq context and interrupts
2954 * should be enabled.
2955 *
2956 * Return values (usually ignored):
2957 * NET_RX_SUCCESS: no congestion
2958 * NET_RX_DROP: packet was dropped
2959 */
2960int netif_receive_skb(struct sk_buff *skb)
2961{
2962 if (netdev_tstamp_prequeue)
2963 net_timestamp_check(skb);
2964
2965 if (skb_defer_rx_timestamp(skb))
2966 return NET_RX_SUCCESS;
2967
2968#ifdef CONFIG_RPS
2969 {
2970 struct rps_dev_flow voidflow, *rflow = &voidflow;
2971 int cpu, ret;
2972
2973 rcu_read_lock();
2974
2975 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2976
2977 if (cpu >= 0) {
2978 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2979 rcu_read_unlock();
2980 } else {
2981 rcu_read_unlock();
2982 ret = __netif_receive_skb(skb);
2983 }
2984
2985 return ret;
2986 }
2987#else
2988 return __netif_receive_skb(skb);
2989#endif
2990}
2516EXPORT_SYMBOL(netif_receive_skb); 2991EXPORT_SYMBOL(netif_receive_skb);
2517 2992
2518/* Network device is going away, flush any packets still pending */ 2993/* Network device is going away, flush any packets still pending
2994 * Called with irqs disabled.
2995 */
2519static void flush_backlog(void *arg) 2996static void flush_backlog(void *arg)
2520{ 2997{
2521 struct net_device *dev = arg; 2998 struct net_device *dev = arg;
2522 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2999 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2523 struct sk_buff *skb, *tmp; 3000 struct sk_buff *skb, *tmp;
2524 3001
2525 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) 3002 rps_lock(sd);
3003 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3004 if (skb->dev == dev) {
3005 __skb_unlink(skb, &sd->input_pkt_queue);
3006 kfree_skb(skb);
3007 input_queue_head_incr(sd);
3008 }
3009 }
3010 rps_unlock(sd);
3011
3012 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
2526 if (skb->dev == dev) { 3013 if (skb->dev == dev) {
2527 __skb_unlink(skb, &queue->input_pkt_queue); 3014 __skb_unlink(skb, &sd->process_queue);
2528 kfree_skb(skb); 3015 kfree_skb(skb);
3016 input_queue_head_incr(sd);
2529 } 3017 }
3018 }
2530} 3019}
2531 3020
2532static int napi_gro_complete(struct sk_buff *skb) 3021static int napi_gro_complete(struct sk_buff *skb)
@@ -2561,7 +3050,7 @@ out:
2561 return netif_receive_skb(skb); 3050 return netif_receive_skb(skb);
2562} 3051}
2563 3052
2564void napi_gro_flush(struct napi_struct *napi) 3053static void napi_gro_flush(struct napi_struct *napi)
2565{ 3054{
2566 struct sk_buff *skb, *next; 3055 struct sk_buff *skb, *next;
2567 3056
@@ -2574,7 +3063,6 @@ void napi_gro_flush(struct napi_struct *napi)
2574 napi->gro_count = 0; 3063 napi->gro_count = 0;
2575 napi->gro_list = NULL; 3064 napi->gro_list = NULL;
2576} 3065}
2577EXPORT_SYMBOL(napi_gro_flush);
2578 3066
2579enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3067enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2580{ 3068{
@@ -2586,7 +3074,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2586 int mac_len; 3074 int mac_len;
2587 enum gro_result ret; 3075 enum gro_result ret;
2588 3076
2589 if (!(skb->dev->features & NETIF_F_GRO)) 3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
2590 goto normal; 3078 goto normal;
2591 3079
2592 if (skb_is_gso(skb) || skb_has_frags(skb)) 3080 if (skb_is_gso(skb) || skb_has_frags(skb))
@@ -2655,7 +3143,7 @@ pull:
2655 put_page(skb_shinfo(skb)->frags[0].page); 3143 put_page(skb_shinfo(skb)->frags[0].page);
2656 memmove(skb_shinfo(skb)->frags, 3144 memmove(skb_shinfo(skb)->frags,
2657 skb_shinfo(skb)->frags + 1, 3145 skb_shinfo(skb)->frags + 1,
2658 --skb_shinfo(skb)->nr_frags); 3146 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
2659 } 3147 }
2660 } 3148 }
2661 3149
@@ -2673,9 +3161,6 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2673{ 3161{
2674 struct sk_buff *p; 3162 struct sk_buff *p;
2675 3163
2676 if (netpoll_rx_on(skb))
2677 return GRO_NORMAL;
2678
2679 for (p = napi->gro_list; p; p = p->next) { 3164 for (p = napi->gro_list; p; p = p->next) {
2680 NAPI_GRO_CB(p)->same_flow = 3165 NAPI_GRO_CB(p)->same_flow =
2681 (p->dev == skb->dev) && 3166 (p->dev == skb->dev) &&
@@ -2761,7 +3246,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2761 switch (ret) { 3246 switch (ret) {
2762 case GRO_NORMAL: 3247 case GRO_NORMAL:
2763 case GRO_HELD: 3248 case GRO_HELD:
2764 skb->protocol = eth_type_trans(skb, napi->dev); 3249 skb->protocol = eth_type_trans(skb, skb->dev);
2765 3250
2766 if (ret == GRO_HELD) 3251 if (ret == GRO_HELD)
2767 skb_gro_pull(skb, -ETH_HLEN); 3252 skb_gro_pull(skb, -ETH_HLEN);
@@ -2830,27 +3315,87 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
2830} 3315}
2831EXPORT_SYMBOL(napi_gro_frags); 3316EXPORT_SYMBOL(napi_gro_frags);
2832 3317
3318/*
3319 * net_rps_action sends any pending IPI's for rps.
3320 * Note: called with local irq disabled, but exits with local irq enabled.
3321 */
3322static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3323{
3324#ifdef CONFIG_RPS
3325 struct softnet_data *remsd = sd->rps_ipi_list;
3326
3327 if (remsd) {
3328 sd->rps_ipi_list = NULL;
3329
3330 local_irq_enable();
3331
3332 /* Send pending IPI's to kick RPS processing on remote cpus. */
3333 while (remsd) {
3334 struct softnet_data *next = remsd->rps_ipi_next;
3335
3336 if (cpu_online(remsd->cpu))
3337 __smp_call_function_single(remsd->cpu,
3338 &remsd->csd, 0);
3339 remsd = next;
3340 }
3341 } else
3342#endif
3343 local_irq_enable();
3344}
3345
2833static int process_backlog(struct napi_struct *napi, int quota) 3346static int process_backlog(struct napi_struct *napi, int quota)
2834{ 3347{
2835 int work = 0; 3348 int work = 0;
2836 struct softnet_data *queue = &__get_cpu_var(softnet_data); 3349 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
2837 unsigned long start_time = jiffies;
2838 3350
3351#ifdef CONFIG_RPS
3352 /* Check if we have pending ipi, its better to send them now,
3353 * not waiting net_rx_action() end.
3354 */
3355 if (sd->rps_ipi_list) {
3356 local_irq_disable();
3357 net_rps_action_and_irq_enable(sd);
3358 }
3359#endif
2839 napi->weight = weight_p; 3360 napi->weight = weight_p;
2840 do { 3361 local_irq_disable();
3362 while (work < quota) {
2841 struct sk_buff *skb; 3363 struct sk_buff *skb;
3364 unsigned int qlen;
2842 3365
2843 local_irq_disable(); 3366 while ((skb = __skb_dequeue(&sd->process_queue))) {
2844 skb = __skb_dequeue(&queue->input_pkt_queue);
2845 if (!skb) {
2846 __napi_complete(napi);
2847 local_irq_enable(); 3367 local_irq_enable();
2848 break; 3368 __netif_receive_skb(skb);
3369 local_irq_disable();
3370 input_queue_head_incr(sd);
3371 if (++work >= quota) {
3372 local_irq_enable();
3373 return work;
3374 }
2849 } 3375 }
2850 local_irq_enable();
2851 3376
2852 netif_receive_skb(skb); 3377 rps_lock(sd);
2853 } while (++work < quota && jiffies == start_time); 3378 qlen = skb_queue_len(&sd->input_pkt_queue);
3379 if (qlen)
3380 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3381 &sd->process_queue);
3382
3383 if (qlen < quota - work) {
3384 /*
3385 * Inline a custom version of __napi_complete().
3386 * only current cpu owns and manipulates this napi,
3387 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3388 * we can use a plain write instead of clear_bit(),
3389 * and we dont need an smp_mb() memory barrier.
3390 */
3391 list_del(&napi->poll_list);
3392 napi->state = 0;
3393
3394 quota = work + qlen;
3395 }
3396 rps_unlock(sd);
3397 }
3398 local_irq_enable();
2854 3399
2855 return work; 3400 return work;
2856} 3401}
@@ -2866,8 +3411,7 @@ void __napi_schedule(struct napi_struct *n)
2866 unsigned long flags; 3411 unsigned long flags;
2867 3412
2868 local_irq_save(flags); 3413 local_irq_save(flags);
2869 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); 3414 ____napi_schedule(&__get_cpu_var(softnet_data), n);
2870 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2871 local_irq_restore(flags); 3415 local_irq_restore(flags);
2872} 3416}
2873EXPORT_SYMBOL(__napi_schedule); 3417EXPORT_SYMBOL(__napi_schedule);
@@ -2938,17 +3482,16 @@ void netif_napi_del(struct napi_struct *napi)
2938} 3482}
2939EXPORT_SYMBOL(netif_napi_del); 3483EXPORT_SYMBOL(netif_napi_del);
2940 3484
2941
2942static void net_rx_action(struct softirq_action *h) 3485static void net_rx_action(struct softirq_action *h)
2943{ 3486{
2944 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 3487 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2945 unsigned long time_limit = jiffies + 2; 3488 unsigned long time_limit = jiffies + 2;
2946 int budget = netdev_budget; 3489 int budget = netdev_budget;
2947 void *have; 3490 void *have;
2948 3491
2949 local_irq_disable(); 3492 local_irq_disable();
2950 3493
2951 while (!list_empty(list)) { 3494 while (!list_empty(&sd->poll_list)) {
2952 struct napi_struct *n; 3495 struct napi_struct *n;
2953 int work, weight; 3496 int work, weight;
2954 3497
@@ -2966,7 +3509,7 @@ static void net_rx_action(struct softirq_action *h)
2966 * entries to the tail of this list, and only ->poll() 3509 * entries to the tail of this list, and only ->poll()
2967 * calls can remove this head entry from the list. 3510 * calls can remove this head entry from the list.
2968 */ 3511 */
2969 n = list_entry(list->next, struct napi_struct, poll_list); 3512 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
2970 3513
2971 have = netpoll_poll_lock(n); 3514 have = netpoll_poll_lock(n);
2972 3515
@@ -3001,13 +3544,13 @@ static void net_rx_action(struct softirq_action *h)
3001 napi_complete(n); 3544 napi_complete(n);
3002 local_irq_disable(); 3545 local_irq_disable();
3003 } else 3546 } else
3004 list_move_tail(&n->poll_list, list); 3547 list_move_tail(&n->poll_list, &sd->poll_list);
3005 } 3548 }
3006 3549
3007 netpoll_poll_unlock(have); 3550 netpoll_poll_unlock(have);
3008 } 3551 }
3009out: 3552out:
3010 local_irq_enable(); 3553 net_rps_action_and_irq_enable(sd);
3011 3554
3012#ifdef CONFIG_NET_DMA 3555#ifdef CONFIG_NET_DMA
3013 /* 3556 /*
@@ -3020,7 +3563,7 @@ out:
3020 return; 3563 return;
3021 3564
3022softnet_break: 3565softnet_break:
3023 __get_cpu_var(netdev_rx_stat).time_squeeze++; 3566 sd->time_squeeze++;
3024 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 3567 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3025 goto out; 3568 goto out;
3026} 3569}
@@ -3183,10 +3726,11 @@ void dev_seq_stop(struct seq_file *seq, void *v)
3183 3726
3184static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3727static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3185{ 3728{
3186 const struct net_device_stats *stats = dev_get_stats(dev); 3729 struct rtnl_link_stats64 temp;
3730 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
3187 3731
3188 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 3732 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
3189 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 3733 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
3190 dev->name, stats->rx_bytes, stats->rx_packets, 3734 dev->name, stats->rx_bytes, stats->rx_packets,
3191 stats->rx_errors, 3735 stats->rx_errors,
3192 stats->rx_dropped + stats->rx_missed_errors, 3736 stats->rx_dropped + stats->rx_missed_errors,
@@ -3221,17 +3765,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
3221 return 0; 3765 return 0;
3222} 3766}
3223 3767
3224static struct netif_rx_stats *softnet_get_online(loff_t *pos) 3768static struct softnet_data *softnet_get_online(loff_t *pos)
3225{ 3769{
3226 struct netif_rx_stats *rc = NULL; 3770 struct softnet_data *sd = NULL;
3227 3771
3228 while (*pos < nr_cpu_ids) 3772 while (*pos < nr_cpu_ids)
3229 if (cpu_online(*pos)) { 3773 if (cpu_online(*pos)) {
3230 rc = &per_cpu(netdev_rx_stat, *pos); 3774 sd = &per_cpu(softnet_data, *pos);
3231 break; 3775 break;
3232 } else 3776 } else
3233 ++*pos; 3777 ++*pos;
3234 return rc; 3778 return sd;
3235} 3779}
3236 3780
3237static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 3781static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3251,12 +3795,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
3251 3795
3252static int softnet_seq_show(struct seq_file *seq, void *v) 3796static int softnet_seq_show(struct seq_file *seq, void *v)
3253{ 3797{
3254 struct netif_rx_stats *s = v; 3798 struct softnet_data *sd = v;
3255 3799
3256 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 3800 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3257 s->total, s->dropped, s->time_squeeze, 0, 3801 sd->processed, sd->dropped, sd->time_squeeze, 0,
3258 0, 0, 0, 0, /* was fastroute */ 3802 0, 0, 0, 0, /* was fastroute */
3259 s->cpu_collision); 3803 sd->cpu_collision, sd->received_rps);
3260 return 0; 3804 return 0;
3261} 3805}
3262 3806
@@ -3479,11 +4023,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
3479 4023
3480 slave->master = master; 4024 slave->master = master;
3481 4025
3482 synchronize_net(); 4026 if (old) {
3483 4027 synchronize_net();
3484 if (old)
3485 dev_put(old); 4028 dev_put(old);
3486 4029 }
3487 if (master) 4030 if (master)
3488 slave->flags |= IFF_SLAVE; 4031 slave->flags |= IFF_SLAVE;
3489 else 4032 else
@@ -3640,10 +4183,10 @@ void __dev_set_rx_mode(struct net_device *dev)
3640 /* Unicast addresses changes may only happen under the rtnl, 4183 /* Unicast addresses changes may only happen under the rtnl,
3641 * therefore calling __dev_set_promiscuity here is safe. 4184 * therefore calling __dev_set_promiscuity here is safe.
3642 */ 4185 */
3643 if (dev->uc.count > 0 && !dev->uc_promisc) { 4186 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
3644 __dev_set_promiscuity(dev, 1); 4187 __dev_set_promiscuity(dev, 1);
3645 dev->uc_promisc = 1; 4188 dev->uc_promisc = 1;
3646 } else if (dev->uc.count == 0 && dev->uc_promisc) { 4189 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
3647 __dev_set_promiscuity(dev, -1); 4190 __dev_set_promiscuity(dev, -1);
3648 dev->uc_promisc = 0; 4191 dev->uc_promisc = 0;
3649 } 4192 }
@@ -3660,562 +4203,6 @@ void dev_set_rx_mode(struct net_device *dev)
3660 netif_addr_unlock_bh(dev); 4203 netif_addr_unlock_bh(dev);
3661} 4204}
3662 4205
3663/* hw addresses list handling functions */
3664
3665static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3666 int addr_len, unsigned char addr_type)
3667{
3668 struct netdev_hw_addr *ha;
3669 int alloc_size;
3670
3671 if (addr_len > MAX_ADDR_LEN)
3672 return -EINVAL;
3673
3674 list_for_each_entry(ha, &list->list, list) {
3675 if (!memcmp(ha->addr, addr, addr_len) &&
3676 ha->type == addr_type) {
3677 ha->refcount++;
3678 return 0;
3679 }
3680 }
3681
3682
3683 alloc_size = sizeof(*ha);
3684 if (alloc_size < L1_CACHE_BYTES)
3685 alloc_size = L1_CACHE_BYTES;
3686 ha = kmalloc(alloc_size, GFP_ATOMIC);
3687 if (!ha)
3688 return -ENOMEM;
3689 memcpy(ha->addr, addr, addr_len);
3690 ha->type = addr_type;
3691 ha->refcount = 1;
3692 ha->synced = false;
3693 list_add_tail_rcu(&ha->list, &list->list);
3694 list->count++;
3695 return 0;
3696}
3697
3698static void ha_rcu_free(struct rcu_head *head)
3699{
3700 struct netdev_hw_addr *ha;
3701
3702 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3703 kfree(ha);
3704}
3705
3706static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3707 int addr_len, unsigned char addr_type)
3708{
3709 struct netdev_hw_addr *ha;
3710
3711 list_for_each_entry(ha, &list->list, list) {
3712 if (!memcmp(ha->addr, addr, addr_len) &&
3713 (ha->type == addr_type || !addr_type)) {
3714 if (--ha->refcount)
3715 return 0;
3716 list_del_rcu(&ha->list);
3717 call_rcu(&ha->rcu_head, ha_rcu_free);
3718 list->count--;
3719 return 0;
3720 }
3721 }
3722 return -ENOENT;
3723}
3724
3725static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3726 struct netdev_hw_addr_list *from_list,
3727 int addr_len,
3728 unsigned char addr_type)
3729{
3730 int err;
3731 struct netdev_hw_addr *ha, *ha2;
3732 unsigned char type;
3733
3734 list_for_each_entry(ha, &from_list->list, list) {
3735 type = addr_type ? addr_type : ha->type;
3736 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3737 if (err)
3738 goto unroll;
3739 }
3740 return 0;
3741
3742unroll:
3743 list_for_each_entry(ha2, &from_list->list, list) {
3744 if (ha2 == ha)
3745 break;
3746 type = addr_type ? addr_type : ha2->type;
3747 __hw_addr_del(to_list, ha2->addr, addr_len, type);
3748 }
3749 return err;
3750}
3751
3752static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3753 struct netdev_hw_addr_list *from_list,
3754 int addr_len,
3755 unsigned char addr_type)
3756{
3757 struct netdev_hw_addr *ha;
3758 unsigned char type;
3759
3760 list_for_each_entry(ha, &from_list->list, list) {
3761 type = addr_type ? addr_type : ha->type;
3762 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3763 }
3764}
3765
3766static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3767 struct netdev_hw_addr_list *from_list,
3768 int addr_len)
3769{
3770 int err = 0;
3771 struct netdev_hw_addr *ha, *tmp;
3772
3773 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3774 if (!ha->synced) {
3775 err = __hw_addr_add(to_list, ha->addr,
3776 addr_len, ha->type);
3777 if (err)
3778 break;
3779 ha->synced = true;
3780 ha->refcount++;
3781 } else if (ha->refcount == 1) {
3782 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3783 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3784 }
3785 }
3786 return err;
3787}
3788
3789static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3790 struct netdev_hw_addr_list *from_list,
3791 int addr_len)
3792{
3793 struct netdev_hw_addr *ha, *tmp;
3794
3795 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3796 if (ha->synced) {
3797 __hw_addr_del(to_list, ha->addr,
3798 addr_len, ha->type);
3799 ha->synced = false;
3800 __hw_addr_del(from_list, ha->addr,
3801 addr_len, ha->type);
3802 }
3803 }
3804}
3805
3806static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3807{
3808 struct netdev_hw_addr *ha, *tmp;
3809
3810 list_for_each_entry_safe(ha, tmp, &list->list, list) {
3811 list_del_rcu(&ha->list);
3812 call_rcu(&ha->rcu_head, ha_rcu_free);
3813 }
3814 list->count = 0;
3815}
3816
3817static void __hw_addr_init(struct netdev_hw_addr_list *list)
3818{
3819 INIT_LIST_HEAD(&list->list);
3820 list->count = 0;
3821}
3822
3823/* Device addresses handling functions */
3824
3825static void dev_addr_flush(struct net_device *dev)
3826{
3827 /* rtnl_mutex must be held here */
3828
3829 __hw_addr_flush(&dev->dev_addrs);
3830 dev->dev_addr = NULL;
3831}
3832
3833static int dev_addr_init(struct net_device *dev)
3834{
3835 unsigned char addr[MAX_ADDR_LEN];
3836 struct netdev_hw_addr *ha;
3837 int err;
3838
3839 /* rtnl_mutex must be held here */
3840
3841 __hw_addr_init(&dev->dev_addrs);
3842 memset(addr, 0, sizeof(addr));
3843 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3844 NETDEV_HW_ADDR_T_LAN);
3845 if (!err) {
3846 /*
3847 * Get the first (previously created) address from the list
3848 * and set dev_addr pointer to this location.
3849 */
3850 ha = list_first_entry(&dev->dev_addrs.list,
3851 struct netdev_hw_addr, list);
3852 dev->dev_addr = ha->addr;
3853 }
3854 return err;
3855}
3856
3857/**
3858 * dev_addr_add - Add a device address
3859 * @dev: device
3860 * @addr: address to add
3861 * @addr_type: address type
3862 *
3863 * Add a device address to the device or increase the reference count if
3864 * it already exists.
3865 *
3866 * The caller must hold the rtnl_mutex.
3867 */
3868int dev_addr_add(struct net_device *dev, unsigned char *addr,
3869 unsigned char addr_type)
3870{
3871 int err;
3872
3873 ASSERT_RTNL();
3874
3875 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3876 if (!err)
3877 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3878 return err;
3879}
3880EXPORT_SYMBOL(dev_addr_add);
3881
3882/**
3883 * dev_addr_del - Release a device address.
3884 * @dev: device
3885 * @addr: address to delete
3886 * @addr_type: address type
3887 *
3888 * Release reference to a device address and remove it from the device
3889 * if the reference count drops to zero.
3890 *
3891 * The caller must hold the rtnl_mutex.
3892 */
3893int dev_addr_del(struct net_device *dev, unsigned char *addr,
3894 unsigned char addr_type)
3895{
3896 int err;
3897 struct netdev_hw_addr *ha;
3898
3899 ASSERT_RTNL();
3900
3901 /*
3902 * We can not remove the first address from the list because
3903 * dev->dev_addr points to that.
3904 */
3905 ha = list_first_entry(&dev->dev_addrs.list,
3906 struct netdev_hw_addr, list);
3907 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3908 return -ENOENT;
3909
3910 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3911 addr_type);
3912 if (!err)
3913 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3914 return err;
3915}
3916EXPORT_SYMBOL(dev_addr_del);
3917
3918/**
3919 * dev_addr_add_multiple - Add device addresses from another device
3920 * @to_dev: device to which addresses will be added
3921 * @from_dev: device from which addresses will be added
3922 * @addr_type: address type - 0 means type will be used from from_dev
3923 *
3924 * Add device addresses of the one device to another.
3925 **
3926 * The caller must hold the rtnl_mutex.
3927 */
3928int dev_addr_add_multiple(struct net_device *to_dev,
3929 struct net_device *from_dev,
3930 unsigned char addr_type)
3931{
3932 int err;
3933
3934 ASSERT_RTNL();
3935
3936 if (from_dev->addr_len != to_dev->addr_len)
3937 return -EINVAL;
3938 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3939 to_dev->addr_len, addr_type);
3940 if (!err)
3941 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3942 return err;
3943}
3944EXPORT_SYMBOL(dev_addr_add_multiple);
3945
3946/**
3947 * dev_addr_del_multiple - Delete device addresses by another device
3948 * @to_dev: device where the addresses will be deleted
3949 * @from_dev: device by which addresses the addresses will be deleted
3950 * @addr_type: address type - 0 means type will used from from_dev
3951 *
3952 * Deletes addresses in to device by the list of addresses in from device.
3953 *
3954 * The caller must hold the rtnl_mutex.
3955 */
3956int dev_addr_del_multiple(struct net_device *to_dev,
3957 struct net_device *from_dev,
3958 unsigned char addr_type)
3959{
3960 ASSERT_RTNL();
3961
3962 if (from_dev->addr_len != to_dev->addr_len)
3963 return -EINVAL;
3964 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3965 to_dev->addr_len, addr_type);
3966 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3967 return 0;
3968}
3969EXPORT_SYMBOL(dev_addr_del_multiple);
3970
3971/* multicast addresses handling functions */
3972
3973int __dev_addr_delete(struct dev_addr_list **list, int *count,
3974 void *addr, int alen, int glbl)
3975{
3976 struct dev_addr_list *da;
3977
3978 for (; (da = *list) != NULL; list = &da->next) {
3979 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3980 alen == da->da_addrlen) {
3981 if (glbl) {
3982 int old_glbl = da->da_gusers;
3983 da->da_gusers = 0;
3984 if (old_glbl == 0)
3985 break;
3986 }
3987 if (--da->da_users)
3988 return 0;
3989
3990 *list = da->next;
3991 kfree(da);
3992 (*count)--;
3993 return 0;
3994 }
3995 }
3996 return -ENOENT;
3997}
3998
3999int __dev_addr_add(struct dev_addr_list **list, int *count,
4000 void *addr, int alen, int glbl)
4001{
4002 struct dev_addr_list *da;
4003
4004 for (da = *list; da != NULL; da = da->next) {
4005 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4006 da->da_addrlen == alen) {
4007 if (glbl) {
4008 int old_glbl = da->da_gusers;
4009 da->da_gusers = 1;
4010 if (old_glbl)
4011 return 0;
4012 }
4013 da->da_users++;
4014 return 0;
4015 }
4016 }
4017
4018 da = kzalloc(sizeof(*da), GFP_ATOMIC);
4019 if (da == NULL)
4020 return -ENOMEM;
4021 memcpy(da->da_addr, addr, alen);
4022 da->da_addrlen = alen;
4023 da->da_users = 1;
4024 da->da_gusers = glbl ? 1 : 0;
4025 da->next = *list;
4026 *list = da;
4027 (*count)++;
4028 return 0;
4029}
4030
4031/**
4032 * dev_unicast_delete - Release secondary unicast address.
4033 * @dev: device
4034 * @addr: address to delete
4035 *
4036 * Release reference to a secondary unicast address and remove it
4037 * from the device if the reference count drops to zero.
4038 *
4039 * The caller must hold the rtnl_mutex.
4040 */
4041int dev_unicast_delete(struct net_device *dev, void *addr)
4042{
4043 int err;
4044
4045 ASSERT_RTNL();
4046
4047 netif_addr_lock_bh(dev);
4048 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4049 NETDEV_HW_ADDR_T_UNICAST);
4050 if (!err)
4051 __dev_set_rx_mode(dev);
4052 netif_addr_unlock_bh(dev);
4053 return err;
4054}
4055EXPORT_SYMBOL(dev_unicast_delete);
4056
4057/**
4058 * dev_unicast_add - add a secondary unicast address
4059 * @dev: device
4060 * @addr: address to add
4061 *
4062 * Add a secondary unicast address to the device or increase
4063 * the reference count if it already exists.
4064 *
4065 * The caller must hold the rtnl_mutex.
4066 */
4067int dev_unicast_add(struct net_device *dev, void *addr)
4068{
4069 int err;
4070
4071 ASSERT_RTNL();
4072
4073 netif_addr_lock_bh(dev);
4074 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4075 NETDEV_HW_ADDR_T_UNICAST);
4076 if (!err)
4077 __dev_set_rx_mode(dev);
4078 netif_addr_unlock_bh(dev);
4079 return err;
4080}
4081EXPORT_SYMBOL(dev_unicast_add);
4082
4083int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4084 struct dev_addr_list **from, int *from_count)
4085{
4086 struct dev_addr_list *da, *next;
4087 int err = 0;
4088
4089 da = *from;
4090 while (da != NULL) {
4091 next = da->next;
4092 if (!da->da_synced) {
4093 err = __dev_addr_add(to, to_count,
4094 da->da_addr, da->da_addrlen, 0);
4095 if (err < 0)
4096 break;
4097 da->da_synced = 1;
4098 da->da_users++;
4099 } else if (da->da_users == 1) {
4100 __dev_addr_delete(to, to_count,
4101 da->da_addr, da->da_addrlen, 0);
4102 __dev_addr_delete(from, from_count,
4103 da->da_addr, da->da_addrlen, 0);
4104 }
4105 da = next;
4106 }
4107 return err;
4108}
4109EXPORT_SYMBOL_GPL(__dev_addr_sync);
4110
4111void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4112 struct dev_addr_list **from, int *from_count)
4113{
4114 struct dev_addr_list *da, *next;
4115
4116 da = *from;
4117 while (da != NULL) {
4118 next = da->next;
4119 if (da->da_synced) {
4120 __dev_addr_delete(to, to_count,
4121 da->da_addr, da->da_addrlen, 0);
4122 da->da_synced = 0;
4123 __dev_addr_delete(from, from_count,
4124 da->da_addr, da->da_addrlen, 0);
4125 }
4126 da = next;
4127 }
4128}
4129EXPORT_SYMBOL_GPL(__dev_addr_unsync);
4130
4131/**
4132 * dev_unicast_sync - Synchronize device's unicast list to another device
4133 * @to: destination device
4134 * @from: source device
4135 *
4136 * Add newly added addresses to the destination device and release
4137 * addresses that have no users left. The source device must be
4138 * locked by netif_tx_lock_bh.
4139 *
4140 * This function is intended to be called from the dev->set_rx_mode
4141 * function of layered software devices.
4142 */
4143int dev_unicast_sync(struct net_device *to, struct net_device *from)
4144{
4145 int err = 0;
4146
4147 if (to->addr_len != from->addr_len)
4148 return -EINVAL;
4149
4150 netif_addr_lock_bh(to);
4151 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4152 if (!err)
4153 __dev_set_rx_mode(to);
4154 netif_addr_unlock_bh(to);
4155 return err;
4156}
4157EXPORT_SYMBOL(dev_unicast_sync);
4158
4159/**
4160 * dev_unicast_unsync - Remove synchronized addresses from the destination device
4161 * @to: destination device
4162 * @from: source device
4163 *
4164 * Remove all addresses that were added to the destination device by
4165 * dev_unicast_sync(). This function is intended to be called from the
4166 * dev->stop function of layered software devices.
4167 */
4168void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4169{
4170 if (to->addr_len != from->addr_len)
4171 return;
4172
4173 netif_addr_lock_bh(from);
4174 netif_addr_lock(to);
4175 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4176 __dev_set_rx_mode(to);
4177 netif_addr_unlock(to);
4178 netif_addr_unlock_bh(from);
4179}
4180EXPORT_SYMBOL(dev_unicast_unsync);
4181
4182static void dev_unicast_flush(struct net_device *dev)
4183{
4184 netif_addr_lock_bh(dev);
4185 __hw_addr_flush(&dev->uc);
4186 netif_addr_unlock_bh(dev);
4187}
4188
4189static void dev_unicast_init(struct net_device *dev)
4190{
4191 __hw_addr_init(&dev->uc);
4192}
4193
4194
4195static void __dev_addr_discard(struct dev_addr_list **list)
4196{
4197 struct dev_addr_list *tmp;
4198
4199 while (*list != NULL) {
4200 tmp = *list;
4201 *list = tmp->next;
4202 if (tmp->da_users > tmp->da_gusers)
4203 printk("__dev_addr_discard: address leakage! "
4204 "da_users=%d\n", tmp->da_users);
4205 kfree(tmp);
4206 }
4207}
4208
4209static void dev_addr_discard(struct net_device *dev)
4210{
4211 netif_addr_lock_bh(dev);
4212
4213 __dev_addr_discard(&dev->mc_list);
4214 dev->mc_count = 0;
4215
4216 netif_addr_unlock_bh(dev);
4217}
4218
4219/** 4206/**
4220 * dev_get_flags - get flags reported to userspace 4207 * dev_get_flags - get flags reported to userspace
4221 * @dev: device 4208 * @dev: device
@@ -4247,18 +4234,10 @@ unsigned dev_get_flags(const struct net_device *dev)
4247} 4234}
4248EXPORT_SYMBOL(dev_get_flags); 4235EXPORT_SYMBOL(dev_get_flags);
4249 4236
4250/** 4237int __dev_change_flags(struct net_device *dev, unsigned int flags)
4251 * dev_change_flags - change device settings
4252 * @dev: device
4253 * @flags: device state flags
4254 *
4255 * Change settings on device based state flags. The flags are
4256 * in the userspace exported format.
4257 */
4258int dev_change_flags(struct net_device *dev, unsigned flags)
4259{ 4238{
4260 int ret, changes;
4261 int old_flags = dev->flags; 4239 int old_flags = dev->flags;
4240 int ret;
4262 4241
4263 ASSERT_RTNL(); 4242 ASSERT_RTNL();
4264 4243
@@ -4289,17 +4268,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
4289 4268
4290 ret = 0; 4269 ret = 0;
4291 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 4270 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
4292 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 4271 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4293 4272
4294 if (!ret) 4273 if (!ret)
4295 dev_set_rx_mode(dev); 4274 dev_set_rx_mode(dev);
4296 } 4275 }
4297 4276
4298 if (dev->flags & IFF_UP &&
4299 ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4300 IFF_VOLATILE)))
4301 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4302
4303 if ((flags ^ dev->gflags) & IFF_PROMISC) { 4277 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4304 int inc = (flags & IFF_PROMISC) ? 1 : -1; 4278 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4305 4279
@@ -4318,11 +4292,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
4318 dev_set_allmulti(dev, inc); 4292 dev_set_allmulti(dev, inc);
4319 } 4293 }
4320 4294
4321 /* Exclude state transition flags, already notified */ 4295 return ret;
4322 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 4296}
4297
4298void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4299{
4300 unsigned int changes = dev->flags ^ old_flags;
4301
4302 if (changes & IFF_UP) {
4303 if (dev->flags & IFF_UP)
4304 call_netdevice_notifiers(NETDEV_UP, dev);
4305 else
4306 call_netdevice_notifiers(NETDEV_DOWN, dev);
4307 }
4308
4309 if (dev->flags & IFF_UP &&
4310 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4311 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4312}
4313
4314/**
4315 * dev_change_flags - change device settings
4316 * @dev: device
4317 * @flags: device state flags
4318 *
4319 * Change settings on device based state flags. The flags are
4320 * in the userspace exported format.
4321 */
4322int dev_change_flags(struct net_device *dev, unsigned flags)
4323{
4324 int ret, changes;
4325 int old_flags = dev->flags;
4326
4327 ret = __dev_change_flags(dev, flags);
4328 if (ret < 0)
4329 return ret;
4330
4331 changes = old_flags ^ dev->flags;
4323 if (changes) 4332 if (changes)
4324 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 4333 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4325 4334
4335 __dev_notify_flags(dev, old_flags);
4326 return ret; 4336 return ret;
4327} 4337}
4328EXPORT_SYMBOL(dev_change_flags); 4338EXPORT_SYMBOL(dev_change_flags);
@@ -4503,8 +4513,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4503 return -EINVAL; 4513 return -EINVAL;
4504 if (!netif_device_present(dev)) 4514 if (!netif_device_present(dev))
4505 return -ENODEV; 4515 return -ENODEV;
4506 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 4516 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4507 dev->addr_len, 1);
4508 4517
4509 case SIOCDELMULTI: 4518 case SIOCDELMULTI:
4510 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 4519 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4512,8 +4521,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4512 return -EINVAL; 4521 return -EINVAL;
4513 if (!netif_device_present(dev)) 4522 if (!netif_device_present(dev))
4514 return -ENODEV; 4523 return -ENODEV;
4515 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 4524 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4516 dev->addr_len, 1);
4517 4525
4518 case SIOCSIFTXQLEN: 4526 case SIOCSIFTXQLEN:
4519 if (ifr->ifr_qlen < 0) 4527 if (ifr->ifr_qlen < 0)
@@ -4813,11 +4821,15 @@ static void rollback_registered_many(struct list_head *head)
4813 */ 4821 */
4814 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4822 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4815 4823
4824 if (!dev->rtnl_link_ops ||
4825 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4826 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4827
4816 /* 4828 /*
4817 * Flush the unicast and multicast chains 4829 * Flush the unicast and multicast chains
4818 */ 4830 */
4819 dev_unicast_flush(dev); 4831 dev_uc_flush(dev);
4820 dev_addr_discard(dev); 4832 dev_mc_flush(dev);
4821 4833
4822 if (dev->netdev_ops->ndo_uninit) 4834 if (dev->netdev_ops->ndo_uninit)
4823 dev->netdev_ops->ndo_uninit(dev); 4835 dev->netdev_ops->ndo_uninit(dev);
@@ -4830,10 +4842,10 @@ static void rollback_registered_many(struct list_head *head)
4830 } 4842 }
4831 4843
4832 /* Process any work delayed until the end of the batch */ 4844 /* Process any work delayed until the end of the batch */
4833 dev = list_entry(head->next, struct net_device, unreg_list); 4845 dev = list_first_entry(head, struct net_device, unreg_list);
4834 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 4846 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4835 4847
4836 synchronize_net(); 4848 rcu_barrier();
4837 4849
4838 list_for_each_entry(dev, head, unreg_list) 4850 list_for_each_entry(dev, head, unreg_list)
4839 dev_put(dev); 4851 dev_put(dev);
@@ -4966,6 +4978,24 @@ int register_netdevice(struct net_device *dev)
4966 4978
4967 dev->iflink = -1; 4979 dev->iflink = -1;
4968 4980
4981#ifdef CONFIG_RPS
4982 if (!dev->num_rx_queues) {
4983 /*
4984 * Allocate a single RX queue if driver never called
4985 * alloc_netdev_mq
4986 */
4987
4988 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4989 if (!dev->_rx) {
4990 ret = -ENOMEM;
4991 goto out;
4992 }
4993
4994 dev->_rx->first = dev->_rx;
4995 atomic_set(&dev->_rx->count, 1);
4996 dev->num_rx_queues = 1;
4997 }
4998#endif
4969 /* Init, if this function is available */ 4999 /* Init, if this function is available */
4970 if (dev->netdev_ops->ndo_init) { 5000 if (dev->netdev_ops->ndo_init) {
4971 ret = dev->netdev_ops->ndo_init(dev); 5001 ret = dev->netdev_ops->ndo_init(dev);
@@ -4976,7 +5006,7 @@ int register_netdevice(struct net_device *dev)
4976 } 5006 }
4977 } 5007 }
4978 5008
4979 ret = dev_get_valid_name(net, dev->name, dev->name, 0); 5009 ret = dev_get_valid_name(dev, dev->name, 0);
4980 if (ret) 5010 if (ret)
4981 goto err_uninit; 5011 goto err_uninit;
4982 5012
@@ -5005,8 +5035,6 @@ int register_netdevice(struct net_device *dev)
5005 if (dev->features & NETIF_F_SG) 5035 if (dev->features & NETIF_F_SG)
5006 dev->features |= NETIF_F_GSO; 5036 dev->features |= NETIF_F_GSO;
5007 5037
5008 netdev_initialize_kobject(dev);
5009
5010 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5038 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5011 ret = notifier_to_errno(ret); 5039 ret = notifier_to_errno(ret);
5012 if (ret) 5040 if (ret)
@@ -5039,7 +5067,9 @@ int register_netdevice(struct net_device *dev)
5039 * Prevent userspace races by waiting until the network 5067 * Prevent userspace races by waiting until the network
5040 * device is fully setup before sending notifications. 5068 * device is fully setup before sending notifications.
5041 */ 5069 */
5042 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); 5070 if (!dev->rtnl_link_ops ||
5071 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5072 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5043 5073
5044out: 5074out:
5045 return ret; 5075 return ret;
@@ -5216,7 +5246,7 @@ void netdev_run_todo(void)
5216 5246
5217 while (!list_empty(&list)) { 5247 while (!list_empty(&list)) {
5218 struct net_device *dev 5248 struct net_device *dev
5219 = list_entry(list.next, struct net_device, todo_list); 5249 = list_first_entry(&list, struct net_device, todo_list);
5220 list_del(&dev->todo_list); 5250 list_del(&dev->todo_list);
5221 5251
5222 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 5252 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5249,20 +5279,22 @@ void netdev_run_todo(void)
5249/** 5279/**
5250 * dev_txq_stats_fold - fold tx_queues stats 5280 * dev_txq_stats_fold - fold tx_queues stats
5251 * @dev: device to get statistics from 5281 * @dev: device to get statistics from
5252 * @stats: struct net_device_stats to hold results 5282 * @stats: struct rtnl_link_stats64 to hold results
5253 */ 5283 */
5254void dev_txq_stats_fold(const struct net_device *dev, 5284void dev_txq_stats_fold(const struct net_device *dev,
5255 struct net_device_stats *stats) 5285 struct rtnl_link_stats64 *stats)
5256{ 5286{
5257 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5287 u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5258 unsigned int i; 5288 unsigned int i;
5259 struct netdev_queue *txq; 5289 struct netdev_queue *txq;
5260 5290
5261 for (i = 0; i < dev->num_tx_queues; i++) { 5291 for (i = 0; i < dev->num_tx_queues; i++) {
5262 txq = netdev_get_tx_queue(dev, i); 5292 txq = netdev_get_tx_queue(dev, i);
5293 spin_lock_bh(&txq->_xmit_lock);
5263 tx_bytes += txq->tx_bytes; 5294 tx_bytes += txq->tx_bytes;
5264 tx_packets += txq->tx_packets; 5295 tx_packets += txq->tx_packets;
5265 tx_dropped += txq->tx_dropped; 5296 tx_dropped += txq->tx_dropped;
5297 spin_unlock_bh(&txq->_xmit_lock);
5266 } 5298 }
5267 if (tx_bytes || tx_packets || tx_dropped) { 5299 if (tx_bytes || tx_packets || tx_dropped) {
5268 stats->tx_bytes = tx_bytes; 5300 stats->tx_bytes = tx_bytes;
@@ -5272,23 +5304,53 @@ void dev_txq_stats_fold(const struct net_device *dev,
5272} 5304}
5273EXPORT_SYMBOL(dev_txq_stats_fold); 5305EXPORT_SYMBOL(dev_txq_stats_fold);
5274 5306
5307/* Convert net_device_stats to rtnl_link_stats64. They have the same
5308 * fields in the same order, with only the type differing.
5309 */
5310static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5311 const struct net_device_stats *netdev_stats)
5312{
5313#if BITS_PER_LONG == 64
5314 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5315 memcpy(stats64, netdev_stats, sizeof(*stats64));
5316#else
5317 size_t i, n = sizeof(*stats64) / sizeof(u64);
5318 const unsigned long *src = (const unsigned long *)netdev_stats;
5319 u64 *dst = (u64 *)stats64;
5320
5321 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5322 sizeof(*stats64) / sizeof(u64));
5323 for (i = 0; i < n; i++)
5324 dst[i] = src[i];
5325#endif
5326}
5327
5275/** 5328/**
5276 * dev_get_stats - get network device statistics 5329 * dev_get_stats - get network device statistics
5277 * @dev: device to get statistics from 5330 * @dev: device to get statistics from
5331 * @storage: place to store stats
5278 * 5332 *
5279 * Get network statistics from device. The device driver may provide 5333 * Get network statistics from device. Return @storage.
5280 * its own method by setting dev->netdev_ops->get_stats; otherwise 5334 * The device driver may provide its own method by setting
5281 * the internal statistics structure is used. 5335 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5336 * otherwise the internal statistics structure is used.
5282 */ 5337 */
5283const struct net_device_stats *dev_get_stats(struct net_device *dev) 5338struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5339 struct rtnl_link_stats64 *storage)
5284{ 5340{
5285 const struct net_device_ops *ops = dev->netdev_ops; 5341 const struct net_device_ops *ops = dev->netdev_ops;
5286 5342
5287 if (ops->ndo_get_stats) 5343 if (ops->ndo_get_stats64) {
5288 return ops->ndo_get_stats(dev); 5344 memset(storage, 0, sizeof(*storage));
5289 5345 return ops->ndo_get_stats64(dev, storage);
5290 dev_txq_stats_fold(dev, &dev->stats); 5346 }
5291 return &dev->stats; 5347 if (ops->ndo_get_stats) {
5348 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5349 return storage;
5350 }
5351 netdev_stats_to_stats64(storage, &dev->stats);
5352 dev_txq_stats_fold(dev, storage);
5353 return storage;
5292} 5354}
5293EXPORT_SYMBOL(dev_get_stats); 5355EXPORT_SYMBOL(dev_get_stats);
5294 5356
@@ -5324,6 +5386,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5324 struct net_device *dev; 5386 struct net_device *dev;
5325 size_t alloc_size; 5387 size_t alloc_size;
5326 struct net_device *p; 5388 struct net_device *p;
5389#ifdef CONFIG_RPS
5390 struct netdev_rx_queue *rx;
5391 int i;
5392#endif
5327 5393
5328 BUG_ON(strlen(name) >= sizeof(dev->name)); 5394 BUG_ON(strlen(name) >= sizeof(dev->name));
5329 5395
@@ -5349,13 +5415,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5349 goto free_p; 5415 goto free_p;
5350 } 5416 }
5351 5417
5418#ifdef CONFIG_RPS
5419 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5420 if (!rx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "rx queues.\n");
5423 goto free_tx;
5424 }
5425
5426 atomic_set(&rx->count, queue_count);
5427
5428 /*
5429 * Set a pointer to first element in the array which holds the
5430 * reference count.
5431 */
5432 for (i = 0; i < queue_count; i++)
5433 rx[i].first = rx;
5434#endif
5435
5352 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5436 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5353 dev->padded = (char *)dev - (char *)p; 5437 dev->padded = (char *)dev - (char *)p;
5354 5438
5355 if (dev_addr_init(dev)) 5439 if (dev_addr_init(dev))
5356 goto free_tx; 5440 goto free_rx;
5357 5441
5358 dev_unicast_init(dev); 5442 dev_mc_init(dev);
5443 dev_uc_init(dev);
5359 5444
5360 dev_net_set(dev, &init_net); 5445 dev_net_set(dev, &init_net);
5361 5446
@@ -5363,10 +5448,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5363 dev->num_tx_queues = queue_count; 5448 dev->num_tx_queues = queue_count;
5364 dev->real_num_tx_queues = queue_count; 5449 dev->real_num_tx_queues = queue_count;
5365 5450
5451#ifdef CONFIG_RPS
5452 dev->_rx = rx;
5453 dev->num_rx_queues = queue_count;
5454#endif
5455
5366 dev->gso_max_size = GSO_MAX_SIZE; 5456 dev->gso_max_size = GSO_MAX_SIZE;
5367 5457
5368 netdev_init_queues(dev); 5458 netdev_init_queues(dev);
5369 5459
5460 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5461 dev->ethtool_ntuple_list.count = 0;
5370 INIT_LIST_HEAD(&dev->napi_list); 5462 INIT_LIST_HEAD(&dev->napi_list);
5371 INIT_LIST_HEAD(&dev->unreg_list); 5463 INIT_LIST_HEAD(&dev->unreg_list);
5372 INIT_LIST_HEAD(&dev->link_watch_list); 5464 INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5375,9 +5467,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5375 strcpy(dev->name, name); 5467 strcpy(dev->name, name);
5376 return dev; 5468 return dev;
5377 5469
5470free_rx:
5471#ifdef CONFIG_RPS
5472 kfree(rx);
5378free_tx: 5473free_tx:
5474#endif
5379 kfree(tx); 5475 kfree(tx);
5380
5381free_p: 5476free_p:
5382 kfree(p); 5477 kfree(p);
5383 return NULL; 5478 return NULL;
@@ -5403,6 +5498,9 @@ void free_netdev(struct net_device *dev)
5403 /* Flush device addresses */ 5498 /* Flush device addresses */
5404 dev_addr_flush(dev); 5499 dev_addr_flush(dev);
5405 5500
5501 /* Clear ethtool n-tuple list */
5502 ethtool_ntuple_flush(dev);
5503
5406 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5504 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5407 netif_napi_del(p); 5505 netif_napi_del(p);
5408 5506
@@ -5520,15 +5618,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5520 if (dev->features & NETIF_F_NETNS_LOCAL) 5618 if (dev->features & NETIF_F_NETNS_LOCAL)
5521 goto out; 5619 goto out;
5522 5620
5523#ifdef CONFIG_SYSFS
5524 /* Don't allow real devices to be moved when sysfs
5525 * is enabled.
5526 */
5527 err = -EINVAL;
5528 if (dev->dev.parent)
5529 goto out;
5530#endif
5531
5532 /* Ensure the device has been registrered */ 5621 /* Ensure the device has been registrered */
5533 err = -EINVAL; 5622 err = -EINVAL;
5534 if (dev->reg_state != NETREG_REGISTERED) 5623 if (dev->reg_state != NETREG_REGISTERED)
@@ -5547,7 +5636,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5547 /* We get here if we can't use the current device name */ 5636 /* We get here if we can't use the current device name */
5548 if (!pat) 5637 if (!pat)
5549 goto out; 5638 goto out;
5550 if (dev_get_valid_name(net, pat, dev->name, 1)) 5639 if (dev_get_valid_name(dev, pat, 1))
5551 goto out; 5640 goto out;
5552 } 5641 }
5553 5642
@@ -5576,10 +5665,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5576 /* 5665 /*
5577 * Flush the unicast and multicast chains 5666 * Flush the unicast and multicast chains
5578 */ 5667 */
5579 dev_unicast_flush(dev); 5668 dev_uc_flush(dev);
5580 dev_addr_discard(dev); 5669 dev_mc_flush(dev);
5581
5582 netdev_unregister_kobject(dev);
5583 5670
5584 /* Actually switch the network namespace */ 5671 /* Actually switch the network namespace */
5585 dev_net_set(dev, net); 5672 dev_net_set(dev, net);
@@ -5593,7 +5680,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5593 } 5680 }
5594 5681
5595 /* Fixup kobjects */ 5682 /* Fixup kobjects */
5596 err = netdev_register_kobject(dev); 5683 err = device_rename(&dev->dev, dev->name);
5597 WARN_ON(err); 5684 WARN_ON(err);
5598 5685
5599 /* Add the device back in the hashes */ 5686 /* Add the device back in the hashes */
@@ -5620,7 +5707,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5620 void *ocpu) 5707 void *ocpu)
5621{ 5708{
5622 struct sk_buff **list_skb; 5709 struct sk_buff **list_skb;
5623 struct Qdisc **list_net;
5624 struct sk_buff *skb; 5710 struct sk_buff *skb;
5625 unsigned int cpu, oldcpu = (unsigned long)ocpu; 5711 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5626 struct softnet_data *sd, *oldsd; 5712 struct softnet_data *sd, *oldsd;
@@ -5641,20 +5727,26 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5641 *list_skb = oldsd->completion_queue; 5727 *list_skb = oldsd->completion_queue;
5642 oldsd->completion_queue = NULL; 5728 oldsd->completion_queue = NULL;
5643 5729
5644 /* Find end of our output_queue. */
5645 list_net = &sd->output_queue;
5646 while (*list_net)
5647 list_net = &(*list_net)->next_sched;
5648 /* Append output queue from offline CPU. */ 5730 /* Append output queue from offline CPU. */
5649 *list_net = oldsd->output_queue; 5731 if (oldsd->output_queue) {
5650 oldsd->output_queue = NULL; 5732 *sd->output_queue_tailp = oldsd->output_queue;
5733 sd->output_queue_tailp = oldsd->output_queue_tailp;
5734 oldsd->output_queue = NULL;
5735 oldsd->output_queue_tailp = &oldsd->output_queue;
5736 }
5651 5737
5652 raise_softirq_irqoff(NET_TX_SOFTIRQ); 5738 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5653 local_irq_enable(); 5739 local_irq_enable();
5654 5740
5655 /* Process offline CPU's input_pkt_queue */ 5741 /* Process offline CPU's input_pkt_queue */
5656 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 5742 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
5743 netif_rx(skb);
5744 input_queue_head_incr(oldsd);
5745 }
5746 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5657 netif_rx(skb); 5747 netif_rx(skb);
5748 input_queue_head_incr(oldsd);
5749 }
5658 5750
5659 return NOTIFY_OK; 5751 return NOTIFY_OK;
5660} 5752}
@@ -5763,6 +5855,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5763 return buffer; 5855 return buffer;
5764} 5856}
5765 5857
5858static int __netdev_printk(const char *level, const struct net_device *dev,
5859 struct va_format *vaf)
5860{
5861 int r;
5862
5863 if (dev && dev->dev.parent)
5864 r = dev_printk(level, dev->dev.parent, "%s: %pV",
5865 netdev_name(dev), vaf);
5866 else if (dev)
5867 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
5868 else
5869 r = printk("%s(NULL net_device): %pV", level, vaf);
5870
5871 return r;
5872}
5873
5874int netdev_printk(const char *level, const struct net_device *dev,
5875 const char *format, ...)
5876{
5877 struct va_format vaf;
5878 va_list args;
5879 int r;
5880
5881 va_start(args, format);
5882
5883 vaf.fmt = format;
5884 vaf.va = &args;
5885
5886 r = __netdev_printk(level, dev, &vaf);
5887 va_end(args);
5888
5889 return r;
5890}
5891EXPORT_SYMBOL(netdev_printk);
5892
5893#define define_netdev_printk_level(func, level) \
5894int func(const struct net_device *dev, const char *fmt, ...) \
5895{ \
5896 int r; \
5897 struct va_format vaf; \
5898 va_list args; \
5899 \
5900 va_start(args, fmt); \
5901 \
5902 vaf.fmt = fmt; \
5903 vaf.va = &args; \
5904 \
5905 r = __netdev_printk(level, dev, &vaf); \
5906 va_end(args); \
5907 \
5908 return r; \
5909} \
5910EXPORT_SYMBOL(func);
5911
5912define_netdev_printk_level(netdev_emerg, KERN_EMERG);
5913define_netdev_printk_level(netdev_alert, KERN_ALERT);
5914define_netdev_printk_level(netdev_crit, KERN_CRIT);
5915define_netdev_printk_level(netdev_err, KERN_ERR);
5916define_netdev_printk_level(netdev_warn, KERN_WARNING);
5917define_netdev_printk_level(netdev_notice, KERN_NOTICE);
5918define_netdev_printk_level(netdev_info, KERN_INFO);
5919
5766static void __net_exit netdev_exit(struct net *net) 5920static void __net_exit netdev_exit(struct net *net)
5767{ 5921{
5768 kfree(net->dev_name_head); 5922 kfree(net->dev_name_head);
@@ -5870,17 +6024,26 @@ static int __init net_dev_init(void)
5870 */ 6024 */
5871 6025
5872 for_each_possible_cpu(i) { 6026 for_each_possible_cpu(i) {
5873 struct softnet_data *queue; 6027 struct softnet_data *sd = &per_cpu(softnet_data, i);
5874 6028
5875 queue = &per_cpu(softnet_data, i); 6029 memset(sd, 0, sizeof(*sd));
5876 skb_queue_head_init(&queue->input_pkt_queue); 6030 skb_queue_head_init(&sd->input_pkt_queue);
5877 queue->completion_queue = NULL; 6031 skb_queue_head_init(&sd->process_queue);
5878 INIT_LIST_HEAD(&queue->poll_list); 6032 sd->completion_queue = NULL;
6033 INIT_LIST_HEAD(&sd->poll_list);
6034 sd->output_queue = NULL;
6035 sd->output_queue_tailp = &sd->output_queue;
6036#ifdef CONFIG_RPS
6037 sd->csd.func = rps_trigger_softirq;
6038 sd->csd.info = sd;
6039 sd->csd.flags = 0;
6040 sd->cpu = i;
6041#endif
5879 6042
5880 queue->backlog.poll = process_backlog; 6043 sd->backlog.poll = process_backlog;
5881 queue->backlog.weight = weight_p; 6044 sd->backlog.weight = weight_p;
5882 queue->backlog.gro_list = NULL; 6045 sd->backlog.gro_list = NULL;
5883 queue->backlog.gro_count = 0; 6046 sd->backlog.gro_count = 0;
5884 } 6047 }
5885 6048
5886 dev_boot_phase = 0; 6049 dev_boot_phase = 0;
@@ -5915,7 +6078,7 @@ subsys_initcall(net_dev_init);
5915 6078
5916static int __init initialize_hashrnd(void) 6079static int __init initialize_hashrnd(void)
5917{ 6080{
5918 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); 6081 get_random_bytes(&hashrnd, sizeof(hashrnd));
5919 return 0; 6082 return 0;
5920} 6083}
5921 6084
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..508f9c18992f
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
1/*
2 * net/core/dev_addr_lists.c - Functions for handling net device lists
3 * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
4 *
5 * This file contains functions for working with unicast, multicast and device
6 * addresses lists.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/netdevice.h>
15#include <linux/rtnetlink.h>
16#include <linux/list.h>
17#include <linux/proc_fs.h>
18
19/*
20 * General list handling functions
21 */
22
23static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
24 unsigned char *addr, int addr_len,
25 unsigned char addr_type, bool global)
26{
27 struct netdev_hw_addr *ha;
28 int alloc_size;
29
30 if (addr_len > MAX_ADDR_LEN)
31 return -EINVAL;
32
33 list_for_each_entry(ha, &list->list, list) {
34 if (!memcmp(ha->addr, addr, addr_len) &&
35 ha->type == addr_type) {
36 if (global) {
37 /* check if addr is already used as global */
38 if (ha->global_use)
39 return 0;
40 else
41 ha->global_use = true;
42 }
43 ha->refcount++;
44 return 0;
45 }
46 }
47
48
49 alloc_size = sizeof(*ha);
50 if (alloc_size < L1_CACHE_BYTES)
51 alloc_size = L1_CACHE_BYTES;
52 ha = kmalloc(alloc_size, GFP_ATOMIC);
53 if (!ha)
54 return -ENOMEM;
55 memcpy(ha->addr, addr, addr_len);
56 ha->type = addr_type;
57 ha->refcount = 1;
58 ha->global_use = global;
59 ha->synced = false;
60 list_add_tail_rcu(&ha->list, &list->list);
61 list->count++;
62 return 0;
63}
64
65static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
66 int addr_len, unsigned char addr_type)
67{
68 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
69}
70
71static void ha_rcu_free(struct rcu_head *head)
72{
73 struct netdev_hw_addr *ha;
74
75 ha = container_of(head, struct netdev_hw_addr, rcu_head);
76 kfree(ha);
77}
78
79static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
80 unsigned char *addr, int addr_len,
81 unsigned char addr_type, bool global)
82{
83 struct netdev_hw_addr *ha;
84
85 list_for_each_entry(ha, &list->list, list) {
86 if (!memcmp(ha->addr, addr, addr_len) &&
87 (ha->type == addr_type || !addr_type)) {
88 if (global) {
89 if (!ha->global_use)
90 break;
91 else
92 ha->global_use = false;
93 }
94 if (--ha->refcount)
95 return 0;
96 list_del_rcu(&ha->list);
97 call_rcu(&ha->rcu_head, ha_rcu_free);
98 list->count--;
99 return 0;
100 }
101 }
102 return -ENOENT;
103}
104
105static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
106 int addr_len, unsigned char addr_type)
107{
108 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
109}
110
111int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
112 struct netdev_hw_addr_list *from_list,
113 int addr_len, unsigned char addr_type)
114{
115 int err;
116 struct netdev_hw_addr *ha, *ha2;
117 unsigned char type;
118
119 list_for_each_entry(ha, &from_list->list, list) {
120 type = addr_type ? addr_type : ha->type;
121 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
122 if (err)
123 goto unroll;
124 }
125 return 0;
126
127unroll:
128 list_for_each_entry(ha2, &from_list->list, list) {
129 if (ha2 == ha)
130 break;
131 type = addr_type ? addr_type : ha2->type;
132 __hw_addr_del(to_list, ha2->addr, addr_len, type);
133 }
134 return err;
135}
136EXPORT_SYMBOL(__hw_addr_add_multiple);
137
138void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
139 struct netdev_hw_addr_list *from_list,
140 int addr_len, unsigned char addr_type)
141{
142 struct netdev_hw_addr *ha;
143 unsigned char type;
144
145 list_for_each_entry(ha, &from_list->list, list) {
146 type = addr_type ? addr_type : ha->type;
147 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
148 }
149}
150EXPORT_SYMBOL(__hw_addr_del_multiple);
151
152int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
153 struct netdev_hw_addr_list *from_list,
154 int addr_len)
155{
156 int err = 0;
157 struct netdev_hw_addr *ha, *tmp;
158
159 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
160 if (!ha->synced) {
161 err = __hw_addr_add(to_list, ha->addr,
162 addr_len, ha->type);
163 if (err)
164 break;
165 ha->synced = true;
166 ha->refcount++;
167 } else if (ha->refcount == 1) {
168 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
169 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
170 }
171 }
172 return err;
173}
174EXPORT_SYMBOL(__hw_addr_sync);
175
176void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
177 struct netdev_hw_addr_list *from_list,
178 int addr_len)
179{
180 struct netdev_hw_addr *ha, *tmp;
181
182 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
183 if (ha->synced) {
184 __hw_addr_del(to_list, ha->addr,
185 addr_len, ha->type);
186 ha->synced = false;
187 __hw_addr_del(from_list, ha->addr,
188 addr_len, ha->type);
189 }
190 }
191}
192EXPORT_SYMBOL(__hw_addr_unsync);
193
194void __hw_addr_flush(struct netdev_hw_addr_list *list)
195{
196 struct netdev_hw_addr *ha, *tmp;
197
198 list_for_each_entry_safe(ha, tmp, &list->list, list) {
199 list_del_rcu(&ha->list);
200 call_rcu(&ha->rcu_head, ha_rcu_free);
201 }
202 list->count = 0;
203}
204EXPORT_SYMBOL(__hw_addr_flush);
205
206void __hw_addr_init(struct netdev_hw_addr_list *list)
207{
208 INIT_LIST_HEAD(&list->list);
209 list->count = 0;
210}
211EXPORT_SYMBOL(__hw_addr_init);
212
213/*
214 * Device addresses handling functions
215 */
216
217/**
218 * dev_addr_flush - Flush device address list
219 * @dev: device
220 *
221 * Flush device address list and reset ->dev_addr.
222 *
223 * The caller must hold the rtnl_mutex.
224 */
225void dev_addr_flush(struct net_device *dev)
226{
227 /* rtnl_mutex must be held here */
228
229 __hw_addr_flush(&dev->dev_addrs);
230 dev->dev_addr = NULL;
231}
232EXPORT_SYMBOL(dev_addr_flush);
233
234/**
235 * dev_addr_init - Init device address list
236 * @dev: device
237 *
238 * Init device address list and create the first element,
239 * used by ->dev_addr.
240 *
241 * The caller must hold the rtnl_mutex.
242 */
243int dev_addr_init(struct net_device *dev)
244{
245 unsigned char addr[MAX_ADDR_LEN];
246 struct netdev_hw_addr *ha;
247 int err;
248
249 /* rtnl_mutex must be held here */
250
251 __hw_addr_init(&dev->dev_addrs);
252 memset(addr, 0, sizeof(addr));
253 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
254 NETDEV_HW_ADDR_T_LAN);
255 if (!err) {
256 /*
257 * Get the first (previously created) address from the list
258 * and set dev_addr pointer to this location.
259 */
260 ha = list_first_entry(&dev->dev_addrs.list,
261 struct netdev_hw_addr, list);
262 dev->dev_addr = ha->addr;
263 }
264 return err;
265}
266EXPORT_SYMBOL(dev_addr_init);
267
268/**
269 * dev_addr_add - Add a device address
270 * @dev: device
271 * @addr: address to add
272 * @addr_type: address type
273 *
274 * Add a device address to the device or increase the reference count if
275 * it already exists.
276 *
277 * The caller must hold the rtnl_mutex.
278 */
279int dev_addr_add(struct net_device *dev, unsigned char *addr,
280 unsigned char addr_type)
281{
282 int err;
283
284 ASSERT_RTNL();
285
286 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
287 if (!err)
288 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
289 return err;
290}
291EXPORT_SYMBOL(dev_addr_add);
292
293/**
294 * dev_addr_del - Release a device address.
295 * @dev: device
296 * @addr: address to delete
297 * @addr_type: address type
298 *
299 * Release reference to a device address and remove it from the device
300 * if the reference count drops to zero.
301 *
302 * The caller must hold the rtnl_mutex.
303 */
304int dev_addr_del(struct net_device *dev, unsigned char *addr,
305 unsigned char addr_type)
306{
307 int err;
308 struct netdev_hw_addr *ha;
309
310 ASSERT_RTNL();
311
312 /*
313 * We can not remove the first address from the list because
314 * dev->dev_addr points to that.
315 */
316 ha = list_first_entry(&dev->dev_addrs.list,
317 struct netdev_hw_addr, list);
318 if (ha->addr == dev->dev_addr && ha->refcount == 1)
319 return -ENOENT;
320
321 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
322 addr_type);
323 if (!err)
324 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
325 return err;
326}
327EXPORT_SYMBOL(dev_addr_del);
328
329/**
330 * dev_addr_add_multiple - Add device addresses from another device
331 * @to_dev: device to which addresses will be added
332 * @from_dev: device from which addresses will be added
333 * @addr_type: address type - 0 means type will be used from from_dev
334 *
335 * Add device addresses of the one device to another.
336 **
337 * The caller must hold the rtnl_mutex.
338 */
339int dev_addr_add_multiple(struct net_device *to_dev,
340 struct net_device *from_dev,
341 unsigned char addr_type)
342{
343 int err;
344
345 ASSERT_RTNL();
346
347 if (from_dev->addr_len != to_dev->addr_len)
348 return -EINVAL;
349 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
350 to_dev->addr_len, addr_type);
351 if (!err)
352 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
353 return err;
354}
355EXPORT_SYMBOL(dev_addr_add_multiple);
356
357/**
358 * dev_addr_del_multiple - Delete device addresses by another device
359 * @to_dev: device where the addresses will be deleted
360 * @from_dev: device by which addresses the addresses will be deleted
361 * @addr_type: address type - 0 means type will used from from_dev
362 *
363 * Deletes addresses in to device by the list of addresses in from device.
364 *
365 * The caller must hold the rtnl_mutex.
366 */
367int dev_addr_del_multiple(struct net_device *to_dev,
368 struct net_device *from_dev,
369 unsigned char addr_type)
370{
371 ASSERT_RTNL();
372
373 if (from_dev->addr_len != to_dev->addr_len)
374 return -EINVAL;
375 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
376 to_dev->addr_len, addr_type);
377 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
378 return 0;
379}
380EXPORT_SYMBOL(dev_addr_del_multiple);
381
382/*
383 * Unicast list handling functions
384 */
385
386/**
387 * dev_uc_add - Add a secondary unicast address
388 * @dev: device
389 * @addr: address to add
390 *
391 * Add a secondary unicast address to the device or increase
392 * the reference count if it already exists.
393 */
394int dev_uc_add(struct net_device *dev, unsigned char *addr)
395{
396 int err;
397
398 netif_addr_lock_bh(dev);
399 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
400 NETDEV_HW_ADDR_T_UNICAST);
401 if (!err)
402 __dev_set_rx_mode(dev);
403 netif_addr_unlock_bh(dev);
404 return err;
405}
406EXPORT_SYMBOL(dev_uc_add);
407
408/**
409 * dev_uc_del - Release secondary unicast address.
410 * @dev: device
411 * @addr: address to delete
412 *
413 * Release reference to a secondary unicast address and remove it
414 * from the device if the reference count drops to zero.
415 */
416int dev_uc_del(struct net_device *dev, unsigned char *addr)
417{
418 int err;
419
420 netif_addr_lock_bh(dev);
421 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
422 NETDEV_HW_ADDR_T_UNICAST);
423 if (!err)
424 __dev_set_rx_mode(dev);
425 netif_addr_unlock_bh(dev);
426 return err;
427}
428EXPORT_SYMBOL(dev_uc_del);
429
430/**
431 * dev_uc_sync - Synchronize device's unicast list to another device
432 * @to: destination device
433 * @from: source device
434 *
435 * Add newly added addresses to the destination device and release
436 * addresses that have no users left. The source device must be
437 * locked by netif_tx_lock_bh.
438 *
439 * This function is intended to be called from the dev->set_rx_mode
440 * function of layered software devices.
441 */
442int dev_uc_sync(struct net_device *to, struct net_device *from)
443{
444 int err = 0;
445
446 if (to->addr_len != from->addr_len)
447 return -EINVAL;
448
449 netif_addr_lock_bh(to);
450 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
451 if (!err)
452 __dev_set_rx_mode(to);
453 netif_addr_unlock_bh(to);
454 return err;
455}
456EXPORT_SYMBOL(dev_uc_sync);
457
458/**
459 * dev_uc_unsync - Remove synchronized addresses from the destination device
460 * @to: destination device
461 * @from: source device
462 *
463 * Remove all addresses that were added to the destination device by
464 * dev_uc_sync(). This function is intended to be called from the
465 * dev->stop function of layered software devices.
466 */
467void dev_uc_unsync(struct net_device *to, struct net_device *from)
468{
469 if (to->addr_len != from->addr_len)
470 return;
471
472 netif_addr_lock_bh(from);
473 netif_addr_lock(to);
474 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
475 __dev_set_rx_mode(to);
476 netif_addr_unlock(to);
477 netif_addr_unlock_bh(from);
478}
479EXPORT_SYMBOL(dev_uc_unsync);
480
481/**
482 * dev_uc_flush - Flush unicast addresses
483 * @dev: device
484 *
485 * Flush unicast addresses.
486 */
487void dev_uc_flush(struct net_device *dev)
488{
489 netif_addr_lock_bh(dev);
490 __hw_addr_flush(&dev->uc);
491 netif_addr_unlock_bh(dev);
492}
493EXPORT_SYMBOL(dev_uc_flush);
494
495/**
496 * dev_uc_flush - Init unicast address list
497 * @dev: device
498 *
499 * Init unicast address list.
500 */
501void dev_uc_init(struct net_device *dev)
502{
503 __hw_addr_init(&dev->uc);
504}
505EXPORT_SYMBOL(dev_uc_init);
506
507/*
508 * Multicast list handling functions
509 */
510
511static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
512 bool global)
513{
514 int err;
515
516 netif_addr_lock_bh(dev);
517 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
518 NETDEV_HW_ADDR_T_MULTICAST, global);
519 if (!err)
520 __dev_set_rx_mode(dev);
521 netif_addr_unlock_bh(dev);
522 return err;
523}
524/**
525 * dev_mc_add - Add a multicast address
526 * @dev: device
527 * @addr: address to add
528 *
529 * Add a multicast address to the device or increase
530 * the reference count if it already exists.
531 */
532int dev_mc_add(struct net_device *dev, unsigned char *addr)
533{
534 return __dev_mc_add(dev, addr, false);
535}
536EXPORT_SYMBOL(dev_mc_add);
537
538/**
539 * dev_mc_add_global - Add a global multicast address
540 * @dev: device
541 * @addr: address to add
542 *
543 * Add a global multicast address to the device.
544 */
545int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
546{
547 return __dev_mc_add(dev, addr, true);
548}
549EXPORT_SYMBOL(dev_mc_add_global);
550
551static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
552 bool global)
553{
554 int err;
555
556 netif_addr_lock_bh(dev);
557 err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
558 NETDEV_HW_ADDR_T_MULTICAST, global);
559 if (!err)
560 __dev_set_rx_mode(dev);
561 netif_addr_unlock_bh(dev);
562 return err;
563}
564
565/**
566 * dev_mc_del - Delete a multicast address.
567 * @dev: device
568 * @addr: address to delete
569 *
570 * Release reference to a multicast address and remove it
571 * from the device if the reference count drops to zero.
572 */
573int dev_mc_del(struct net_device *dev, unsigned char *addr)
574{
575 return __dev_mc_del(dev, addr, false);
576}
577EXPORT_SYMBOL(dev_mc_del);
578
579/**
580 * dev_mc_del_global - Delete a global multicast address.
581 * @dev: device
582 * @addr: address to delete
583 *
584 * Release reference to a multicast address and remove it
585 * from the device if the reference count drops to zero.
586 */
587int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
588{
589 return __dev_mc_del(dev, addr, true);
590}
591EXPORT_SYMBOL(dev_mc_del_global);
592
593/**
594 * dev_mc_sync - Synchronize device's unicast list to another device
595 * @to: destination device
596 * @from: source device
597 *
598 * Add newly added addresses to the destination device and release
599 * addresses that have no users left. The source device must be
600 * locked by netif_tx_lock_bh.
601 *
602 * This function is intended to be called from the dev->set_multicast_list
603 * or dev->set_rx_mode function of layered software devices.
604 */
605int dev_mc_sync(struct net_device *to, struct net_device *from)
606{
607 int err = 0;
608
609 if (to->addr_len != from->addr_len)
610 return -EINVAL;
611
612 netif_addr_lock_bh(to);
613 err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
614 if (!err)
615 __dev_set_rx_mode(to);
616 netif_addr_unlock_bh(to);
617 return err;
618}
619EXPORT_SYMBOL(dev_mc_sync);
620
621/**
622 * dev_mc_unsync - Remove synchronized addresses from the destination device
623 * @to: destination device
624 * @from: source device
625 *
626 * Remove all addresses that were added to the destination device by
627 * dev_mc_sync(). This function is intended to be called from the
628 * dev->stop function of layered software devices.
629 */
630void dev_mc_unsync(struct net_device *to, struct net_device *from)
631{
632 if (to->addr_len != from->addr_len)
633 return;
634
635 netif_addr_lock_bh(from);
636 netif_addr_lock(to);
637 __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
638 __dev_set_rx_mode(to);
639 netif_addr_unlock(to);
640 netif_addr_unlock_bh(from);
641}
642EXPORT_SYMBOL(dev_mc_unsync);
643
644/**
645 * dev_mc_flush - Flush multicast addresses
646 * @dev: device
647 *
648 * Flush multicast addresses.
649 */
650void dev_mc_flush(struct net_device *dev)
651{
652 netif_addr_lock_bh(dev);
653 __hw_addr_flush(&dev->mc);
654 netif_addr_unlock_bh(dev);
655}
656EXPORT_SYMBOL(dev_mc_flush);
657
658/**
659 * dev_mc_flush - Init multicast address list
660 * @dev: device
661 *
662 * Init multicast address list.
663 */
664void dev_mc_init(struct net_device *dev)
665{
666 __hw_addr_init(&dev->mc);
667}
668EXPORT_SYMBOL(dev_mc_init);
669
670#ifdef CONFIG_PROC_FS
671#include <linux/seq_file.h>
672
673static int dev_mc_seq_show(struct seq_file *seq, void *v)
674{
675 struct netdev_hw_addr *ha;
676 struct net_device *dev = v;
677
678 if (v == SEQ_START_TOKEN)
679 return 0;
680
681 netif_addr_lock_bh(dev);
682 netdev_for_each_mc_addr(ha, dev) {
683 int i;
684
685 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
686 dev->name, ha->refcount, ha->global_use);
687
688 for (i = 0; i < dev->addr_len; i++)
689 seq_printf(seq, "%02x", ha->addr[i]);
690
691 seq_putc(seq, '\n');
692 }
693 netif_addr_unlock_bh(dev);
694 return 0;
695}
696
697static const struct seq_operations dev_mc_seq_ops = {
698 .start = dev_seq_start,
699 .next = dev_seq_next,
700 .stop = dev_seq_stop,
701 .show = dev_mc_seq_show,
702};
703
704static int dev_mc_seq_open(struct inode *inode, struct file *file)
705{
706 return seq_open_net(inode, file, &dev_mc_seq_ops,
707 sizeof(struct seq_net_private));
708}
709
710static const struct file_operations dev_mc_seq_fops = {
711 .owner = THIS_MODULE,
712 .open = dev_mc_seq_open,
713 .read = seq_read,
714 .llseek = seq_lseek,
715 .release = seq_release_net,
716};
717
718#endif
719
720static int __net_init dev_mc_net_init(struct net *net)
721{
722 if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
723 return -ENOMEM;
724 return 0;
725}
726
727static void __net_exit dev_mc_net_exit(struct net *net)
728{
729 proc_net_remove(net, "dev_mcast");
730}
731
732static struct pernet_operations __net_initdata dev_mc_net_ops = {
733 .init = dev_mc_net_init,
734 .exit = dev_mc_net_exit,
735};
736
737void __init dev_mcast_init(void)
738{
739 register_pernet_subsys(&dev_mc_net_ops);
740}
741
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 9e2fa39f22a3..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,229 +0,0 @@
1/*
2 * Linux NET3: Multicast List maintenance.
3 *
4 * Authors:
5 * Tim Kordas <tjk@nostromo.eeap.cwru.edu>
6 * Richard Underwood <richard@wuzz.demon.co.uk>
7 *
8 * Stir fried together from the IP multicast and CAP patches above
9 * Alan Cox <alan@lxorguk.ukuu.org.uk>
10 *
11 * Fixes:
12 * Alan Cox : Update the device on a real delete
13 * rather than any time but...
14 * Alan Cox : IFF_ALLMULTI support.
15 * Alan Cox : New format set_multicast_list() calls.
16 * Gleb Natapov : Remove dev_mc_lock.
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24#include <linux/module.h>
25#include <asm/uaccess.h>
26#include <asm/system.h>
27#include <linux/bitops.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/string.h>
31#include <linux/mm.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/in.h>
35#include <linux/errno.h>
36#include <linux/interrupt.h>
37#include <linux/if_ether.h>
38#include <linux/inet.h>
39#include <linux/netdevice.h>
40#include <linux/etherdevice.h>
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#include <linux/init.h>
44#include <net/net_namespace.h>
45#include <net/ip.h>
46#include <net/route.h>
47#include <linux/skbuff.h>
48#include <net/sock.h>
49#include <net/arp.h>
50
51
52/*
53 * Device multicast list maintenance.
54 *
55 * This is used both by IP and by the user level maintenance functions.
56 * Unlike BSD we maintain a usage count on a given multicast address so
57 * that a casual user application can add/delete multicasts used by
58 * protocols without doing damage to the protocols when it deletes the
59 * entries. It also helps IP as it tracks overlapping maps.
60 *
61 * Device mc lists are changed by bh at least if IPv6 is enabled,
62 * so that it must be bh protected.
63 *
64 * We block accesses to device mc filters with netif_tx_lock.
65 */
66
67/*
68 * Delete a device level multicast
69 */
70
71int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
72{
73 int err;
74
75 netif_addr_lock_bh(dev);
76 err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
77 addr, alen, glbl);
78 if (!err) {
79 /*
80 * We have altered the list, so the card
81 * loaded filter is now wrong. Fix it
82 */
83
84 __dev_set_rx_mode(dev);
85 }
86 netif_addr_unlock_bh(dev);
87 return err;
88}
89
90/*
91 * Add a device level multicast
92 */
93
94int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
95{
96 int err;
97
98 netif_addr_lock_bh(dev);
99 err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
100 if (!err)
101 __dev_set_rx_mode(dev);
102 netif_addr_unlock_bh(dev);
103 return err;
104}
105
106/**
107 * dev_mc_sync - Synchronize device's multicast list to another device
108 * @to: destination device
109 * @from: source device
110 *
111 * Add newly added addresses to the destination device and release
112 * addresses that have no users left. The source device must be
113 * locked by netif_tx_lock_bh.
114 *
115 * This function is intended to be called from the dev->set_multicast_list
116 * or dev->set_rx_mode function of layered software devices.
117 */
118int dev_mc_sync(struct net_device *to, struct net_device *from)
119{
120 int err = 0;
121
122 netif_addr_lock_bh(to);
123 err = __dev_addr_sync(&to->mc_list, &to->mc_count,
124 &from->mc_list, &from->mc_count);
125 if (!err)
126 __dev_set_rx_mode(to);
127 netif_addr_unlock_bh(to);
128
129 return err;
130}
131EXPORT_SYMBOL(dev_mc_sync);
132
133
134/**
135 * dev_mc_unsync - Remove synchronized addresses from the destination
136 * device
137 * @to: destination device
138 * @from: source device
139 *
140 * Remove all addresses that were added to the destination device by
141 * dev_mc_sync(). This function is intended to be called from the
142 * dev->stop function of layered software devices.
143 */
144void dev_mc_unsync(struct net_device *to, struct net_device *from)
145{
146 netif_addr_lock_bh(from);
147 netif_addr_lock(to);
148
149 __dev_addr_unsync(&to->mc_list, &to->mc_count,
150 &from->mc_list, &from->mc_count);
151 __dev_set_rx_mode(to);
152
153 netif_addr_unlock(to);
154 netif_addr_unlock_bh(from);
155}
156EXPORT_SYMBOL(dev_mc_unsync);
157
158#ifdef CONFIG_PROC_FS
159static int dev_mc_seq_show(struct seq_file *seq, void *v)
160{
161 struct dev_addr_list *m;
162 struct net_device *dev = v;
163
164 if (v == SEQ_START_TOKEN)
165 return 0;
166
167 netif_addr_lock_bh(dev);
168 for (m = dev->mc_list; m; m = m->next) {
169 int i;
170
171 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
172 dev->name, m->dmi_users, m->dmi_gusers);
173
174 for (i = 0; i < m->dmi_addrlen; i++)
175 seq_printf(seq, "%02x", m->dmi_addr[i]);
176
177 seq_putc(seq, '\n');
178 }
179 netif_addr_unlock_bh(dev);
180 return 0;
181}
182
183static const struct seq_operations dev_mc_seq_ops = {
184 .start = dev_seq_start,
185 .next = dev_seq_next,
186 .stop = dev_seq_stop,
187 .show = dev_mc_seq_show,
188};
189
190static int dev_mc_seq_open(struct inode *inode, struct file *file)
191{
192 return seq_open_net(inode, file, &dev_mc_seq_ops,
193 sizeof(struct seq_net_private));
194}
195
196static const struct file_operations dev_mc_seq_fops = {
197 .owner = THIS_MODULE,
198 .open = dev_mc_seq_open,
199 .read = seq_read,
200 .llseek = seq_lseek,
201 .release = seq_release_net,
202};
203
204#endif
205
206static int __net_init dev_mc_net_init(struct net *net)
207{
208 if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
209 return -ENOMEM;
210 return 0;
211}
212
213static void __net_exit dev_mc_net_exit(struct net *net)
214{
215 proc_net_remove(net, "dev_mcast");
216}
217
218static struct pernet_operations __net_initdata dev_mc_net_ops = {
219 .init = dev_mc_net_init,
220 .exit = dev_mc_net_exit,
221};
222
223void __init dev_mcast_init(void)
224{
225 register_pernet_subsys(&dev_mc_net_ops);
226}
227
228EXPORT_SYMBOL(dev_mc_add);
229EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b8e9d3a86887..36e603c78ce9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/timer.h> 22#include <linux/timer.h>
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <linux/slab.h>
24#include <net/genetlink.h> 25#include <net/genetlink.h>
25#include <net/netevent.h> 26#include <net/netevent.h>
26 27
@@ -171,12 +172,12 @@ out:
171 return; 172 return;
172} 173}
173 174
174static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 175static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
175{ 176{
176 trace_drop_common(skb, location); 177 trace_drop_common(skb, location);
177} 178}
178 179
179static void trace_napi_poll_hit(struct napi_struct *napi) 180static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
180{ 181{
181 struct dm_hw_stat_delta *new_stat; 182 struct dm_hw_stat_delta *new_stat;
182 183
@@ -222,14 +223,19 @@ static int set_all_monitor_traces(int state)
222 223
223 spin_lock(&trace_state_lock); 224 spin_lock(&trace_state_lock);
224 225
226 if (state == trace_state) {
227 rc = -EAGAIN;
228 goto out_unlock;
229 }
230
225 switch (state) { 231 switch (state) {
226 case TRACE_ON: 232 case TRACE_ON:
227 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 233 rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
228 rc |= register_trace_napi_poll(trace_napi_poll_hit); 234 rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
229 break; 235 break;
230 case TRACE_OFF: 236 case TRACE_OFF:
231 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 237 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
232 rc |= unregister_trace_napi_poll(trace_napi_poll_hit); 238 rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
233 239
234 tracepoint_synchronize_unregister(); 240 tracepoint_synchronize_unregister();
235 241
@@ -250,11 +256,12 @@ static int set_all_monitor_traces(int state)
250 256
251 if (!rc) 257 if (!rc)
252 trace_state = state; 258 trace_state = state;
259 else
260 rc = -EINPROGRESS;
253 261
262out_unlock:
254 spin_unlock(&trace_state_lock); 263 spin_unlock(&trace_state_lock);
255 264
256 if (rc)
257 return -EINPROGRESS;
258 return rc; 265 return rc;
259} 266}
260 267
@@ -296,7 +303,6 @@ static int dropmon_net_event(struct notifier_block *ev_block,
296 303
297 new_stat->dev = dev; 304 new_stat->dev = dev;
298 new_stat->last_rx = jiffies; 305 new_stat->last_rx = jiffies;
299 INIT_RCU_HEAD(&new_stat->rcu);
300 spin_lock(&trace_state_lock); 306 spin_lock(&trace_state_lock);
301 list_add_rcu(&new_stat->list, &hw_stats_list); 307 list_add_rcu(&new_stat->list, &hw_stats_list);
302 spin_unlock(&trace_state_lock); 308 spin_unlock(&trace_state_lock);
@@ -341,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = {
341 347
342static int __init init_net_drop_monitor(void) 348static int __init init_net_drop_monitor(void)
343{ 349{
344 int cpu;
345 int rc, i, ret;
346 struct per_cpu_dm_data *data; 350 struct per_cpu_dm_data *data;
351 int cpu, rc;
352
347 printk(KERN_INFO "Initalizing network drop monitor service\n"); 353 printk(KERN_INFO "Initalizing network drop monitor service\n");
348 354
349 if (sizeof(void *) > 8) { 355 if (sizeof(void *) > 8) {
@@ -351,21 +357,12 @@ static int __init init_net_drop_monitor(void)
351 return -ENOSPC; 357 return -ENOSPC;
352 } 358 }
353 359
354 if (genl_register_family(&net_drop_monitor_family) < 0) { 360 rc = genl_register_family_with_ops(&net_drop_monitor_family,
361 dropmon_ops,
362 ARRAY_SIZE(dropmon_ops));
363 if (rc) {
355 printk(KERN_ERR "Could not create drop monitor netlink family\n"); 364 printk(KERN_ERR "Could not create drop monitor netlink family\n");
356 return -EFAULT; 365 return rc;
357 }
358
359 rc = -EFAULT;
360
361 for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
362 ret = genl_register_ops(&net_drop_monitor_family,
363 &dropmon_ops[i]);
364 if (ret) {
365 printk(KERN_CRIT "Failed to register operation %d\n",
366 dropmon_ops[i].cmd);
367 goto out_unreg;
368 }
369 } 366 }
370 367
371 rc = register_netdevice_notifier(&dropmon_net_notifier); 368 rc = register_netdevice_notifier(&dropmon_net_notifier);
diff --git a/net/core/dst.c b/net/core/dst.c
index 57bc4d5b8d08..6c41b1fac3db 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -12,11 +12,13 @@
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/types.h> 19#include <linux/types.h>
19#include <net/net_namespace.h> 20#include <net/net_namespace.h>
21#include <linux/sched.h>
20 22
21#include <net/dst.h> 23#include <net/dst.h>
22 24
@@ -42,7 +44,7 @@ static atomic_t dst_total = ATOMIC_INIT(0);
42 */ 44 */
43static struct { 45static struct {
44 spinlock_t lock; 46 spinlock_t lock;
45 struct dst_entry *list; 47 struct dst_entry *list;
46 unsigned long timer_inc; 48 unsigned long timer_inc;
47 unsigned long timer_expires; 49 unsigned long timer_expires;
48} dst_garbage = { 50} dst_garbage = {
@@ -50,7 +52,7 @@ static struct {
50 .timer_inc = DST_GC_MAX, 52 .timer_inc = DST_GC_MAX,
51}; 53};
52static void dst_gc_task(struct work_struct *work); 54static void dst_gc_task(struct work_struct *work);
53static void ___dst_free(struct dst_entry * dst); 55static void ___dst_free(struct dst_entry *dst);
54 56
55static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); 57static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
56 58
@@ -79,6 +81,7 @@ loop:
79 while ((dst = next) != NULL) { 81 while ((dst = next) != NULL) {
80 next = dst->next; 82 next = dst->next;
81 prefetch(&next->next); 83 prefetch(&next->next);
84 cond_resched();
82 if (likely(atomic_read(&dst->__refcnt))) { 85 if (likely(atomic_read(&dst->__refcnt))) {
83 last->next = dst; 86 last->next = dst;
84 last = dst; 87 last = dst;
@@ -133,8 +136,8 @@ loop:
133 } 136 }
134 expires = dst_garbage.timer_expires; 137 expires = dst_garbage.timer_expires;
135 /* 138 /*
136 * if the next desired timer is more than 4 seconds in the future 139 * if the next desired timer is more than 4 seconds in the
137 * then round the timer to whole seconds 140 * future then round the timer to whole seconds
138 */ 141 */
139 if (expires > 4*HZ) 142 if (expires > 4*HZ)
140 expires = round_jiffies_relative(expires); 143 expires = round_jiffies_relative(expires);
@@ -149,7 +152,8 @@ loop:
149 " expires: %lu elapsed: %lu us\n", 152 " expires: %lu elapsed: %lu us\n",
150 atomic_read(&dst_total), delayed, work_performed, 153 atomic_read(&dst_total), delayed, work_performed,
151 expires, 154 expires,
152 elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); 155 elapsed.tv_sec * USEC_PER_SEC +
156 elapsed.tv_nsec / NSEC_PER_USEC);
153#endif 157#endif
154} 158}
155 159
@@ -160,9 +164,9 @@ int dst_discard(struct sk_buff *skb)
160} 164}
161EXPORT_SYMBOL(dst_discard); 165EXPORT_SYMBOL(dst_discard);
162 166
163void * dst_alloc(struct dst_ops * ops) 167void *dst_alloc(struct dst_ops *ops)
164{ 168{
165 struct dst_entry * dst; 169 struct dst_entry *dst;
166 170
167 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 171 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
168 if (ops->gc(ops)) 172 if (ops->gc(ops))
@@ -182,19 +186,19 @@ void * dst_alloc(struct dst_ops * ops)
182 atomic_inc(&ops->entries); 186 atomic_inc(&ops->entries);
183 return dst; 187 return dst;
184} 188}
189EXPORT_SYMBOL(dst_alloc);
185 190
186static void ___dst_free(struct dst_entry * dst) 191static void ___dst_free(struct dst_entry *dst)
187{ 192{
188 /* The first case (dev==NULL) is required, when 193 /* The first case (dev==NULL) is required, when
189 protocol module is unloaded. 194 protocol module is unloaded.
190 */ 195 */
191 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { 196 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
192 dst->input = dst->output = dst_discard; 197 dst->input = dst->output = dst_discard;
193 }
194 dst->obsolete = 2; 198 dst->obsolete = 2;
195} 199}
196 200
197void __dst_free(struct dst_entry * dst) 201void __dst_free(struct dst_entry *dst)
198{ 202{
199 spin_lock_bh(&dst_garbage.lock); 203 spin_lock_bh(&dst_garbage.lock);
200 ___dst_free(dst); 204 ___dst_free(dst);
@@ -208,6 +212,7 @@ void __dst_free(struct dst_entry * dst)
208 } 212 }
209 spin_unlock_bh(&dst_garbage.lock); 213 spin_unlock_bh(&dst_garbage.lock);
210} 214}
215EXPORT_SYMBOL(__dst_free);
211 216
212struct dst_entry *dst_destroy(struct dst_entry * dst) 217struct dst_entry *dst_destroy(struct dst_entry * dst)
213{ 218{
@@ -259,15 +264,16 @@ again:
259 } 264 }
260 return NULL; 265 return NULL;
261} 266}
267EXPORT_SYMBOL(dst_destroy);
262 268
263void dst_release(struct dst_entry *dst) 269void dst_release(struct dst_entry *dst)
264{ 270{
265 if (dst) { 271 if (dst) {
266 int newrefcnt; 272 int newrefcnt;
267 273
268 smp_mb__before_atomic_dec(); 274 smp_mb__before_atomic_dec();
269 newrefcnt = atomic_dec_return(&dst->__refcnt); 275 newrefcnt = atomic_dec_return(&dst->__refcnt);
270 WARN_ON(newrefcnt < 0); 276 WARN_ON(newrefcnt < 0);
271 } 277 }
272} 278}
273EXPORT_SYMBOL(dst_release); 279EXPORT_SYMBOL(dst_release);
@@ -280,8 +286,8 @@ EXPORT_SYMBOL(dst_release);
280 * 286 *
281 * Commented and originally written by Alexey. 287 * Commented and originally written by Alexey.
282 */ 288 */
283static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, 289static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
284 int unregister) 290 int unregister)
285{ 291{
286 if (dst->ops->ifdown) 292 if (dst->ops->ifdown)
287 dst->ops->ifdown(dst, dev, unregister); 293 dst->ops->ifdown(dst, dev, unregister);
@@ -303,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
303 } 309 }
304} 310}
305 311
306static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 312static int dst_dev_event(struct notifier_block *this, unsigned long event,
313 void *ptr)
307{ 314{
308 struct net_device *dev = ptr; 315 struct net_device *dev = ptr;
309 struct dst_entry *dst, *last = NULL; 316 struct dst_entry *dst, *last = NULL;
@@ -326,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
326 last->next = dst; 333 last->next = dst;
327 else 334 else
328 dst_busy_list = dst; 335 dst_busy_list = dst;
329 for (; dst; dst = dst->next) { 336 for (; dst; dst = dst->next)
330 dst_ifdown(dst, dev, event != NETDEV_DOWN); 337 dst_ifdown(dst, dev, event != NETDEV_DOWN);
331 }
332 mutex_unlock(&dst_gc_mutex); 338 mutex_unlock(&dst_gc_mutex);
333 break; 339 break;
334 } 340 }
@@ -343,7 +349,3 @@ void __init dst_init(void)
343{ 349{
344 register_netdevice_notifier(&dst_dev_notifier); 350 register_netdevice_notifier(&dst_dev_notifier);
345} 351}
346
347EXPORT_SYMBOL(__dst_free);
348EXPORT_SYMBOL(dst_alloc);
349EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8aee584e8d1..8451ab481095 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,7 +17,9 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/ethtool.h> 18#include <linux/ethtool.h>
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <asm/uaccess.h> 20#include <linux/bitops.h>
21#include <linux/uaccess.h>
22#include <linux/slab.h>
21 23
22/* 24/*
23 * Some useful ethtool_ops methods that're device independent. 25 * Some useful ethtool_ops methods that're device independent.
@@ -29,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
29{ 31{
30 return netif_carrier_ok(dev) ? 1 : 0; 32 return netif_carrier_ok(dev) ? 1 : 0;
31} 33}
34EXPORT_SYMBOL(ethtool_op_get_link);
32 35
33u32 ethtool_op_get_rx_csum(struct net_device *dev) 36u32 ethtool_op_get_rx_csum(struct net_device *dev)
34{ 37{
@@ -61,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
61 64
62 return 0; 65 return 0;
63} 66}
67EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
64 68
65int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) 69int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
66{ 70{
@@ -71,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
71 75
72 return 0; 76 return 0;
73} 77}
78EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
74 79
75u32 ethtool_op_get_sg(struct net_device *dev) 80u32 ethtool_op_get_sg(struct net_device *dev)
76{ 81{
77 return (dev->features & NETIF_F_SG) != 0; 82 return (dev->features & NETIF_F_SG) != 0;
78} 83}
84EXPORT_SYMBOL(ethtool_op_get_sg);
79 85
80int ethtool_op_set_sg(struct net_device *dev, u32 data) 86int ethtool_op_set_sg(struct net_device *dev, u32 data)
81{ 87{
@@ -86,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
86 92
87 return 0; 93 return 0;
88} 94}
95EXPORT_SYMBOL(ethtool_op_set_sg);
89 96
90u32 ethtool_op_get_tso(struct net_device *dev) 97u32 ethtool_op_get_tso(struct net_device *dev)
91{ 98{
92 return (dev->features & NETIF_F_TSO) != 0; 99 return (dev->features & NETIF_F_TSO) != 0;
93} 100}
101EXPORT_SYMBOL(ethtool_op_get_tso);
94 102
95int ethtool_op_set_tso(struct net_device *dev, u32 data) 103int ethtool_op_set_tso(struct net_device *dev, u32 data)
96{ 104{
@@ -101,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
101 109
102 return 0; 110 return 0;
103} 111}
112EXPORT_SYMBOL(ethtool_op_set_tso);
104 113
105u32 ethtool_op_get_ufo(struct net_device *dev) 114u32 ethtool_op_get_ufo(struct net_device *dev)
106{ 115{
107 return (dev->features & NETIF_F_UFO) != 0; 116 return (dev->features & NETIF_F_UFO) != 0;
108} 117}
118EXPORT_SYMBOL(ethtool_op_get_ufo);
109 119
110int ethtool_op_set_ufo(struct net_device *dev, u32 data) 120int ethtool_op_set_ufo(struct net_device *dev, u32 data)
111{ 121{
@@ -115,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
115 dev->features &= ~NETIF_F_UFO; 125 dev->features &= ~NETIF_F_UFO;
116 return 0; 126 return 0;
117} 127}
128EXPORT_SYMBOL(ethtool_op_set_ufo);
118 129
119/* the following list of flags are the same as their associated 130/* the following list of flags are the same as their associated
120 * NETIF_F_xxx values in include/linux/netdevice.h 131 * NETIF_F_xxx values in include/linux/netdevice.h
121 */ 132 */
122static const u32 flags_dup_features = 133static const u32 flags_dup_features =
123 ETH_FLAG_LRO; 134 (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
124 135
125u32 ethtool_op_get_flags(struct net_device *dev) 136u32 ethtool_op_get_flags(struct net_device *dev)
126{ 137{
@@ -131,22 +142,36 @@ u32 ethtool_op_get_flags(struct net_device *dev)
131 142
132 return dev->features & flags_dup_features; 143 return dev->features & flags_dup_features;
133} 144}
145EXPORT_SYMBOL(ethtool_op_get_flags);
134 146
135int ethtool_op_set_flags(struct net_device *dev, u32 data) 147int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
136{ 148{
137 if (data & ETH_FLAG_LRO) 149 if (data & ~supported)
138 dev->features |= NETIF_F_LRO; 150 return -EINVAL;
139 else
140 dev->features &= ~NETIF_F_LRO;
141 151
152 dev->features = ((dev->features & ~flags_dup_features) |
153 (data & flags_dup_features));
142 return 0; 154 return 0;
143} 155}
156EXPORT_SYMBOL(ethtool_op_set_flags);
157
158void ethtool_ntuple_flush(struct net_device *dev)
159{
160 struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
161
162 list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
163 list_del(&fsc->list);
164 kfree(fsc);
165 }
166 dev->ethtool_ntuple_list.count = 0;
167}
168EXPORT_SYMBOL(ethtool_ntuple_flush);
144 169
145/* Handlers for each ethtool command */ 170/* Handlers for each ethtool command */
146 171
147static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 172static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
148{ 173{
149 struct ethtool_cmd cmd = { ETHTOOL_GSET }; 174 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
150 int err; 175 int err;
151 176
152 if (!dev->ethtool_ops->get_settings) 177 if (!dev->ethtool_ops->get_settings)
@@ -174,7 +199,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
174 return dev->ethtool_ops->set_settings(dev, &cmd); 199 return dev->ethtool_ops->set_settings(dev, &cmd);
175} 200}
176 201
177static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) 202static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
203 void __user *useraddr)
178{ 204{
179 struct ethtool_drvinfo info; 205 struct ethtool_drvinfo info;
180 const struct ethtool_ops *ops = dev->ethtool_ops; 206 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -186,6 +212,10 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
186 info.cmd = ETHTOOL_GDRVINFO; 212 info.cmd = ETHTOOL_GDRVINFO;
187 ops->get_drvinfo(dev, &info); 213 ops->get_drvinfo(dev, &info);
188 214
215 /*
216 * this method of obtaining string set info is deprecated;
217 * Use ETHTOOL_GSSET_INFO instead.
218 */
189 if (ops->get_sset_count) { 219 if (ops->get_sset_count) {
190 int rc; 220 int rc;
191 221
@@ -209,22 +239,94 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
209 return 0; 239 return 0;
210} 240}
211 241
212static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) 242static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
243 void __user *useraddr)
213{ 244{
214 struct ethtool_rxnfc cmd; 245 struct ethtool_sset_info info;
246 const struct ethtool_ops *ops = dev->ethtool_ops;
247 u64 sset_mask;
248 int i, idx = 0, n_bits = 0, ret, rc;
249 u32 *info_buf = NULL;
250
251 if (!ops->get_sset_count)
252 return -EOPNOTSUPP;
253
254 if (copy_from_user(&info, useraddr, sizeof(info)))
255 return -EFAULT;
256
257 /* store copy of mask, because we zero struct later on */
258 sset_mask = info.sset_mask;
259 if (!sset_mask)
260 return 0;
261
262 /* calculate size of return buffer */
263 n_bits = hweight64(sset_mask);
264
265 memset(&info, 0, sizeof(info));
266 info.cmd = ETHTOOL_GSSET_INFO;
267
268 info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
269 if (!info_buf)
270 return -ENOMEM;
271
272 /*
273 * fill return buffer based on input bitmask and successful
274 * get_sset_count return
275 */
276 for (i = 0; i < 64; i++) {
277 if (!(sset_mask & (1ULL << i)))
278 continue;
279
280 rc = ops->get_sset_count(dev, i);
281 if (rc >= 0) {
282 info.sset_mask |= (1ULL << i);
283 info_buf[idx++] = rc;
284 }
285 }
286
287 ret = -EFAULT;
288 if (copy_to_user(useraddr, &info, sizeof(info)))
289 goto out;
290
291 useraddr += offsetof(struct ethtool_sset_info, data);
292 if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
293 goto out;
294
295 ret = 0;
296
297out:
298 kfree(info_buf);
299 return ret;
300}
301
302static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
303 u32 cmd, void __user *useraddr)
304{
305 struct ethtool_rxnfc info;
306 size_t info_size = sizeof(info);
215 307
216 if (!dev->ethtool_ops->set_rxnfc) 308 if (!dev->ethtool_ops->set_rxnfc)
217 return -EOPNOTSUPP; 309 return -EOPNOTSUPP;
218 310
219 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 311 /* struct ethtool_rxnfc was originally defined for
312 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
313 * members. User-space might still be using that
314 * definition. */
315 if (cmd == ETHTOOL_SRXFH)
316 info_size = (offsetof(struct ethtool_rxnfc, data) +
317 sizeof(info.data));
318
319 if (copy_from_user(&info, useraddr, info_size))
220 return -EFAULT; 320 return -EFAULT;
221 321
222 return dev->ethtool_ops->set_rxnfc(dev, &cmd); 322 return dev->ethtool_ops->set_rxnfc(dev, &info);
223} 323}
224 324
225static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) 325static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
326 u32 cmd, void __user *useraddr)
226{ 327{
227 struct ethtool_rxnfc info; 328 struct ethtool_rxnfc info;
329 size_t info_size = sizeof(info);
228 const struct ethtool_ops *ops = dev->ethtool_ops; 330 const struct ethtool_ops *ops = dev->ethtool_ops;
229 int ret; 331 int ret;
230 void *rule_buf = NULL; 332 void *rule_buf = NULL;
@@ -232,13 +334,22 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
232 if (!ops->get_rxnfc) 334 if (!ops->get_rxnfc)
233 return -EOPNOTSUPP; 335 return -EOPNOTSUPP;
234 336
235 if (copy_from_user(&info, useraddr, sizeof(info))) 337 /* struct ethtool_rxnfc was originally defined for
338 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
339 * members. User-space might still be using that
340 * definition. */
341 if (cmd == ETHTOOL_GRXFH)
342 info_size = (offsetof(struct ethtool_rxnfc, data) +
343 sizeof(info.data));
344
345 if (copy_from_user(&info, useraddr, info_size))
236 return -EFAULT; 346 return -EFAULT;
237 347
238 if (info.cmd == ETHTOOL_GRXCLSRLALL) { 348 if (info.cmd == ETHTOOL_GRXCLSRLALL) {
239 if (info.rule_cnt > 0) { 349 if (info.rule_cnt > 0) {
240 rule_buf = kmalloc(info.rule_cnt * sizeof(u32), 350 if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
241 GFP_USER); 351 rule_buf = kzalloc(info.rule_cnt * sizeof(u32),
352 GFP_USER);
242 if (!rule_buf) 353 if (!rule_buf)
243 return -ENOMEM; 354 return -ENOMEM;
244 } 355 }
@@ -249,7 +360,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
249 goto err_out; 360 goto err_out;
250 361
251 ret = -EFAULT; 362 ret = -EFAULT;
252 if (copy_to_user(useraddr, &info, sizeof(info))) 363 if (copy_to_user(useraddr, &info, info_size))
253 goto err_out; 364 goto err_out;
254 365
255 if (rule_buf) { 366 if (rule_buf) {
@@ -266,6 +377,387 @@ err_out:
266 return ret; 377 return ret;
267} 378}
268 379
380static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
381 void __user *useraddr)
382{
383 struct ethtool_rxfh_indir *indir;
384 u32 table_size;
385 size_t full_size;
386 int ret;
387
388 if (!dev->ethtool_ops->get_rxfh_indir)
389 return -EOPNOTSUPP;
390
391 if (copy_from_user(&table_size,
392 useraddr + offsetof(struct ethtool_rxfh_indir, size),
393 sizeof(table_size)))
394 return -EFAULT;
395
396 if (table_size >
397 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
398 return -ENOMEM;
399 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
400 indir = kzalloc(full_size, GFP_USER);
401 if (!indir)
402 return -ENOMEM;
403
404 indir->cmd = ETHTOOL_GRXFHINDIR;
405 indir->size = table_size;
406 ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
407 if (ret)
408 goto out;
409
410 if (copy_to_user(useraddr, indir, full_size))
411 ret = -EFAULT;
412
413out:
414 kfree(indir);
415 return ret;
416}
417
418static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
419 void __user *useraddr)
420{
421 struct ethtool_rxfh_indir *indir;
422 u32 table_size;
423 size_t full_size;
424 int ret;
425
426 if (!dev->ethtool_ops->set_rxfh_indir)
427 return -EOPNOTSUPP;
428
429 if (copy_from_user(&table_size,
430 useraddr + offsetof(struct ethtool_rxfh_indir, size),
431 sizeof(table_size)))
432 return -EFAULT;
433
434 if (table_size >
435 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
436 return -ENOMEM;
437 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
438 indir = kmalloc(full_size, GFP_USER);
439 if (!indir)
440 return -ENOMEM;
441
442 if (copy_from_user(indir, useraddr, full_size)) {
443 ret = -EFAULT;
444 goto out;
445 }
446
447 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
448
449out:
450 kfree(indir);
451 return ret;
452}
453
454static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
455 struct ethtool_rx_ntuple_flow_spec *spec,
456 struct ethtool_rx_ntuple_flow_spec_container *fsc)
457{
458
459 /* don't add filters forever */
460 if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
461 /* free the container */
462 kfree(fsc);
463 return;
464 }
465
466 /* Copy the whole filter over */
467 fsc->fs.flow_type = spec->flow_type;
468 memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
469 memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
470
471 fsc->fs.vlan_tag = spec->vlan_tag;
472 fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
473 fsc->fs.data = spec->data;
474 fsc->fs.data_mask = spec->data_mask;
475 fsc->fs.action = spec->action;
476
477 /* add to the list */
478 list_add_tail_rcu(&fsc->list, &list->list);
479 list->count++;
480}
481
482static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
483 void __user *useraddr)
484{
485 struct ethtool_rx_ntuple cmd;
486 const struct ethtool_ops *ops = dev->ethtool_ops;
487 struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
488 int ret;
489
490 if (!(dev->features & NETIF_F_NTUPLE))
491 return -EINVAL;
492
493 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
494 return -EFAULT;
495
496 /*
497 * Cache filter in dev struct for GET operation only if
498 * the underlying driver doesn't have its own GET operation, and
499 * only if the filter was added successfully. First make sure we
500 * can allocate the filter, then continue if successful.
501 */
502 if (!ops->get_rx_ntuple) {
503 fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
504 if (!fsc)
505 return -ENOMEM;
506 }
507
508 ret = ops->set_rx_ntuple(dev, &cmd);
509 if (ret) {
510 kfree(fsc);
511 return ret;
512 }
513
514 if (!ops->get_rx_ntuple)
515 __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
516
517 return ret;
518}
519
520static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
521{
522 struct ethtool_gstrings gstrings;
523 const struct ethtool_ops *ops = dev->ethtool_ops;
524 struct ethtool_rx_ntuple_flow_spec_container *fsc;
525 u8 *data;
526 char *p;
527 int ret, i, num_strings = 0;
528
529 if (!ops->get_sset_count)
530 return -EOPNOTSUPP;
531
532 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
533 return -EFAULT;
534
535 ret = ops->get_sset_count(dev, gstrings.string_set);
536 if (ret < 0)
537 return ret;
538
539 gstrings.len = ret;
540
541 data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
542 if (!data)
543 return -ENOMEM;
544
545 if (ops->get_rx_ntuple) {
546 /* driver-specific filter grab */
547 ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
548 goto copy;
549 }
550
551 /* default ethtool filter grab */
552 i = 0;
553 p = (char *)data;
554 list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
555 sprintf(p, "Filter %d:\n", i);
556 p += ETH_GSTRING_LEN;
557 num_strings++;
558
559 switch (fsc->fs.flow_type) {
560 case TCP_V4_FLOW:
561 sprintf(p, "\tFlow Type: TCP\n");
562 p += ETH_GSTRING_LEN;
563 num_strings++;
564 break;
565 case UDP_V4_FLOW:
566 sprintf(p, "\tFlow Type: UDP\n");
567 p += ETH_GSTRING_LEN;
568 num_strings++;
569 break;
570 case SCTP_V4_FLOW:
571 sprintf(p, "\tFlow Type: SCTP\n");
572 p += ETH_GSTRING_LEN;
573 num_strings++;
574 break;
575 case AH_ESP_V4_FLOW:
576 sprintf(p, "\tFlow Type: AH ESP\n");
577 p += ETH_GSTRING_LEN;
578 num_strings++;
579 break;
580 case ESP_V4_FLOW:
581 sprintf(p, "\tFlow Type: ESP\n");
582 p += ETH_GSTRING_LEN;
583 num_strings++;
584 break;
585 case IP_USER_FLOW:
586 sprintf(p, "\tFlow Type: Raw IP\n");
587 p += ETH_GSTRING_LEN;
588 num_strings++;
589 break;
590 case IPV4_FLOW:
591 sprintf(p, "\tFlow Type: IPv4\n");
592 p += ETH_GSTRING_LEN;
593 num_strings++;
594 break;
595 default:
596 sprintf(p, "\tFlow Type: Unknown\n");
597 p += ETH_GSTRING_LEN;
598 num_strings++;
599 goto unknown_filter;
600 }
601
602 /* now the rest of the filters */
603 switch (fsc->fs.flow_type) {
604 case TCP_V4_FLOW:
605 case UDP_V4_FLOW:
606 case SCTP_V4_FLOW:
607 sprintf(p, "\tSrc IP addr: 0x%x\n",
608 fsc->fs.h_u.tcp_ip4_spec.ip4src);
609 p += ETH_GSTRING_LEN;
610 num_strings++;
611 sprintf(p, "\tSrc IP mask: 0x%x\n",
612 fsc->fs.m_u.tcp_ip4_spec.ip4src);
613 p += ETH_GSTRING_LEN;
614 num_strings++;
615 sprintf(p, "\tDest IP addr: 0x%x\n",
616 fsc->fs.h_u.tcp_ip4_spec.ip4dst);
617 p += ETH_GSTRING_LEN;
618 num_strings++;
619 sprintf(p, "\tDest IP mask: 0x%x\n",
620 fsc->fs.m_u.tcp_ip4_spec.ip4dst);
621 p += ETH_GSTRING_LEN;
622 num_strings++;
623 sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
624 fsc->fs.h_u.tcp_ip4_spec.psrc,
625 fsc->fs.m_u.tcp_ip4_spec.psrc);
626 p += ETH_GSTRING_LEN;
627 num_strings++;
628 sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
629 fsc->fs.h_u.tcp_ip4_spec.pdst,
630 fsc->fs.m_u.tcp_ip4_spec.pdst);
631 p += ETH_GSTRING_LEN;
632 num_strings++;
633 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
634 fsc->fs.h_u.tcp_ip4_spec.tos,
635 fsc->fs.m_u.tcp_ip4_spec.tos);
636 p += ETH_GSTRING_LEN;
637 num_strings++;
638 break;
639 case AH_ESP_V4_FLOW:
640 case ESP_V4_FLOW:
641 sprintf(p, "\tSrc IP addr: 0x%x\n",
642 fsc->fs.h_u.ah_ip4_spec.ip4src);
643 p += ETH_GSTRING_LEN;
644 num_strings++;
645 sprintf(p, "\tSrc IP mask: 0x%x\n",
646 fsc->fs.m_u.ah_ip4_spec.ip4src);
647 p += ETH_GSTRING_LEN;
648 num_strings++;
649 sprintf(p, "\tDest IP addr: 0x%x\n",
650 fsc->fs.h_u.ah_ip4_spec.ip4dst);
651 p += ETH_GSTRING_LEN;
652 num_strings++;
653 sprintf(p, "\tDest IP mask: 0x%x\n",
654 fsc->fs.m_u.ah_ip4_spec.ip4dst);
655 p += ETH_GSTRING_LEN;
656 num_strings++;
657 sprintf(p, "\tSPI: %d, mask: 0x%x\n",
658 fsc->fs.h_u.ah_ip4_spec.spi,
659 fsc->fs.m_u.ah_ip4_spec.spi);
660 p += ETH_GSTRING_LEN;
661 num_strings++;
662 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
663 fsc->fs.h_u.ah_ip4_spec.tos,
664 fsc->fs.m_u.ah_ip4_spec.tos);
665 p += ETH_GSTRING_LEN;
666 num_strings++;
667 break;
668 case IP_USER_FLOW:
669 sprintf(p, "\tSrc IP addr: 0x%x\n",
670 fsc->fs.h_u.raw_ip4_spec.ip4src);
671 p += ETH_GSTRING_LEN;
672 num_strings++;
673 sprintf(p, "\tSrc IP mask: 0x%x\n",
674 fsc->fs.m_u.raw_ip4_spec.ip4src);
675 p += ETH_GSTRING_LEN;
676 num_strings++;
677 sprintf(p, "\tDest IP addr: 0x%x\n",
678 fsc->fs.h_u.raw_ip4_spec.ip4dst);
679 p += ETH_GSTRING_LEN;
680 num_strings++;
681 sprintf(p, "\tDest IP mask: 0x%x\n",
682 fsc->fs.m_u.raw_ip4_spec.ip4dst);
683 p += ETH_GSTRING_LEN;
684 num_strings++;
685 break;
686 case IPV4_FLOW:
687 sprintf(p, "\tSrc IP addr: 0x%x\n",
688 fsc->fs.h_u.usr_ip4_spec.ip4src);
689 p += ETH_GSTRING_LEN;
690 num_strings++;
691 sprintf(p, "\tSrc IP mask: 0x%x\n",
692 fsc->fs.m_u.usr_ip4_spec.ip4src);
693 p += ETH_GSTRING_LEN;
694 num_strings++;
695 sprintf(p, "\tDest IP addr: 0x%x\n",
696 fsc->fs.h_u.usr_ip4_spec.ip4dst);
697 p += ETH_GSTRING_LEN;
698 num_strings++;
699 sprintf(p, "\tDest IP mask: 0x%x\n",
700 fsc->fs.m_u.usr_ip4_spec.ip4dst);
701 p += ETH_GSTRING_LEN;
702 num_strings++;
703 sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
704 fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
705 fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
706 p += ETH_GSTRING_LEN;
707 num_strings++;
708 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
709 fsc->fs.h_u.usr_ip4_spec.tos,
710 fsc->fs.m_u.usr_ip4_spec.tos);
711 p += ETH_GSTRING_LEN;
712 num_strings++;
713 sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
714 fsc->fs.h_u.usr_ip4_spec.ip_ver,
715 fsc->fs.m_u.usr_ip4_spec.ip_ver);
716 p += ETH_GSTRING_LEN;
717 num_strings++;
718 sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
719 fsc->fs.h_u.usr_ip4_spec.proto,
720 fsc->fs.m_u.usr_ip4_spec.proto);
721 p += ETH_GSTRING_LEN;
722 num_strings++;
723 break;
724 }
725 sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
726 fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
727 p += ETH_GSTRING_LEN;
728 num_strings++;
729 sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
730 p += ETH_GSTRING_LEN;
731 num_strings++;
732 sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
733 p += ETH_GSTRING_LEN;
734 num_strings++;
735 if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
736 sprintf(p, "\tAction: Drop\n");
737 else
738 sprintf(p, "\tAction: Direct to queue %d\n",
739 fsc->fs.action);
740 p += ETH_GSTRING_LEN;
741 num_strings++;
742unknown_filter:
743 i++;
744 }
745copy:
746 /* indicate to userspace how many strings we actually have */
747 gstrings.len = num_strings;
748 ret = -EFAULT;
749 if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
750 goto out;
751 useraddr += sizeof(gstrings);
752 if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
753 goto out;
754 ret = 0;
755
756out:
757 kfree(data);
758 return ret;
759}
760
269static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) 761static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
270{ 762{
271 struct ethtool_regs regs; 763 struct ethtool_regs regs;
@@ -283,7 +775,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
283 if (regs.len > reglen) 775 if (regs.len > reglen)
284 regs.len = reglen; 776 regs.len = reglen;
285 777
286 regbuf = kmalloc(reglen, GFP_USER); 778 regbuf = kzalloc(reglen, GFP_USER);
287 if (!regbuf) 779 if (!regbuf)
288 return -ENOMEM; 780 return -ENOMEM;
289 781
@@ -324,7 +816,7 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
324 816
325static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) 817static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
326{ 818{
327 struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; 819 struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
328 820
329 if (!dev->ethtool_ops->get_wol) 821 if (!dev->ethtool_ops->get_wol)
330 return -EOPNOTSUPP; 822 return -EOPNOTSUPP;
@@ -456,9 +948,10 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
456 return ret; 948 return ret;
457} 949}
458 950
459static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) 951static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
952 void __user *useraddr)
460{ 953{
461 struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; 954 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
462 955
463 if (!dev->ethtool_ops->get_coalesce) 956 if (!dev->ethtool_ops->get_coalesce)
464 return -EOPNOTSUPP; 957 return -EOPNOTSUPP;
@@ -470,7 +963,8 @@ static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
470 return 0; 963 return 0;
471} 964}
472 965
473static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) 966static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
967 void __user *useraddr)
474{ 968{
475 struct ethtool_coalesce coalesce; 969 struct ethtool_coalesce coalesce;
476 970
@@ -485,7 +979,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
485 979
486static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) 980static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
487{ 981{
488 struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; 982 struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM };
489 983
490 if (!dev->ethtool_ops->get_ringparam) 984 if (!dev->ethtool_ops->get_ringparam)
491 return -EOPNOTSUPP; 985 return -EOPNOTSUPP;
@@ -574,6 +1068,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
574 1068
575 return dev->ethtool_ops->set_tx_csum(dev, edata.data); 1069 return dev->ethtool_ops->set_tx_csum(dev, edata.data);
576} 1070}
1071EXPORT_SYMBOL(ethtool_op_set_tx_csum);
577 1072
578static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) 1073static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
579{ 1074{
@@ -645,7 +1140,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
645 1140
646 edata.data = dev->features & NETIF_F_GSO; 1141 edata.data = dev->features & NETIF_F_GSO;
647 if (copy_to_user(useraddr, &edata, sizeof(edata))) 1142 if (copy_to_user(useraddr, &edata, sizeof(edata)))
648 return -EFAULT; 1143 return -EFAULT;
649 return 0; 1144 return 0;
650} 1145}
651 1146
@@ -668,7 +1163,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
668 1163
669 edata.data = dev->features & NETIF_F_GRO; 1164 edata.data = dev->features & NETIF_F_GRO;
670 if (copy_to_user(useraddr, &edata, sizeof(edata))) 1165 if (copy_to_user(useraddr, &edata, sizeof(edata)))
671 return -EFAULT; 1166 return -EFAULT;
672 return 0; 1167 return 0;
673} 1168}
674 1169
@@ -839,7 +1334,7 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
839static int ethtool_get_value(struct net_device *dev, char __user *useraddr, 1334static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
840 u32 cmd, u32 (*actor)(struct net_device *)) 1335 u32 cmd, u32 (*actor)(struct net_device *))
841{ 1336{
842 struct ethtool_value edata = { cmd }; 1337 struct ethtool_value edata = { .cmd = cmd };
843 1338
844 if (!actor) 1339 if (!actor)
845 return -EOPNOTSUPP; 1340 return -EOPNOTSUPP;
@@ -880,7 +1375,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
880 return actor(dev, edata.data); 1375 return actor(dev, edata.data);
881} 1376}
882 1377
883static int ethtool_flash_device(struct net_device *dev, char __user *useraddr) 1378static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
1379 char __user *useraddr)
884{ 1380{
885 struct ethtool_flash efl; 1381 struct ethtool_flash efl;
886 1382
@@ -909,11 +1405,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
909 if (!dev->ethtool_ops) 1405 if (!dev->ethtool_ops)
910 return -EOPNOTSUPP; 1406 return -EOPNOTSUPP;
911 1407
912 if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd))) 1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
913 return -EFAULT; 1409 return -EFAULT;
914 1410
915 /* Allow some commands to be done by anyone */ 1411 /* Allow some commands to be done by anyone */
916 switch(ethcmd) { 1412 switch (ethcmd) {
917 case ETHTOOL_GDRVINFO: 1413 case ETHTOOL_GDRVINFO:
918 case ETHTOOL_GMSGLVL: 1414 case ETHTOOL_GMSGLVL:
919 case ETHTOOL_GCOALESCE: 1415 case ETHTOOL_GCOALESCE:
@@ -927,6 +1423,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
927 case ETHTOOL_GPERMADDR: 1423 case ETHTOOL_GPERMADDR:
928 case ETHTOOL_GUFO: 1424 case ETHTOOL_GUFO:
929 case ETHTOOL_GGSO: 1425 case ETHTOOL_GGSO:
1426 case ETHTOOL_GGRO:
930 case ETHTOOL_GFLAGS: 1427 case ETHTOOL_GFLAGS:
931 case ETHTOOL_GPFLAGS: 1428 case ETHTOOL_GPFLAGS:
932 case ETHTOOL_GRXFH: 1429 case ETHTOOL_GRXFH:
@@ -940,10 +1437,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
940 return -EPERM; 1437 return -EPERM;
941 } 1438 }
942 1439
943 if (dev->ethtool_ops->begin) 1440 if (dev->ethtool_ops->begin) {
944 if ((rc = dev->ethtool_ops->begin(dev)) < 0) 1441 rc = dev->ethtool_ops->begin(dev);
1442 if (rc < 0)
945 return rc; 1443 return rc;
946 1444 }
947 old_features = dev->features; 1445 old_features = dev->features;
948 1446
949 switch (ethcmd) { 1447 switch (ethcmd) {
@@ -1093,12 +1591,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1093 case ETHTOOL_GRXCLSRLCNT: 1591 case ETHTOOL_GRXCLSRLCNT:
1094 case ETHTOOL_GRXCLSRULE: 1592 case ETHTOOL_GRXCLSRULE:
1095 case ETHTOOL_GRXCLSRLALL: 1593 case ETHTOOL_GRXCLSRLALL:
1096 rc = ethtool_get_rxnfc(dev, useraddr); 1594 rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
1097 break; 1595 break;
1098 case ETHTOOL_SRXFH: 1596 case ETHTOOL_SRXFH:
1099 case ETHTOOL_SRXCLSRLDEL: 1597 case ETHTOOL_SRXCLSRLDEL:
1100 case ETHTOOL_SRXCLSRLINS: 1598 case ETHTOOL_SRXCLSRLINS:
1101 rc = ethtool_set_rxnfc(dev, useraddr); 1599 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
1102 break; 1600 break;
1103 case ETHTOOL_GGRO: 1601 case ETHTOOL_GGRO:
1104 rc = ethtool_get_gro(dev, useraddr); 1602 rc = ethtool_get_gro(dev, useraddr);
@@ -1112,6 +1610,21 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1112 case ETHTOOL_RESET: 1610 case ETHTOOL_RESET:
1113 rc = ethtool_reset(dev, useraddr); 1611 rc = ethtool_reset(dev, useraddr);
1114 break; 1612 break;
1613 case ETHTOOL_SRXNTUPLE:
1614 rc = ethtool_set_rx_ntuple(dev, useraddr);
1615 break;
1616 case ETHTOOL_GRXNTUPLE:
1617 rc = ethtool_get_rx_ntuple(dev, useraddr);
1618 break;
1619 case ETHTOOL_GSSET_INFO:
1620 rc = ethtool_get_sset_info(dev, useraddr);
1621 break;
1622 case ETHTOOL_GRXFHINDIR:
1623 rc = ethtool_get_rxfh_indir(dev, useraddr);
1624 break;
1625 case ETHTOOL_SRXFHINDIR:
1626 rc = ethtool_set_rxfh_indir(dev, useraddr);
1627 break;
1115 default: 1628 default:
1116 rc = -EOPNOTSUPP; 1629 rc = -EOPNOTSUPP;
1117 } 1630 }
@@ -1124,16 +1637,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1124 1637
1125 return rc; 1638 return rc;
1126} 1639}
1127
1128EXPORT_SYMBOL(ethtool_op_get_link);
1129EXPORT_SYMBOL(ethtool_op_get_sg);
1130EXPORT_SYMBOL(ethtool_op_get_tso);
1131EXPORT_SYMBOL(ethtool_op_set_sg);
1132EXPORT_SYMBOL(ethtool_op_set_tso);
1133EXPORT_SYMBOL(ethtool_op_set_tx_csum);
1134EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
1135EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
1136EXPORT_SYMBOL(ethtool_op_set_ufo);
1137EXPORT_SYMBOL(ethtool_op_get_ufo);
1138EXPORT_SYMBOL(ethtool_op_set_flags);
1139EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 02a3b2c69c1e..42e84e08a1be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/slab.h>
13#include <linux/list.h> 14#include <linux/list.h>
14#include <net/net_namespace.h> 15#include <net/net_namespace.h>
15#include <net/sock.h> 16#include <net/sock.h>
@@ -38,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
38} 39}
39EXPORT_SYMBOL(fib_default_rule_add); 40EXPORT_SYMBOL(fib_default_rule_add);
40 41
42u32 fib_default_rule_pref(struct fib_rules_ops *ops)
43{
44 struct list_head *pos;
45 struct fib_rule *rule;
46
47 if (!list_empty(&ops->rules_list)) {
48 pos = ops->rules_list.next;
49 if (pos->next != &ops->rules_list) {
50 rule = list_entry(pos->next, struct fib_rule, list);
51 if (rule->pref)
52 return rule->pref - 1;
53 }
54 }
55
56 return 0;
57}
58EXPORT_SYMBOL(fib_default_rule_pref);
59
41static void notify_rule_change(int event, struct fib_rule *rule, 60static void notify_rule_change(int event, struct fib_rule *rule,
42 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 61 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
43 u32 pid); 62 u32 pid);
@@ -103,12 +122,12 @@ errout:
103} 122}
104 123
105struct fib_rules_ops * 124struct fib_rules_ops *
106fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) 125fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
107{ 126{
108 struct fib_rules_ops *ops; 127 struct fib_rules_ops *ops;
109 int err; 128 int err;
110 129
111 ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); 130 ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
112 if (ops == NULL) 131 if (ops == NULL)
113 return ERR_PTR(-ENOMEM); 132 return ERR_PTR(-ENOMEM);
114 133
@@ -123,7 +142,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
123 142
124 return ops; 143 return ops;
125} 144}
126
127EXPORT_SYMBOL_GPL(fib_rules_register); 145EXPORT_SYMBOL_GPL(fib_rules_register);
128 146
129void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 147void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -157,7 +175,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
157 175
158 call_rcu(&ops->rcu, fib_rules_put_rcu); 176 call_rcu(&ops->rcu, fib_rules_put_rcu);
159} 177}
160
161EXPORT_SYMBOL_GPL(fib_rules_unregister); 178EXPORT_SYMBOL_GPL(fib_rules_unregister);
162 179
163static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 180static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -220,7 +237,6 @@ out:
220 237
221 return err; 238 return err;
222} 239}
223
224EXPORT_SYMBOL_GPL(fib_rules_lookup); 240EXPORT_SYMBOL_GPL(fib_rules_lookup);
225 241
226static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, 242static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -519,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
519 return -EMSGSIZE; 535 return -EMSGSIZE;
520 536
521 frh = nlmsg_data(nlh); 537 frh = nlmsg_data(nlh);
538 frh->family = ops->family;
522 frh->table = rule->table; 539 frh->table = rule->table;
523 NLA_PUT_U32(skb, FRA_TABLE, rule->table); 540 NLA_PUT_U32(skb, FRA_TABLE, rule->table);
524 frh->res1 = 0; 541 frh->res1 = 0;
@@ -613,7 +630,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
613 break; 630 break;
614 631
615 cb->args[1] = 0; 632 cb->args[1] = 0;
616 skip: 633skip:
617 idx++; 634 idx++;
618 } 635 }
619 rcu_read_unlock(); 636 rcu_read_unlock();
@@ -685,7 +702,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
685 struct fib_rules_ops *ops; 702 struct fib_rules_ops *ops;
686 703
687 ASSERT_RTNL(); 704 ASSERT_RTNL();
688 rcu_read_lock();
689 705
690 switch (event) { 706 switch (event) {
691 case NETDEV_REGISTER: 707 case NETDEV_REGISTER:
@@ -699,8 +715,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
699 break; 715 break;
700 } 716 }
701 717
702 rcu_read_unlock();
703
704 return NOTIFY_DONE; 718 return NOTIFY_DONE;
705} 719}
706 720
@@ -708,7 +722,7 @@ static struct notifier_block fib_rules_notifier = {
708 .notifier_call = fib_rules_event, 722 .notifier_call = fib_rules_event,
709}; 723};
710 724
711static int fib_rules_net_init(struct net *net) 725static int __net_init fib_rules_net_init(struct net *net)
712{ 726{
713 INIT_LIST_HEAD(&net->rules_ops); 727 INIT_LIST_HEAD(&net->rules_ops);
714 spin_lock_init(&net->rules_mod_lock); 728 spin_lock_init(&net->rules_mod_lock);
diff --git a/net/core/filter.c b/net/core/filter.c
index 08db7b9143a3..52b051f82a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -25,6 +25,7 @@
25#include <linux/inet.h> 25#include <linux/inet.h>
26#include <linux/netdevice.h> 26#include <linux/netdevice.h>
27#include <linux/if_packet.h> 27#include <linux/if_packet.h>
28#include <linux/gfp.h>
28#include <net/ip.h> 29#include <net/ip.h>
29#include <net/protocol.h> 30#include <net/protocol.h>
30#include <net/netlink.h> 31#include <net/netlink.h>
@@ -86,7 +87,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
86 return err; 87 return err;
87 88
88 rcu_read_lock_bh(); 89 rcu_read_lock_bh();
89 filter = rcu_dereference(sk->sk_filter); 90 filter = rcu_dereference_bh(sk->sk_filter);
90 if (filter) { 91 if (filter) {
91 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 92 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
92 filter->len); 93 filter->len);
@@ -127,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
127 fentry = &filter[pc]; 128 fentry = &filter[pc];
128 129
129 switch (fentry->code) { 130 switch (fentry->code) {
130 case BPF_ALU|BPF_ADD|BPF_X: 131 case BPF_S_ALU_ADD_X:
131 A += X; 132 A += X;
132 continue; 133 continue;
133 case BPF_ALU|BPF_ADD|BPF_K: 134 case BPF_S_ALU_ADD_K:
134 A += fentry->k; 135 A += fentry->k;
135 continue; 136 continue;
136 case BPF_ALU|BPF_SUB|BPF_X: 137 case BPF_S_ALU_SUB_X:
137 A -= X; 138 A -= X;
138 continue; 139 continue;
139 case BPF_ALU|BPF_SUB|BPF_K: 140 case BPF_S_ALU_SUB_K:
140 A -= fentry->k; 141 A -= fentry->k;
141 continue; 142 continue;
142 case BPF_ALU|BPF_MUL|BPF_X: 143 case BPF_S_ALU_MUL_X:
143 A *= X; 144 A *= X;
144 continue; 145 continue;
145 case BPF_ALU|BPF_MUL|BPF_K: 146 case BPF_S_ALU_MUL_K:
146 A *= fentry->k; 147 A *= fentry->k;
147 continue; 148 continue;
148 case BPF_ALU|BPF_DIV|BPF_X: 149 case BPF_S_ALU_DIV_X:
149 if (X == 0) 150 if (X == 0)
150 return 0; 151 return 0;
151 A /= X; 152 A /= X;
152 continue; 153 continue;
153 case BPF_ALU|BPF_DIV|BPF_K: 154 case BPF_S_ALU_DIV_K:
154 A /= fentry->k; 155 A /= fentry->k;
155 continue; 156 continue;
156 case BPF_ALU|BPF_AND|BPF_X: 157 case BPF_S_ALU_AND_X:
157 A &= X; 158 A &= X;
158 continue; 159 continue;
159 case BPF_ALU|BPF_AND|BPF_K: 160 case BPF_S_ALU_AND_K:
160 A &= fentry->k; 161 A &= fentry->k;
161 continue; 162 continue;
162 case BPF_ALU|BPF_OR|BPF_X: 163 case BPF_S_ALU_OR_X:
163 A |= X; 164 A |= X;
164 continue; 165 continue;
165 case BPF_ALU|BPF_OR|BPF_K: 166 case BPF_S_ALU_OR_K:
166 A |= fentry->k; 167 A |= fentry->k;
167 continue; 168 continue;
168 case BPF_ALU|BPF_LSH|BPF_X: 169 case BPF_S_ALU_LSH_X:
169 A <<= X; 170 A <<= X;
170 continue; 171 continue;
171 case BPF_ALU|BPF_LSH|BPF_K: 172 case BPF_S_ALU_LSH_K:
172 A <<= fentry->k; 173 A <<= fentry->k;
173 continue; 174 continue;
174 case BPF_ALU|BPF_RSH|BPF_X: 175 case BPF_S_ALU_RSH_X:
175 A >>= X; 176 A >>= X;
176 continue; 177 continue;
177 case BPF_ALU|BPF_RSH|BPF_K: 178 case BPF_S_ALU_RSH_K:
178 A >>= fentry->k; 179 A >>= fentry->k;
179 continue; 180 continue;
180 case BPF_ALU|BPF_NEG: 181 case BPF_S_ALU_NEG:
181 A = -A; 182 A = -A;
182 continue; 183 continue;
183 case BPF_JMP|BPF_JA: 184 case BPF_S_JMP_JA:
184 pc += fentry->k; 185 pc += fentry->k;
185 continue; 186 continue;
186 case BPF_JMP|BPF_JGT|BPF_K: 187 case BPF_S_JMP_JGT_K:
187 pc += (A > fentry->k) ? fentry->jt : fentry->jf; 188 pc += (A > fentry->k) ? fentry->jt : fentry->jf;
188 continue; 189 continue;
189 case BPF_JMP|BPF_JGE|BPF_K: 190 case BPF_S_JMP_JGE_K:
190 pc += (A >= fentry->k) ? fentry->jt : fentry->jf; 191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
191 continue; 192 continue;
192 case BPF_JMP|BPF_JEQ|BPF_K: 193 case BPF_S_JMP_JEQ_K:
193 pc += (A == fentry->k) ? fentry->jt : fentry->jf; 194 pc += (A == fentry->k) ? fentry->jt : fentry->jf;
194 continue; 195 continue;
195 case BPF_JMP|BPF_JSET|BPF_K: 196 case BPF_S_JMP_JSET_K:
196 pc += (A & fentry->k) ? fentry->jt : fentry->jf; 197 pc += (A & fentry->k) ? fentry->jt : fentry->jf;
197 continue; 198 continue;
198 case BPF_JMP|BPF_JGT|BPF_X: 199 case BPF_S_JMP_JGT_X:
199 pc += (A > X) ? fentry->jt : fentry->jf; 200 pc += (A > X) ? fentry->jt : fentry->jf;
200 continue; 201 continue;
201 case BPF_JMP|BPF_JGE|BPF_X: 202 case BPF_S_JMP_JGE_X:
202 pc += (A >= X) ? fentry->jt : fentry->jf; 203 pc += (A >= X) ? fentry->jt : fentry->jf;
203 continue; 204 continue;
204 case BPF_JMP|BPF_JEQ|BPF_X: 205 case BPF_S_JMP_JEQ_X:
205 pc += (A == X) ? fentry->jt : fentry->jf; 206 pc += (A == X) ? fentry->jt : fentry->jf;
206 continue; 207 continue;
207 case BPF_JMP|BPF_JSET|BPF_X: 208 case BPF_S_JMP_JSET_X:
208 pc += (A & X) ? fentry->jt : fentry->jf; 209 pc += (A & X) ? fentry->jt : fentry->jf;
209 continue; 210 continue;
210 case BPF_LD|BPF_W|BPF_ABS: 211 case BPF_S_LD_W_ABS:
211 k = fentry->k; 212 k = fentry->k;
212load_w: 213load_w:
213 ptr = load_pointer(skb, k, 4, &tmp); 214 ptr = load_pointer(skb, k, 4, &tmp);
@@ -216,7 +217,7 @@ load_w:
216 continue; 217 continue;
217 } 218 }
218 break; 219 break;
219 case BPF_LD|BPF_H|BPF_ABS: 220 case BPF_S_LD_H_ABS:
220 k = fentry->k; 221 k = fentry->k;
221load_h: 222load_h:
222 ptr = load_pointer(skb, k, 2, &tmp); 223 ptr = load_pointer(skb, k, 2, &tmp);
@@ -225,7 +226,7 @@ load_h:
225 continue; 226 continue;
226 } 227 }
227 break; 228 break;
228 case BPF_LD|BPF_B|BPF_ABS: 229 case BPF_S_LD_B_ABS:
229 k = fentry->k; 230 k = fentry->k;
230load_b: 231load_b:
231 ptr = load_pointer(skb, k, 1, &tmp); 232 ptr = load_pointer(skb, k, 1, &tmp);
@@ -234,54 +235,54 @@ load_b:
234 continue; 235 continue;
235 } 236 }
236 break; 237 break;
237 case BPF_LD|BPF_W|BPF_LEN: 238 case BPF_S_LD_W_LEN:
238 A = skb->len; 239 A = skb->len;
239 continue; 240 continue;
240 case BPF_LDX|BPF_W|BPF_LEN: 241 case BPF_S_LDX_W_LEN:
241 X = skb->len; 242 X = skb->len;
242 continue; 243 continue;
243 case BPF_LD|BPF_W|BPF_IND: 244 case BPF_S_LD_W_IND:
244 k = X + fentry->k; 245 k = X + fentry->k;
245 goto load_w; 246 goto load_w;
246 case BPF_LD|BPF_H|BPF_IND: 247 case BPF_S_LD_H_IND:
247 k = X + fentry->k; 248 k = X + fentry->k;
248 goto load_h; 249 goto load_h;
249 case BPF_LD|BPF_B|BPF_IND: 250 case BPF_S_LD_B_IND:
250 k = X + fentry->k; 251 k = X + fentry->k;
251 goto load_b; 252 goto load_b;
252 case BPF_LDX|BPF_B|BPF_MSH: 253 case BPF_S_LDX_B_MSH:
253 ptr = load_pointer(skb, fentry->k, 1, &tmp); 254 ptr = load_pointer(skb, fentry->k, 1, &tmp);
254 if (ptr != NULL) { 255 if (ptr != NULL) {
255 X = (*(u8 *)ptr & 0xf) << 2; 256 X = (*(u8 *)ptr & 0xf) << 2;
256 continue; 257 continue;
257 } 258 }
258 return 0; 259 return 0;
259 case BPF_LD|BPF_IMM: 260 case BPF_S_LD_IMM:
260 A = fentry->k; 261 A = fentry->k;
261 continue; 262 continue;
262 case BPF_LDX|BPF_IMM: 263 case BPF_S_LDX_IMM:
263 X = fentry->k; 264 X = fentry->k;
264 continue; 265 continue;
265 case BPF_LD|BPF_MEM: 266 case BPF_S_LD_MEM:
266 A = mem[fentry->k]; 267 A = mem[fentry->k];
267 continue; 268 continue;
268 case BPF_LDX|BPF_MEM: 269 case BPF_S_LDX_MEM:
269 X = mem[fentry->k]; 270 X = mem[fentry->k];
270 continue; 271 continue;
271 case BPF_MISC|BPF_TAX: 272 case BPF_S_MISC_TAX:
272 X = A; 273 X = A;
273 continue; 274 continue;
274 case BPF_MISC|BPF_TXA: 275 case BPF_S_MISC_TXA:
275 A = X; 276 A = X;
276 continue; 277 continue;
277 case BPF_RET|BPF_K: 278 case BPF_S_RET_K:
278 return fentry->k; 279 return fentry->k;
279 case BPF_RET|BPF_A: 280 case BPF_S_RET_A:
280 return A; 281 return A;
281 case BPF_ST: 282 case BPF_S_ST:
282 mem[fentry->k] = A; 283 mem[fentry->k] = A;
283 continue; 284 continue;
284 case BPF_STX: 285 case BPF_S_STX:
285 mem[fentry->k] = X; 286 mem[fentry->k] = X;
286 continue; 287 continue;
287 default: 288 default:
@@ -301,6 +302,8 @@ load_b:
301 A = skb->pkt_type; 302 A = skb->pkt_type;
302 continue; 303 continue;
303 case SKF_AD_IFINDEX: 304 case SKF_AD_IFINDEX:
305 if (!skb->dev)
306 return 0;
304 A = skb->dev->ifindex; 307 A = skb->dev->ifindex;
305 continue; 308 continue;
306 case SKF_AD_MARK: 309 case SKF_AD_MARK:
@@ -309,6 +312,11 @@ load_b:
309 case SKF_AD_QUEUE: 312 case SKF_AD_QUEUE:
310 A = skb->queue_mapping; 313 A = skb->queue_mapping;
311 continue; 314 continue;
315 case SKF_AD_HATYPE:
316 if (!skb->dev)
317 return 0;
318 A = skb->dev->type;
319 continue;
312 case SKF_AD_NLATTR: { 320 case SKF_AD_NLATTR: {
313 struct nlattr *nla; 321 struct nlattr *nla;
314 322
@@ -382,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
382 /* Only allow valid instructions */ 390 /* Only allow valid instructions */
383 switch (ftest->code) { 391 switch (ftest->code) {
384 case BPF_ALU|BPF_ADD|BPF_K: 392 case BPF_ALU|BPF_ADD|BPF_K:
393 ftest->code = BPF_S_ALU_ADD_K;
394 break;
385 case BPF_ALU|BPF_ADD|BPF_X: 395 case BPF_ALU|BPF_ADD|BPF_X:
396 ftest->code = BPF_S_ALU_ADD_X;
397 break;
386 case BPF_ALU|BPF_SUB|BPF_K: 398 case BPF_ALU|BPF_SUB|BPF_K:
399 ftest->code = BPF_S_ALU_SUB_K;
400 break;
387 case BPF_ALU|BPF_SUB|BPF_X: 401 case BPF_ALU|BPF_SUB|BPF_X:
402 ftest->code = BPF_S_ALU_SUB_X;
403 break;
388 case BPF_ALU|BPF_MUL|BPF_K: 404 case BPF_ALU|BPF_MUL|BPF_K:
405 ftest->code = BPF_S_ALU_MUL_K;
406 break;
389 case BPF_ALU|BPF_MUL|BPF_X: 407 case BPF_ALU|BPF_MUL|BPF_X:
408 ftest->code = BPF_S_ALU_MUL_X;
409 break;
390 case BPF_ALU|BPF_DIV|BPF_X: 410 case BPF_ALU|BPF_DIV|BPF_X:
411 ftest->code = BPF_S_ALU_DIV_X;
412 break;
391 case BPF_ALU|BPF_AND|BPF_K: 413 case BPF_ALU|BPF_AND|BPF_K:
414 ftest->code = BPF_S_ALU_AND_K;
415 break;
392 case BPF_ALU|BPF_AND|BPF_X: 416 case BPF_ALU|BPF_AND|BPF_X:
417 ftest->code = BPF_S_ALU_AND_X;
418 break;
393 case BPF_ALU|BPF_OR|BPF_K: 419 case BPF_ALU|BPF_OR|BPF_K:
420 ftest->code = BPF_S_ALU_OR_K;
421 break;
394 case BPF_ALU|BPF_OR|BPF_X: 422 case BPF_ALU|BPF_OR|BPF_X:
423 ftest->code = BPF_S_ALU_OR_X;
424 break;
395 case BPF_ALU|BPF_LSH|BPF_K: 425 case BPF_ALU|BPF_LSH|BPF_K:
426 ftest->code = BPF_S_ALU_LSH_K;
427 break;
396 case BPF_ALU|BPF_LSH|BPF_X: 428 case BPF_ALU|BPF_LSH|BPF_X:
429 ftest->code = BPF_S_ALU_LSH_X;
430 break;
397 case BPF_ALU|BPF_RSH|BPF_K: 431 case BPF_ALU|BPF_RSH|BPF_K:
432 ftest->code = BPF_S_ALU_RSH_K;
433 break;
398 case BPF_ALU|BPF_RSH|BPF_X: 434 case BPF_ALU|BPF_RSH|BPF_X:
435 ftest->code = BPF_S_ALU_RSH_X;
436 break;
399 case BPF_ALU|BPF_NEG: 437 case BPF_ALU|BPF_NEG:
438 ftest->code = BPF_S_ALU_NEG;
439 break;
400 case BPF_LD|BPF_W|BPF_ABS: 440 case BPF_LD|BPF_W|BPF_ABS:
441 ftest->code = BPF_S_LD_W_ABS;
442 break;
401 case BPF_LD|BPF_H|BPF_ABS: 443 case BPF_LD|BPF_H|BPF_ABS:
444 ftest->code = BPF_S_LD_H_ABS;
445 break;
402 case BPF_LD|BPF_B|BPF_ABS: 446 case BPF_LD|BPF_B|BPF_ABS:
447 ftest->code = BPF_S_LD_B_ABS;
448 break;
403 case BPF_LD|BPF_W|BPF_LEN: 449 case BPF_LD|BPF_W|BPF_LEN:
450 ftest->code = BPF_S_LD_W_LEN;
451 break;
404 case BPF_LD|BPF_W|BPF_IND: 452 case BPF_LD|BPF_W|BPF_IND:
453 ftest->code = BPF_S_LD_W_IND;
454 break;
405 case BPF_LD|BPF_H|BPF_IND: 455 case BPF_LD|BPF_H|BPF_IND:
456 ftest->code = BPF_S_LD_H_IND;
457 break;
406 case BPF_LD|BPF_B|BPF_IND: 458 case BPF_LD|BPF_B|BPF_IND:
459 ftest->code = BPF_S_LD_B_IND;
460 break;
407 case BPF_LD|BPF_IMM: 461 case BPF_LD|BPF_IMM:
462 ftest->code = BPF_S_LD_IMM;
463 break;
408 case BPF_LDX|BPF_W|BPF_LEN: 464 case BPF_LDX|BPF_W|BPF_LEN:
465 ftest->code = BPF_S_LDX_W_LEN;
466 break;
409 case BPF_LDX|BPF_B|BPF_MSH: 467 case BPF_LDX|BPF_B|BPF_MSH:
468 ftest->code = BPF_S_LDX_B_MSH;
469 break;
410 case BPF_LDX|BPF_IMM: 470 case BPF_LDX|BPF_IMM:
471 ftest->code = BPF_S_LDX_IMM;
472 break;
411 case BPF_MISC|BPF_TAX: 473 case BPF_MISC|BPF_TAX:
474 ftest->code = BPF_S_MISC_TAX;
475 break;
412 case BPF_MISC|BPF_TXA: 476 case BPF_MISC|BPF_TXA:
477 ftest->code = BPF_S_MISC_TXA;
478 break;
413 case BPF_RET|BPF_K: 479 case BPF_RET|BPF_K:
480 ftest->code = BPF_S_RET_K;
481 break;
414 case BPF_RET|BPF_A: 482 case BPF_RET|BPF_A:
483 ftest->code = BPF_S_RET_A;
415 break; 484 break;
416 485
417 /* Some instructions need special checks */ 486 /* Some instructions need special checks */
418 487
419 case BPF_ALU|BPF_DIV|BPF_K:
420 /* check for division by zero */ 488 /* check for division by zero */
489 case BPF_ALU|BPF_DIV|BPF_K:
421 if (ftest->k == 0) 490 if (ftest->k == 0)
422 return -EINVAL; 491 return -EINVAL;
492 ftest->code = BPF_S_ALU_DIV_K;
423 break; 493 break;
424 494
495 /* check for invalid memory addresses */
425 case BPF_LD|BPF_MEM: 496 case BPF_LD|BPF_MEM:
497 if (ftest->k >= BPF_MEMWORDS)
498 return -EINVAL;
499 ftest->code = BPF_S_LD_MEM;
500 break;
426 case BPF_LDX|BPF_MEM: 501 case BPF_LDX|BPF_MEM:
502 if (ftest->k >= BPF_MEMWORDS)
503 return -EINVAL;
504 ftest->code = BPF_S_LDX_MEM;
505 break;
427 case BPF_ST: 506 case BPF_ST:
507 if (ftest->k >= BPF_MEMWORDS)
508 return -EINVAL;
509 ftest->code = BPF_S_ST;
510 break;
428 case BPF_STX: 511 case BPF_STX:
429 /* check for invalid memory addresses */
430 if (ftest->k >= BPF_MEMWORDS) 512 if (ftest->k >= BPF_MEMWORDS)
431 return -EINVAL; 513 return -EINVAL;
514 ftest->code = BPF_S_STX;
432 break; 515 break;
433 516
434 case BPF_JMP|BPF_JA: 517 case BPF_JMP|BPF_JA:
@@ -439,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
439 */ 522 */
440 if (ftest->k >= (unsigned)(flen-pc-1)) 523 if (ftest->k >= (unsigned)(flen-pc-1))
441 return -EINVAL; 524 return -EINVAL;
525 ftest->code = BPF_S_JMP_JA;
442 break; 526 break;
443 527
444 case BPF_JMP|BPF_JEQ|BPF_K: 528 case BPF_JMP|BPF_JEQ|BPF_K:
529 ftest->code = BPF_S_JMP_JEQ_K;
530 break;
445 case BPF_JMP|BPF_JEQ|BPF_X: 531 case BPF_JMP|BPF_JEQ|BPF_X:
532 ftest->code = BPF_S_JMP_JEQ_X;
533 break;
446 case BPF_JMP|BPF_JGE|BPF_K: 534 case BPF_JMP|BPF_JGE|BPF_K:
535 ftest->code = BPF_S_JMP_JGE_K;
536 break;
447 case BPF_JMP|BPF_JGE|BPF_X: 537 case BPF_JMP|BPF_JGE|BPF_X:
538 ftest->code = BPF_S_JMP_JGE_X;
539 break;
448 case BPF_JMP|BPF_JGT|BPF_K: 540 case BPF_JMP|BPF_JGT|BPF_K:
541 ftest->code = BPF_S_JMP_JGT_K;
542 break;
449 case BPF_JMP|BPF_JGT|BPF_X: 543 case BPF_JMP|BPF_JGT|BPF_X:
544 ftest->code = BPF_S_JMP_JGT_X;
545 break;
450 case BPF_JMP|BPF_JSET|BPF_K: 546 case BPF_JMP|BPF_JSET|BPF_K:
547 ftest->code = BPF_S_JMP_JSET_K;
548 break;
451 case BPF_JMP|BPF_JSET|BPF_X: 549 case BPF_JMP|BPF_JSET|BPF_X:
550 ftest->code = BPF_S_JMP_JSET_X;
551 break;
552
553 default:
554 return -EINVAL;
555 }
556
452 /* for conditionals both must be safe */ 557 /* for conditionals both must be safe */
558 switch (ftest->code) {
559 case BPF_S_JMP_JEQ_K:
560 case BPF_S_JMP_JEQ_X:
561 case BPF_S_JMP_JGE_K:
562 case BPF_S_JMP_JGE_X:
563 case BPF_S_JMP_JGT_K:
564 case BPF_S_JMP_JGT_X:
565 case BPF_S_JMP_JSET_X:
566 case BPF_S_JMP_JSET_K:
453 if (pc + ftest->jt + 1 >= flen || 567 if (pc + ftest->jt + 1 >= flen ||
454 pc + ftest->jf + 1 >= flen) 568 pc + ftest->jf + 1 >= flen)
455 return -EINVAL; 569 return -EINVAL;
456 break; 570 }
571 }
457 572
573 /* last instruction must be a RET code */
574 switch (filter[flen - 1].code) {
575 case BPF_S_RET_K:
576 case BPF_S_RET_A:
577 return 0;
578 break;
458 default: 579 default:
459 return -EINVAL; 580 return -EINVAL;
460 } 581 }
461 }
462
463 return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
464} 582}
465EXPORT_SYMBOL(sk_chk_filter); 583EXPORT_SYMBOL(sk_chk_filter);
466 584
@@ -521,7 +639,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
521 } 639 }
522 640
523 rcu_read_lock_bh(); 641 rcu_read_lock_bh();
524 old_fp = rcu_dereference(sk->sk_filter); 642 old_fp = rcu_dereference_bh(sk->sk_filter);
525 rcu_assign_pointer(sk->sk_filter, fp); 643 rcu_assign_pointer(sk->sk_filter, fp);
526 rcu_read_unlock_bh(); 644 rcu_read_unlock_bh();
527 645
@@ -529,6 +647,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
529 sk_filter_delayed_uncharge(sk, old_fp); 647 sk_filter_delayed_uncharge(sk, old_fp);
530 return 0; 648 return 0;
531} 649}
650EXPORT_SYMBOL_GPL(sk_attach_filter);
532 651
533int sk_detach_filter(struct sock *sk) 652int sk_detach_filter(struct sock *sk)
534{ 653{
@@ -536,7 +655,7 @@ int sk_detach_filter(struct sock *sk)
536 struct sk_filter *filter; 655 struct sk_filter *filter;
537 656
538 rcu_read_lock_bh(); 657 rcu_read_lock_bh();
539 filter = rcu_dereference(sk->sk_filter); 658 filter = rcu_dereference_bh(sk->sk_filter);
540 if (filter) { 659 if (filter) {
541 rcu_assign_pointer(sk->sk_filter, NULL); 660 rcu_assign_pointer(sk->sk_filter, NULL);
542 sk_filter_delayed_uncharge(sk, filter); 661 sk_filter_delayed_uncharge(sk, filter);
@@ -545,3 +664,4 @@ int sk_detach_filter(struct sock *sk)
545 rcu_read_unlock_bh(); 664 rcu_read_unlock_bh();
546 return ret; 665 return ret;
547} 666}
667EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..f67dcbfe54ef 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,113 +26,159 @@
26#include <linux/security.h> 26#include <linux/security.h>
27 27
28struct flow_cache_entry { 28struct flow_cache_entry {
29 struct flow_cache_entry *next; 29 union {
30 u16 family; 30 struct hlist_node hlist;
31 u8 dir; 31 struct list_head gc_list;
32 u32 genid; 32 } u;
33 struct flowi key; 33 u16 family;
34 void *object; 34 u8 dir;
35 atomic_t *object_ref; 35 u32 genid;
36 struct flowi key;
37 struct flow_cache_object *object;
36}; 38};
37 39
38atomic_t flow_cache_genid = ATOMIC_INIT(0); 40struct flow_cache_percpu {
39 41 struct hlist_head *hash_table;
40static u32 flow_hash_shift; 42 int hash_count;
41#define flow_hash_size (1 << flow_hash_shift) 43 u32 hash_rnd;
42static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; 44 int hash_rnd_recalc;
43 45 struct tasklet_struct flush_tasklet;
44#define flow_table(cpu) (per_cpu(flow_tables, cpu))
45
46static struct kmem_cache *flow_cachep __read_mostly;
47
48static int flow_lwm, flow_hwm;
49
50struct flow_percpu_info {
51 int hash_rnd_recalc;
52 u32 hash_rnd;
53 int count;
54}; 46};
55static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
56 47
57#define flow_hash_rnd_recalc(cpu) \ 48struct flow_flush_info {
58 (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) 49 struct flow_cache *cache;
59#define flow_hash_rnd(cpu) \ 50 atomic_t cpuleft;
60 (per_cpu(flow_hash_info, cpu).hash_rnd) 51 struct completion completion;
61#define flow_count(cpu) \ 52};
62 (per_cpu(flow_hash_info, cpu).count)
63 53
64static struct timer_list flow_hash_rnd_timer; 54struct flow_cache {
55 u32 hash_shift;
56 unsigned long order;
57 struct flow_cache_percpu *percpu;
58 struct notifier_block hotcpu_notifier;
59 int low_watermark;
60 int high_watermark;
61 struct timer_list rnd_timer;
62};
65 63
66#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) 64atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep;
67 68
68struct flow_flush_info { 69static DEFINE_SPINLOCK(flow_cache_gc_lock);
69 atomic_t cpuleft; 70static LIST_HEAD(flow_cache_gc_list);
70 struct completion completion;
71};
72static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
73 71
74#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) 72#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
73#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
75 74
76static void flow_cache_new_hashrnd(unsigned long arg) 75static void flow_cache_new_hashrnd(unsigned long arg)
77{ 76{
77 struct flow_cache *fc = (void *) arg;
78 int i; 78 int i;
79 79
80 for_each_possible_cpu(i) 80 for_each_possible_cpu(i)
81 flow_hash_rnd_recalc(i) = 1; 81 per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
82
83 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
84 add_timer(&fc->rnd_timer);
85}
82 86
83 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 87static int flow_entry_valid(struct flow_cache_entry *fle)
84 add_timer(&flow_hash_rnd_timer); 88{
89 if (atomic_read(&flow_cache_genid) != fle->genid)
90 return 0;
91 if (fle->object && !fle->object->ops->check(fle->object))
92 return 0;
93 return 1;
85} 94}
86 95
87static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) 96static void flow_entry_kill(struct flow_cache_entry *fle)
88{ 97{
89 if (fle->object) 98 if (fle->object)
90 atomic_dec(fle->object_ref); 99 fle->object->ops->delete(fle->object);
91 kmem_cache_free(flow_cachep, fle); 100 kmem_cache_free(flow_cachep, fle);
92 flow_count(cpu)--;
93} 101}
94 102
95static void __flow_cache_shrink(int cpu, int shrink_to) 103static void flow_cache_gc_task(struct work_struct *work)
96{ 104{
97 struct flow_cache_entry *fle, **flp; 105 struct list_head gc_list;
98 int i; 106 struct flow_cache_entry *fce, *n;
99 107
100 for (i = 0; i < flow_hash_size; i++) { 108 INIT_LIST_HEAD(&gc_list);
101 int k = 0; 109 spin_lock_bh(&flow_cache_gc_lock);
110 list_splice_tail_init(&flow_cache_gc_list, &gc_list);
111 spin_unlock_bh(&flow_cache_gc_lock);
102 112
103 flp = &flow_table(cpu)[i]; 113 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
104 while ((fle = *flp) != NULL && k < shrink_to) { 114 flow_entry_kill(fce);
105 k++; 115}
106 flp = &fle->next; 116static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
107 } 117
108 while ((fle = *flp) != NULL) { 118static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
109 *flp = fle->next; 119 int deleted, struct list_head *gc_list)
110 flow_entry_kill(cpu, fle); 120{
111 } 121 if (deleted) {
122 fcp->hash_count -= deleted;
123 spin_lock_bh(&flow_cache_gc_lock);
124 list_splice_tail(gc_list, &flow_cache_gc_list);
125 spin_unlock_bh(&flow_cache_gc_lock);
126 schedule_work(&flow_cache_gc_work);
112 } 127 }
113} 128}
114 129
115static void flow_cache_shrink(int cpu) 130static void __flow_cache_shrink(struct flow_cache *fc,
131 struct flow_cache_percpu *fcp,
132 int shrink_to)
116{ 133{
117 int shrink_to = flow_lwm / flow_hash_size; 134 struct flow_cache_entry *fle;
135 struct hlist_node *entry, *tmp;
136 LIST_HEAD(gc_list);
137 int i, deleted = 0;
138
139 for (i = 0; i < flow_cache_hash_size(fc); i++) {
140 int saved = 0;
141
142 hlist_for_each_entry_safe(fle, entry, tmp,
143 &fcp->hash_table[i], u.hlist) {
144 if (saved < shrink_to &&
145 flow_entry_valid(fle)) {
146 saved++;
147 } else {
148 deleted++;
149 hlist_del(&fle->u.hlist);
150 list_add_tail(&fle->u.gc_list, &gc_list);
151 }
152 }
153 }
118 154
119 __flow_cache_shrink(cpu, shrink_to); 155 flow_cache_queue_garbage(fcp, deleted, &gc_list);
120} 156}
121 157
122static void flow_new_hash_rnd(int cpu) 158static void flow_cache_shrink(struct flow_cache *fc,
159 struct flow_cache_percpu *fcp)
123{ 160{
124 get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); 161 int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
125 flow_hash_rnd_recalc(cpu) = 0;
126 162
127 __flow_cache_shrink(cpu, 0); 163 __flow_cache_shrink(fc, fcp, shrink_to);
128} 164}
129 165
130static u32 flow_hash_code(struct flowi *key, int cpu) 166static void flow_new_hash_rnd(struct flow_cache *fc,
167 struct flow_cache_percpu *fcp)
168{
169 get_random_bytes(&fcp->hash_rnd, sizeof(u32));
170 fcp->hash_rnd_recalc = 0;
171 __flow_cache_shrink(fc, fcp, 0);
172}
173
174static u32 flow_hash_code(struct flow_cache *fc,
175 struct flow_cache_percpu *fcp,
176 struct flowi *key)
131{ 177{
132 u32 *k = (u32 *) key; 178 u32 *k = (u32 *) key;
133 179
134 return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & 180 return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
135 (flow_hash_size - 1)); 181 & (flow_cache_hash_size(fc) - 1));
136} 182}
137 183
138#if (BITS_PER_LONG == 64) 184#if (BITS_PER_LONG == 64)
@@ -165,114 +211,118 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
165 return 0; 211 return 0;
166} 212}
167 213
168void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, 214struct flow_cache_object *
169 flow_resolve_t resolver) 215flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
216 flow_resolve_t resolver, void *ctx)
170{ 217{
171 struct flow_cache_entry *fle, **head; 218 struct flow_cache *fc = &flow_cache_global;
219 struct flow_cache_percpu *fcp;
220 struct flow_cache_entry *fle, *tfle;
221 struct hlist_node *entry;
222 struct flow_cache_object *flo;
172 unsigned int hash; 223 unsigned int hash;
173 int cpu;
174 224
175 local_bh_disable(); 225 local_bh_disable();
176 cpu = smp_processor_id(); 226 fcp = this_cpu_ptr(fc->percpu);
177 227
178 fle = NULL; 228 fle = NULL;
229 flo = NULL;
179 /* Packet really early in init? Making flow_cache_init a 230 /* Packet really early in init? Making flow_cache_init a
180 * pre-smp initcall would solve this. --RR */ 231 * pre-smp initcall would solve this. --RR */
181 if (!flow_table(cpu)) 232 if (!fcp->hash_table)
182 goto nocache; 233 goto nocache;
183 234
184 if (flow_hash_rnd_recalc(cpu)) 235 if (fcp->hash_rnd_recalc)
185 flow_new_hash_rnd(cpu); 236 flow_new_hash_rnd(fc, fcp);
186 hash = flow_hash_code(key, cpu);
187 237
188 head = &flow_table(cpu)[hash]; 238 hash = flow_hash_code(fc, fcp, key);
189 for (fle = *head; fle; fle = fle->next) { 239 hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
190 if (fle->family == family && 240 if (tfle->family == family &&
191 fle->dir == dir && 241 tfle->dir == dir &&
192 flow_key_compare(key, &fle->key) == 0) { 242 flow_key_compare(key, &tfle->key) == 0) {
193 if (fle->genid == atomic_read(&flow_cache_genid)) { 243 fle = tfle;
194 void *ret = fle->object;
195
196 if (ret)
197 atomic_inc(fle->object_ref);
198 local_bh_enable();
199
200 return ret;
201 }
202 break; 244 break;
203 } 245 }
204 } 246 }
205 247
206 if (!fle) { 248 if (unlikely(!fle)) {
207 if (flow_count(cpu) > flow_hwm) 249 if (fcp->hash_count > fc->high_watermark)
208 flow_cache_shrink(cpu); 250 flow_cache_shrink(fc, fcp);
209 251
210 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 252 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
211 if (fle) { 253 if (fle) {
212 fle->next = *head;
213 *head = fle;
214 fle->family = family; 254 fle->family = family;
215 fle->dir = dir; 255 fle->dir = dir;
216 memcpy(&fle->key, key, sizeof(*key)); 256 memcpy(&fle->key, key, sizeof(*key));
217 fle->object = NULL; 257 fle->object = NULL;
218 flow_count(cpu)++; 258 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
259 fcp->hash_count++;
219 } 260 }
261 } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
262 flo = fle->object;
263 if (!flo)
264 goto ret_object;
265 flo = flo->ops->get(flo);
266 if (flo)
267 goto ret_object;
268 } else if (fle->object) {
269 flo = fle->object;
270 flo->ops->delete(flo);
271 fle->object = NULL;
220 } 272 }
221 273
222nocache: 274nocache:
223 { 275 flo = NULL;
224 int err; 276 if (fle) {
225 void *obj; 277 flo = fle->object;
226 atomic_t *obj_ref; 278 fle->object = NULL;
227
228 err = resolver(net, key, family, dir, &obj, &obj_ref);
229
230 if (fle && !err) {
231 fle->genid = atomic_read(&flow_cache_genid);
232
233 if (fle->object)
234 atomic_dec(fle->object_ref);
235
236 fle->object = obj;
237 fle->object_ref = obj_ref;
238 if (obj)
239 atomic_inc(fle->object_ref);
240 }
241 local_bh_enable();
242
243 if (err)
244 obj = ERR_PTR(err);
245 return obj;
246 } 279 }
280 flo = resolver(net, key, family, dir, flo, ctx);
281 if (fle) {
282 fle->genid = atomic_read(&flow_cache_genid);
283 if (!IS_ERR(flo))
284 fle->object = flo;
285 else
286 fle->genid--;
287 } else {
288 if (flo && !IS_ERR(flo))
289 flo->ops->delete(flo);
290 }
291ret_object:
292 local_bh_enable();
293 return flo;
247} 294}
295EXPORT_SYMBOL(flow_cache_lookup);
248 296
249static void flow_cache_flush_tasklet(unsigned long data) 297static void flow_cache_flush_tasklet(unsigned long data)
250{ 298{
251 struct flow_flush_info *info = (void *)data; 299 struct flow_flush_info *info = (void *)data;
252 int i; 300 struct flow_cache *fc = info->cache;
253 int cpu; 301 struct flow_cache_percpu *fcp;
254 302 struct flow_cache_entry *fle;
255 cpu = smp_processor_id(); 303 struct hlist_node *entry, *tmp;
256 for (i = 0; i < flow_hash_size; i++) { 304 LIST_HEAD(gc_list);
257 struct flow_cache_entry *fle; 305 int i, deleted = 0;
258 306
259 fle = flow_table(cpu)[i]; 307 fcp = this_cpu_ptr(fc->percpu);
260 for (; fle; fle = fle->next) { 308 for (i = 0; i < flow_cache_hash_size(fc); i++) {
261 unsigned genid = atomic_read(&flow_cache_genid); 309 hlist_for_each_entry_safe(fle, entry, tmp,
262 310 &fcp->hash_table[i], u.hlist) {
263 if (!fle->object || fle->genid == genid) 311 if (flow_entry_valid(fle))
264 continue; 312 continue;
265 313
266 fle->object = NULL; 314 deleted++;
267 atomic_dec(fle->object_ref); 315 hlist_del(&fle->u.hlist);
316 list_add_tail(&fle->u.gc_list, &gc_list);
268 } 317 }
269 } 318 }
270 319
320 flow_cache_queue_garbage(fcp, deleted, &gc_list);
321
271 if (atomic_dec_and_test(&info->cpuleft)) 322 if (atomic_dec_and_test(&info->cpuleft))
272 complete(&info->completion); 323 complete(&info->completion);
273} 324}
274 325
275static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
276static void flow_cache_flush_per_cpu(void *data) 326static void flow_cache_flush_per_cpu(void *data)
277{ 327{
278 struct flow_flush_info *info = data; 328 struct flow_flush_info *info = data;
@@ -280,8 +330,7 @@ static void flow_cache_flush_per_cpu(void *data)
280 struct tasklet_struct *tasklet; 330 struct tasklet_struct *tasklet;
281 331
282 cpu = smp_processor_id(); 332 cpu = smp_processor_id();
283 333 tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
284 tasklet = flow_flush_tasklet(cpu);
285 tasklet->data = (unsigned long)info; 334 tasklet->data = (unsigned long)info;
286 tasklet_schedule(tasklet); 335 tasklet_schedule(tasklet);
287} 336}
@@ -294,6 +343,7 @@ void flow_cache_flush(void)
294 /* Don't want cpus going down or up during this. */ 343 /* Don't want cpus going down or up during this. */
295 get_online_cpus(); 344 get_online_cpus();
296 mutex_lock(&flow_flush_sem); 345 mutex_lock(&flow_flush_sem);
346 info.cache = &flow_cache_global;
297 atomic_set(&info.cpuleft, num_online_cpus()); 347 atomic_set(&info.cpuleft, num_online_cpus());
298 init_completion(&info.completion); 348 init_completion(&info.completion);
299 349
@@ -307,62 +357,72 @@ void flow_cache_flush(void)
307 put_online_cpus(); 357 put_online_cpus();
308} 358}
309 359
310static void __init flow_cache_cpu_prepare(int cpu) 360static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
361 struct flow_cache_percpu *fcp)
311{ 362{
312 struct tasklet_struct *tasklet; 363 fcp->hash_table = (struct hlist_head *)
313 unsigned long order; 364 __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
314 365 if (!fcp->hash_table)
315 for (order = 0; 366 panic("NET: failed to allocate flow cache order %lu\n", fc->order);
316 (PAGE_SIZE << order) < 367
317 (sizeof(struct flow_cache_entry *)*flow_hash_size); 368 fcp->hash_rnd_recalc = 1;
318 order++) 369 fcp->hash_count = 0;
319 /* NOTHING */; 370 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
320
321 flow_table(cpu) = (struct flow_cache_entry **)
322 __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
323 if (!flow_table(cpu))
324 panic("NET: failed to allocate flow cache order %lu\n", order);
325
326 flow_hash_rnd_recalc(cpu) = 1;
327 flow_count(cpu) = 0;
328
329 tasklet = flow_flush_tasklet(cpu);
330 tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
331} 371}
332 372
333static int flow_cache_cpu(struct notifier_block *nfb, 373static int flow_cache_cpu(struct notifier_block *nfb,
334 unsigned long action, 374 unsigned long action,
335 void *hcpu) 375 void *hcpu)
336{ 376{
377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
378 int cpu = (unsigned long) hcpu;
379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
380
337 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 381 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
338 __flow_cache_shrink((unsigned long)hcpu, 0); 382 __flow_cache_shrink(fc, fcp, 0);
339 return NOTIFY_OK; 383 return NOTIFY_OK;
340} 384}
341 385
342static int __init flow_cache_init(void) 386static int flow_cache_init(struct flow_cache *fc)
343{ 387{
388 unsigned long order;
344 int i; 389 int i;
345 390
346 flow_cachep = kmem_cache_create("flow_cache", 391 fc->hash_shift = 10;
347 sizeof(struct flow_cache_entry), 392 fc->low_watermark = 2 * flow_cache_hash_size(fc);
348 0, SLAB_PANIC, 393 fc->high_watermark = 4 * flow_cache_hash_size(fc);
349 NULL);
350 flow_hash_shift = 10;
351 flow_lwm = 2 * flow_hash_size;
352 flow_hwm = 4 * flow_hash_size;
353 394
354 setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); 395 for (order = 0;
355 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 396 (PAGE_SIZE << order) <
356 add_timer(&flow_hash_rnd_timer); 397 (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
398 order++)
399 /* NOTHING */;
400 fc->order = order;
401 fc->percpu = alloc_percpu(struct flow_cache_percpu);
402
403 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
404 (unsigned long) fc);
405 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
406 add_timer(&fc->rnd_timer);
357 407
358 for_each_possible_cpu(i) 408 for_each_possible_cpu(i)
359 flow_cache_cpu_prepare(i); 409 flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
410
411 fc->hotcpu_notifier = (struct notifier_block){
412 .notifier_call = flow_cache_cpu,
413 };
414 register_hotcpu_notifier(&fc->hotcpu_notifier);
360 415
361 hotcpu_notifier(flow_cache_cpu, 0);
362 return 0; 416 return 0;
363} 417}
364 418
365module_init(flow_cache_init); 419static int __init flow_cache_init_global(void)
420{
421 flow_cachep = kmem_cache_create("flow_cache",
422 sizeof(struct flow_cache_entry),
423 0, SLAB_PANIC, NULL);
366 424
367EXPORT_SYMBOL(flow_cache_genid); 425 return flow_cache_init(&flow_cache_global);
368EXPORT_SYMBOL(flow_cache_lookup); 426}
427
428module_init(flow_cache_init_global);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 493775f4f2f1..6743146e4d6b 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -32,6 +32,7 @@
32#include <linux/rtnetlink.h> 32#include <linux/rtnetlink.h>
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35#include <linux/slab.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/gen_stats.h> 37#include <net/gen_stats.h>
37 38
@@ -106,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
106 107
107/* Protects against soft lockup during large deletion */ 108/* Protects against soft lockup during large deletion */
108static struct rb_root est_root = RB_ROOT; 109static struct rb_root est_root = RB_ROOT;
110static DEFINE_SPINLOCK(est_tree_lock);
109 111
110static void est_timer(unsigned long arg) 112static void est_timer(unsigned long arg)
111{ 113{
@@ -200,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
200 * 202 *
201 * Returns 0 on success or a negative error code. 203 * Returns 0 on success or a negative error code.
202 * 204 *
203 * NOTE: Called under rtnl_mutex
204 */ 205 */
205int gen_new_estimator(struct gnet_stats_basic_packed *bstats, 206int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
206 struct gnet_stats_rate_est *rate_est, 207 struct gnet_stats_rate_est *rate_est,
@@ -231,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
231 est->last_packets = bstats->packets; 232 est->last_packets = bstats->packets;
232 est->avpps = rate_est->pps<<10; 233 est->avpps = rate_est->pps<<10;
233 234
235 spin_lock_bh(&est_tree_lock);
234 if (!elist[idx].timer.function) { 236 if (!elist[idx].timer.function) {
235 INIT_LIST_HEAD(&elist[idx].list); 237 INIT_LIST_HEAD(&elist[idx].list);
236 setup_timer(&elist[idx].timer, est_timer, idx); 238 setup_timer(&elist[idx].timer, est_timer, idx);
@@ -241,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
241 243
242 list_add_rcu(&est->list, &elist[idx].list); 244 list_add_rcu(&est->list, &elist[idx].list);
243 gen_add_node(est); 245 gen_add_node(est);
246 spin_unlock_bh(&est_tree_lock);
244 247
245 return 0; 248 return 0;
246} 249}
@@ -260,13 +263,14 @@ static void __gen_kill_estimator(struct rcu_head *head)
260 * 263 *
261 * Removes the rate estimator specified by &bstats and &rate_est. 264 * Removes the rate estimator specified by &bstats and &rate_est.
262 * 265 *
263 * NOTE: Called under rtnl_mutex 266 * Note : Caller should respect an RCU grace period before freeing stats_lock
264 */ 267 */
265void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, 268void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
266 struct gnet_stats_rate_est *rate_est) 269 struct gnet_stats_rate_est *rate_est)
267{ 270{
268 struct gen_estimator *e; 271 struct gen_estimator *e;
269 272
273 spin_lock_bh(&est_tree_lock);
270 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
271 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
272 276
@@ -277,6 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
277 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
278 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
279 } 283 }
284 spin_unlock_bh(&est_tree_lock);
280} 285}
281EXPORT_SYMBOL(gen_kill_estimator); 286EXPORT_SYMBOL(gen_kill_estimator);
282 287
@@ -311,8 +316,14 @@ EXPORT_SYMBOL(gen_replace_estimator);
311bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, 316bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
312 const struct gnet_stats_rate_est *rate_est) 317 const struct gnet_stats_rate_est *rate_est)
313{ 318{
319 bool res;
320
314 ASSERT_RTNL(); 321 ASSERT_RTNL();
315 322
316 return gen_find_node(bstats, rate_est) != NULL; 323 spin_lock_bh(&est_tree_lock);
324 res = gen_find_node(bstats, rate_est) != NULL;
325 spin_unlock_bh(&est_tree_lock);
326
327 return res;
317} 328}
318EXPORT_SYMBOL(gen_estimator_active); 329EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 393b1d8618e2..0452eb27a272 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
73 73
74 return 0; 74 return 0;
75} 75}
76EXPORT_SYMBOL(gnet_stats_start_copy_compat);
76 77
77/** 78/**
78 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode 79 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
93{ 94{
94 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); 95 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
95} 96}
97EXPORT_SYMBOL(gnet_stats_start_copy);
96 98
97/** 99/**
98 * gnet_stats_copy_basic - copy basic statistics into statistic TLV 100 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
123 } 125 }
124 return 0; 126 return 0;
125} 127}
128EXPORT_SYMBOL(gnet_stats_copy_basic);
126 129
127/** 130/**
128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV 131 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
@@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
154 157
155 return 0; 158 return 0;
156} 159}
160EXPORT_SYMBOL(gnet_stats_copy_rate_est);
157 161
158/** 162/**
159 * gnet_stats_copy_queue - copy queue statistics into statistics TLV 163 * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
181 185
182 return 0; 186 return 0;
183} 187}
188EXPORT_SYMBOL(gnet_stats_copy_queue);
184 189
185/** 190/**
186 * gnet_stats_copy_app - copy application specific statistics into statistics TLV 191 * gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
208 213
209 return 0; 214 return 0;
210} 215}
216EXPORT_SYMBOL(gnet_stats_copy_app);
211 217
212/** 218/**
213 * gnet_stats_finish_copy - finish dumping procedure 219 * gnet_stats_finish_copy - finish dumping procedure
@@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
241 spin_unlock_bh(d->lock); 247 spin_unlock_bh(d->lock);
242 return 0; 248 return 0;
243} 249}
244
245
246EXPORT_SYMBOL(gnet_stats_start_copy);
247EXPORT_SYMBOL(gnet_stats_start_copy_compat);
248EXPORT_SYMBOL(gnet_stats_copy_basic);
249EXPORT_SYMBOL(gnet_stats_copy_rate_est);
250EXPORT_SYMBOL(gnet_stats_copy_queue);
251EXPORT_SYMBOL(gnet_stats_copy_app);
252EXPORT_SYMBOL(gnet_stats_finish_copy); 250EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 16ad45d4882b..e6b133b77ccb 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -20,7 +20,6 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/slab.h>
24#include <linux/net.h> 23#include <linux/net.h>
25#include <linux/in6.h> 24#include <linux/in6.h>
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
@@ -36,9 +35,10 @@
36 * in any case. 35 * in any case.
37 */ 36 */
38 37
39int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) 38long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
40{ 39{
41 int size, err, ct; 40 int size, ct;
41 long err;
42 42
43 if (m->msg_namelen) { 43 if (m->msg_namelen) {
44 if (mode == VERIFY_READ) { 44 if (mode == VERIFY_READ) {
@@ -96,6 +96,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
96 96
97 return 0; 97 return 0;
98} 98}
99EXPORT_SYMBOL(memcpy_toiovec);
99 100
100/* 101/*
101 * Copy kernel to iovec. Returns -EFAULT on error. 102 * Copy kernel to iovec. Returns -EFAULT on error.
@@ -121,6 +122,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
121 122
122 return 0; 123 return 0;
123} 124}
125EXPORT_SYMBOL(memcpy_toiovecend);
124 126
125/* 127/*
126 * Copy iovec to kernel. Returns -EFAULT on error. 128 * Copy iovec to kernel. Returns -EFAULT on error.
@@ -145,6 +147,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
145 147
146 return 0; 148 return 0;
147} 149}
150EXPORT_SYMBOL(memcpy_fromiovec);
148 151
149/* 152/*
150 * Copy iovec from kernel. Returns -EFAULT on error. 153 * Copy iovec from kernel. Returns -EFAULT on error.
@@ -173,6 +176,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
173 176
174 return 0; 177 return 0;
175} 178}
179EXPORT_SYMBOL(memcpy_fromiovecend);
176 180
177/* 181/*
178 * And now for the all-in-one: copy and checksum from a user iovec 182 * And now for the all-in-one: copy and checksum from a user iovec
@@ -257,9 +261,4 @@ out_fault:
257 err = -EFAULT; 261 err = -EFAULT;
258 goto out; 262 goto out;
259} 263}
260
261EXPORT_SYMBOL(csum_partial_copy_fromiovecend); 264EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
262EXPORT_SYMBOL(memcpy_fromiovec);
263EXPORT_SYMBOL(memcpy_fromiovecend);
264EXPORT_SYMBOL(memcpy_toiovec);
265EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 5910b555a54a..01a1101b5936 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -19,7 +19,6 @@
19#include <linux/rtnetlink.h> 19#include <linux/rtnetlink.h>
20#include <linux/jiffies.h> 20#include <linux/jiffies.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h>
23#include <linux/workqueue.h> 22#include <linux/workqueue.h>
24#include <linux/bitops.h> 23#include <linux/bitops.h>
25#include <asm/types.h> 24#include <asm/types.h>
@@ -244,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev)
244 243
245 linkwatch_schedule_work(urgent); 244 linkwatch_schedule_work(urgent);
246} 245}
247
248EXPORT_SYMBOL(linkwatch_fire_event); 246EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f35377b643e4..a4e0a7482c2b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -15,6 +15,7 @@
15 * Harald Welte Add neighbour cache statistics like rtstat 15 * Harald Welte Add neighbour cache statistics like rtstat
16 */ 16 */
17 17
18#include <linux/slab.h>
18#include <linux/types.h> 19#include <linux/types.h>
19#include <linux/kernel.h> 20#include <linux/kernel.h>
20#include <linux/module.h> 21#include <linux/module.h>
@@ -771,6 +772,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
771} 772}
772 773
773static void neigh_invalidate(struct neighbour *neigh) 774static void neigh_invalidate(struct neighbour *neigh)
775 __releases(neigh->lock)
776 __acquires(neigh->lock)
774{ 777{
775 struct sk_buff *skb; 778 struct sk_buff *skb;
776 779
@@ -931,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
931 kfree_skb(buff); 934 kfree_skb(buff);
932 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 935 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
933 } 936 }
937 skb_dst_force(skb);
934 __skb_queue_tail(&neigh->arp_queue, skb); 938 __skb_queue_tail(&neigh->arp_queue, skb);
935 } 939 }
936 rc = 1; 940 rc = 1;
@@ -945,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
945{ 949{
946 struct hh_cache *hh; 950 struct hh_cache *hh;
947 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 951 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
948 = neigh->dev->header_ops->cache_update; 952 = NULL;
953
954 if (neigh->dev->header_ops)
955 update = neigh->dev->header_ops->cache_update;
949 956
950 if (update) { 957 if (update) {
951 for (hh = neigh->hh; hh; hh = hh->hh_next) { 958 for (hh = neigh->hh; hh; hh = hh->hh_next) {
@@ -2417,8 +2424,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
2417 2424
2418static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 2425static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2419{ 2426{
2420 struct proc_dir_entry *pde = seq->private; 2427 struct neigh_table *tbl = seq->private;
2421 struct neigh_table *tbl = pde->data;
2422 int cpu; 2428 int cpu;
2423 2429
2424 if (*pos == 0) 2430 if (*pos == 0)
@@ -2435,8 +2441,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2435 2441
2436static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2442static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2437{ 2443{
2438 struct proc_dir_entry *pde = seq->private; 2444 struct neigh_table *tbl = seq->private;
2439 struct neigh_table *tbl = pde->data;
2440 int cpu; 2445 int cpu;
2441 2446
2442 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 2447 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2455,8 +2460,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2455 2460
2456static int neigh_stat_seq_show(struct seq_file *seq, void *v) 2461static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2457{ 2462{
2458 struct proc_dir_entry *pde = seq->private; 2463 struct neigh_table *tbl = seq->private;
2459 struct neigh_table *tbl = pde->data;
2460 struct neigh_statistics *st = v; 2464 struct neigh_statistics *st = v;
2461 2465
2462 if (v == SEQ_START_TOKEN) { 2466 if (v == SEQ_START_TOKEN) {
@@ -2501,7 +2505,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2501 2505
2502 if (!ret) { 2506 if (!ret) {
2503 struct seq_file *sf = file->private_data; 2507 struct seq_file *sf = file->private_data;
2504 sf->private = PDE(inode); 2508 sf->private = PDE(inode)->data;
2505 } 2509 }
2506 return ret; 2510 return ret;
2507}; 2511};
@@ -2559,9 +2563,11 @@ EXPORT_SYMBOL(neigh_app_ns);
2559 2563
2560#ifdef CONFIG_SYSCTL 2564#ifdef CONFIG_SYSCTL
2561 2565
2566#define NEIGH_VARS_MAX 19
2567
2562static struct neigh_sysctl_table { 2568static struct neigh_sysctl_table {
2563 struct ctl_table_header *sysctl_header; 2569 struct ctl_table_header *sysctl_header;
2564 struct ctl_table neigh_vars[__NET_NEIGH_MAX]; 2570 struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2565 char *dev_name; 2571 char *dev_name;
2566} neigh_sysctl_template __read_mostly = { 2572} neigh_sysctl_template __read_mostly = {
2567 .neigh_vars = { 2573 .neigh_vars = {
@@ -2678,8 +2684,7 @@ static struct neigh_sysctl_table {
2678}; 2684};
2679 2685
2680int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 2686int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2681 int p_id, int pdev_id, char *p_name, 2687 char *p_name, proc_handler *handler)
2682 proc_handler *handler)
2683{ 2688{
2684 struct neigh_sysctl_table *t; 2689 struct neigh_sysctl_table *t;
2685 const char *dev_name_source = NULL; 2690 const char *dev_name_source = NULL;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fbc1c7472c5e..af4dfbadf2a0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -13,9 +13,13 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/if_arp.h> 15#include <linux/if_arp.h>
16#include <linux/slab.h>
17#include <linux/nsproxy.h>
16#include <net/sock.h> 18#include <net/sock.h>
19#include <net/net_namespace.h>
17#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
18#include <linux/wireless.h> 21#include <linux/wireless.h>
22#include <linux/vmalloc.h>
19#include <net/wext.h> 23#include <net/wext.h>
20 24
21#include "net-sysfs.h" 25#include "net-sysfs.h"
@@ -25,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
25static const char fmt_long_hex[] = "%#lx\n"; 29static const char fmt_long_hex[] = "%#lx\n";
26static const char fmt_dec[] = "%d\n"; 30static const char fmt_dec[] = "%d\n";
27static const char fmt_ulong[] = "%lu\n"; 31static const char fmt_ulong[] = "%lu\n";
32static const char fmt_u64[] = "%llu\n";
28 33
29static inline int dev_isalive(const struct net_device *dev) 34static inline int dev_isalive(const struct net_device *dev)
30{ 35{
@@ -90,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
90} 95}
91 96
92NETDEVICE_SHOW(dev_id, fmt_hex); 97NETDEVICE_SHOW(dev_id, fmt_hex);
98NETDEVICE_SHOW(addr_assign_type, fmt_dec);
93NETDEVICE_SHOW(addr_len, fmt_dec); 99NETDEVICE_SHOW(addr_len, fmt_dec);
94NETDEVICE_SHOW(iflink, fmt_dec); 100NETDEVICE_SHOW(iflink, fmt_dec);
95NETDEVICE_SHOW(ifindex, fmt_dec); 101NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -290,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev,
290} 296}
291 297
292static struct device_attribute net_class_attributes[] = { 298static struct device_attribute net_class_attributes[] = {
299 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
293 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 300 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
294 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), 301 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
295 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), 302 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
@@ -320,14 +327,15 @@ static ssize_t netstat_show(const struct device *d,
320 struct net_device *dev = to_net_dev(d); 327 struct net_device *dev = to_net_dev(d);
321 ssize_t ret = -EINVAL; 328 ssize_t ret = -EINVAL;
322 329
323 WARN_ON(offset > sizeof(struct net_device_stats) || 330 WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
324 offset % sizeof(unsigned long) != 0); 331 offset % sizeof(u64) != 0);
325 332
326 read_lock(&dev_base_lock); 333 read_lock(&dev_base_lock);
327 if (dev_isalive(dev)) { 334 if (dev_isalive(dev)) {
328 const struct net_device_stats *stats = dev_get_stats(dev); 335 struct rtnl_link_stats64 temp;
329 ret = sprintf(buf, fmt_ulong, 336 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
330 *(unsigned long *)(((u8 *) stats) + offset)); 337
338 ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
331 } 339 }
332 read_unlock(&dev_base_lock); 340 read_unlock(&dev_base_lock);
333 return ret; 341 return ret;
@@ -339,7 +347,7 @@ static ssize_t show_##name(struct device *d, \
339 struct device_attribute *attr, char *buf) \ 347 struct device_attribute *attr, char *buf) \
340{ \ 348{ \
341 return netstat_show(d, attr, buf, \ 349 return netstat_show(d, attr, buf, \
342 offsetof(struct net_device_stats, name)); \ 350 offsetof(struct rtnl_link_stats64, name)); \
343} \ 351} \
344static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 352static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
345 353
@@ -410,7 +418,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
410 const struct iw_statistics *iw; 418 const struct iw_statistics *iw;
411 ssize_t ret = -EINVAL; 419 ssize_t ret = -EINVAL;
412 420
413 rtnl_lock(); 421 if (!rtnl_trylock())
422 return restart_syscall();
414 if (dev_isalive(dev)) { 423 if (dev_isalive(dev)) {
415 iw = get_wireless_stats(dev); 424 iw = get_wireless_stats(dev);
416 if (iw) 425 if (iw)
@@ -464,18 +473,345 @@ static struct attribute_group wireless_group = {
464 .attrs = wireless_attrs, 473 .attrs = wireless_attrs,
465}; 474};
466#endif 475#endif
467
468#endif /* CONFIG_SYSFS */ 476#endif /* CONFIG_SYSFS */
469 477
478#ifdef CONFIG_RPS
479/*
480 * RX queue sysfs structures and functions.
481 */
482struct rx_queue_attribute {
483 struct attribute attr;
484 ssize_t (*show)(struct netdev_rx_queue *queue,
485 struct rx_queue_attribute *attr, char *buf);
486 ssize_t (*store)(struct netdev_rx_queue *queue,
487 struct rx_queue_attribute *attr, const char *buf, size_t len);
488};
489#define to_rx_queue_attr(_attr) container_of(_attr, \
490 struct rx_queue_attribute, attr)
491
492#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
493
494static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
495 char *buf)
496{
497 struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
498 struct netdev_rx_queue *queue = to_rx_queue(kobj);
499
500 if (!attribute->show)
501 return -EIO;
502
503 return attribute->show(queue, attribute, buf);
504}
505
506static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
507 const char *buf, size_t count)
508{
509 struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
510 struct netdev_rx_queue *queue = to_rx_queue(kobj);
511
512 if (!attribute->store)
513 return -EIO;
514
515 return attribute->store(queue, attribute, buf, count);
516}
517
518static struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store,
521};
522
523static ssize_t show_rps_map(struct netdev_rx_queue *queue,
524 struct rx_queue_attribute *attribute, char *buf)
525{
526 struct rps_map *map;
527 cpumask_var_t mask;
528 size_t len = 0;
529 int i;
530
531 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
532 return -ENOMEM;
533
534 rcu_read_lock();
535 map = rcu_dereference(queue->rps_map);
536 if (map)
537 for (i = 0; i < map->len; i++)
538 cpumask_set_cpu(map->cpus[i], mask);
539
540 len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
541 if (PAGE_SIZE - len < 3) {
542 rcu_read_unlock();
543 free_cpumask_var(mask);
544 return -EINVAL;
545 }
546 rcu_read_unlock();
547
548 free_cpumask_var(mask);
549 len += sprintf(buf + len, "\n");
550 return len;
551}
552
553static void rps_map_release(struct rcu_head *rcu)
554{
555 struct rps_map *map = container_of(rcu, struct rps_map, rcu);
556
557 kfree(map);
558}
559
560static ssize_t store_rps_map(struct netdev_rx_queue *queue,
561 struct rx_queue_attribute *attribute,
562 const char *buf, size_t len)
563{
564 struct rps_map *old_map, *map;
565 cpumask_var_t mask;
566 int err, cpu, i;
567 static DEFINE_SPINLOCK(rps_map_lock);
568
569 if (!capable(CAP_NET_ADMIN))
570 return -EPERM;
571
572 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
573 return -ENOMEM;
574
575 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
576 if (err) {
577 free_cpumask_var(mask);
578 return err;
579 }
580
581 map = kzalloc(max_t(unsigned,
582 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
583 GFP_KERNEL);
584 if (!map) {
585 free_cpumask_var(mask);
586 return -ENOMEM;
587 }
588
589 i = 0;
590 for_each_cpu_and(cpu, mask, cpu_online_mask)
591 map->cpus[i++] = cpu;
592
593 if (i)
594 map->len = i;
595 else {
596 kfree(map);
597 map = NULL;
598 }
599
600 spin_lock(&rps_map_lock);
601 old_map = queue->rps_map;
602 rcu_assign_pointer(queue->rps_map, map);
603 spin_unlock(&rps_map_lock);
604
605 if (old_map)
606 call_rcu(&old_map->rcu, rps_map_release);
607
608 free_cpumask_var(mask);
609 return len;
610}
611
612static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
613 struct rx_queue_attribute *attr,
614 char *buf)
615{
616 struct rps_dev_flow_table *flow_table;
617 unsigned int val = 0;
618
619 rcu_read_lock();
620 flow_table = rcu_dereference(queue->rps_flow_table);
621 if (flow_table)
622 val = flow_table->mask + 1;
623 rcu_read_unlock();
624
625 return sprintf(buf, "%u\n", val);
626}
627
628static void rps_dev_flow_table_release_work(struct work_struct *work)
629{
630 struct rps_dev_flow_table *table = container_of(work,
631 struct rps_dev_flow_table, free_work);
632
633 vfree(table);
634}
635
636static void rps_dev_flow_table_release(struct rcu_head *rcu)
637{
638 struct rps_dev_flow_table *table = container_of(rcu,
639 struct rps_dev_flow_table, rcu);
640
641 INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
642 schedule_work(&table->free_work);
643}
644
645static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
646 struct rx_queue_attribute *attr,
647 const char *buf, size_t len)
648{
649 unsigned int count;
650 char *endp;
651 struct rps_dev_flow_table *table, *old_table;
652 static DEFINE_SPINLOCK(rps_dev_flow_lock);
653
654 if (!capable(CAP_NET_ADMIN))
655 return -EPERM;
656
657 count = simple_strtoul(buf, &endp, 0);
658 if (endp == buf)
659 return -EINVAL;
660
661 if (count) {
662 int i;
663
664 if (count > 1<<30) {
665 /* Enforce a limit to prevent overflow */
666 return -EINVAL;
667 }
668 count = roundup_pow_of_two(count);
669 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
670 if (!table)
671 return -ENOMEM;
672
673 table->mask = count - 1;
674 for (i = 0; i < count; i++)
675 table->flows[i].cpu = RPS_NO_CPU;
676 } else
677 table = NULL;
678
679 spin_lock(&rps_dev_flow_lock);
680 old_table = queue->rps_flow_table;
681 rcu_assign_pointer(queue->rps_flow_table, table);
682 spin_unlock(&rps_dev_flow_lock);
683
684 if (old_table)
685 call_rcu(&old_table->rcu, rps_dev_flow_table_release);
686
687 return len;
688}
689
690static struct rx_queue_attribute rps_cpus_attribute =
691 __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
692
693
694static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
695 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
696 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
697
698static struct attribute *rx_queue_default_attrs[] = {
699 &rps_cpus_attribute.attr,
700 &rps_dev_flow_table_cnt_attribute.attr,
701 NULL
702};
703
704static void rx_queue_release(struct kobject *kobj)
705{
706 struct netdev_rx_queue *queue = to_rx_queue(kobj);
707 struct netdev_rx_queue *first = queue->first;
708
709 if (queue->rps_map)
710 call_rcu(&queue->rps_map->rcu, rps_map_release);
711
712 if (queue->rps_flow_table)
713 call_rcu(&queue->rps_flow_table->rcu,
714 rps_dev_flow_table_release);
715
716 if (atomic_dec_and_test(&first->count))
717 kfree(first);
718}
719
720static struct kobj_type rx_queue_ktype = {
721 .sysfs_ops = &rx_queue_sysfs_ops,
722 .release = rx_queue_release,
723 .default_attrs = rx_queue_default_attrs,
724};
725
726static int rx_queue_add_kobject(struct net_device *net, int index)
727{
728 struct netdev_rx_queue *queue = net->_rx + index;
729 struct kobject *kobj = &queue->kobj;
730 int error = 0;
731
732 kobj->kset = net->queues_kset;
733 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
734 "rx-%u", index);
735 if (error) {
736 kobject_put(kobj);
737 return error;
738 }
739
740 kobject_uevent(kobj, KOBJ_ADD);
741
742 return error;
743}
744
745static int rx_queue_register_kobjects(struct net_device *net)
746{
747 int i;
748 int error = 0;
749
750 net->queues_kset = kset_create_and_add("queues",
751 NULL, &net->dev.kobj);
752 if (!net->queues_kset)
753 return -ENOMEM;
754 for (i = 0; i < net->num_rx_queues; i++) {
755 error = rx_queue_add_kobject(net, i);
756 if (error)
757 break;
758 }
759
760 if (error)
761 while (--i >= 0)
762 kobject_put(&net->_rx[i].kobj);
763
764 return error;
765}
766
767static void rx_queue_remove_kobjects(struct net_device *net)
768{
769 int i;
770
771 for (i = 0; i < net->num_rx_queues; i++)
772 kobject_put(&net->_rx[i].kobj);
773 kset_unregister(net->queues_kset);
774}
775#endif /* CONFIG_RPS */
776
777static const void *net_current_ns(void)
778{
779 return current->nsproxy->net_ns;
780}
781
782static const void *net_initial_ns(void)
783{
784 return &init_net;
785}
786
787static const void *net_netlink_ns(struct sock *sk)
788{
789 return sock_net(sk);
790}
791
792static struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns,
797};
798
799static void net_kobj_ns_exit(struct net *net)
800{
801 kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
802}
803
804static struct pernet_operations kobj_net_ops = {
805 .exit = net_kobj_ns_exit,
806};
807
808
470#ifdef CONFIG_HOTPLUG 809#ifdef CONFIG_HOTPLUG
471static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) 810static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
472{ 811{
473 struct net_device *dev = to_net_dev(d); 812 struct net_device *dev = to_net_dev(d);
474 int retval; 813 int retval;
475 814
476 if (!net_eq(dev_net(dev), &init_net))
477 return 0;
478
479 /* pass interface to uevent. */ 815 /* pass interface to uevent. */
480 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 816 retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
481 if (retval) 817 if (retval)
@@ -505,6 +841,13 @@ static void netdev_release(struct device *d)
505 kfree((char *)dev - dev->padded); 841 kfree((char *)dev - dev->padded);
506} 842}
507 843
844static const void *net_namespace(struct device *d)
845{
846 struct net_device *dev;
847 dev = container_of(d, struct net_device, dev);
848 return dev_net(dev);
849}
850
508static struct class net_class = { 851static struct class net_class = {
509 .name = "net", 852 .name = "net",
510 .dev_release = netdev_release, 853 .dev_release = netdev_release,
@@ -514,6 +857,8 @@ static struct class net_class = {
514#ifdef CONFIG_HOTPLUG 857#ifdef CONFIG_HOTPLUG
515 .dev_uevent = netdev_uevent, 858 .dev_uevent = netdev_uevent,
516#endif 859#endif
860 .ns_type = &net_ns_type_operations,
861 .namespace = net_namespace,
517}; 862};
518 863
519/* Delete sysfs entries but hold kobject reference until after all 864/* Delete sysfs entries but hold kobject reference until after all
@@ -525,8 +870,9 @@ void netdev_unregister_kobject(struct net_device * net)
525 870
526 kobject_get(&dev->kobj); 871 kobject_get(&dev->kobj);
527 872
528 if (!net_eq(dev_net(net), &init_net)) 873#ifdef CONFIG_RPS
529 return; 874 rx_queue_remove_kobjects(net);
875#endif
530 876
531 device_del(dev); 877 device_del(dev);
532} 878}
@@ -536,7 +882,9 @@ int netdev_register_kobject(struct net_device *net)
536{ 882{
537 struct device *dev = &(net->dev); 883 struct device *dev = &(net->dev);
538 const struct attribute_group **groups = net->sysfs_groups; 884 const struct attribute_group **groups = net->sysfs_groups;
885 int error = 0;
539 886
887 device_initialize(dev);
540 dev->class = &net_class; 888 dev->class = &net_class;
541 dev->platform_data = net; 889 dev->platform_data = net;
542 dev->groups = groups; 890 dev->groups = groups;
@@ -559,32 +907,36 @@ int netdev_register_kobject(struct net_device *net)
559#endif 907#endif
560#endif /* CONFIG_SYSFS */ 908#endif /* CONFIG_SYSFS */
561 909
562 if (!net_eq(dev_net(net), &init_net)) 910 error = device_add(dev);
563 return 0; 911 if (error)
912 return error;
913
914#ifdef CONFIG_RPS
915 error = rx_queue_register_kobjects(net);
916 if (error) {
917 device_del(dev);
918 return error;
919 }
920#endif
564 921
565 return device_add(dev); 922 return error;
566} 923}
567 924
568int netdev_class_create_file(struct class_attribute *class_attr) 925int netdev_class_create_file(struct class_attribute *class_attr)
569{ 926{
570 return class_create_file(&net_class, class_attr); 927 return class_create_file(&net_class, class_attr);
571} 928}
929EXPORT_SYMBOL(netdev_class_create_file);
572 930
573void netdev_class_remove_file(struct class_attribute *class_attr) 931void netdev_class_remove_file(struct class_attribute *class_attr)
574{ 932{
575 class_remove_file(&net_class, class_attr); 933 class_remove_file(&net_class, class_attr);
576} 934}
577
578EXPORT_SYMBOL(netdev_class_create_file);
579EXPORT_SYMBOL(netdev_class_remove_file); 935EXPORT_SYMBOL(netdev_class_remove_file);
580 936
581void netdev_initialize_kobject(struct net_device *net)
582{
583 struct device *device = &(net->dev);
584 device_initialize(device);
585}
586
587int netdev_kobject_init(void) 937int netdev_kobject_init(void)
588{ 938{
939 kobj_ns_type_register(&net_ns_type_operations);
940 register_pernet_subsys(&kobj_net_ops);
589 return class_register(&net_class); 941 return class_register(&net_class);
590} 942}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7void netdev_initialize_kobject(struct net_device *);
8#endif 7#endif
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index f1e982c508bb..afa6380ed88a 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,6 +19,7 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/netlink.h> 20#include <linux/netlink.h>
21#include <linux/net_dropmon.h> 21#include <linux/net_dropmon.h>
22#include <linux/slab.h>
22 23
23#include <asm/unaligned.h> 24#include <asm/unaligned.h>
24#include <asm/bitops.h> 25#include <asm/bitops.h>
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bd8c4712ea24..c988e685433a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net);
27 27
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29 29
30static void net_generic_release(struct rcu_head *rcu)
31{
32 struct net_generic *ng;
33
34 ng = container_of(rcu, struct net_generic, rcu);
35 kfree(ng);
36}
37
38static int net_assign_generic(struct net *net, int id, void *data)
39{
40 struct net_generic *ng, *old_ng;
41
42 BUG_ON(!mutex_is_locked(&net_mutex));
43 BUG_ON(id == 0);
44
45 ng = old_ng = net->gen;
46 if (old_ng->len >= id)
47 goto assign;
48
49 ng = kzalloc(sizeof(struct net_generic) +
50 id * sizeof(void *), GFP_KERNEL);
51 if (ng == NULL)
52 return -ENOMEM;
53
54 /*
55 * Some synchronisation notes:
56 *
57 * The net_generic explores the net->gen array inside rcu
58 * read section. Besides once set the net->gen->ptr[x]
59 * pointer never changes (see rules in netns/generic.h).
60 *
61 * That said, we simply duplicate this array and schedule
62 * the old copy for kfree after a grace period.
63 */
64
65 ng->len = id;
66 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
67
68 rcu_assign_pointer(net->gen, ng);
69 call_rcu(&old_ng->rcu, net_generic_release);
70assign:
71 ng->ptr[id - 1] = data;
72 return 0;
73}
74
30static int ops_init(const struct pernet_operations *ops, struct net *net) 75static int ops_init(const struct pernet_operations *ops, struct net *net)
31{ 76{
32 int err; 77 int err;
@@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
469 * addition run the exit method for all existing network 514 * addition run the exit method for all existing network
470 * namespaces. 515 * namespaces.
471 */ 516 */
472void unregister_pernet_subsys(struct pernet_operations *module) 517void unregister_pernet_subsys(struct pernet_operations *ops)
473{ 518{
474 mutex_lock(&net_mutex); 519 mutex_lock(&net_mutex);
475 unregister_pernet_operations(module); 520 unregister_pernet_operations(ops);
476 mutex_unlock(&net_mutex); 521 mutex_unlock(&net_mutex);
477} 522}
478EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 523EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
526 mutex_unlock(&net_mutex); 571 mutex_unlock(&net_mutex);
527} 572}
528EXPORT_SYMBOL_GPL(unregister_pernet_device); 573EXPORT_SYMBOL_GPL(unregister_pernet_device);
529
530static void net_generic_release(struct rcu_head *rcu)
531{
532 struct net_generic *ng;
533
534 ng = container_of(rcu, struct net_generic, rcu);
535 kfree(ng);
536}
537
538int net_assign_generic(struct net *net, int id, void *data)
539{
540 struct net_generic *ng, *old_ng;
541
542 BUG_ON(!mutex_is_locked(&net_mutex));
543 BUG_ON(id == 0);
544
545 ng = old_ng = net->gen;
546 if (old_ng->len >= id)
547 goto assign;
548
549 ng = kzalloc(sizeof(struct net_generic) +
550 id * sizeof(void *), GFP_KERNEL);
551 if (ng == NULL)
552 return -ENOMEM;
553
554 /*
555 * Some synchronisation notes:
556 *
557 * The net_generic explores the net->gen array inside rcu
558 * read section. Besides once set the net->gen->ptr[x]
559 * pointer never changes (see rules in netns/generic.h).
560 *
561 * That said, we simply duplicate this array and schedule
562 * the old copy for kfree after a grace period.
563 */
564
565 ng->len = id;
566 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
567
568 rcu_assign_pointer(net->gen, ng);
569 call_rcu(&old_ng->rcu, net_generic_release);
570assign:
571 ng->ptr[id - 1] = data;
572 return 0;
573}
574EXPORT_SYMBOL_GPL(net_assign_generic);
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de87502..865f0ceb81fb 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
35 err = atomic_notifier_chain_register(&netevent_notif_chain, nb); 35 err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
36 return err; 36 return err;
37} 37}
38EXPORT_SYMBOL_GPL(register_netevent_notifier);
38 39
39/** 40/**
40 * netevent_unregister_notifier - unregister a netevent notifier block 41 * netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
50{ 51{
51 return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); 52 return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
52} 53}
54EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
53 55
54/** 56/**
55 * call_netevent_notifiers - call all netevent notifier blocks 57 * call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
64{ 66{
65 return atomic_notifier_call_chain(&netevent_notif_chain, val, v); 67 return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
66} 68}
67
68EXPORT_SYMBOL_GPL(register_netevent_notifier);
69EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
70EXPORT_SYMBOL_GPL(call_netevent_notifiers); 69EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0b4d0d35ef40..537e01afd81b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -22,6 +22,7 @@
22#include <linux/delay.h> 22#include <linux/delay.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/slab.h>
25#include <net/tcp.h> 26#include <net/tcp.h>
26#include <net/udp.h> 27#include <net/udp.h>
27#include <asm/unaligned.h> 28#include <asm/unaligned.h>
@@ -178,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
178 } 179 }
179} 180}
180 181
181void netpoll_poll(struct netpoll *np) 182void netpoll_poll_dev(struct net_device *dev)
182{ 183{
183 struct net_device *dev = np->dev;
184 const struct net_device_ops *ops; 184 const struct net_device_ops *ops;
185 185
186 if (!dev || !netif_running(dev)) 186 if (!dev || !netif_running(dev))
@@ -199,6 +199,13 @@ void netpoll_poll(struct netpoll *np)
199 199
200 zap_completion_queue(); 200 zap_completion_queue();
201} 201}
202EXPORT_SYMBOL(netpoll_poll_dev);
203
204void netpoll_poll(struct netpoll *np)
205{
206 netpoll_poll_dev(np->dev);
207}
208EXPORT_SYMBOL(netpoll_poll);
202 209
203static void refill_skbs(void) 210static void refill_skbs(void)
204{ 211{
@@ -281,12 +288,13 @@ static int netpoll_owner_active(struct net_device *dev)
281 return 0; 288 return 0;
282} 289}
283 290
284static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 291void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
285{ 292{
286 int status = NETDEV_TX_BUSY; 293 int status = NETDEV_TX_BUSY;
287 unsigned long tries; 294 unsigned long tries;
288 struct net_device *dev = np->dev; 295 struct net_device *dev = np->dev;
289 const struct net_device_ops *ops = dev->netdev_ops; 296 const struct net_device_ops *ops = dev->netdev_ops;
297 /* It is up to the caller to keep npinfo alive. */
290 struct netpoll_info *npinfo = np->dev->npinfo; 298 struct netpoll_info *npinfo = np->dev->npinfo;
291 299
292 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { 300 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -307,7 +315,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
307 tries > 0; --tries) { 315 tries > 0; --tries) {
308 if (__netif_tx_trylock(txq)) { 316 if (__netif_tx_trylock(txq)) {
309 if (!netif_tx_queue_stopped(txq)) { 317 if (!netif_tx_queue_stopped(txq)) {
318 dev->priv_flags |= IFF_IN_NETPOLL;
310 status = ops->ndo_start_xmit(skb, dev); 319 status = ops->ndo_start_xmit(skb, dev);
320 dev->priv_flags &= ~IFF_IN_NETPOLL;
311 if (status == NETDEV_TX_OK) 321 if (status == NETDEV_TX_OK)
312 txq_trans_update(txq); 322 txq_trans_update(txq);
313 } 323 }
@@ -336,6 +346,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
336 schedule_delayed_work(&npinfo->tx_work,0); 346 schedule_delayed_work(&npinfo->tx_work,0);
337 } 347 }
338} 348}
349EXPORT_SYMBOL(netpoll_send_skb);
339 350
340void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 351void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
341{ 352{
@@ -397,6 +408,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
397 408
398 netpoll_send_skb(np, skb); 409 netpoll_send_skb(np, skb);
399} 410}
411EXPORT_SYMBOL(netpoll_send_udp);
400 412
401static void arp_reply(struct sk_buff *skb) 413static void arp_reply(struct sk_buff *skb)
402{ 414{
@@ -407,11 +419,24 @@ static void arp_reply(struct sk_buff *skb)
407 __be32 sip, tip; 419 __be32 sip, tip;
408 unsigned char *sha; 420 unsigned char *sha;
409 struct sk_buff *send_skb; 421 struct sk_buff *send_skb;
410 struct netpoll *np = NULL; 422 struct netpoll *np, *tmp;
423 unsigned long flags;
424 int hits = 0;
425
426 if (list_empty(&npinfo->rx_np))
427 return;
428
429 /* Before checking the packet, we do some early
430 inspection whether this is interesting at all */
431 spin_lock_irqsave(&npinfo->rx_lock, flags);
432 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
433 if (np->dev == skb->dev)
434 hits++;
435 }
436 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
411 437
412 if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) 438 /* No netpoll struct is using this dev */
413 np = npinfo->rx_np; 439 if (!hits)
414 if (!np)
415 return; 440 return;
416 441
417 /* No arp on this interface */ 442 /* No arp on this interface */
@@ -437,77 +462,91 @@ static void arp_reply(struct sk_buff *skb)
437 arp_ptr += skb->dev->addr_len; 462 arp_ptr += skb->dev->addr_len;
438 memcpy(&sip, arp_ptr, 4); 463 memcpy(&sip, arp_ptr, 4);
439 arp_ptr += 4; 464 arp_ptr += 4;
440 /* if we actually cared about dst hw addr, it would get copied here */ 465 /* If we actually cared about dst hw addr,
466 it would get copied here */
441 arp_ptr += skb->dev->addr_len; 467 arp_ptr += skb->dev->addr_len;
442 memcpy(&tip, arp_ptr, 4); 468 memcpy(&tip, arp_ptr, 4);
443 469
444 /* Should we ignore arp? */ 470 /* Should we ignore arp? */
445 if (tip != np->local_ip || 471 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
446 ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
447 return; 472 return;
448 473
449 size = arp_hdr_len(skb->dev); 474 size = arp_hdr_len(skb->dev);
450 send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
451 LL_RESERVED_SPACE(np->dev));
452
453 if (!send_skb)
454 return;
455 475
456 skb_reset_network_header(send_skb); 476 spin_lock_irqsave(&npinfo->rx_lock, flags);
457 arp = (struct arphdr *) skb_put(send_skb, size); 477 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
458 send_skb->dev = skb->dev; 478 if (tip != np->local_ip)
459 send_skb->protocol = htons(ETH_P_ARP); 479 continue;
460 480
461 /* Fill the device header for the ARP frame */ 481 send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
462 if (dev_hard_header(send_skb, skb->dev, ptype, 482 LL_RESERVED_SPACE(np->dev));
463 sha, np->dev->dev_addr, 483 if (!send_skb)
464 send_skb->len) < 0) { 484 continue;
465 kfree_skb(send_skb);
466 return;
467 }
468 485
469 /* 486 skb_reset_network_header(send_skb);
470 * Fill out the arp protocol part. 487 arp = (struct arphdr *) skb_put(send_skb, size);
471 * 488 send_skb->dev = skb->dev;
472 * we only support ethernet device type, 489 send_skb->protocol = htons(ETH_P_ARP);
473 * which (according to RFC 1390) should always equal 1 (Ethernet).
474 */
475 490
476 arp->ar_hrd = htons(np->dev->type); 491 /* Fill the device header for the ARP frame */
477 arp->ar_pro = htons(ETH_P_IP); 492 if (dev_hard_header(send_skb, skb->dev, ptype,
478 arp->ar_hln = np->dev->addr_len; 493 sha, np->dev->dev_addr,
479 arp->ar_pln = 4; 494 send_skb->len) < 0) {
480 arp->ar_op = htons(type); 495 kfree_skb(send_skb);
496 continue;
497 }
481 498
482 arp_ptr=(unsigned char *)(arp + 1); 499 /*
483 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); 500 * Fill out the arp protocol part.
484 arp_ptr += np->dev->addr_len; 501 *
485 memcpy(arp_ptr, &tip, 4); 502 * we only support ethernet device type,
486 arp_ptr += 4; 503 * which (according to RFC 1390) should
487 memcpy(arp_ptr, sha, np->dev->addr_len); 504 * always equal 1 (Ethernet).
488 arp_ptr += np->dev->addr_len; 505 */
489 memcpy(arp_ptr, &sip, 4);
490 506
491 netpoll_send_skb(np, send_skb); 507 arp->ar_hrd = htons(np->dev->type);
508 arp->ar_pro = htons(ETH_P_IP);
509 arp->ar_hln = np->dev->addr_len;
510 arp->ar_pln = 4;
511 arp->ar_op = htons(type);
512
513 arp_ptr = (unsigned char *)(arp + 1);
514 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
515 arp_ptr += np->dev->addr_len;
516 memcpy(arp_ptr, &tip, 4);
517 arp_ptr += 4;
518 memcpy(arp_ptr, sha, np->dev->addr_len);
519 arp_ptr += np->dev->addr_len;
520 memcpy(arp_ptr, &sip, 4);
521
522 netpoll_send_skb(np, send_skb);
523
524 /* If there are several rx_hooks for the same address,
525 we're fine by sending a single reply */
526 break;
527 }
528 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
492} 529}
493 530
494int __netpoll_rx(struct sk_buff *skb) 531int __netpoll_rx(struct sk_buff *skb)
495{ 532{
496 int proto, len, ulen; 533 int proto, len, ulen;
534 int hits = 0;
497 struct iphdr *iph; 535 struct iphdr *iph;
498 struct udphdr *uh; 536 struct udphdr *uh;
499 struct netpoll_info *npi = skb->dev->npinfo; 537 struct netpoll_info *npinfo = skb->dev->npinfo;
500 struct netpoll *np = npi->rx_np; 538 struct netpoll *np, *tmp;
501 539
502 if (!np) 540 if (list_empty(&npinfo->rx_np))
503 goto out; 541 goto out;
542
504 if (skb->dev->type != ARPHRD_ETHER) 543 if (skb->dev->type != ARPHRD_ETHER)
505 goto out; 544 goto out;
506 545
507 /* check if netpoll clients need ARP */ 546 /* check if netpoll clients need ARP */
508 if (skb->protocol == htons(ETH_P_ARP) && 547 if (skb->protocol == htons(ETH_P_ARP) &&
509 atomic_read(&trapped)) { 548 atomic_read(&trapped)) {
510 skb_queue_tail(&npi->arp_tx, skb); 549 skb_queue_tail(&npinfo->arp_tx, skb);
511 return 1; 550 return 1;
512 } 551 }
513 552
@@ -551,16 +590,23 @@ int __netpoll_rx(struct sk_buff *skb)
551 goto out; 590 goto out;
552 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) 591 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
553 goto out; 592 goto out;
554 if (np->local_ip && np->local_ip != iph->daddr)
555 goto out;
556 if (np->remote_ip && np->remote_ip != iph->saddr)
557 goto out;
558 if (np->local_port && np->local_port != ntohs(uh->dest))
559 goto out;
560 593
561 np->rx_hook(np, ntohs(uh->source), 594 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
562 (char *)(uh+1), 595 if (np->local_ip && np->local_ip != iph->daddr)
563 ulen - sizeof(struct udphdr)); 596 continue;
597 if (np->remote_ip && np->remote_ip != iph->saddr)
598 continue;
599 if (np->local_port && np->local_port != ntohs(uh->dest))
600 continue;
601
602 np->rx_hook(np, ntohs(uh->source),
603 (char *)(uh+1),
604 ulen - sizeof(struct udphdr));
605 hits++;
606 }
607
608 if (!hits)
609 goto out;
564 610
565 kfree_skb(skb); 611 kfree_skb(skb);
566 return 1; 612 return 1;
@@ -580,7 +626,7 @@ void netpoll_print_options(struct netpoll *np)
580 np->name, np->local_port); 626 np->name, np->local_port);
581 printk(KERN_INFO "%s: local IP %pI4\n", 627 printk(KERN_INFO "%s: local IP %pI4\n",
582 np->name, &np->local_ip); 628 np->name, &np->local_ip);
583 printk(KERN_INFO "%s: interface %s\n", 629 printk(KERN_INFO "%s: interface '%s'\n",
584 np->name, np->dev_name); 630 np->name, np->dev_name);
585 printk(KERN_INFO "%s: remote port %d\n", 631 printk(KERN_INFO "%s: remote port %d\n",
586 np->name, np->remote_port); 632 np->name, np->remote_port);
@@ -589,6 +635,7 @@ void netpoll_print_options(struct netpoll *np)
589 printk(KERN_INFO "%s: remote ethernet address %pM\n", 635 printk(KERN_INFO "%s: remote ethernet address %pM\n",
590 np->name, np->remote_mac); 636 np->name, np->remote_mac);
591} 637}
638EXPORT_SYMBOL(netpoll_print_options);
592 639
593int netpoll_parse_options(struct netpoll *np, char *opt) 640int netpoll_parse_options(struct netpoll *np, char *opt)
594{ 641{
@@ -627,6 +674,9 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
627 if ((delim = strchr(cur, '@')) == NULL) 674 if ((delim = strchr(cur, '@')) == NULL)
628 goto parse_failed; 675 goto parse_failed;
629 *delim = 0; 676 *delim = 0;
677 if (*cur == ' ' || *cur == '\t')
678 printk(KERN_INFO "%s: warning: whitespace"
679 "is not allowed\n", np->name);
630 np->remote_port = simple_strtol(cur, NULL, 10); 680 np->remote_port = simple_strtol(cur, NULL, 10);
631 cur = delim; 681 cur = delim;
632 } 682 }
@@ -674,37 +724,37 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
674 return 0; 724 return 0;
675 725
676 parse_failed: 726 parse_failed:
677 printk(KERN_INFO "%s: couldn't parse config at %s!\n", 727 printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
678 np->name, cur); 728 np->name, cur);
679 return -1; 729 return -1;
680} 730}
731EXPORT_SYMBOL(netpoll_parse_options);
681 732
682int netpoll_setup(struct netpoll *np) 733int __netpoll_setup(struct netpoll *np)
683{ 734{
684 struct net_device *ndev = NULL; 735 struct net_device *ndev = np->dev;
685 struct in_device *in_dev;
686 struct netpoll_info *npinfo; 736 struct netpoll_info *npinfo;
737 const struct net_device_ops *ops;
687 unsigned long flags; 738 unsigned long flags;
688 int err; 739 int err;
689 740
690 if (np->dev_name) 741 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
691 ndev = dev_get_by_name(&init_net, np->dev_name); 742 !ndev->netdev_ops->ndo_poll_controller) {
692 if (!ndev) { 743 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
693 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
694 np->name, np->dev_name); 744 np->name, np->dev_name);
695 return -ENODEV; 745 err = -ENOTSUPP;
746 goto out;
696 } 747 }
697 748
698 np->dev = ndev;
699 if (!ndev->npinfo) { 749 if (!ndev->npinfo) {
700 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); 750 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
701 if (!npinfo) { 751 if (!npinfo) {
702 err = -ENOMEM; 752 err = -ENOMEM;
703 goto release; 753 goto out;
704 } 754 }
705 755
706 npinfo->rx_flags = 0; 756 npinfo->rx_flags = 0;
707 npinfo->rx_np = NULL; 757 INIT_LIST_HEAD(&npinfo->rx_np);
708 758
709 spin_lock_init(&npinfo->rx_lock); 759 spin_lock_init(&npinfo->rx_lock);
710 skb_queue_head_init(&npinfo->arp_tx); 760 skb_queue_head_init(&npinfo->arp_tx);
@@ -712,16 +762,51 @@ int netpoll_setup(struct netpoll *np)
712 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 762 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
713 763
714 atomic_set(&npinfo->refcnt, 1); 764 atomic_set(&npinfo->refcnt, 1);
765
766 ops = np->dev->netdev_ops;
767 if (ops->ndo_netpoll_setup) {
768 err = ops->ndo_netpoll_setup(ndev, npinfo);
769 if (err)
770 goto free_npinfo;
771 }
715 } else { 772 } else {
716 npinfo = ndev->npinfo; 773 npinfo = ndev->npinfo;
717 atomic_inc(&npinfo->refcnt); 774 atomic_inc(&npinfo->refcnt);
718 } 775 }
719 776
720 if (!ndev->netdev_ops->ndo_poll_controller) { 777 npinfo->netpoll = np;
721 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", 778
779 if (np->rx_hook) {
780 spin_lock_irqsave(&npinfo->rx_lock, flags);
781 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
782 list_add_tail(&np->rx, &npinfo->rx_np);
783 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
784 }
785
786 /* last thing to do is link it to the net device structure */
787 rcu_assign_pointer(ndev->npinfo, npinfo);
788
789 return 0;
790
791free_npinfo:
792 kfree(npinfo);
793out:
794 return err;
795}
796EXPORT_SYMBOL_GPL(__netpoll_setup);
797
798int netpoll_setup(struct netpoll *np)
799{
800 struct net_device *ndev = NULL;
801 struct in_device *in_dev;
802 int err;
803
804 if (np->dev_name)
805 ndev = dev_get_by_name(&init_net, np->dev_name);
806 if (!ndev) {
807 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
722 np->name, np->dev_name); 808 np->name, np->dev_name);
723 err = -ENOTSUPP; 809 return -ENODEV;
724 goto release;
725 } 810 }
726 811
727 if (!netif_running(ndev)) { 812 if (!netif_running(ndev)) {
@@ -737,7 +822,7 @@ int netpoll_setup(struct netpoll *np)
737 if (err) { 822 if (err) {
738 printk(KERN_ERR "%s: failed to open %s\n", 823 printk(KERN_ERR "%s: failed to open %s\n",
739 np->name, ndev->name); 824 np->name, ndev->name);
740 goto release; 825 goto put;
741 } 826 }
742 827
743 atleast = jiffies + HZ/10; 828 atleast = jiffies + HZ/10;
@@ -774,7 +859,7 @@ int netpoll_setup(struct netpoll *np)
774 printk(KERN_ERR "%s: no IP address for %s, aborting\n", 859 printk(KERN_ERR "%s: no IP address for %s, aborting\n",
775 np->name, np->dev_name); 860 np->name, np->dev_name);
776 err = -EDESTADDRREQ; 861 err = -EDESTADDRREQ;
777 goto release; 862 goto put;
778 } 863 }
779 864
780 np->local_ip = in_dev->ifa_list->ifa_local; 865 np->local_ip = in_dev->ifa_list->ifa_local;
@@ -782,31 +867,25 @@ int netpoll_setup(struct netpoll *np)
782 printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); 867 printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
783 } 868 }
784 869
785 if (np->rx_hook) { 870 np->dev = ndev;
786 spin_lock_irqsave(&npinfo->rx_lock, flags);
787 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
788 npinfo->rx_np = np;
789 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
790 }
791 871
792 /* fill up the skb queue */ 872 /* fill up the skb queue */
793 refill_skbs(); 873 refill_skbs();
794 874
795 /* last thing to do is link it to the net device structure */ 875 rtnl_lock();
796 ndev->npinfo = npinfo; 876 err = __netpoll_setup(np);
877 rtnl_unlock();
797 878
798 /* avoid racing with NAPI reading npinfo */ 879 if (err)
799 synchronize_rcu(); 880 goto put;
800 881
801 return 0; 882 return 0;
802 883
803 release: 884put:
804 if (!ndev->npinfo)
805 kfree(npinfo);
806 np->dev = NULL;
807 dev_put(ndev); 885 dev_put(ndev);
808 return err; 886 return err;
809} 887}
888EXPORT_SYMBOL(netpoll_setup);
810 889
811static int __init netpoll_init(void) 890static int __init netpoll_init(void)
812{ 891{
@@ -815,43 +894,65 @@ static int __init netpoll_init(void)
815} 894}
816core_initcall(netpoll_init); 895core_initcall(netpoll_init);
817 896
818void netpoll_cleanup(struct netpoll *np) 897void __netpoll_cleanup(struct netpoll *np)
819{ 898{
820 struct netpoll_info *npinfo; 899 struct netpoll_info *npinfo;
821 unsigned long flags; 900 unsigned long flags;
822 901
823 if (np->dev) { 902 npinfo = np->dev->npinfo;
824 npinfo = np->dev->npinfo; 903 if (!npinfo)
825 if (npinfo) { 904 return;
826 if (npinfo->rx_np == np) {
827 spin_lock_irqsave(&npinfo->rx_lock, flags);
828 npinfo->rx_np = NULL;
829 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
830 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
831 }
832 905
833 if (atomic_dec_and_test(&npinfo->refcnt)) { 906 if (!list_empty(&npinfo->rx_np)) {
834 skb_queue_purge(&npinfo->arp_tx); 907 spin_lock_irqsave(&npinfo->rx_lock, flags);
835 skb_queue_purge(&npinfo->txq); 908 list_del(&np->rx);
836 cancel_rearming_delayed_work(&npinfo->tx_work); 909 if (list_empty(&npinfo->rx_np))
910 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
911 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
912 }
837 913
838 /* clean after last, unfinished work */ 914 if (atomic_dec_and_test(&npinfo->refcnt)) {
839 __skb_queue_purge(&npinfo->txq); 915 const struct net_device_ops *ops;
840 kfree(npinfo); 916
841 np->dev->npinfo = NULL; 917 ops = np->dev->netdev_ops;
842 } 918 if (ops->ndo_netpoll_cleanup)
843 } 919 ops->ndo_netpoll_cleanup(np->dev);
920
921 rcu_assign_pointer(np->dev->npinfo, NULL);
922
923 /* avoid racing with NAPI reading npinfo */
924 synchronize_rcu_bh();
844 925
845 dev_put(np->dev); 926 skb_queue_purge(&npinfo->arp_tx);
927 skb_queue_purge(&npinfo->txq);
928 cancel_rearming_delayed_work(&npinfo->tx_work);
929
930 /* clean after last, unfinished work */
931 __skb_queue_purge(&npinfo->txq);
932 kfree(npinfo);
846 } 933 }
934}
935EXPORT_SYMBOL_GPL(__netpoll_cleanup);
847 936
937void netpoll_cleanup(struct netpoll *np)
938{
939 if (!np->dev)
940 return;
941
942 rtnl_lock();
943 __netpoll_cleanup(np);
944 rtnl_unlock();
945
946 dev_put(np->dev);
848 np->dev = NULL; 947 np->dev = NULL;
849} 948}
949EXPORT_SYMBOL(netpoll_cleanup);
850 950
851int netpoll_trap(void) 951int netpoll_trap(void)
852{ 952{
853 return atomic_read(&trapped); 953 return atomic_read(&trapped);
854} 954}
955EXPORT_SYMBOL(netpoll_trap);
855 956
856void netpoll_set_trap(int trap) 957void netpoll_set_trap(int trap)
857{ 958{
@@ -860,12 +961,4 @@ void netpoll_set_trap(int trap)
860 else 961 else
861 atomic_dec(&trapped); 962 atomic_dec(&trapped);
862} 963}
863
864EXPORT_SYMBOL(netpoll_set_trap); 964EXPORT_SYMBOL(netpoll_set_trap);
865EXPORT_SYMBOL(netpoll_trap);
866EXPORT_SYMBOL(netpoll_print_options);
867EXPORT_SYMBOL(netpoll_parse_options);
868EXPORT_SYMBOL(netpoll_setup);
869EXPORT_SYMBOL(netpoll_cleanup);
870EXPORT_SYMBOL(netpoll_send_udp);
871EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index de0c2c726420..10a1ea72010d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
115 * command by Adit Ranadive <adit.262@gmail.com> 115 * command by Adit Ranadive <adit.262@gmail.com>
116 * 116 *
117 */ 117 */
118
119#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
120
118#include <linux/sys.h> 121#include <linux/sys.h>
119#include <linux/types.h> 122#include <linux/types.h>
120#include <linux/module.h> 123#include <linux/module.h>
@@ -169,11 +172,13 @@
169#include <asm/dma.h> 172#include <asm/dma.h>
170#include <asm/div64.h> /* do_div */ 173#include <asm/div64.h> /* do_div */
171 174
172#define VERSION "2.72" 175#define VERSION "2.74"
173#define IP_NAME_SZ 32 176#define IP_NAME_SZ 32
174#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ 177#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
175#define MPLS_STACK_BOTTOM htonl(0x00000100) 178#define MPLS_STACK_BOTTOM htonl(0x00000100)
176 179
180#define func_enter() pr_debug("entering %s\n", __func__);
181
177/* Device flag bits */ 182/* Device flag bits */
178#define F_IPSRC_RND (1<<0) /* IP-Src Random */ 183#define F_IPSRC_RND (1<<0) /* IP-Src Random */
179#define F_IPDST_RND (1<<1) /* IP-Dst Random */ 184#define F_IPDST_RND (1<<1) /* IP-Dst Random */
@@ -190,6 +195,7 @@
190#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ 195#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
191#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ 196#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
192#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ 197#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
198#define F_NODE (1<<15) /* Node memory alloc*/
193 199
194/* Thread control flag bits */ 200/* Thread control flag bits */
195#define T_STOP (1<<0) /* Stop run */ 201#define T_STOP (1<<0) /* Stop run */
@@ -372,6 +378,7 @@ struct pktgen_dev {
372 378
373 u16 queue_map_min; 379 u16 queue_map_min;
374 u16 queue_map_max; 380 u16 queue_map_max;
381 int node; /* Memory node */
375 382
376#ifdef CONFIG_XFRM 383#ifdef CONFIG_XFRM
377 __u8 ipsmode; /* IPSEC mode (config) */ 384 __u8 ipsmode; /* IPSEC mode (config) */
@@ -422,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
422} 429}
423 430
424static const char version[] = 431static const char version[] =
425 "pktgen " VERSION ": Packet Generator for packet performance testing.\n"; 432 "Packet Generator for packet performance testing. "
433 "Version: " VERSION "\n";
426 434
427static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); 435static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
428static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); 436static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -493,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
493 pktgen_reset_all_threads(); 501 pktgen_reset_all_threads();
494 502
495 else 503 else
496 printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); 504 pr_warning("Unknown command: %s\n", data);
497 505
498 err = count; 506 err = count;
499 507
@@ -607,6 +615,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
607 if (pkt_dev->traffic_class) 615 if (pkt_dev->traffic_class)
608 seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); 616 seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
609 617
618 if (pkt_dev->node >= 0)
619 seq_printf(seq, " node: %d\n", pkt_dev->node);
620
610 seq_printf(seq, " Flags: "); 621 seq_printf(seq, " Flags: ");
611 622
612 if (pkt_dev->flags & F_IPV6) 623 if (pkt_dev->flags & F_IPV6)
@@ -660,6 +671,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
660 if (pkt_dev->flags & F_SVID_RND) 671 if (pkt_dev->flags & F_SVID_RND)
661 seq_printf(seq, "SVID_RND "); 672 seq_printf(seq, "SVID_RND ");
662 673
674 if (pkt_dev->flags & F_NODE)
675 seq_printf(seq, "NODE_ALLOC ");
676
663 seq_puts(seq, "\n"); 677 seq_puts(seq, "\n");
664 678
665 /* not really stopped, more like last-running-at */ 679 /* not really stopped, more like last-running-at */
@@ -832,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
832 const char __user * user_buffer, size_t count, 846 const char __user * user_buffer, size_t count,
833 loff_t * offset) 847 loff_t * offset)
834{ 848{
835 struct seq_file *seq = (struct seq_file *)file->private_data; 849 struct seq_file *seq = file->private_data;
836 struct pktgen_dev *pkt_dev = seq->private; 850 struct pktgen_dev *pkt_dev = seq->private;
837 int i = 0, max, len; 851 int i = 0, max, len;
838 char name[16], valstr[32]; 852 char name[16], valstr[32];
@@ -844,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file,
844 pg_result = &(pkt_dev->result[0]); 858 pg_result = &(pkt_dev->result[0]);
845 859
846 if (count < 1) { 860 if (count < 1) {
847 printk(KERN_WARNING "pktgen: wrong command format\n"); 861 pr_warning("wrong command format\n");
848 return -EINVAL; 862 return -EINVAL;
849 } 863 }
850 864
851 max = count - i; 865 max = count - i;
852 tmp = count_trail_chars(&user_buffer[i], max); 866 tmp = count_trail_chars(&user_buffer[i], max);
853 if (tmp < 0) { 867 if (tmp < 0) {
854 printk(KERN_WARNING "pktgen: illegal format\n"); 868 pr_warning("illegal format\n");
855 return tmp; 869 return tmp;
856 } 870 }
857 i += tmp; 871 i += tmp;
@@ -972,6 +986,36 @@ static ssize_t pktgen_if_write(struct file *file,
972 (unsigned long long) pkt_dev->delay); 986 (unsigned long long) pkt_dev->delay);
973 return count; 987 return count;
974 } 988 }
989 if (!strcmp(name, "rate")) {
990 len = num_arg(&user_buffer[i], 10, &value);
991 if (len < 0)
992 return len;
993
994 i += len;
995 if (!value)
996 return len;
997 pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
998 if (debug)
999 pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
1000
1001 sprintf(pg_result, "OK: rate=%lu", value);
1002 return count;
1003 }
1004 if (!strcmp(name, "ratep")) {
1005 len = num_arg(&user_buffer[i], 10, &value);
1006 if (len < 0)
1007 return len;
1008
1009 i += len;
1010 if (!value)
1011 return len;
1012 pkt_dev->delay = NSEC_PER_SEC/value;
1013 if (debug)
1014 pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
1015
1016 sprintf(pg_result, "OK: rate=%lu", value);
1017 return count;
1018 }
975 if (!strcmp(name, "udp_src_min")) { 1019 if (!strcmp(name, "udp_src_min")) {
976 len = num_arg(&user_buffer[i], 10, &value); 1020 len = num_arg(&user_buffer[i], 10, &value);
977 if (len < 0) 1021 if (len < 0)
@@ -1074,6 +1118,21 @@ static ssize_t pktgen_if_write(struct file *file,
1074 pkt_dev->dst_mac_count); 1118 pkt_dev->dst_mac_count);
1075 return count; 1119 return count;
1076 } 1120 }
1121 if (!strcmp(name, "node")) {
1122 len = num_arg(&user_buffer[i], 10, &value);
1123 if (len < 0)
1124 return len;
1125
1126 i += len;
1127
1128 if (node_possible(value)) {
1129 pkt_dev->node = value;
1130 sprintf(pg_result, "OK: node=%d", pkt_dev->node);
1131 }
1132 else
1133 sprintf(pg_result, "ERROR: node not possible");
1134 return count;
1135 }
1077 if (!strcmp(name, "flag")) { 1136 if (!strcmp(name, "flag")) {
1078 char f[32]; 1137 char f[32];
1079 memset(f, 0, 32); 1138 memset(f, 0, 32);
@@ -1166,12 +1225,18 @@ static ssize_t pktgen_if_write(struct file *file,
1166 else if (strcmp(f, "!IPV6") == 0) 1225 else if (strcmp(f, "!IPV6") == 0)
1167 pkt_dev->flags &= ~F_IPV6; 1226 pkt_dev->flags &= ~F_IPV6;
1168 1227
1228 else if (strcmp(f, "NODE_ALLOC") == 0)
1229 pkt_dev->flags |= F_NODE;
1230
1231 else if (strcmp(f, "!NODE_ALLOC") == 0)
1232 pkt_dev->flags &= ~F_NODE;
1233
1169 else { 1234 else {
1170 sprintf(pg_result, 1235 sprintf(pg_result,
1171 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", 1236 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
1172 f, 1237 f,
1173 "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " 1238 "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
1174 "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); 1239 "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
1175 return count; 1240 return count;
1176 } 1241 }
1177 sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); 1242 sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -1369,18 +1434,12 @@ static ssize_t pktgen_if_write(struct file *file,
1369 i += len; 1434 i += len;
1370 1435
1371 for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { 1436 for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
1372 if (*v >= '0' && *v <= '9') { 1437 int value;
1373 *m *= 16; 1438
1374 *m += *v - '0'; 1439 value = hex_to_bin(*v);
1375 } 1440 if (value >= 0)
1376 if (*v >= 'A' && *v <= 'F') { 1441 *m = *m * 16 + value;
1377 *m *= 16; 1442
1378 *m += *v - 'A' + 10;
1379 }
1380 if (*v >= 'a' && *v <= 'f') {
1381 *m *= 16;
1382 *m += *v - 'a' + 10;
1383 }
1384 if (*v == ':') { 1443 if (*v == ':') {
1385 m++; 1444 m++;
1386 *m = 0; 1445 *m = 0;
@@ -1411,18 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file,
1411 i += len; 1470 i += len;
1412 1471
1413 for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { 1472 for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
1414 if (*v >= '0' && *v <= '9') { 1473 int value;
1415 *m *= 16; 1474
1416 *m += *v - '0'; 1475 value = hex_to_bin(*v);
1417 } 1476 if (value >= 0)
1418 if (*v >= 'A' && *v <= 'F') { 1477 *m = *m * 16 + value;
1419 *m *= 16; 1478
1420 *m += *v - 'A' + 10;
1421 }
1422 if (*v >= 'a' && *v <= 'f') {
1423 *m *= 16;
1424 *m += *v - 'a' + 10;
1425 }
1426 if (*v == ':') { 1479 if (*v == ':') {
1427 m++; 1480 m++;
1428 *m = 0; 1481 *m = 0;
@@ -1711,7 +1764,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1711 const char __user * user_buffer, 1764 const char __user * user_buffer,
1712 size_t count, loff_t * offset) 1765 size_t count, loff_t * offset)
1713{ 1766{
1714 struct seq_file *seq = (struct seq_file *)file->private_data; 1767 struct seq_file *seq = file->private_data;
1715 struct pktgen_thread *t = seq->private; 1768 struct pktgen_thread *t = seq->private;
1716 int i = 0, max, len, ret; 1769 int i = 0, max, len, ret;
1717 char name[40]; 1770 char name[40];
@@ -1752,7 +1805,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1752 name, (unsigned long)count); 1805 name, (unsigned long)count);
1753 1806
1754 if (!t) { 1807 if (!t) {
1755 printk(KERN_ERR "pktgen: ERROR: No thread\n"); 1808 pr_err("ERROR: No thread\n");
1756 ret = -EINVAL; 1809 ret = -EINVAL;
1757 goto out; 1810 goto out;
1758 } 1811 }
@@ -1845,7 +1898,7 @@ static void pktgen_mark_device(const char *ifname)
1845 int i = 0; 1898 int i = 0;
1846 1899
1847 mutex_lock(&pktgen_thread_lock); 1900 mutex_lock(&pktgen_thread_lock);
1848 pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname); 1901 pr_debug("%s: marking %s for removal\n", __func__, ifname);
1849 1902
1850 while (1) { 1903 while (1) {
1851 1904
@@ -1854,15 +1907,14 @@ static void pktgen_mark_device(const char *ifname)
1854 break; /* success */ 1907 break; /* success */
1855 1908
1856 mutex_unlock(&pktgen_thread_lock); 1909 mutex_unlock(&pktgen_thread_lock);
1857 pr_debug("pktgen: pktgen_mark_device waiting for %s " 1910 pr_debug("%s: waiting for %s to disappear....\n",
1858 "to disappear....\n", ifname); 1911 __func__, ifname);
1859 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); 1912 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
1860 mutex_lock(&pktgen_thread_lock); 1913 mutex_lock(&pktgen_thread_lock);
1861 1914
1862 if (++i >= max_tries) { 1915 if (++i >= max_tries) {
1863 printk(KERN_ERR "pktgen_mark_device: timed out after " 1916 pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
1864 "waiting %d msec for device %s to be removed\n", 1917 __func__, msec_per_try * i, ifname);
1865 msec_per_try * i, ifname);
1866 break; 1918 break;
1867 } 1919 }
1868 1920
@@ -1889,8 +1941,8 @@ static void pktgen_change_name(struct net_device *dev)
1889 &pktgen_if_fops, 1941 &pktgen_if_fops,
1890 pkt_dev); 1942 pkt_dev);
1891 if (!pkt_dev->entry) 1943 if (!pkt_dev->entry)
1892 printk(KERN_ERR "pktgen: can't move proc " 1944 pr_err("can't move proc entry for '%s'\n",
1893 " entry for '%s'\n", dev->name); 1945 dev->name);
1894 break; 1946 break;
1895 } 1947 }
1896 } 1948 }
@@ -1954,15 +2006,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
1954 2006
1955 odev = pktgen_dev_get_by_name(pkt_dev, ifname); 2007 odev = pktgen_dev_get_by_name(pkt_dev, ifname);
1956 if (!odev) { 2008 if (!odev) {
1957 printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); 2009 pr_err("no such netdevice: \"%s\"\n", ifname);
1958 return -ENODEV; 2010 return -ENODEV;
1959 } 2011 }
1960 2012
1961 if (odev->type != ARPHRD_ETHER) { 2013 if (odev->type != ARPHRD_ETHER) {
1962 printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); 2014 pr_err("not an ethernet device: \"%s\"\n", ifname);
1963 err = -EINVAL; 2015 err = -EINVAL;
1964 } else if (!netif_running(odev)) { 2016 } else if (!netif_running(odev)) {
1965 printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); 2017 pr_err("device is down: \"%s\"\n", ifname);
1966 err = -ENETDOWN; 2018 err = -ENETDOWN;
1967 } else { 2019 } else {
1968 pkt_dev->odev = odev; 2020 pkt_dev->odev = odev;
@@ -1981,8 +2033,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1981 int ntxq; 2033 int ntxq;
1982 2034
1983 if (!pkt_dev->odev) { 2035 if (!pkt_dev->odev) {
1984 printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " 2036 pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
1985 "setup_inject.\n");
1986 sprintf(pkt_dev->result, 2037 sprintf(pkt_dev->result,
1987 "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); 2038 "ERROR: pkt_dev->odev == NULL in setup_inject.\n");
1988 return; 2039 return;
@@ -1992,19 +2043,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1992 ntxq = pkt_dev->odev->real_num_tx_queues; 2043 ntxq = pkt_dev->odev->real_num_tx_queues;
1993 2044
1994 if (ntxq <= pkt_dev->queue_map_min) { 2045 if (ntxq <= pkt_dev->queue_map_min) {
1995 printk(KERN_WARNING "pktgen: WARNING: Requested " 2046 pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
1996 "queue_map_min (zero-based) (%d) exceeds valid range " 2047 pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
1997 "[0 - %d] for (%d) queues on %s, resetting\n", 2048 pkt_dev->odevname);
1998 pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
1999 pkt_dev->odevname);
2000 pkt_dev->queue_map_min = ntxq - 1; 2049 pkt_dev->queue_map_min = ntxq - 1;
2001 } 2050 }
2002 if (pkt_dev->queue_map_max >= ntxq) { 2051 if (pkt_dev->queue_map_max >= ntxq) {
2003 printk(KERN_WARNING "pktgen: WARNING: Requested " 2052 pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
2004 "queue_map_max (zero-based) (%d) exceeds valid range " 2053 pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
2005 "[0 - %d] for (%d) queues on %s, resetting\n", 2054 pkt_dev->odevname);
2006 pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
2007 pkt_dev->odevname);
2008 pkt_dev->queue_map_max = ntxq - 1; 2055 pkt_dev->queue_map_max = ntxq - 1;
2009 } 2056 }
2010 2057
@@ -2064,8 +2111,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2064 } 2111 }
2065 rcu_read_unlock(); 2112 rcu_read_unlock();
2066 if (err) 2113 if (err)
2067 printk(KERN_ERR "pktgen: ERROR: IPv6 link " 2114 pr_err("ERROR: IPv6 link address not available\n");
2068 "address not availble.\n");
2069 } 2115 }
2070#endif 2116#endif
2071 } else { 2117 } else {
@@ -2113,15 +2159,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2113 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 2159 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
2114 hrtimer_set_expires(&t.timer, spin_until); 2160 hrtimer_set_expires(&t.timer, spin_until);
2115 2161
2116 remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); 2162 remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
2117 if (remaining <= 0) { 2163 if (remaining <= 0) {
2118 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); 2164 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
2119 return; 2165 return;
2120 } 2166 }
2121 2167
2122 start_time = ktime_now(); 2168 start_time = ktime_now();
2123 if (remaining < 100) 2169 if (remaining < 100000)
2124 udelay(remaining); /* really small just spin */ 2170 ndelay(remaining); /* really small just spin */
2125 else { 2171 else {
2126 /* see do_nanosleep */ 2172 /* see do_nanosleep */
2127 hrtimer_init_sleeper(&t, current); 2173 hrtimer_init_sleeper(&t, current);
@@ -2141,7 +2187,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2141 end_time = ktime_now(); 2187 end_time = ktime_now();
2142 2188
2143 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2189 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
2144 pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); 2190 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
2145} 2191}
2146 2192
2147static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) 2193static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -2188,12 +2234,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
2188/* If there was already an IPSEC SA, we keep it as is, else 2234/* If there was already an IPSEC SA, we keep it as is, else
2189 * we go look for it ... 2235 * we go look for it ...
2190*/ 2236*/
2237#define DUMMY_MARK 0
2191static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) 2238static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2192{ 2239{
2193 struct xfrm_state *x = pkt_dev->flows[flow].x; 2240 struct xfrm_state *x = pkt_dev->flows[flow].x;
2194 if (!x) { 2241 if (!x) {
2195 /*slow path: we dont already have xfrm_state*/ 2242 /*slow path: we dont already have xfrm_state*/
2196 x = xfrm_stateonly_find(&init_net, 2243 x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
2197 (xfrm_address_t *)&pkt_dev->cur_daddr, 2244 (xfrm_address_t *)&pkt_dev->cur_daddr,
2198 (xfrm_address_t *)&pkt_dev->cur_saddr, 2245 (xfrm_address_t *)&pkt_dev->cur_saddr,
2199 AF_INET, 2246 AF_INET,
@@ -2498,8 +2545,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2498 if (nhead > 0) { 2545 if (nhead > 0) {
2499 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); 2546 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
2500 if (ret < 0) { 2547 if (ret < 0) {
2501 printk(KERN_ERR "Error expanding " 2548 pr_err("Error expanding ipsec packet %d\n",
2502 "ipsec packet %d\n", ret); 2549 ret);
2503 goto err; 2550 goto err;
2504 } 2551 }
2505 } 2552 }
@@ -2508,8 +2555,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2508 skb_pull(skb, ETH_HLEN); 2555 skb_pull(skb, ETH_HLEN);
2509 ret = pktgen_output_ipsec(skb, pkt_dev); 2556 ret = pktgen_output_ipsec(skb, pkt_dev);
2510 if (ret) { 2557 if (ret) {
2511 printk(KERN_ERR "Error creating ipsec " 2558 pr_err("Error creating ipsec packet %d\n", ret);
2512 "packet %d\n", ret);
2513 goto err; 2559 goto err;
2514 } 2560 }
2515 /* restore ll */ 2561 /* restore ll */
@@ -2571,9 +2617,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2571 mod_cur_headers(pkt_dev); 2617 mod_cur_headers(pkt_dev);
2572 2618
2573 datalen = (odev->hard_header_len + 16) & ~0xf; 2619 datalen = (odev->hard_header_len + 16) & ~0xf;
2574 skb = __netdev_alloc_skb(odev, 2620
2575 pkt_dev->cur_pkt_size + 64 2621 if (pkt_dev->flags & F_NODE) {
2576 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); 2622 int node;
2623
2624 if (pkt_dev->node >= 0)
2625 node = pkt_dev->node;
2626 else
2627 node = numa_node_id();
2628
2629 skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
2630 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
2631 if (likely(skb)) {
2632 skb_reserve(skb, NET_SKB_PAD);
2633 skb->dev = odev;
2634 }
2635 }
2636 else
2637 skb = __netdev_alloc_skb(odev,
2638 pkt_dev->cur_pkt_size + 64
2639 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
2640
2577 if (!skb) { 2641 if (!skb) {
2578 sprintf(pkt_dev->result, "No memory"); 2642 sprintf(pkt_dev->result, "No memory");
2579 return NULL; 2643 return NULL;
@@ -2967,8 +3031,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2967 if (datalen < sizeof(struct pktgen_hdr)) { 3031 if (datalen < sizeof(struct pktgen_hdr)) {
2968 datalen = sizeof(struct pktgen_hdr); 3032 datalen = sizeof(struct pktgen_hdr);
2969 if (net_ratelimit()) 3033 if (net_ratelimit())
2970 printk(KERN_INFO "pktgen: increased datalen to %d\n", 3034 pr_info("increased datalen to %d\n", datalen);
2971 datalen);
2972 } 3035 }
2973 3036
2974 udph->source = htons(pkt_dev->cur_udp_src); 3037 udph->source = htons(pkt_dev->cur_udp_src);
@@ -3095,7 +3158,7 @@ static void pktgen_run(struct pktgen_thread *t)
3095 struct pktgen_dev *pkt_dev; 3158 struct pktgen_dev *pkt_dev;
3096 int started = 0; 3159 int started = 0;
3097 3160
3098 pr_debug("pktgen: entering pktgen_run. %p\n", t); 3161 func_enter();
3099 3162
3100 if_lock(t); 3163 if_lock(t);
3101 list_for_each_entry(pkt_dev, &t->if_list, list) { 3164 list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3128,7 +3191,7 @@ static void pktgen_stop_all_threads_ifs(void)
3128{ 3191{
3129 struct pktgen_thread *t; 3192 struct pktgen_thread *t;
3130 3193
3131 pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n"); 3194 func_enter();
3132 3195
3133 mutex_lock(&pktgen_thread_lock); 3196 mutex_lock(&pktgen_thread_lock);
3134 3197
@@ -3193,7 +3256,7 @@ static void pktgen_run_all_threads(void)
3193{ 3256{
3194 struct pktgen_thread *t; 3257 struct pktgen_thread *t;
3195 3258
3196 pr_debug("pktgen: entering pktgen_run_all_threads.\n"); 3259 func_enter();
3197 3260
3198 mutex_lock(&pktgen_thread_lock); 3261 mutex_lock(&pktgen_thread_lock);
3199 3262
@@ -3212,7 +3275,7 @@ static void pktgen_reset_all_threads(void)
3212{ 3275{
3213 struct pktgen_thread *t; 3276 struct pktgen_thread *t;
3214 3277
3215 pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); 3278 func_enter();
3216 3279
3217 mutex_lock(&pktgen_thread_lock); 3280 mutex_lock(&pktgen_thread_lock);
3218 3281
@@ -3262,8 +3325,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3262 int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; 3325 int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
3263 3326
3264 if (!pkt_dev->running) { 3327 if (!pkt_dev->running) {
3265 printk(KERN_WARNING "pktgen: interface: %s is already " 3328 pr_warning("interface: %s is already stopped\n",
3266 "stopped\n", pkt_dev->odevname); 3329 pkt_dev->odevname);
3267 return -EINVAL; 3330 return -EINVAL;
3268 } 3331 }
3269 3332
@@ -3299,7 +3362,7 @@ static void pktgen_stop(struct pktgen_thread *t)
3299{ 3362{
3300 struct pktgen_dev *pkt_dev; 3363 struct pktgen_dev *pkt_dev;
3301 3364
3302 pr_debug("pktgen: entering pktgen_stop\n"); 3365 func_enter();
3303 3366
3304 if_lock(t); 3367 if_lock(t);
3305 3368
@@ -3319,7 +3382,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3319 struct list_head *q, *n; 3382 struct list_head *q, *n;
3320 struct pktgen_dev *cur; 3383 struct pktgen_dev *cur;
3321 3384
3322 pr_debug("pktgen: entering pktgen_rem_one_if\n"); 3385 func_enter();
3323 3386
3324 if_lock(t); 3387 if_lock(t);
3325 3388
@@ -3345,9 +3408,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3345 struct list_head *q, *n; 3408 struct list_head *q, *n;
3346 struct pktgen_dev *cur; 3409 struct pktgen_dev *cur;
3347 3410
3411 func_enter();
3412
3348 /* Remove all devices, free mem */ 3413 /* Remove all devices, free mem */
3349 3414
3350 pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
3351 if_lock(t); 3415 if_lock(t);
3352 3416
3353 list_for_each_safe(q, n, &t->if_list) { 3417 list_for_each_safe(q, n, &t->if_list) {
@@ -3429,8 +3493,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3429 3493
3430 pkt_dev->skb = fill_packet(odev, pkt_dev); 3494 pkt_dev->skb = fill_packet(odev, pkt_dev);
3431 if (pkt_dev->skb == NULL) { 3495 if (pkt_dev->skb == NULL) {
3432 printk(KERN_ERR "pktgen: ERROR: couldn't " 3496 pr_err("ERROR: couldn't allocate skb in fill_packet\n");
3433 "allocate skb in fill_packet.\n");
3434 schedule(); 3497 schedule();
3435 pkt_dev->clone_count--; /* back out increment, OOM */ 3498 pkt_dev->clone_count--; /* back out increment, OOM */
3436 return; 3499 return;
@@ -3510,8 +3573,7 @@ static int pktgen_thread_worker(void *arg)
3510 init_waitqueue_head(&t->queue); 3573 init_waitqueue_head(&t->queue);
3511 complete(&t->start_done); 3574 complete(&t->start_done);
3512 3575
3513 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", 3576 pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3514 cpu, task_pid_nr(current));
3515 3577
3516 set_current_state(TASK_INTERRUPTIBLE); 3578 set_current_state(TASK_INTERRUPTIBLE);
3517 3579
@@ -3524,6 +3586,7 @@ static int pktgen_thread_worker(void *arg)
3524 wait_event_interruptible_timeout(t->queue, 3586 wait_event_interruptible_timeout(t->queue,
3525 t->control != 0, 3587 t->control != 0,
3526 HZ/10); 3588 HZ/10);
3589 try_to_freeze();
3527 continue; 3590 continue;
3528 } 3591 }
3529 3592
@@ -3563,13 +3626,13 @@ static int pktgen_thread_worker(void *arg)
3563 set_current_state(TASK_INTERRUPTIBLE); 3626 set_current_state(TASK_INTERRUPTIBLE);
3564 } 3627 }
3565 3628
3566 pr_debug("pktgen: %s stopping all device\n", t->tsk->comm); 3629 pr_debug("%s stopping all device\n", t->tsk->comm);
3567 pktgen_stop(t); 3630 pktgen_stop(t);
3568 3631
3569 pr_debug("pktgen: %s removing all device\n", t->tsk->comm); 3632 pr_debug("%s removing all device\n", t->tsk->comm);
3570 pktgen_rem_all_ifs(t); 3633 pktgen_rem_all_ifs(t);
3571 3634
3572 pr_debug("pktgen: %s removing thread.\n", t->tsk->comm); 3635 pr_debug("%s removing thread\n", t->tsk->comm);
3573 pktgen_rem_thread(t); 3636 pktgen_rem_thread(t);
3574 3637
3575 return 0; 3638 return 0;
@@ -3593,7 +3656,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3593 } 3656 }
3594 3657
3595 if_unlock(t); 3658 if_unlock(t);
3596 pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev); 3659 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
3597 return pkt_dev; 3660 return pkt_dev;
3598} 3661}
3599 3662
@@ -3609,8 +3672,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3609 if_lock(t); 3672 if_lock(t);
3610 3673
3611 if (pkt_dev->pg_thread) { 3674 if (pkt_dev->pg_thread) {
3612 printk(KERN_ERR "pktgen: ERROR: already assigned " 3675 pr_err("ERROR: already assigned to a thread\n");
3613 "to a thread.\n");
3614 rv = -EBUSY; 3676 rv = -EBUSY;
3615 goto out; 3677 goto out;
3616 } 3678 }
@@ -3636,7 +3698,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3636 3698
3637 pkt_dev = __pktgen_NN_threads(ifname, FIND); 3699 pkt_dev = __pktgen_NN_threads(ifname, FIND);
3638 if (pkt_dev) { 3700 if (pkt_dev) {
3639 printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); 3701 pr_err("ERROR: interface already used\n");
3640 return -EBUSY; 3702 return -EBUSY;
3641 } 3703 }
3642 3704
@@ -3672,6 +3734,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3672 pkt_dev->svlan_p = 0; 3734 pkt_dev->svlan_p = 0;
3673 pkt_dev->svlan_cfi = 0; 3735 pkt_dev->svlan_cfi = 0;
3674 pkt_dev->svlan_id = 0xffff; 3736 pkt_dev->svlan_id = 0xffff;
3737 pkt_dev->node = -1;
3675 3738
3676 err = pktgen_setup_dev(pkt_dev, ifname); 3739 err = pktgen_setup_dev(pkt_dev, ifname);
3677 if (err) 3740 if (err)
@@ -3680,7 +3743,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3680 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, 3743 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
3681 &pktgen_if_fops, pkt_dev); 3744 &pktgen_if_fops, pkt_dev);
3682 if (!pkt_dev->entry) { 3745 if (!pkt_dev->entry) {
3683 printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", 3746 pr_err("cannot create %s/%s procfs entry\n",
3684 PG_PROC_DIR, ifname); 3747 PG_PROC_DIR, ifname);
3685 err = -EINVAL; 3748 err = -EINVAL;
3686 goto out2; 3749 goto out2;
@@ -3711,8 +3774,7 @@ static int __init pktgen_create_thread(int cpu)
3711 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, 3774 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
3712 cpu_to_node(cpu)); 3775 cpu_to_node(cpu));
3713 if (!t) { 3776 if (!t) {
3714 printk(KERN_ERR "pktgen: ERROR: out of memory, can't " 3777 pr_err("ERROR: out of memory, can't create new thread\n");
3715 "create new thread.\n");
3716 return -ENOMEM; 3778 return -ENOMEM;
3717 } 3779 }
3718 3780
@@ -3726,8 +3788,7 @@ static int __init pktgen_create_thread(int cpu)
3726 3788
3727 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3789 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
3728 if (IS_ERR(p)) { 3790 if (IS_ERR(p)) {
3729 printk(KERN_ERR "pktgen: kernel_thread() failed " 3791 pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
3730 "for cpu %d\n", t->cpu);
3731 list_del(&t->th_list); 3792 list_del(&t->th_list);
3732 kfree(t); 3793 kfree(t);
3733 return PTR_ERR(p); 3794 return PTR_ERR(p);
@@ -3738,7 +3799,7 @@ static int __init pktgen_create_thread(int cpu)
3738 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, 3799 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
3739 &pktgen_thread_fops, t); 3800 &pktgen_thread_fops, t);
3740 if (!pe) { 3801 if (!pe) {
3741 printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", 3802 pr_err("cannot create %s/%s procfs entry\n",
3742 PG_PROC_DIR, t->tsk->comm); 3803 PG_PROC_DIR, t->tsk->comm);
3743 kthread_stop(p); 3804 kthread_stop(p);
3744 list_del(&t->th_list); 3805 list_del(&t->th_list);
@@ -3772,11 +3833,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3772 struct pktgen_dev *pkt_dev) 3833 struct pktgen_dev *pkt_dev)
3773{ 3834{
3774 3835
3775 pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); 3836 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3776 3837
3777 if (pkt_dev->running) { 3838 if (pkt_dev->running) {
3778 printk(KERN_WARNING "pktgen: WARNING: trying to remove a " 3839 pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
3779 "running interface, stopping it now.\n");
3780 pktgen_stop_device(pkt_dev); 3840 pktgen_stop_device(pkt_dev);
3781 } 3841 }
3782 3842
@@ -3807,7 +3867,7 @@ static int __init pg_init(void)
3807 int cpu; 3867 int cpu;
3808 struct proc_dir_entry *pe; 3868 struct proc_dir_entry *pe;
3809 3869
3810 printk(KERN_INFO "%s", version); 3870 pr_info("%s", version);
3811 3871
3812 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); 3872 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
3813 if (!pg_proc_dir) 3873 if (!pg_proc_dir)
@@ -3815,8 +3875,7 @@ static int __init pg_init(void)
3815 3875
3816 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); 3876 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
3817 if (pe == NULL) { 3877 if (pe == NULL) {
3818 printk(KERN_ERR "pktgen: ERROR: cannot create %s " 3878 pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
3819 "procfs entry.\n", PGCTRL);
3820 proc_net_remove(&init_net, PG_PROC_DIR); 3879 proc_net_remove(&init_net, PG_PROC_DIR);
3821 return -EINVAL; 3880 return -EINVAL;
3822 } 3881 }
@@ -3829,13 +3888,12 @@ static int __init pg_init(void)
3829 3888
3830 err = pktgen_create_thread(cpu); 3889 err = pktgen_create_thread(cpu);
3831 if (err) 3890 if (err)
3832 printk(KERN_WARNING "pktgen: WARNING: Cannot create " 3891 pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
3833 "thread for cpu %d (%d)\n", cpu, err); 3892 cpu, err);
3834 } 3893 }
3835 3894
3836 if (list_empty(&pktgen_threads)) { 3895 if (list_empty(&pktgen_threads)) {
3837 printk(KERN_ERR "pktgen: ERROR: Initialization failed for " 3896 pr_err("ERROR: Initialization failed for all threads\n");
3838 "all threads\n");
3839 unregister_netdevice_notifier(&pktgen_notifier_block); 3897 unregister_netdevice_notifier(&pktgen_notifier_block);
3840 remove_proc_entry(PGCTRL, pg_proc_dir); 3898 remove_proc_entry(PGCTRL, pg_proc_dir);
3841 proc_net_remove(&init_net, PG_PROC_DIR); 3899 proc_net_remove(&init_net, PG_PROC_DIR);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 794bcb897ff0..f78d821bd935 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/if_addr.h> 37#include <linux/if_addr.h>
38#include <linux/pci.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/system.h> 41#include <asm/system.h>
@@ -89,7 +90,15 @@ int rtnl_is_locked(void)
89} 90}
90EXPORT_SYMBOL(rtnl_is_locked); 91EXPORT_SYMBOL(rtnl_is_locked);
91 92
92static struct rtnl_link *rtnl_msg_handlers[NPROTO]; 93#ifdef CONFIG_PROVE_LOCKING
94int lockdep_rtnl_is_held(void)
95{
96 return lockdep_is_held(&rtnl_mutex);
97}
98EXPORT_SYMBOL(lockdep_rtnl_is_held);
99#endif /* #ifdef CONFIG_PROVE_LOCKING */
100
101static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
93 102
94static inline int rtm_msgindex(int msgtype) 103static inline int rtm_msgindex(int msgtype)
95{ 104{
@@ -109,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
109{ 118{
110 struct rtnl_link *tab; 119 struct rtnl_link *tab;
111 120
112 tab = rtnl_msg_handlers[protocol]; 121 if (protocol <= RTNL_FAMILY_MAX)
122 tab = rtnl_msg_handlers[protocol];
123 else
124 tab = NULL;
125
113 if (tab == NULL || tab[msgindex].doit == NULL) 126 if (tab == NULL || tab[msgindex].doit == NULL)
114 tab = rtnl_msg_handlers[PF_UNSPEC]; 127 tab = rtnl_msg_handlers[PF_UNSPEC];
115 128
@@ -120,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
120{ 133{
121 struct rtnl_link *tab; 134 struct rtnl_link *tab;
122 135
123 tab = rtnl_msg_handlers[protocol]; 136 if (protocol <= RTNL_FAMILY_MAX)
137 tab = rtnl_msg_handlers[protocol];
138 else
139 tab = NULL;
140
124 if (tab == NULL || tab[msgindex].dumpit == NULL) 141 if (tab == NULL || tab[msgindex].dumpit == NULL)
125 tab = rtnl_msg_handlers[PF_UNSPEC]; 142 tab = rtnl_msg_handlers[PF_UNSPEC];
126 143
@@ -150,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
150 struct rtnl_link *tab; 167 struct rtnl_link *tab;
151 int msgindex; 168 int msgindex;
152 169
153 BUG_ON(protocol < 0 || protocol >= NPROTO); 170 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
154 msgindex = rtm_msgindex(msgtype); 171 msgindex = rtm_msgindex(msgtype);
155 172
156 tab = rtnl_msg_handlers[protocol]; 173 tab = rtnl_msg_handlers[protocol];
@@ -202,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
202{ 219{
203 int msgindex; 220 int msgindex;
204 221
205 BUG_ON(protocol < 0 || protocol >= NPROTO); 222 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
206 msgindex = rtm_msgindex(msgtype); 223 msgindex = rtm_msgindex(msgtype);
207 224
208 if (rtnl_msg_handlers[protocol] == NULL) 225 if (rtnl_msg_handlers[protocol] == NULL)
@@ -224,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
224 */ 241 */
225void rtnl_unregister_all(int protocol) 242void rtnl_unregister_all(int protocol)
226{ 243{
227 BUG_ON(protocol < 0 || protocol >= NPROTO); 244 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
228 245
229 kfree(rtnl_msg_handlers[protocol]); 246 kfree(rtnl_msg_handlers[protocol]);
230 rtnl_msg_handlers[protocol] = NULL; 247 rtnl_msg_handlers[protocol] = NULL;
@@ -548,8 +565,21 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
548 } 565 }
549} 566}
550 567
568static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
569 const struct ifinfomsg *ifm)
570{
571 unsigned int flags = ifm->ifi_flags;
572
573 /* bugwards compatibility: ifi_change == 0 is treated as ~0 */
574 if (ifm->ifi_change)
575 flags = (flags & ifm->ifi_change) |
576 (dev->flags & ~ifm->ifi_change);
577
578 return flags;
579}
580
551static void copy_rtnl_link_stats(struct rtnl_link_stats *a, 581static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
552 const struct net_device_stats *b) 582 const struct rtnl_link_stats64 *b)
553{ 583{
554 a->rx_packets = b->rx_packets; 584 a->rx_packets = b->rx_packets;
555 a->tx_packets = b->tx_packets; 585 a->tx_packets = b->tx_packets;
@@ -578,9 +608,85 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
578 608
579 a->rx_compressed = b->rx_compressed; 609 a->rx_compressed = b->rx_compressed;
580 a->tx_compressed = b->tx_compressed; 610 a->tx_compressed = b->tx_compressed;
581}; 611}
582 612
583static inline size_t if_nlmsg_size(const struct net_device *dev) 613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{
615 struct rtnl_link_stats64 a;
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645}
646
647/* All VF info */
648static inline int rtnl_vfinfo_size(const struct net_device *dev)
649{
650 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
651
652 int num_vfs = dev_num_vf(dev->dev.parent);
653 size_t size = nla_total_size(sizeof(struct nlattr));
654 size += nla_total_size(num_vfs * sizeof(struct nlattr));
655 size += num_vfs *
656 (nla_total_size(sizeof(struct ifla_vf_mac)) +
657 nla_total_size(sizeof(struct ifla_vf_vlan)) +
658 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
659 return size;
660 } else
661 return 0;
662}
663
664static size_t rtnl_port_size(const struct net_device *dev)
665{
666 size_t port_size = nla_total_size(4) /* PORT_VF */
667 + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
668 + nla_total_size(sizeof(struct ifla_port_vsi))
669 /* PORT_VSI_TYPE */
670 + nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */
671 + nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */
672 + nla_total_size(1) /* PROT_VDP_REQUEST */
673 + nla_total_size(2); /* PORT_VDP_RESPONSE */
674 size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
675 size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
676 + port_size;
677 size_t port_self_size = nla_total_size(sizeof(struct nlattr))
678 + port_size;
679
680 if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
681 return 0;
682 if (dev_num_vf(dev->dev.parent))
683 return port_self_size + vf_ports_size +
684 vf_port_size * dev_num_vf(dev->dev.parent);
685 else
686 return port_self_size;
687}
688
689static noinline size_t if_nlmsg_size(const struct net_device *dev)
584{ 690{
585 return NLMSG_ALIGN(sizeof(struct ifinfomsg)) 691 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
586 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ 692 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -588,6 +694,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
588 + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ 694 + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
589 + nla_total_size(sizeof(struct rtnl_link_ifmap)) 695 + nla_total_size(sizeof(struct rtnl_link_ifmap))
590 + nla_total_size(sizeof(struct rtnl_link_stats)) 696 + nla_total_size(sizeof(struct rtnl_link_stats))
697 + nla_total_size(sizeof(struct rtnl_link_stats64))
591 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ 698 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
592 + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ 699 + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
593 + nla_total_size(4) /* IFLA_TXQLEN */ 700 + nla_total_size(4) /* IFLA_TXQLEN */
@@ -597,16 +704,95 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
597 + nla_total_size(4) /* IFLA_MASTER */ 704 + nla_total_size(4) /* IFLA_MASTER */
598 + nla_total_size(1) /* IFLA_OPERSTATE */ 705 + nla_total_size(1) /* IFLA_OPERSTATE */
599 + nla_total_size(1) /* IFLA_LINKMODE */ 706 + nla_total_size(1) /* IFLA_LINKMODE */
707 + nla_total_size(4) /* IFLA_NUM_VF */
708 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
709 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
600 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ 710 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
601} 711}
602 712
713static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
714{
715 struct nlattr *vf_ports;
716 struct nlattr *vf_port;
717 int vf;
718 int err;
719
720 vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
721 if (!vf_ports)
722 return -EMSGSIZE;
723
724 for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
725 vf_port = nla_nest_start(skb, IFLA_VF_PORT);
726 if (!vf_port)
727 goto nla_put_failure;
728 NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
729 err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
730 if (err == -EMSGSIZE)
731 goto nla_put_failure;
732 if (err) {
733 nla_nest_cancel(skb, vf_port);
734 continue;
735 }
736 nla_nest_end(skb, vf_port);
737 }
738
739 nla_nest_end(skb, vf_ports);
740
741 return 0;
742
743nla_put_failure:
744 nla_nest_cancel(skb, vf_ports);
745 return -EMSGSIZE;
746}
747
748static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
749{
750 struct nlattr *port_self;
751 int err;
752
753 port_self = nla_nest_start(skb, IFLA_PORT_SELF);
754 if (!port_self)
755 return -EMSGSIZE;
756
757 err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
758 if (err) {
759 nla_nest_cancel(skb, port_self);
760 return (err == -EMSGSIZE) ? err : 0;
761 }
762
763 nla_nest_end(skb, port_self);
764
765 return 0;
766}
767
768static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
769{
770 int err;
771
772 if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
773 return 0;
774
775 err = rtnl_port_self_fill(skb, dev);
776 if (err)
777 return err;
778
779 if (dev_num_vf(dev->dev.parent)) {
780 err = rtnl_vf_ports_fill(skb, dev);
781 if (err)
782 return err;
783 }
784
785 return 0;
786}
787
603static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 788static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
604 int type, u32 pid, u32 seq, u32 change, 789 int type, u32 pid, u32 seq, u32 change,
605 unsigned int flags) 790 unsigned int flags)
606{ 791{
607 struct ifinfomsg *ifm; 792 struct ifinfomsg *ifm;
608 struct nlmsghdr *nlh; 793 struct nlmsghdr *nlh;
609 const struct net_device_stats *stats; 794 struct rtnl_link_stats64 temp;
795 const struct rtnl_link_stats64 *stats;
610 struct nlattr *attr; 796 struct nlattr *attr;
611 797
612 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 798 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -662,9 +848,55 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
662 if (attr == NULL) 848 if (attr == NULL)
663 goto nla_put_failure; 849 goto nla_put_failure;
664 850
665 stats = dev_get_stats(dev); 851 stats = dev_get_stats(dev, &temp);
666 copy_rtnl_link_stats(nla_data(attr), stats); 852 copy_rtnl_link_stats(nla_data(attr), stats);
667 853
854 attr = nla_reserve(skb, IFLA_STATS64,
855 sizeof(struct rtnl_link_stats64));
856 if (attr == NULL)
857 goto nla_put_failure;
858 copy_rtnl_link_stats64(nla_data(attr), stats);
859
860 if (dev->dev.parent)
861 NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
862
863 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
864 int i;
865
866 struct nlattr *vfinfo, *vf;
867 int num_vfs = dev_num_vf(dev->dev.parent);
868
869 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
870 if (!vfinfo)
871 goto nla_put_failure;
872 for (i = 0; i < num_vfs; i++) {
873 struct ifla_vf_info ivi;
874 struct ifla_vf_mac vf_mac;
875 struct ifla_vf_vlan vf_vlan;
876 struct ifla_vf_tx_rate vf_tx_rate;
877 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
878 break;
879 vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
880 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
881 vf_vlan.vlan = ivi.vlan;
882 vf_vlan.qos = ivi.qos;
883 vf_tx_rate.rate = ivi.tx_rate;
884 vf = nla_nest_start(skb, IFLA_VF_INFO);
885 if (!vf) {
886 nla_nest_cancel(skb, vfinfo);
887 goto nla_put_failure;
888 }
889 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
890 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
891 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
892 nla_nest_end(skb, vf);
893 }
894 nla_nest_end(skb, vfinfo);
895 }
896
897 if (rtnl_port_fill(skb, dev))
898 goto nla_put_failure;
899
668 if (dev->rtnl_link_ops) { 900 if (dev->rtnl_link_ops) {
669 if (rtnl_link_fill(skb, dev) < 0) 901 if (rtnl_link_fill(skb, dev) < 0)
670 goto nla_put_failure; 902 goto nla_put_failure;
@@ -725,6 +957,9 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
725 [IFLA_LINKINFO] = { .type = NLA_NESTED }, 957 [IFLA_LINKINFO] = { .type = NLA_NESTED },
726 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 958 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
727 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 959 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
960 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
961 [IFLA_VF_PORTS] = { .type = NLA_NESTED },
962 [IFLA_PORT_SELF] = { .type = NLA_NESTED },
728}; 963};
729EXPORT_SYMBOL(ifla_policy); 964EXPORT_SYMBOL(ifla_policy);
730 965
@@ -733,6 +968,33 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
733 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 968 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
734}; 969};
735 970
971static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
972 [IFLA_VF_INFO] = { .type = NLA_NESTED },
973};
974
975static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
976 [IFLA_VF_MAC] = { .type = NLA_BINARY,
977 .len = sizeof(struct ifla_vf_mac) },
978 [IFLA_VF_VLAN] = { .type = NLA_BINARY,
979 .len = sizeof(struct ifla_vf_vlan) },
980 [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
981 .len = sizeof(struct ifla_vf_tx_rate) },
982};
983
984static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
985 [IFLA_PORT_VF] = { .type = NLA_U32 },
986 [IFLA_PORT_PROFILE] = { .type = NLA_STRING,
987 .len = PORT_PROFILE_MAX },
988 [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY,
989 .len = sizeof(struct ifla_port_vsi)},
990 [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
991 .len = PORT_UUID_MAX },
992 [IFLA_PORT_HOST_UUID] = { .type = NLA_STRING,
993 .len = PORT_UUID_MAX },
994 [IFLA_PORT_REQUEST] = { .type = NLA_U8, },
995 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
996};
997
736struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) 998struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
737{ 999{
738 struct net *net; 1000 struct net *net;
@@ -762,6 +1024,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
762 return 0; 1024 return 0;
763} 1025}
764 1026
1027static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1028{
1029 int rem, err = -EINVAL;
1030 struct nlattr *vf;
1031 const struct net_device_ops *ops = dev->netdev_ops;
1032
1033 nla_for_each_nested(vf, attr, rem) {
1034 switch (nla_type(vf)) {
1035 case IFLA_VF_MAC: {
1036 struct ifla_vf_mac *ivm;
1037 ivm = nla_data(vf);
1038 err = -EOPNOTSUPP;
1039 if (ops->ndo_set_vf_mac)
1040 err = ops->ndo_set_vf_mac(dev, ivm->vf,
1041 ivm->mac);
1042 break;
1043 }
1044 case IFLA_VF_VLAN: {
1045 struct ifla_vf_vlan *ivv;
1046 ivv = nla_data(vf);
1047 err = -EOPNOTSUPP;
1048 if (ops->ndo_set_vf_vlan)
1049 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
1050 ivv->vlan,
1051 ivv->qos);
1052 break;
1053 }
1054 case IFLA_VF_TX_RATE: {
1055 struct ifla_vf_tx_rate *ivt;
1056 ivt = nla_data(vf);
1057 err = -EOPNOTSUPP;
1058 if (ops->ndo_set_vf_tx_rate)
1059 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
1060 ivt->rate);
1061 break;
1062 }
1063 default:
1064 err = -EINVAL;
1065 break;
1066 }
1067 if (err)
1068 break;
1069 }
1070 return err;
1071}
1072
765static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 1073static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
766 struct nlattr **tb, char *ifname, int modified) 1074 struct nlattr **tb, char *ifname, int modified)
767{ 1075{
@@ -875,13 +1183,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
875 } 1183 }
876 1184
877 if (ifm->ifi_flags || ifm->ifi_change) { 1185 if (ifm->ifi_flags || ifm->ifi_change) {
878 unsigned int flags = ifm->ifi_flags; 1186 err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
879
880 /* bugwards compatibility: ifi_change == 0 is treated as ~0 */
881 if (ifm->ifi_change)
882 flags = (flags & ifm->ifi_change) |
883 (dev->flags & ~ifm->ifi_change);
884 err = dev_change_flags(dev, flags);
885 if (err < 0) 1187 if (err < 0)
886 goto errout; 1188 goto errout;
887 } 1189 }
@@ -898,6 +1200,67 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
898 write_unlock_bh(&dev_base_lock); 1200 write_unlock_bh(&dev_base_lock);
899 } 1201 }
900 1202
1203 if (tb[IFLA_VFINFO_LIST]) {
1204 struct nlattr *attr;
1205 int rem;
1206 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
1207 if (nla_type(attr) != IFLA_VF_INFO) {
1208 err = -EINVAL;
1209 goto errout;
1210 }
1211 err = do_setvfinfo(dev, attr);
1212 if (err < 0)
1213 goto errout;
1214 modified = 1;
1215 }
1216 }
1217 err = 0;
1218
1219 if (tb[IFLA_VF_PORTS]) {
1220 struct nlattr *port[IFLA_PORT_MAX+1];
1221 struct nlattr *attr;
1222 int vf;
1223 int rem;
1224
1225 err = -EOPNOTSUPP;
1226 if (!ops->ndo_set_vf_port)
1227 goto errout;
1228
1229 nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
1230 if (nla_type(attr) != IFLA_VF_PORT)
1231 continue;
1232 err = nla_parse_nested(port, IFLA_PORT_MAX,
1233 attr, ifla_port_policy);
1234 if (err < 0)
1235 goto errout;
1236 if (!port[IFLA_PORT_VF]) {
1237 err = -EOPNOTSUPP;
1238 goto errout;
1239 }
1240 vf = nla_get_u32(port[IFLA_PORT_VF]);
1241 err = ops->ndo_set_vf_port(dev, vf, port);
1242 if (err < 0)
1243 goto errout;
1244 modified = 1;
1245 }
1246 }
1247 err = 0;
1248
1249 if (tb[IFLA_PORT_SELF]) {
1250 struct nlattr *port[IFLA_PORT_MAX+1];
1251
1252 err = nla_parse_nested(port, IFLA_PORT_MAX,
1253 tb[IFLA_PORT_SELF], ifla_port_policy);
1254 if (err < 0)
1255 goto errout;
1256
1257 err = -EOPNOTSUPP;
1258 if (ops->ndo_set_vf_port)
1259 err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
1260 if (err < 0)
1261 goto errout;
1262 modified = 1;
1263 }
901 err = 0; 1264 err = 0;
902 1265
903errout: 1266errout:
@@ -989,6 +1352,26 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
989 return 0; 1352 return 0;
990} 1353}
991 1354
1355int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
1356{
1357 unsigned int old_flags;
1358 int err;
1359
1360 old_flags = dev->flags;
1361 if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
1362 err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
1363 if (err < 0)
1364 return err;
1365 }
1366
1367 dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
1368 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
1369
1370 __dev_notify_flags(dev, old_flags);
1371 return 0;
1372}
1373EXPORT_SYMBOL(rtnl_configure_link);
1374
992struct net_device *rtnl_create_link(struct net *src_net, struct net *net, 1375struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
993 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) 1376 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
994{ 1377{
@@ -1010,6 +1393,7 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1010 1393
1011 dev_net_set(dev, net); 1394 dev_net_set(dev, net);
1012 dev->rtnl_link_ops = ops; 1395 dev->rtnl_link_ops = ops;
1396 dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
1013 dev->real_num_tx_queues = real_num_queues; 1397 dev->real_num_tx_queues = real_num_queues;
1014 1398
1015 if (strchr(dev->name, '%')) { 1399 if (strchr(dev->name, '%')) {
@@ -1139,7 +1523,7 @@ replay:
1139 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) 1523 if (!(nlh->nlmsg_flags & NLM_F_CREATE))
1140 return -ENODEV; 1524 return -ENODEV;
1141 1525
1142 if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) 1526 if (ifm->ifi_index)
1143 return -EOPNOTSUPP; 1527 return -EOPNOTSUPP;
1144 if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) 1528 if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
1145 return -EOPNOTSUPP; 1529 return -EOPNOTSUPP;
@@ -1170,9 +1554,16 @@ replay:
1170 err = ops->newlink(net, dev, tb, data); 1554 err = ops->newlink(net, dev, tb, data);
1171 else 1555 else
1172 err = register_netdevice(dev); 1556 err = register_netdevice(dev);
1557
1173 if (err < 0 && !IS_ERR(dev)) 1558 if (err < 0 && !IS_ERR(dev))
1174 free_netdev(dev); 1559 free_netdev(dev);
1560 if (err < 0)
1561 goto out;
1175 1562
1563 err = rtnl_configure_link(dev, ifm);
1564 if (err < 0)
1565 unregister_netdevice(dev);
1566out:
1176 put_net(dest_net); 1567 put_net(dest_net);
1177 return err; 1568 return err;
1178 } 1569 }
@@ -1229,7 +1620,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1229 1620
1230 if (s_idx == 0) 1621 if (s_idx == 0)
1231 s_idx = 1; 1622 s_idx = 1;
1232 for (idx = 1; idx < NPROTO; idx++) { 1623 for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
1233 int type = cb->nlh->nlmsg_type-RTM_BASE; 1624 int type = cb->nlh->nlmsg_type-RTM_BASE;
1234 if (idx < s_idx || idx == PF_PACKET) 1625 if (idx < s_idx || idx == PF_PACKET)
1235 continue; 1626 continue;
@@ -1297,9 +1688,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1297 return 0; 1688 return 0;
1298 1689
1299 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; 1690 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
1300 if (family >= NPROTO)
1301 return -EAFNOSUPPORT;
1302
1303 sz_idx = type>>2; 1691 sz_idx = type>>2;
1304 kind = type&3; 1692 kind = type&3;
1305 1693
@@ -1361,17 +1749,15 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
1361 struct net_device *dev = ptr; 1749 struct net_device *dev = ptr;
1362 1750
1363 switch (event) { 1751 switch (event) {
1364 case NETDEV_UNREGISTER:
1365 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
1366 break;
1367 case NETDEV_UP: 1752 case NETDEV_UP:
1368 case NETDEV_DOWN: 1753 case NETDEV_DOWN:
1369 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1754 case NETDEV_PRE_UP:
1370 break;
1371 case NETDEV_POST_INIT: 1755 case NETDEV_POST_INIT:
1372 case NETDEV_REGISTER: 1756 case NETDEV_REGISTER:
1373 case NETDEV_CHANGE: 1757 case NETDEV_CHANGE:
1758 case NETDEV_PRE_TYPE_CHANGE:
1374 case NETDEV_GOING_DOWN: 1759 case NETDEV_GOING_DOWN:
1760 case NETDEV_UNREGISTER:
1375 case NETDEV_UNREGISTER_BATCH: 1761 case NETDEV_UNREGISTER_BATCH:
1376 break; 1762 break;
1377 default: 1763 default:
@@ -1386,7 +1772,7 @@ static struct notifier_block rtnetlink_dev_notifier = {
1386}; 1772};
1387 1773
1388 1774
1389static int rtnetlink_net_init(struct net *net) 1775static int __net_init rtnetlink_net_init(struct net *net)
1390{ 1776{
1391 struct sock *sk; 1777 struct sock *sk;
1392 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, 1778 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
@@ -1397,7 +1783,7 @@ static int rtnetlink_net_init(struct net *net)
1397 return 0; 1783 return 0;
1398} 1784}
1399 1785
1400static void rtnetlink_net_exit(struct net *net) 1786static void __net_exit rtnetlink_net_exit(struct net *net)
1401{ 1787{
1402 netlink_kernel_release(net->rtnl); 1788 netlink_kernel_release(net->rtnl);
1403 net->rtnl = NULL; 1789 net->rtnl = NULL;
diff --git a/net/core/scm.c b/net/core/scm.c
index b7ba91b074b3..413cab89017d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/pid.h> 27#include <linux/pid.h>
28#include <linux/nsproxy.h> 28#include <linux/nsproxy.h>
29#include <linux/slab.h>
29 30
30#include <asm/system.h> 31#include <asm/system.h>
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
@@ -129,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm)
129 } 130 }
130 } 131 }
131} 132}
133EXPORT_SYMBOL(__scm_destroy);
132 134
133int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) 135int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
134{ 136{
@@ -156,6 +158,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
156 switch (cmsg->cmsg_type) 158 switch (cmsg->cmsg_type)
157 { 159 {
158 case SCM_RIGHTS: 160 case SCM_RIGHTS:
161 if (!sock->ops || sock->ops->family != PF_UNIX)
162 goto error;
159 err=scm_fp_copy(cmsg, &p->fp); 163 err=scm_fp_copy(cmsg, &p->fp);
160 if (err<0) 164 if (err<0)
161 goto error; 165 goto error;
@@ -167,6 +171,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
167 err = scm_check_creds(&p->creds); 171 err = scm_check_creds(&p->creds);
168 if (err) 172 if (err)
169 goto error; 173 goto error;
174
175 if (pid_vnr(p->pid) != p->creds.pid) {
176 struct pid *pid;
177 err = -ESRCH;
178 pid = find_get_pid(p->creds.pid);
179 if (!pid)
180 goto error;
181 put_pid(p->pid);
182 p->pid = pid;
183 }
184
185 if ((p->cred->euid != p->creds.uid) ||
186 (p->cred->egid != p->creds.gid)) {
187 struct cred *cred;
188 err = -ENOMEM;
189 cred = prepare_creds();
190 if (!cred)
191 goto error;
192
193 cred->uid = cred->euid = p->creds.uid;
194 cred->gid = cred->egid = p->creds.uid;
195 put_cred(p->cred);
196 p->cred = cred;
197 }
170 break; 198 break;
171 default: 199 default:
172 goto error; 200 goto error;
@@ -184,6 +212,7 @@ error:
184 scm_destroy(p); 212 scm_destroy(p);
185 return err; 213 return err;
186} 214}
215EXPORT_SYMBOL(__scm_send);
187 216
188int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) 217int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
189{ 218{
@@ -222,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
222out: 251out:
223 return err; 252 return err;
224} 253}
254EXPORT_SYMBOL(put_cmsg);
225 255
226void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) 256void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
227{ 257{
@@ -291,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
291 */ 321 */
292 __scm_destroy(scm); 322 __scm_destroy(scm);
293} 323}
324EXPORT_SYMBOL(scm_detach_fds);
294 325
295struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) 326struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
296{ 327{
@@ -308,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
308 } 339 }
309 return new_fpl; 340 return new_fpl;
310} 341}
311
312EXPORT_SYMBOL(__scm_destroy);
313EXPORT_SYMBOL(__scm_send);
314EXPORT_SYMBOL(put_cmsg);
315EXPORT_SYMBOL(scm_detach_fds);
316EXPORT_SYMBOL(scm_fp_dup); 342EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..c83b421341c0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
117 * 117 *
118 * Out of line support code for skb_put(). Not user callable. 118 * Out of line support code for skb_put(). Not user callable.
119 */ 119 */
120void skb_over_panic(struct sk_buff *skb, int sz, void *here) 120static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
121{ 121{
122 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 122 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
123 "data:%p tail:%#lx end:%#lx dev:%s\n", 123 "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
126 skb->dev ? skb->dev->name : "<NULL>"); 126 skb->dev ? skb->dev->name : "<NULL>");
127 BUG(); 127 BUG();
128} 128}
129EXPORT_SYMBOL(skb_over_panic);
130 129
131/** 130/**
132 * skb_under_panic - private function 131 * skb_under_panic - private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
137 * Out of line support code for skb_push(). Not user callable. 136 * Out of line support code for skb_push(). Not user callable.
138 */ 137 */
139 138
140void skb_under_panic(struct sk_buff *skb, int sz, void *here) 139static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
141{ 140{
142 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 141 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
143 "data:%p tail:%#lx end:%#lx dev:%s\n", 142 "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
146 skb->dev ? skb->dev->name : "<NULL>"); 145 skb->dev ? skb->dev->name : "<NULL>");
147 BUG(); 146 BUG();
148} 147}
149EXPORT_SYMBOL(skb_under_panic);
150 148
151/* Allocate a new skbuff. We do this ourselves so we can fill in a few 149/* Allocate a new skbuff. We do this ourselves so we can fill in a few
152 * 'private' fields and also do memory statistics to find all the 150 * 'private' fields and also do memory statistics to find all the
@@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
183 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); 181 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
184 if (!skb) 182 if (!skb)
185 goto out; 183 goto out;
184 prefetchw(skb);
186 185
187 size = SKB_DATA_ALIGN(size); 186 size = SKB_DATA_ALIGN(size);
188 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), 187 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
189 gfp_mask, node); 188 gfp_mask, node);
190 if (!data) 189 if (!data)
191 goto nodata; 190 goto nodata;
191 prefetchw(data + size);
192 192
193 /* 193 /*
194 * Only clear those fields we need to clear, not those that we will 194 * Only clear those fields we need to clear, not those that we will
@@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
210 210
211 /* make sure we initialize shinfo sequentially */ 211 /* make sure we initialize shinfo sequentially */
212 shinfo = skb_shinfo(skb); 212 shinfo = skb_shinfo(skb);
213 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
213 atomic_set(&shinfo->dataref, 1); 214 atomic_set(&shinfo->dataref, 1);
214 shinfo->nr_frags = 0;
215 shinfo->gso_size = 0;
216 shinfo->gso_segs = 0;
217 shinfo->gso_type = 0;
218 shinfo->ip6_frag_id = 0;
219 shinfo->tx_flags.flags = 0;
220 skb_frag_list_init(skb);
221 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
222 215
223 if (fclone) { 216 if (fclone) {
224 struct sk_buff *child = skb + 1; 217 struct sk_buff *child = skb + 1;
@@ -489,40 +482,34 @@ EXPORT_SYMBOL(consume_skb);
489 * reference count dropping and cleans up the skbuff as if it 482 * reference count dropping and cleans up the skbuff as if it
490 * just came from __alloc_skb(). 483 * just came from __alloc_skb().
491 */ 484 */
492int skb_recycle_check(struct sk_buff *skb, int skb_size) 485bool skb_recycle_check(struct sk_buff *skb, int skb_size)
493{ 486{
494 struct skb_shared_info *shinfo; 487 struct skb_shared_info *shinfo;
495 488
496 if (irqs_disabled()) 489 if (irqs_disabled())
497 return 0; 490 return false;
498 491
499 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) 492 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
500 return 0; 493 return false;
501 494
502 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); 495 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
503 if (skb_end_pointer(skb) - skb->head < skb_size) 496 if (skb_end_pointer(skb) - skb->head < skb_size)
504 return 0; 497 return false;
505 498
506 if (skb_shared(skb) || skb_cloned(skb)) 499 if (skb_shared(skb) || skb_cloned(skb))
507 return 0; 500 return false;
508 501
509 skb_release_head_state(skb); 502 skb_release_head_state(skb);
503
510 shinfo = skb_shinfo(skb); 504 shinfo = skb_shinfo(skb);
505 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
511 atomic_set(&shinfo->dataref, 1); 506 atomic_set(&shinfo->dataref, 1);
512 shinfo->nr_frags = 0;
513 shinfo->gso_size = 0;
514 shinfo->gso_segs = 0;
515 shinfo->gso_type = 0;
516 shinfo->ip6_frag_id = 0;
517 shinfo->tx_flags.flags = 0;
518 skb_frag_list_init(skb);
519 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
520 507
521 memset(skb, 0, offsetof(struct sk_buff, tail)); 508 memset(skb, 0, offsetof(struct sk_buff, tail));
522 skb->data = skb->head + NET_SKB_PAD; 509 skb->data = skb->head + NET_SKB_PAD;
523 skb_reset_tail_pointer(skb); 510 skb_reset_tail_pointer(skb);
524 511
525 return 1; 512 return true;
526} 513}
527EXPORT_SYMBOL(skb_recycle_check); 514EXPORT_SYMBOL(skb_recycle_check);
528 515
@@ -533,7 +520,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
533 new->transport_header = old->transport_header; 520 new->transport_header = old->transport_header;
534 new->network_header = old->network_header; 521 new->network_header = old->network_header;
535 new->mac_header = old->mac_header; 522 new->mac_header = old->mac_header;
536 skb_dst_set(new, dst_clone(skb_dst(old))); 523 skb_dst_copy(new, old);
524 new->rxhash = old->rxhash;
537#ifdef CONFIG_XFRM 525#ifdef CONFIG_XFRM
538 new->sp = secpath_get(old->sp); 526 new->sp = secpath_get(old->sp);
539#endif 527#endif
@@ -544,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
544 new->ip_summed = old->ip_summed; 532 new->ip_summed = old->ip_summed;
545 skb_copy_queue_mapping(new, old); 533 skb_copy_queue_mapping(new, old);
546 new->priority = old->priority; 534 new->priority = old->priority;
535 new->deliver_no_wcard = old->deliver_no_wcard;
547#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 536#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
548 new->ipvs_property = old->ipvs_property; 537 new->ipvs_property = old->ipvs_property;
549#endif 538#endif
@@ -828,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
828 memcpy(data + nhead, skb->head, skb->tail - skb->head); 817 memcpy(data + nhead, skb->head, skb->tail - skb->head);
829#endif 818#endif
830 memcpy(data + size, skb_end_pointer(skb), 819 memcpy(data + size, skb_end_pointer(skb),
831 sizeof(struct skb_shared_info)); 820 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
832 821
833 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
834 get_page(skb_shinfo(skb)->frags[i].page); 823 get_page(skb_shinfo(skb)->frags[i].page);
@@ -854,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
854 skb->network_header += off; 843 skb->network_header += off;
855 if (skb_mac_header_was_set(skb)) 844 if (skb_mac_header_was_set(skb))
856 skb->mac_header += off; 845 skb->mac_header += off;
857 skb->csum_start += nhead; 846 /* Only adjust this if it actually is csum_start rather than csum */
847 if (skb->ip_summed == CHECKSUM_PARTIAL)
848 skb->csum_start += nhead;
858 skb->cloned = 0; 849 skb->cloned = 0;
859 skb->hdr_len = 0; 850 skb->hdr_len = 0;
860 skb->nohdr = 0; 851 skb->nohdr = 0;
@@ -941,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
941 copy_skb_header(n, skb); 932 copy_skb_header(n, skb);
942 933
943 off = newheadroom - oldheadroom; 934 off = newheadroom - oldheadroom;
944 n->csum_start += off; 935 if (n->ip_summed == CHECKSUM_PARTIAL)
936 n->csum_start += off;
945#ifdef NET_SKBUFF_DATA_USES_OFFSET 937#ifdef NET_SKBUFF_DATA_USES_OFFSET
946 n->transport_header += off; 938 n->transport_header += off;
947 n->network_header += off; 939 n->network_header += off;
@@ -1051,7 +1043,7 @@ EXPORT_SYMBOL(skb_push);
1051 */ 1043 */
1052unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) 1044unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
1053{ 1045{
1054 return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); 1046 return skb_pull_inline(skb, len);
1055} 1047}
1056EXPORT_SYMBOL(skb_pull); 1048EXPORT_SYMBOL(skb_pull);
1057 1049
@@ -1417,12 +1409,13 @@ new_page:
1417/* 1409/*
1418 * Fill page/offset/length into spd, if it can hold more pages. 1410 * Fill page/offset/length into spd, if it can hold more pages.
1419 */ 1411 */
1420static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1412static inline int spd_fill_page(struct splice_pipe_desc *spd,
1413 struct pipe_inode_info *pipe, struct page *page,
1421 unsigned int *len, unsigned int offset, 1414 unsigned int *len, unsigned int offset,
1422 struct sk_buff *skb, int linear, 1415 struct sk_buff *skb, int linear,
1423 struct sock *sk) 1416 struct sock *sk)
1424{ 1417{
1425 if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1418 if (unlikely(spd->nr_pages == pipe->buffers))
1426 return 1; 1419 return 1;
1427 1420
1428 if (linear) { 1421 if (linear) {
@@ -1458,7 +1451,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1458 unsigned int plen, unsigned int *off, 1451 unsigned int plen, unsigned int *off,
1459 unsigned int *len, struct sk_buff *skb, 1452 unsigned int *len, struct sk_buff *skb,
1460 struct splice_pipe_desc *spd, int linear, 1453 struct splice_pipe_desc *spd, int linear,
1461 struct sock *sk) 1454 struct sock *sk,
1455 struct pipe_inode_info *pipe)
1462{ 1456{
1463 if (!*len) 1457 if (!*len)
1464 return 1; 1458 return 1;
@@ -1481,7 +1475,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1481 /* the linear region may spread across several pages */ 1475 /* the linear region may spread across several pages */
1482 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1476 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1483 1477
1484 if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) 1478 if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
1485 return 1; 1479 return 1;
1486 1480
1487 __segment_seek(&page, &poff, &plen, flen); 1481 __segment_seek(&page, &poff, &plen, flen);
@@ -1496,9 +1490,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1496 * Map linear and fragment data from the skb to spd. It reports failure if the 1490 * Map linear and fragment data from the skb to spd. It reports failure if the
1497 * pipe is full or if we already spliced the requested length. 1491 * pipe is full or if we already spliced the requested length.
1498 */ 1492 */
1499static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, 1493static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1500 unsigned int *len, struct splice_pipe_desc *spd, 1494 unsigned int *offset, unsigned int *len,
1501 struct sock *sk) 1495 struct splice_pipe_desc *spd, struct sock *sk)
1502{ 1496{
1503 int seg; 1497 int seg;
1504 1498
@@ -1508,7 +1502,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1508 if (__splice_segment(virt_to_page(skb->data), 1502 if (__splice_segment(virt_to_page(skb->data),
1509 (unsigned long) skb->data & (PAGE_SIZE - 1), 1503 (unsigned long) skb->data & (PAGE_SIZE - 1),
1510 skb_headlen(skb), 1504 skb_headlen(skb),
1511 offset, len, skb, spd, 1, sk)) 1505 offset, len, skb, spd, 1, sk, pipe))
1512 return 1; 1506 return 1;
1513 1507
1514 /* 1508 /*
@@ -1518,7 +1512,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1518 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1512 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1519 1513
1520 if (__splice_segment(f->page, f->page_offset, f->size, 1514 if (__splice_segment(f->page, f->page_offset, f->size,
1521 offset, len, skb, spd, 0, sk)) 1515 offset, len, skb, spd, 0, sk, pipe))
1522 return 1; 1516 return 1;
1523 } 1517 }
1524 1518
@@ -1535,8 +1529,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1535 struct pipe_inode_info *pipe, unsigned int tlen, 1529 struct pipe_inode_info *pipe, unsigned int tlen,
1536 unsigned int flags) 1530 unsigned int flags)
1537{ 1531{
1538 struct partial_page partial[PIPE_BUFFERS]; 1532 struct partial_page partial[PIPE_DEF_BUFFERS];
1539 struct page *pages[PIPE_BUFFERS]; 1533 struct page *pages[PIPE_DEF_BUFFERS];
1540 struct splice_pipe_desc spd = { 1534 struct splice_pipe_desc spd = {
1541 .pages = pages, 1535 .pages = pages,
1542 .partial = partial, 1536 .partial = partial,
@@ -1546,12 +1540,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1546 }; 1540 };
1547 struct sk_buff *frag_iter; 1541 struct sk_buff *frag_iter;
1548 struct sock *sk = skb->sk; 1542 struct sock *sk = skb->sk;
1543 int ret = 0;
1544
1545 if (splice_grow_spd(pipe, &spd))
1546 return -ENOMEM;
1549 1547
1550 /* 1548 /*
1551 * __skb_splice_bits() only fails if the output has no room left, 1549 * __skb_splice_bits() only fails if the output has no room left,
1552 * so no point in going over the frag_list for the error case. 1550 * so no point in going over the frag_list for the error case.
1553 */ 1551 */
1554 if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) 1552 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
1555 goto done; 1553 goto done;
1556 else if (!tlen) 1554 else if (!tlen)
1557 goto done; 1555 goto done;
@@ -1562,14 +1560,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1562 skb_walk_frags(skb, frag_iter) { 1560 skb_walk_frags(skb, frag_iter) {
1563 if (!tlen) 1561 if (!tlen)
1564 break; 1562 break;
1565 if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) 1563 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
1566 break; 1564 break;
1567 } 1565 }
1568 1566
1569done: 1567done:
1570 if (spd.nr_pages) { 1568 if (spd.nr_pages) {
1571 int ret;
1572
1573 /* 1569 /*
1574 * Drop the socket lock, otherwise we have reverse 1570 * Drop the socket lock, otherwise we have reverse
1575 * locking dependencies between sk_lock and i_mutex 1571 * locking dependencies between sk_lock and i_mutex
@@ -1582,10 +1578,10 @@ done:
1582 release_sock(sk); 1578 release_sock(sk);
1583 ret = splice_to_pipe(pipe, &spd); 1579 ret = splice_to_pipe(pipe, &spd);
1584 lock_sock(sk); 1580 lock_sock(sk);
1585 return ret;
1586 } 1581 }
1587 1582
1588 return 0; 1583 splice_shrink_spd(pipe, &spd);
1584 return ret;
1589} 1585}
1590 1586
1591/** 1587/**
@@ -2490,7 +2486,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
2490 skb_postpull_rcsum(skb, skb->data, len); 2486 skb_postpull_rcsum(skb, skb->data, len);
2491 return skb->data += len; 2487 return skb->data += len;
2492} 2488}
2493
2494EXPORT_SYMBOL_GPL(skb_pull_rcsum); 2489EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2495 2490
2496/** 2491/**
@@ -2578,6 +2573,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2578 __copy_skb_header(nskb, skb); 2573 __copy_skb_header(nskb, skb);
2579 nskb->mac_len = skb->mac_len; 2574 nskb->mac_len = skb->mac_len;
2580 2575
2576 /* nskb and skb might have different headroom */
2577 if (nskb->ip_summed == CHECKSUM_PARTIAL)
2578 nskb->csum_start += skb_headroom(nskb) - headroom;
2579
2581 skb_reset_mac_header(nskb); 2580 skb_reset_mac_header(nskb);
2582 skb_set_network_header(nskb, skb->mac_len); 2581 skb_set_network_header(nskb, skb->mac_len);
2583 nskb->transport_header = (nskb->network_header + 2582 nskb->transport_header = (nskb->network_header +
@@ -2708,7 +2707,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2708 return -E2BIG; 2707 return -E2BIG;
2709 2708
2710 headroom = skb_headroom(p); 2709 headroom = skb_headroom(p);
2711 nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); 2710 nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC);
2712 if (unlikely(!nskb)) 2711 if (unlikely(!nskb))
2713 return -ENOMEM; 2712 return -ENOMEM;
2714 2713
@@ -2729,6 +2728,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2729 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); 2728 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2730 skb_shinfo(nskb)->frag_list = p; 2729 skb_shinfo(nskb)->frag_list = p;
2731 skb_shinfo(nskb)->gso_size = pinfo->gso_size; 2730 skb_shinfo(nskb)->gso_size = pinfo->gso_size;
2731 pinfo->gso_size = 0;
2732 skb_header_release(p); 2732 skb_header_release(p);
2733 nskb->prev = p; 2733 nskb->prev = p;
2734 2734
@@ -2971,6 +2971,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2971} 2971}
2972EXPORT_SYMBOL_GPL(skb_cow_data); 2972EXPORT_SYMBOL_GPL(skb_cow_data);
2973 2973
2974static void sock_rmem_free(struct sk_buff *skb)
2975{
2976 struct sock *sk = skb->sk;
2977
2978 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
2979}
2980
2981/*
2982 * Note: We dont mem charge error packets (no sk_forward_alloc changes)
2983 */
2984int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
2985{
2986 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
2987 (unsigned)sk->sk_rcvbuf)
2988 return -ENOMEM;
2989
2990 skb_orphan(skb);
2991 skb->sk = sk;
2992 skb->destructor = sock_rmem_free;
2993 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
2994
2995 skb_queue_tail(&sk->sk_error_queue, skb);
2996 if (!sock_flag(sk, SOCK_DEAD))
2997 sk->sk_data_ready(sk, skb->len);
2998 return 0;
2999}
3000EXPORT_SYMBOL(sock_queue_err_skb);
3001
2974void skb_tstamp_tx(struct sk_buff *orig_skb, 3002void skb_tstamp_tx(struct sk_buff *orig_skb,
2975 struct skb_shared_hwtstamps *hwtstamps) 3003 struct skb_shared_hwtstamps *hwtstamps)
2976{ 3004{
@@ -3002,7 +3030,9 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3002 memset(serr, 0, sizeof(*serr)); 3030 memset(serr, 0, sizeof(*serr));
3003 serr->ee.ee_errno = ENOMSG; 3031 serr->ee.ee_errno = ENOMSG;
3004 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 3032 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
3033
3005 err = sock_queue_err_skb(sk, skb); 3034 err = sock_queue_err_skb(sk, skb);
3035
3006 if (err) 3036 if (err)
3007 kfree_skb(skb); 3037 kfree_skb(skb);
3008} 3038}
diff --git a/net/core/sock.c b/net/core/sock.c
index e1f6f225f012..ef30e9d286e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
110#include <linux/tcp.h> 110#include <linux/tcp.h>
111#include <linux/init.h> 111#include <linux/init.h>
112#include <linux/highmem.h> 112#include <linux/highmem.h>
113#include <linux/user_namespace.h>
113 114
114#include <asm/uaccess.h> 115#include <asm/uaccess.h>
115#include <asm/system.h> 116#include <asm/system.h>
@@ -123,6 +124,7 @@
123#include <linux/net_tstamp.h> 124#include <linux/net_tstamp.h>
124#include <net/xfrm.h> 125#include <net/xfrm.h>
125#include <linux/ipsec.h> 126#include <linux/ipsec.h>
127#include <net/cls_cgroup.h>
126 128
127#include <linux/filter.h> 129#include <linux/filter.h>
128 130
@@ -155,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 157 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 159 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
158 "sk_lock-AF_IEEE802154", 160 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
159 "sk_lock-AF_MAX" 161 "sk_lock-AF_MAX"
160}; 162};
161static const char *const af_family_slock_key_strings[AF_MAX+1] = { 163static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -171,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
171 "slock-27" , "slock-28" , "slock-AF_CAN" , 173 "slock-27" , "slock-28" , "slock-AF_CAN" ,
172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 174 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
173 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 175 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
174 "slock-AF_IEEE802154", 176 "slock-AF_IEEE802154", "slock-AF_CAIF" ,
175 "slock-AF_MAX" 177 "slock-AF_MAX"
176}; 178};
177static const char *const af_family_clock_key_strings[AF_MAX+1] = { 179static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -187,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
187 "clock-27" , "clock-28" , "clock-AF_CAN" , 189 "clock-27" , "clock-28" , "clock-AF_CAN" ,
188 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 190 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
189 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 191 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
190 "clock-AF_IEEE802154", 192 "clock-AF_IEEE802154", "clock-AF_CAIF" ,
191 "clock-AF_MAX" 193 "clock-AF_MAX"
192}; 194};
193 195
@@ -217,6 +219,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 219int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
218EXPORT_SYMBOL(sysctl_optmem_max); 220EXPORT_SYMBOL(sysctl_optmem_max);
219 221
222#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
223int net_cls_subsys_id = -1;
224EXPORT_SYMBOL_GPL(net_cls_subsys_id);
225#endif
226
220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 227static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
221{ 228{
222 struct timeval tv; 229 struct timeval tv;
@@ -307,6 +314,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
307 */ 314 */
308 skb_len = skb->len; 315 skb_len = skb->len;
309 316
317 /* we escape from rcu protected region, make sure we dont leak
318 * a norefcounted dst
319 */
320 skb_dst_force(skb);
321
310 spin_lock_irqsave(&list->lock, flags); 322 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops); 323 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb); 324 __skb_queue_tail(list, skb);
@@ -327,6 +339,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
327 339
328 skb->dev = NULL; 340 skb->dev = NULL;
329 341
342 if (sk_rcvqueues_full(sk, skb)) {
343 atomic_inc(&sk->sk_drops);
344 goto discard_and_relse;
345 }
330 if (nested) 346 if (nested)
331 bh_lock_sock_nested(sk); 347 bh_lock_sock_nested(sk);
332 else 348 else
@@ -340,8 +356,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
340 rc = sk_backlog_rcv(sk, skb); 356 rc = sk_backlog_rcv(sk, skb);
341 357
342 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 358 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
343 } else 359 } else if (sk_add_backlog(sk, skb)) {
344 sk_add_backlog(sk, skb); 360 bh_unlock_sock(sk);
361 atomic_inc(&sk->sk_drops);
362 goto discard_and_relse;
363 }
364
345 bh_unlock_sock(sk); 365 bh_unlock_sock(sk);
346out: 366out:
347 sock_put(sk); 367 sock_put(sk);
@@ -360,11 +380,11 @@ EXPORT_SYMBOL(sk_reset_txq);
360 380
361struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 381struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
362{ 382{
363 struct dst_entry *dst = sk->sk_dst_cache; 383 struct dst_entry *dst = __sk_dst_get(sk);
364 384
365 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 385 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
366 sk_tx_queue_clear(sk); 386 sk_tx_queue_clear(sk);
367 sk->sk_dst_cache = NULL; 387 rcu_assign_pointer(sk->sk_dst_cache, NULL);
368 dst_release(dst); 388 dst_release(dst);
369 return NULL; 389 return NULL;
370 } 390 }
@@ -730,6 +750,20 @@ set_rcvbuf:
730EXPORT_SYMBOL(sock_setsockopt); 750EXPORT_SYMBOL(sock_setsockopt);
731 751
732 752
753void cred_to_ucred(struct pid *pid, const struct cred *cred,
754 struct ucred *ucred)
755{
756 ucred->pid = pid_vnr(pid);
757 ucred->uid = ucred->gid = -1;
758 if (cred) {
759 struct user_namespace *current_ns = current_user_ns();
760
761 ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
762 ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
763 }
764}
765EXPORT_SYMBOL_GPL(cred_to_ucred);
766
733int sock_getsockopt(struct socket *sock, int level, int optname, 767int sock_getsockopt(struct socket *sock, int level, int optname,
734 char __user *optval, int __user *optlen) 768 char __user *optval, int __user *optlen)
735{ 769{
@@ -741,7 +775,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
741 struct timeval tm; 775 struct timeval tm;
742 } v; 776 } v;
743 777
744 unsigned int lv = sizeof(int); 778 int lv = sizeof(int);
745 int len; 779 int len;
746 780
747 if (get_user(len, optlen)) 781 if (get_user(len, optlen))
@@ -882,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
882 break; 916 break;
883 917
884 case SO_PEERCRED: 918 case SO_PEERCRED:
885 if (len > sizeof(sk->sk_peercred)) 919 {
886 len = sizeof(sk->sk_peercred); 920 struct ucred peercred;
887 if (copy_to_user(optval, &sk->sk_peercred, len)) 921 if (len > sizeof(peercred))
922 len = sizeof(peercred);
923 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
924 if (copy_to_user(optval, &peercred, len))
888 return -EFAULT; 925 return -EFAULT;
889 goto lenout; 926 goto lenout;
927 }
890 928
891 case SO_PEERNAME: 929 case SO_PEERNAME:
892 { 930 {
@@ -1037,6 +1075,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1037 module_put(owner); 1075 module_put(owner);
1038} 1076}
1039 1077
1078#ifdef CONFIG_CGROUPS
1079void sock_update_classid(struct sock *sk)
1080{
1081 u32 classid = task_cls_classid(current);
1082
1083 if (classid && classid != sk->sk_classid)
1084 sk->sk_classid = classid;
1085}
1086EXPORT_SYMBOL(sock_update_classid);
1087#endif
1088
1040/** 1089/**
1041 * sk_alloc - All socket objects are allocated here 1090 * sk_alloc - All socket objects are allocated here
1042 * @net: the applicable net namespace 1091 * @net: the applicable net namespace
@@ -1060,6 +1109,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1060 sock_lock_init(sk); 1109 sock_lock_init(sk);
1061 sock_net_set(sk, get_net(net)); 1110 sock_net_set(sk, get_net(net));
1062 atomic_set(&sk->sk_wmem_alloc, 1); 1111 atomic_set(&sk->sk_wmem_alloc, 1);
1112
1113 sock_update_classid(sk);
1063 } 1114 }
1064 1115
1065 return sk; 1116 return sk;
@@ -1073,7 +1124,8 @@ static void __sk_free(struct sock *sk)
1073 if (sk->sk_destruct) 1124 if (sk->sk_destruct)
1074 sk->sk_destruct(sk); 1125 sk->sk_destruct(sk);
1075 1126
1076 filter = rcu_dereference(sk->sk_filter); 1127 filter = rcu_dereference_check(sk->sk_filter,
1128 atomic_read(&sk->sk_wmem_alloc) == 0);
1077 if (filter) { 1129 if (filter) {
1078 sk_filter_uncharge(sk, filter); 1130 sk_filter_uncharge(sk, filter);
1079 rcu_assign_pointer(sk->sk_filter, NULL); 1131 rcu_assign_pointer(sk->sk_filter, NULL);
@@ -1086,6 +1138,9 @@ static void __sk_free(struct sock *sk)
1086 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1138 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
1087 __func__, atomic_read(&sk->sk_omem_alloc)); 1139 __func__, atomic_read(&sk->sk_omem_alloc));
1088 1140
1141 if (sk->sk_peer_cred)
1142 put_cred(sk->sk_peer_cred);
1143 put_pid(sk->sk_peer_pid);
1089 put_net(sock_net(sk)); 1144 put_net(sock_net(sk));
1090 sk_prot_free(sk->sk_prot_creator, sk); 1145 sk_prot_free(sk->sk_prot_creator, sk);
1091} 1146}
@@ -1138,6 +1193,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1138 sock_lock_init(newsk); 1193 sock_lock_init(newsk);
1139 bh_lock_sock(newsk); 1194 bh_lock_sock(newsk);
1140 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1195 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1196 newsk->sk_backlog.len = 0;
1141 1197
1142 atomic_set(&newsk->sk_rmem_alloc, 0); 1198 atomic_set(&newsk->sk_rmem_alloc, 0);
1143 /* 1199 /*
@@ -1151,7 +1207,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1151 skb_queue_head_init(&newsk->sk_async_wait_queue); 1207 skb_queue_head_init(&newsk->sk_async_wait_queue);
1152#endif 1208#endif
1153 1209
1154 rwlock_init(&newsk->sk_dst_lock); 1210 spin_lock_init(&newsk->sk_dst_lock);
1155 rwlock_init(&newsk->sk_callback_lock); 1211 rwlock_init(&newsk->sk_callback_lock);
1156 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1212 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1157 af_callback_keys + newsk->sk_family, 1213 af_callback_keys + newsk->sk_family,
@@ -1201,7 +1257,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1201 */ 1257 */
1202 sk_refcnt_debug_inc(newsk); 1258 sk_refcnt_debug_inc(newsk);
1203 sk_set_socket(newsk, NULL); 1259 sk_set_socket(newsk, NULL);
1204 newsk->sk_sleep = NULL; 1260 newsk->sk_wq = NULL;
1205 1261
1206 if (newsk->sk_prot->sockets_allocated) 1262 if (newsk->sk_prot->sockets_allocated)
1207 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1263 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1221,6 +1277,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1221 sk->sk_route_caps = dst->dev->features; 1277 sk->sk_route_caps = dst->dev->features;
1222 if (sk->sk_route_caps & NETIF_F_GSO) 1278 if (sk->sk_route_caps & NETIF_F_GSO)
1223 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1279 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1280 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1224 if (sk_can_gso(sk)) { 1281 if (sk_can_gso(sk)) {
1225 if (dst->header_len) { 1282 if (dst->header_len) {
1226 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1283 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1282,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree);
1282void sock_rfree(struct sk_buff *skb) 1339void sock_rfree(struct sk_buff *skb)
1283{ 1340{
1284 struct sock *sk = skb->sk; 1341 struct sock *sk = skb->sk;
1342 unsigned int len = skb->truesize;
1285 1343
1286 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1344 atomic_sub(len, &sk->sk_rmem_alloc);
1287 sk_mem_uncharge(skb->sk, skb->truesize); 1345 sk_mem_uncharge(sk, len);
1288} 1346}
1289EXPORT_SYMBOL(sock_rfree); 1347EXPORT_SYMBOL(sock_rfree);
1290 1348
@@ -1293,9 +1351,9 @@ int sock_i_uid(struct sock *sk)
1293{ 1351{
1294 int uid; 1352 int uid;
1295 1353
1296 read_lock(&sk->sk_callback_lock); 1354 read_lock_bh(&sk->sk_callback_lock);
1297 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1355 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1298 read_unlock(&sk->sk_callback_lock); 1356 read_unlock_bh(&sk->sk_callback_lock);
1299 return uid; 1357 return uid;
1300} 1358}
1301EXPORT_SYMBOL(sock_i_uid); 1359EXPORT_SYMBOL(sock_i_uid);
@@ -1304,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk)
1304{ 1362{
1305 unsigned long ino; 1363 unsigned long ino;
1306 1364
1307 read_lock(&sk->sk_callback_lock); 1365 read_lock_bh(&sk->sk_callback_lock);
1308 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1366 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1309 read_unlock(&sk->sk_callback_lock); 1367 read_unlock_bh(&sk->sk_callback_lock);
1310 return ino; 1368 return ino;
1311} 1369}
1312EXPORT_SYMBOL(sock_i_ino); 1370EXPORT_SYMBOL(sock_i_ino);
@@ -1389,7 +1447,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1389 if (signal_pending(current)) 1447 if (signal_pending(current))
1390 break; 1448 break;
1391 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1449 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1392 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1450 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1393 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1451 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1394 break; 1452 break;
1395 if (sk->sk_shutdown & SEND_SHUTDOWN) 1453 if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1398,7 +1456,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1398 break; 1456 break;
1399 timeo = schedule_timeout(timeo); 1457 timeo = schedule_timeout(timeo);
1400 } 1458 }
1401 finish_wait(sk->sk_sleep, &wait); 1459 finish_wait(sk_sleep(sk), &wait);
1402 return timeo; 1460 return timeo;
1403} 1461}
1404 1462
@@ -1525,6 +1583,7 @@ static void __release_sock(struct sock *sk)
1525 do { 1583 do {
1526 struct sk_buff *next = skb->next; 1584 struct sk_buff *next = skb->next;
1527 1585
1586 WARN_ON_ONCE(skb_dst_is_noref(skb));
1528 skb->next = NULL; 1587 skb->next = NULL;
1529 sk_backlog_rcv(sk, skb); 1588 sk_backlog_rcv(sk, skb);
1530 1589
@@ -1541,6 +1600,12 @@ static void __release_sock(struct sock *sk)
1541 1600
1542 bh_lock_sock(sk); 1601 bh_lock_sock(sk);
1543 } while ((skb = sk->sk_backlog.head) != NULL); 1602 } while ((skb = sk->sk_backlog.head) != NULL);
1603
1604 /*
1605 * Doing the zeroing here guarantee we can not loop forever
1606 * while a wild producer attempts to flood us.
1607 */
1608 sk->sk_backlog.len = 0;
1544} 1609}
1545 1610
1546/** 1611/**
@@ -1558,11 +1623,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
1558 int rc; 1623 int rc;
1559 DEFINE_WAIT(wait); 1624 DEFINE_WAIT(wait);
1560 1625
1561 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1626 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1562 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1627 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1563 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1628 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1564 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1629 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1565 finish_wait(sk->sk_sleep, &wait); 1630 finish_wait(sk_sleep(sk), &wait);
1566 return rc; 1631 return rc;
1567} 1632}
1568EXPORT_SYMBOL(sk_wait_data); 1633EXPORT_SYMBOL(sk_wait_data);
@@ -1784,41 +1849,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
1784 1849
1785static void sock_def_wakeup(struct sock *sk) 1850static void sock_def_wakeup(struct sock *sk)
1786{ 1851{
1787 read_lock(&sk->sk_callback_lock); 1852 struct socket_wq *wq;
1788 if (sk_has_sleeper(sk)) 1853
1789 wake_up_interruptible_all(sk->sk_sleep); 1854 rcu_read_lock();
1790 read_unlock(&sk->sk_callback_lock); 1855 wq = rcu_dereference(sk->sk_wq);
1856 if (wq_has_sleeper(wq))
1857 wake_up_interruptible_all(&wq->wait);
1858 rcu_read_unlock();
1791} 1859}
1792 1860
1793static void sock_def_error_report(struct sock *sk) 1861static void sock_def_error_report(struct sock *sk)
1794{ 1862{
1795 read_lock(&sk->sk_callback_lock); 1863 struct socket_wq *wq;
1796 if (sk_has_sleeper(sk)) 1864
1797 wake_up_interruptible_poll(sk->sk_sleep, POLLERR); 1865 rcu_read_lock();
1866 wq = rcu_dereference(sk->sk_wq);
1867 if (wq_has_sleeper(wq))
1868 wake_up_interruptible_poll(&wq->wait, POLLERR);
1798 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1869 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1799 read_unlock(&sk->sk_callback_lock); 1870 rcu_read_unlock();
1800} 1871}
1801 1872
1802static void sock_def_readable(struct sock *sk, int len) 1873static void sock_def_readable(struct sock *sk, int len)
1803{ 1874{
1804 read_lock(&sk->sk_callback_lock); 1875 struct socket_wq *wq;
1805 if (sk_has_sleeper(sk)) 1876
1806 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | 1877 rcu_read_lock();
1878 wq = rcu_dereference(sk->sk_wq);
1879 if (wq_has_sleeper(wq))
1880 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1807 POLLRDNORM | POLLRDBAND); 1881 POLLRDNORM | POLLRDBAND);
1808 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1882 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1809 read_unlock(&sk->sk_callback_lock); 1883 rcu_read_unlock();
1810} 1884}
1811 1885
1812static void sock_def_write_space(struct sock *sk) 1886static void sock_def_write_space(struct sock *sk)
1813{ 1887{
1814 read_lock(&sk->sk_callback_lock); 1888 struct socket_wq *wq;
1889
1890 rcu_read_lock();
1815 1891
1816 /* Do not wake up a writer until he can make "significant" 1892 /* Do not wake up a writer until he can make "significant"
1817 * progress. --DaveM 1893 * progress. --DaveM
1818 */ 1894 */
1819 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1895 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1820 if (sk_has_sleeper(sk)) 1896 wq = rcu_dereference(sk->sk_wq);
1821 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | 1897 if (wq_has_sleeper(wq))
1898 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1822 POLLWRNORM | POLLWRBAND); 1899 POLLWRNORM | POLLWRBAND);
1823 1900
1824 /* Should agree with poll, otherwise some programs break */ 1901 /* Should agree with poll, otherwise some programs break */
@@ -1826,7 +1903,7 @@ static void sock_def_write_space(struct sock *sk)
1826 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 1903 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1827 } 1904 }
1828 1905
1829 read_unlock(&sk->sk_callback_lock); 1906 rcu_read_unlock();
1830} 1907}
1831 1908
1832static void sock_def_destruct(struct sock *sk) 1909static void sock_def_destruct(struct sock *sk)
@@ -1880,12 +1957,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1880 1957
1881 if (sock) { 1958 if (sock) {
1882 sk->sk_type = sock->type; 1959 sk->sk_type = sock->type;
1883 sk->sk_sleep = &sock->wait; 1960 sk->sk_wq = sock->wq;
1884 sock->sk = sk; 1961 sock->sk = sk;
1885 } else 1962 } else
1886 sk->sk_sleep = NULL; 1963 sk->sk_wq = NULL;
1887 1964
1888 rwlock_init(&sk->sk_dst_lock); 1965 spin_lock_init(&sk->sk_dst_lock);
1889 rwlock_init(&sk->sk_callback_lock); 1966 rwlock_init(&sk->sk_callback_lock);
1890 lockdep_set_class_and_name(&sk->sk_callback_lock, 1967 lockdep_set_class_and_name(&sk->sk_callback_lock,
1891 af_callback_keys + sk->sk_family, 1968 af_callback_keys + sk->sk_family,
@@ -1900,9 +1977,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1900 sk->sk_sndmsg_page = NULL; 1977 sk->sk_sndmsg_page = NULL;
1901 sk->sk_sndmsg_off = 0; 1978 sk->sk_sndmsg_off = 0;
1902 1979
1903 sk->sk_peercred.pid = 0; 1980 sk->sk_peer_pid = NULL;
1904 sk->sk_peercred.uid = -1; 1981 sk->sk_peer_cred = NULL;
1905 sk->sk_peercred.gid = -1;
1906 sk->sk_write_pending = 0; 1982 sk->sk_write_pending = 0;
1907 sk->sk_rcvlowat = 1; 1983 sk->sk_rcvlowat = 1;
1908 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1984 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -1953,6 +2029,39 @@ void release_sock(struct sock *sk)
1953} 2029}
1954EXPORT_SYMBOL(release_sock); 2030EXPORT_SYMBOL(release_sock);
1955 2031
2032/**
2033 * lock_sock_fast - fast version of lock_sock
2034 * @sk: socket
2035 *
2036 * This version should be used for very small section, where process wont block
2037 * return false if fast path is taken
2038 * sk_lock.slock locked, owned = 0, BH disabled
2039 * return true if slow path is taken
2040 * sk_lock.slock unlocked, owned = 1, BH enabled
2041 */
2042bool lock_sock_fast(struct sock *sk)
2043{
2044 might_sleep();
2045 spin_lock_bh(&sk->sk_lock.slock);
2046
2047 if (!sk->sk_lock.owned)
2048 /*
2049 * Note : We must disable BH
2050 */
2051 return false;
2052
2053 __lock_sock(sk);
2054 sk->sk_lock.owned = 1;
2055 spin_unlock(&sk->sk_lock.slock);
2056 /*
2057 * The sk_lock has mutex_lock() semantics here:
2058 */
2059 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2060 local_bh_enable();
2061 return true;
2062}
2063EXPORT_SYMBOL(lock_sock_fast);
2064
1956int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2065int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1957{ 2066{
1958 struct timeval tv; 2067 struct timeval tv;
@@ -2123,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2123#ifdef CONFIG_NET_NS 2232#ifdef CONFIG_NET_NS
2124void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2233void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2125{ 2234{
2126 int cpu = smp_processor_id(); 2235 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2127 per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
2128} 2236}
2129EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2237EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2130 2238
@@ -2140,13 +2248,13 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
2140} 2248}
2141EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2249EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2142 2250
2143static int sock_inuse_init_net(struct net *net) 2251static int __net_init sock_inuse_init_net(struct net *net)
2144{ 2252{
2145 net->core.inuse = alloc_percpu(struct prot_inuse); 2253 net->core.inuse = alloc_percpu(struct prot_inuse);
2146 return net->core.inuse ? 0 : -ENOMEM; 2254 return net->core.inuse ? 0 : -ENOMEM;
2147} 2255}
2148 2256
2149static void sock_inuse_exit_net(struct net *net) 2257static void __net_exit sock_inuse_exit_net(struct net *net)
2150{ 2258{
2151 free_percpu(net->core.inuse); 2259 free_percpu(net->core.inuse);
2152} 2260}
@@ -2170,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2170 2278
2171void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2279void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2172{ 2280{
2173 __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; 2281 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2174} 2282}
2175EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2283EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2176 2284
@@ -2228,13 +2336,10 @@ int proto_register(struct proto *prot, int alloc_slab)
2228 } 2336 }
2229 2337
2230 if (prot->rsk_prot != NULL) { 2338 if (prot->rsk_prot != NULL) {
2231 static const char mask[] = "request_sock_%s"; 2339 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2232
2233 prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2234 if (prot->rsk_prot->slab_name == NULL) 2340 if (prot->rsk_prot->slab_name == NULL)
2235 goto out_free_sock_slab; 2341 goto out_free_sock_slab;
2236 2342
2237 sprintf(prot->rsk_prot->slab_name, mask, prot->name);
2238 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, 2343 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2239 prot->rsk_prot->obj_size, 0, 2344 prot->rsk_prot->obj_size, 0,
2240 SLAB_HWCACHE_ALIGN, NULL); 2345 SLAB_HWCACHE_ALIGN, NULL);
@@ -2247,14 +2352,11 @@ int proto_register(struct proto *prot, int alloc_slab)
2247 } 2352 }
2248 2353
2249 if (prot->twsk_prot != NULL) { 2354 if (prot->twsk_prot != NULL) {
2250 static const char mask[] = "tw_sock_%s"; 2355 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2251
2252 prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2253 2356
2254 if (prot->twsk_prot->twsk_slab_name == NULL) 2357 if (prot->twsk_prot->twsk_slab_name == NULL)
2255 goto out_free_request_sock_slab; 2358 goto out_free_request_sock_slab;
2256 2359
2257 sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
2258 prot->twsk_prot->twsk_slab = 2360 prot->twsk_prot->twsk_slab =
2259 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2361 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2260 prot->twsk_prot->twsk_obj_size, 2362 prot->twsk_prot->twsk_obj_size,
@@ -2281,7 +2383,8 @@ out_free_request_sock_slab:
2281 prot->rsk_prot->slab = NULL; 2383 prot->rsk_prot->slab = NULL;
2282 } 2384 }
2283out_free_request_sock_slab_name: 2385out_free_request_sock_slab_name:
2284 kfree(prot->rsk_prot->slab_name); 2386 if (prot->rsk_prot)
2387 kfree(prot->rsk_prot->slab_name);
2285out_free_sock_slab: 2388out_free_sock_slab:
2286 kmem_cache_destroy(prot->slab); 2389 kmem_cache_destroy(prot->slab);
2287 prot->slab = NULL; 2390 prot->slab = NULL;
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b2..f5df85dcd20b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,18 +28,21 @@
28void sk_stream_write_space(struct sock *sk) 28void sk_stream_write_space(struct sock *sk)
29{ 29{
30 struct socket *sock = sk->sk_socket; 30 struct socket *sock = sk->sk_socket;
31 struct socket_wq *wq;
31 32
32 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { 33 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
33 clear_bit(SOCK_NOSPACE, &sock->flags); 34 clear_bit(SOCK_NOSPACE, &sock->flags);
34 35
35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 36 rcu_read_lock();
36 wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | 37 wq = rcu_dereference(sk->sk_wq);
38 if (wq_has_sleeper(wq))
39 wake_up_interruptible_poll(&wq->wait, POLLOUT |
37 POLLWRNORM | POLLWRBAND); 40 POLLWRNORM | POLLWRBAND);
38 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 41 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
39 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); 42 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
43 rcu_read_unlock();
40 } 44 }
41} 45}
42
43EXPORT_SYMBOL(sk_stream_write_space); 46EXPORT_SYMBOL(sk_stream_write_space);
44 47
45/** 48/**
@@ -66,18 +69,17 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
66 if (signal_pending(tsk)) 69 if (signal_pending(tsk))
67 return sock_intr_errno(*timeo_p); 70 return sock_intr_errno(*timeo_p);
68 71
69 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 72 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
70 sk->sk_write_pending++; 73 sk->sk_write_pending++;
71 done = sk_wait_event(sk, timeo_p, 74 done = sk_wait_event(sk, timeo_p,
72 !sk->sk_err && 75 !sk->sk_err &&
73 !((1 << sk->sk_state) & 76 !((1 << sk->sk_state) &
74 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); 77 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
75 finish_wait(sk->sk_sleep, &wait); 78 finish_wait(sk_sleep(sk), &wait);
76 sk->sk_write_pending--; 79 sk->sk_write_pending--;
77 } while (!done); 80 } while (!done);
78 return 0; 81 return 0;
79} 82}
80
81EXPORT_SYMBOL(sk_stream_wait_connect); 83EXPORT_SYMBOL(sk_stream_wait_connect);
82 84
83/** 85/**
@@ -96,16 +98,15 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
96 DEFINE_WAIT(wait); 98 DEFINE_WAIT(wait);
97 99
98 do { 100 do {
99 prepare_to_wait(sk->sk_sleep, &wait, 101 prepare_to_wait(sk_sleep(sk), &wait,
100 TASK_INTERRUPTIBLE); 102 TASK_INTERRUPTIBLE);
101 if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) 103 if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
102 break; 104 break;
103 } while (!signal_pending(current) && timeout); 105 } while (!signal_pending(current) && timeout);
104 106
105 finish_wait(sk->sk_sleep, &wait); 107 finish_wait(sk_sleep(sk), &wait);
106 } 108 }
107} 109}
108
109EXPORT_SYMBOL(sk_stream_wait_close); 110EXPORT_SYMBOL(sk_stream_wait_close);
110 111
111/** 112/**
@@ -126,7 +127,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
126 while (1) { 127 while (1) {
127 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 128 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
128 129
129 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 130 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
130 131
131 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 132 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
132 goto do_error; 133 goto do_error;
@@ -140,10 +141,10 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
140 141
141 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 142 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
142 sk->sk_write_pending++; 143 sk->sk_write_pending++;
143 sk_wait_event(sk, &current_timeo, !sk->sk_err && 144 sk_wait_event(sk, &current_timeo, sk->sk_err ||
144 !(sk->sk_shutdown & SEND_SHUTDOWN) && 145 (sk->sk_shutdown & SEND_SHUTDOWN) ||
145 sk_stream_memory_free(sk) && 146 (sk_stream_memory_free(sk) &&
146 vm_wait); 147 !vm_wait));
147 sk->sk_write_pending--; 148 sk->sk_write_pending--;
148 149
149 if (vm_wait) { 150 if (vm_wait) {
@@ -157,7 +158,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
157 *timeo_p = current_timeo; 158 *timeo_p = current_timeo;
158 } 159 }
159out: 160out:
160 finish_wait(sk->sk_sleep, &wait); 161 finish_wait(sk_sleep(sk), &wait);
161 return err; 162 return err;
162 163
163do_error: 164do_error:
@@ -170,7 +171,6 @@ do_interrupted:
170 err = sock_intr_errno(*timeo_p); 171 err = sock_intr_errno(*timeo_p);
171 goto out; 172 goto out;
172} 173}
173
174EXPORT_SYMBOL(sk_stream_wait_memory); 174EXPORT_SYMBOL(sk_stream_wait_memory);
175 175
176int sk_stream_error(struct sock *sk, int flags, int err) 176int sk_stream_error(struct sock *sk, int flags, int err)
@@ -181,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
181 send_sig(SIGPIPE, current, 0); 181 send_sig(SIGPIPE, current, 0);
182 return err; 182 return err;
183} 183}
184
185EXPORT_SYMBOL(sk_stream_error); 184EXPORT_SYMBOL(sk_stream_error);
186 185
187void sk_stream_kill_queues(struct sock *sk) 186void sk_stream_kill_queues(struct sock *sk)
@@ -206,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
206 * have gone away, only the net layer knows can touch it. 205 * have gone away, only the net layer knows can touch it.
207 */ 206 */
208} 207}
209
210EXPORT_SYMBOL(sk_stream_kill_queues); 208EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 06124872af5b..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,11 +11,72 @@
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/ratelimit.h> 13#include <linux/ratelimit.h>
14#include <linux/vmalloc.h>
14#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/slab.h>
15 17
16#include <net/ip.h> 18#include <net/ip.h>
17#include <net/sock.h> 19#include <net/sock.h>
18 20
21#ifdef CONFIG_RPS
22static int rps_sock_flow_sysctl(ctl_table *table, int write,
23 void __user *buffer, size_t *lenp, loff_t *ppos)
24{
25 unsigned int orig_size, size;
26 int ret, i;
27 ctl_table tmp = {
28 .data = &size,
29 .maxlen = sizeof(size),
30 .mode = table->mode
31 };
32 struct rps_sock_flow_table *orig_sock_table, *sock_table;
33 static DEFINE_MUTEX(sock_flow_mutex);
34
35 mutex_lock(&sock_flow_mutex);
36
37 orig_sock_table = rps_sock_flow_table;
38 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
39
40 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
41
42 if (write) {
43 if (size) {
44 if (size > 1<<30) {
45 /* Enforce limit to prevent overflow */
46 mutex_unlock(&sock_flow_mutex);
47 return -EINVAL;
48 }
49 size = roundup_pow_of_two(size);
50 if (size != orig_size) {
51 sock_table =
52 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
53 if (!sock_table) {
54 mutex_unlock(&sock_flow_mutex);
55 return -ENOMEM;
56 }
57
58 sock_table->mask = size - 1;
59 } else
60 sock_table = orig_sock_table;
61
62 for (i = 0; i < size; i++)
63 sock_table->ents[i] = RPS_NO_CPU;
64 } else
65 sock_table = NULL;
66
67 if (sock_table != orig_sock_table) {
68 rcu_assign_pointer(rps_sock_flow_table, sock_table);
69 synchronize_rcu();
70 vfree(orig_sock_table);
71 }
72 }
73
74 mutex_unlock(&sock_flow_mutex);
75
76 return ret;
77}
78#endif /* CONFIG_RPS */
79
19static struct ctl_table net_core_table[] = { 80static struct ctl_table net_core_table[] = {
20#ifdef CONFIG_NET 81#ifdef CONFIG_NET
21 { 82 {
@@ -61,6 +122,13 @@ static struct ctl_table net_core_table[] = {
61 .proc_handler = proc_dointvec 122 .proc_handler = proc_dointvec
62 }, 123 },
63 { 124 {
125 .procname = "netdev_tstamp_prequeue",
126 .data = &netdev_tstamp_prequeue,
127 .maxlen = sizeof(int),
128 .mode = 0644,
129 .proc_handler = proc_dointvec
130 },
131 {
64 .procname = "message_cost", 132 .procname = "message_cost",
65 .data = &net_ratelimit_state.interval, 133 .data = &net_ratelimit_state.interval,
66 .maxlen = sizeof(int), 134 .maxlen = sizeof(int),
@@ -81,6 +149,14 @@ static struct ctl_table net_core_table[] = {
81 .mode = 0644, 149 .mode = 0644,
82 .proc_handler = proc_dointvec 150 .proc_handler = proc_dointvec
83 }, 151 },
152#ifdef CONFIG_RPS
153 {
154 .procname = "rps_sock_flow_entries",
155 .maxlen = sizeof(int),
156 .mode = 0644,
157 .proc_handler = rps_sock_flow_sysctl
158 },
159#endif
84#endif /* CONFIG_NET */ 160#endif /* CONFIG_NET */
85 { 161 {
86 .procname = "netdev_budget", 162 .procname = "netdev_budget",
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 000000000000..0ae6c22da85b
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
1/*
2 * PTP 1588 clock support - support for timestamping in PHY devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/errqueue.h>
21#include <linux/phy.h>
22#include <linux/ptp_classify.h>
23#include <linux/skbuff.h>
24
25static struct sock_filter ptp_filter[] = {
26 PTP_FILTER
27};
28
29static unsigned int classify(struct sk_buff *skb)
30{
31 if (likely(skb->dev &&
32 skb->dev->phydev &&
33 skb->dev->phydev->drv))
34 return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
35 else
36 return PTP_CLASS_NONE;
37}
38
39void skb_clone_tx_timestamp(struct sk_buff *skb)
40{
41 struct phy_device *phydev;
42 struct sk_buff *clone;
43 struct sock *sk = skb->sk;
44 unsigned int type;
45
46 if (!sk)
47 return;
48
49 type = classify(skb);
50
51 switch (type) {
52 case PTP_CLASS_V1_IPV4:
53 case PTP_CLASS_V1_IPV6:
54 case PTP_CLASS_V2_IPV4:
55 case PTP_CLASS_V2_IPV6:
56 case PTP_CLASS_V2_L2:
57 case PTP_CLASS_V2_VLAN:
58 phydev = skb->dev->phydev;
59 if (likely(phydev->drv->txtstamp)) {
60 clone = skb_clone(skb, GFP_ATOMIC);
61 if (!clone)
62 return;
63 clone->sk = sk;
64 phydev->drv->txtstamp(phydev, clone, type);
65 }
66 break;
67 default:
68 break;
69 }
70}
71
72void skb_complete_tx_timestamp(struct sk_buff *skb,
73 struct skb_shared_hwtstamps *hwtstamps)
74{
75 struct sock *sk = skb->sk;
76 struct sock_exterr_skb *serr;
77 int err;
78
79 if (!hwtstamps)
80 return;
81
82 *skb_hwtstamps(skb) = *hwtstamps;
83 serr = SKB_EXT_ERR(skb);
84 memset(serr, 0, sizeof(*serr));
85 serr->ee.ee_errno = ENOMSG;
86 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
87 skb->sk = NULL;
88 err = sock_queue_err_skb(sk, skb);
89 if (err)
90 kfree_skb(skb);
91}
92EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
93
94bool skb_defer_rx_timestamp(struct sk_buff *skb)
95{
96 struct phy_device *phydev;
97 unsigned int type;
98
99 skb_push(skb, ETH_HLEN);
100
101 type = classify(skb);
102
103 skb_pull(skb, ETH_HLEN);
104
105 switch (type) {
106 case PTP_CLASS_V1_IPV4:
107 case PTP_CLASS_V1_IPV6:
108 case PTP_CLASS_V2_IPV4:
109 case PTP_CLASS_V2_IPV6:
110 case PTP_CLASS_V2_L2:
111 case PTP_CLASS_V2_VLAN:
112 phydev = skb->dev->phydev;
113 if (likely(phydev->drv->rxtstamp))
114 return phydev->drv->rxtstamp(phydev, skb, type);
115 break;
116 default:
117 break;
118 }
119
120 return false;
121}
122
123void __init skb_timestamping_init(void)
124{
125 BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
126}
diff --git a/net/core/utils.c b/net/core/utils.c
index 838250241d26..f41854470539 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -77,7 +77,6 @@ __be32 in_aton(const char *str)
77 } 77 }
78 return(htonl(l)); 78 return(htonl(l));
79} 79}
80
81EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
82 81
83#define IN6PTON_XDIGIT 0x00010000 82#define IN6PTON_XDIGIT 0x00010000
@@ -162,7 +161,6 @@ out:
162 *end = s; 161 *end = s;
163 return ret; 162 return ret;
164} 163}
165
166EXPORT_SYMBOL(in4_pton); 164EXPORT_SYMBOL(in4_pton);
167 165
168int in6_pton(const char *src, int srclen, 166int in6_pton(const char *src, int srclen,
@@ -280,7 +278,6 @@ out:
280 *end = s; 278 *end = s;
281 return ret; 279 return ret;
282} 280}
283
284EXPORT_SYMBOL(in6_pton); 281EXPORT_SYMBOL(in6_pton);
285 282
286void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, 283void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,