aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /net/core
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c955
-rw-r--r--net/core/dev_mcast.c5
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/ethtool.c514
-rw-r--r--net/core/fib_rules.c110
-rw-r--r--net/core/filter.c15
-rw-r--r--net/core/gen_estimator.c1
-rw-r--r--net/core/gen_stats.c8
-rw-r--r--net/core/iovec.c1
-rw-r--r--net/core/link_watch.c95
-rw-r--r--net/core/neighbour.c68
-rw-r--r--net/core/net-sysfs.c63
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/net_namespace.c272
-rw-r--r--net/core/netpoll.c179
-rw-r--r--net/core/pktgen.c43
-rw-r--r--net/core/rtnetlink.c378
-rw-r--r--net/core/scm.c3
-rw-r--r--net/core/skb_dma_map.c65
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c100
-rw-r--r--net/core/sysctl_net_core.c26
-rw-r--r--net/core/utils.c2
26 files changed, 2020 insertions, 901 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 796f46eece5f..08791ac3e05a 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -6,7 +6,6 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o 6 gen_stats.o gen_estimator.o net_namespace.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9obj-$(CONFIG_HAS_DMA) += skb_dma_map.o
10 9
11obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
12 neighbour.o rtnetlink.o utils.o link_watch.o filter.o 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4ade3011bb3c..2dccd4ee591b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
48#include <linux/poll.h> 48#include <linux/poll.h>
49#include <linux/highmem.h> 49#include <linux/highmem.h>
50#include <linux/spinlock.h> 50#include <linux/spinlock.h>
51#include <linux/slab.h>
51 52
52#include <net/protocol.h> 53#include <net/protocol.h>
53#include <linux/skbuff.h> 54#include <linux/skbuff.h>
@@ -271,6 +272,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
271 } 272 }
272 273
273 kfree_skb(skb); 274 kfree_skb(skb);
275 atomic_inc(&sk->sk_drops);
274 sk_mem_reclaim_partial(sk); 276 sk_mem_reclaim_partial(sk);
275 277
276 return err; 278 return err;
diff --git a/net/core/dev.c b/net/core/dev.c
index fe10551d3671..264137fce3a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,8 @@
79#include <linux/cpu.h> 79#include <linux/cpu.h>
80#include <linux/types.h> 80#include <linux/types.h>
81#include <linux/kernel.h> 81#include <linux/kernel.h>
82#include <linux/hash.h>
83#include <linux/slab.h>
82#include <linux/sched.h> 84#include <linux/sched.h>
83#include <linux/mutex.h> 85#include <linux/mutex.h>
84#include <linux/string.h> 86#include <linux/string.h>
@@ -104,6 +106,7 @@
104#include <net/dst.h> 106#include <net/dst.h>
105#include <net/pkt_sched.h> 107#include <net/pkt_sched.h>
106#include <net/checksum.h> 108#include <net/checksum.h>
109#include <net/xfrm.h>
107#include <linux/highmem.h> 110#include <linux/highmem.h>
108#include <linux/init.h> 111#include <linux/init.h>
109#include <linux/kmod.h> 112#include <linux/kmod.h>
@@ -175,7 +178,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */
175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 178 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176 * semaphore. 179 * semaphore.
177 * 180 *
178 * Pure readers hold dev_base_lock for reading. 181 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
179 * 182 *
180 * Writers must hold the rtnl semaphore while they loop through the 183 * Writers must hold the rtnl semaphore while they loop through the
181 * dev_base_head list, and hold dev_base_lock for writing when they do the 184 * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -193,18 +196,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */
193DEFINE_RWLOCK(dev_base_lock); 196DEFINE_RWLOCK(dev_base_lock);
194EXPORT_SYMBOL(dev_base_lock); 197EXPORT_SYMBOL(dev_base_lock);
195 198
196#define NETDEV_HASHBITS 8
197#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{ 200{
201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 202 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
203} 203}
204 204
205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206{ 206{
207 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 207 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 208}
209 209
210/* Device list insertion */ 210/* Device list insertion */
@@ -215,23 +215,26 @@ static int list_netdevice(struct net_device *dev)
215 ASSERT_RTNL(); 215 ASSERT_RTNL();
216 216
217 write_lock_bh(&dev_base_lock); 217 write_lock_bh(&dev_base_lock);
218 list_add_tail(&dev->dev_list, &net->dev_base_head); 218 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 219 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 220 hlist_add_head_rcu(&dev->index_hlist,
221 dev_index_hash(net, dev->ifindex));
221 write_unlock_bh(&dev_base_lock); 222 write_unlock_bh(&dev_base_lock);
222 return 0; 223 return 0;
223} 224}
224 225
225/* Device list removal */ 226/* Device list removal
227 * caller must respect a RCU grace period before freeing/reusing dev
228 */
226static void unlist_netdevice(struct net_device *dev) 229static void unlist_netdevice(struct net_device *dev)
227{ 230{
228 ASSERT_RTNL(); 231 ASSERT_RTNL();
229 232
230 /* Unlink dev from the device chain */ 233 /* Unlink dev from the device chain */
231 write_lock_bh(&dev_base_lock); 234 write_lock_bh(&dev_base_lock);
232 list_del(&dev->dev_list); 235 list_del_rcu(&dev->dev_list);
233 hlist_del(&dev->name_hlist); 236 hlist_del_rcu(&dev->name_hlist);
234 hlist_del(&dev->index_hlist); 237 hlist_del_rcu(&dev->index_hlist);
235 write_unlock_bh(&dev_base_lock); 238 write_unlock_bh(&dev_base_lock);
236} 239}
237 240
@@ -587,18 +590,44 @@ __setup("netdev=", netdev_boot_setup);
587struct net_device *__dev_get_by_name(struct net *net, const char *name) 590struct net_device *__dev_get_by_name(struct net *net, const char *name)
588{ 591{
589 struct hlist_node *p; 592 struct hlist_node *p;
593 struct net_device *dev;
594 struct hlist_head *head = dev_name_hash(net, name);
590 595
591 hlist_for_each(p, dev_name_hash(net, name)) { 596 hlist_for_each_entry(dev, p, head, name_hlist)
592 struct net_device *dev
593 = hlist_entry(p, struct net_device, name_hlist);
594 if (!strncmp(dev->name, name, IFNAMSIZ)) 597 if (!strncmp(dev->name, name, IFNAMSIZ))
595 return dev; 598 return dev;
596 } 599
597 return NULL; 600 return NULL;
598} 601}
599EXPORT_SYMBOL(__dev_get_by_name); 602EXPORT_SYMBOL(__dev_get_by_name);
600 603
601/** 604/**
605 * dev_get_by_name_rcu - find a device by its name
606 * @net: the applicable net namespace
607 * @name: name to find
608 *
609 * Find an interface by name.
610 * If the name is found a pointer to the device is returned.
611 * If the name is not found then %NULL is returned.
612 * The reference counters are not incremented so the caller must be
613 * careful with locks. The caller must hold RCU lock.
614 */
615
616struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
617{
618 struct hlist_node *p;
619 struct net_device *dev;
620 struct hlist_head *head = dev_name_hash(net, name);
621
622 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
623 if (!strncmp(dev->name, name, IFNAMSIZ))
624 return dev;
625
626 return NULL;
627}
628EXPORT_SYMBOL(dev_get_by_name_rcu);
629
630/**
602 * dev_get_by_name - find a device by its name 631 * dev_get_by_name - find a device by its name
603 * @net: the applicable net namespace 632 * @net: the applicable net namespace
604 * @name: name to find 633 * @name: name to find
@@ -614,11 +643,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
614{ 643{
615 struct net_device *dev; 644 struct net_device *dev;
616 645
617 read_lock(&dev_base_lock); 646 rcu_read_lock();
618 dev = __dev_get_by_name(net, name); 647 dev = dev_get_by_name_rcu(net, name);
619 if (dev) 648 if (dev)
620 dev_hold(dev); 649 dev_hold(dev);
621 read_unlock(&dev_base_lock); 650 rcu_read_unlock();
622 return dev; 651 return dev;
623} 652}
624EXPORT_SYMBOL(dev_get_by_name); 653EXPORT_SYMBOL(dev_get_by_name);
@@ -638,17 +667,42 @@ EXPORT_SYMBOL(dev_get_by_name);
638struct net_device *__dev_get_by_index(struct net *net, int ifindex) 667struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639{ 668{
640 struct hlist_node *p; 669 struct hlist_node *p;
670 struct net_device *dev;
671 struct hlist_head *head = dev_index_hash(net, ifindex);
641 672
642 hlist_for_each(p, dev_index_hash(net, ifindex)) { 673 hlist_for_each_entry(dev, p, head, index_hlist)
643 struct net_device *dev
644 = hlist_entry(p, struct net_device, index_hlist);
645 if (dev->ifindex == ifindex) 674 if (dev->ifindex == ifindex)
646 return dev; 675 return dev;
647 } 676
648 return NULL; 677 return NULL;
649} 678}
650EXPORT_SYMBOL(__dev_get_by_index); 679EXPORT_SYMBOL(__dev_get_by_index);
651 680
681/**
682 * dev_get_by_index_rcu - find a device by its ifindex
683 * @net: the applicable net namespace
684 * @ifindex: index of device
685 *
686 * Search for an interface by index. Returns %NULL if the device
687 * is not found or a pointer to the device. The device has not
688 * had its reference counter increased so the caller must be careful
689 * about locking. The caller must hold RCU lock.
690 */
691
692struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
693{
694 struct hlist_node *p;
695 struct net_device *dev;
696 struct hlist_head *head = dev_index_hash(net, ifindex);
697
698 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
699 if (dev->ifindex == ifindex)
700 return dev;
701
702 return NULL;
703}
704EXPORT_SYMBOL(dev_get_by_index_rcu);
705
652 706
653/** 707/**
654 * dev_get_by_index - find a device by its ifindex 708 * dev_get_by_index - find a device by its ifindex
@@ -665,11 +719,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
665{ 719{
666 struct net_device *dev; 720 struct net_device *dev;
667 721
668 read_lock(&dev_base_lock); 722 rcu_read_lock();
669 dev = __dev_get_by_index(net, ifindex); 723 dev = dev_get_by_index_rcu(net, ifindex);
670 if (dev) 724 if (dev)
671 dev_hold(dev); 725 dev_hold(dev);
672 read_unlock(&dev_base_lock); 726 rcu_read_unlock();
673 return dev; 727 return dev;
674} 728}
675EXPORT_SYMBOL(dev_get_by_index); 729EXPORT_SYMBOL(dev_get_by_index);
@@ -748,15 +802,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
748 struct net_device *dev, *ret; 802 struct net_device *dev, *ret;
749 803
750 ret = NULL; 804 ret = NULL;
751 read_lock(&dev_base_lock); 805 rcu_read_lock();
752 for_each_netdev(net, dev) { 806 for_each_netdev_rcu(net, dev) {
753 if (((dev->flags ^ if_flags) & mask) == 0) { 807 if (((dev->flags ^ if_flags) & mask) == 0) {
754 dev_hold(dev); 808 dev_hold(dev);
755 ret = dev; 809 ret = dev;
756 break; 810 break;
757 } 811 }
758 } 812 }
759 read_unlock(&dev_base_lock); 813 rcu_read_unlock();
760 return ret; 814 return ret;
761} 815}
762EXPORT_SYMBOL(dev_get_by_flags); 816EXPORT_SYMBOL(dev_get_by_flags);
@@ -841,7 +895,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
841 free_page((unsigned long) inuse); 895 free_page((unsigned long) inuse);
842 } 896 }
843 897
844 snprintf(buf, IFNAMSIZ, name, i); 898 if (buf != name)
899 snprintf(buf, IFNAMSIZ, name, i);
845 if (!__dev_get_by_name(net, buf)) 900 if (!__dev_get_by_name(net, buf))
846 return i; 901 return i;
847 902
@@ -881,6 +936,21 @@ int dev_alloc_name(struct net_device *dev, const char *name)
881} 936}
882EXPORT_SYMBOL(dev_alloc_name); 937EXPORT_SYMBOL(dev_alloc_name);
883 938
939static int dev_get_valid_name(struct net *net, const char *name, char *buf,
940 bool fmt)
941{
942 if (!dev_valid_name(name))
943 return -EINVAL;
944
945 if (fmt && strchr(name, '%'))
946 return __dev_alloc_name(net, name, buf);
947 else if (__dev_get_by_name(net, name))
948 return -EEXIST;
949 else if (buf != name)
950 strlcpy(buf, name, IFNAMSIZ);
951
952 return 0;
953}
884 954
885/** 955/**
886 * dev_change_name - change name of a device 956 * dev_change_name - change name of a device
@@ -904,28 +974,20 @@ int dev_change_name(struct net_device *dev, const char *newname)
904 if (dev->flags & IFF_UP) 974 if (dev->flags & IFF_UP)
905 return -EBUSY; 975 return -EBUSY;
906 976
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 977 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0; 978 return 0;
912 979
913 memcpy(oldname, dev->name, IFNAMSIZ); 980 memcpy(oldname, dev->name, IFNAMSIZ);
914 981
915 if (strchr(newname, '%')) { 982 err = dev_get_valid_name(net, newname, dev->name, 1);
916 err = dev_alloc_name(dev, newname); 983 if (err < 0)
917 if (err < 0) 984 return err;
918 return err;
919 } else if (__dev_get_by_name(net, newname))
920 return -EEXIST;
921 else
922 strlcpy(dev->name, newname, IFNAMSIZ);
923 985
924rollback: 986rollback:
925 /* For now only devices in the initial network namespace 987 /* For now only devices in the initial network namespace
926 * are in sysfs. 988 * are in sysfs.
927 */ 989 */
928 if (net == &init_net) { 990 if (net_eq(net, &init_net)) {
929 ret = device_rename(&dev->dev, dev->name); 991 ret = device_rename(&dev->dev, dev->name);
930 if (ret) { 992 if (ret) {
931 memcpy(dev->name, oldname, IFNAMSIZ); 993 memcpy(dev->name, oldname, IFNAMSIZ);
@@ -935,7 +997,12 @@ rollback:
935 997
936 write_lock_bh(&dev_base_lock); 998 write_lock_bh(&dev_base_lock);
937 hlist_del(&dev->name_hlist); 999 hlist_del(&dev->name_hlist);
938 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 1000 write_unlock_bh(&dev_base_lock);
1001
1002 synchronize_rcu();
1003
1004 write_lock_bh(&dev_base_lock);
1005 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
939 write_unlock_bh(&dev_base_lock); 1006 write_unlock_bh(&dev_base_lock);
940 1007
941 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1008 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1038,28 +1105,16 @@ void dev_load(struct net *net, const char *name)
1038{ 1105{
1039 struct net_device *dev; 1106 struct net_device *dev;
1040 1107
1041 read_lock(&dev_base_lock); 1108 rcu_read_lock();
1042 dev = __dev_get_by_name(net, name); 1109 dev = dev_get_by_name_rcu(net, name);
1043 read_unlock(&dev_base_lock); 1110 rcu_read_unlock();
1044 1111
1045 if (!dev && capable(CAP_NET_ADMIN)) 1112 if (!dev && capable(CAP_NET_ADMIN))
1046 request_module("%s", name); 1113 request_module("%s", name);
1047} 1114}
1048EXPORT_SYMBOL(dev_load); 1115EXPORT_SYMBOL(dev_load);
1049 1116
1050/** 1117static int __dev_open(struct net_device *dev)
1051 * dev_open - prepare an interface for use.
1052 * @dev: device to open
1053 *
1054 * Takes a device from down to up state. The device's private open
1055 * function is invoked and then the multicast lists are loaded. Finally
1056 * the device is moved into the up state and a %NETDEV_UP message is
1057 * sent to the netdev notifier chain.
1058 *
1059 * Calling this function on an active interface is a nop. On a failure
1060 * a negative errno code is returned.
1061 */
1062int dev_open(struct net_device *dev)
1063{ 1118{
1064 const struct net_device_ops *ops = dev->netdev_ops; 1119 const struct net_device_ops *ops = dev->netdev_ops;
1065 int ret; 1120 int ret;
@@ -1067,13 +1122,6 @@ int dev_open(struct net_device *dev)
1067 ASSERT_RTNL(); 1122 ASSERT_RTNL();
1068 1123
1069 /* 1124 /*
1070 * Is it already up?
1071 */
1072
1073 if (dev->flags & IFF_UP)
1074 return 0;
1075
1076 /*
1077 * Is it even present? 1125 * Is it even present?
1078 */ 1126 */
1079 if (!netif_device_present(dev)) 1127 if (!netif_device_present(dev))
@@ -1121,36 +1169,57 @@ int dev_open(struct net_device *dev)
1121 * Wakeup transmit queue engine 1169 * Wakeup transmit queue engine
1122 */ 1170 */
1123 dev_activate(dev); 1171 dev_activate(dev);
1124
1125 /*
1126 * ... and announce new interface.
1127 */
1128 call_netdevice_notifiers(NETDEV_UP, dev);
1129 } 1172 }
1130 1173
1131 return ret; 1174 return ret;
1132} 1175}
1133EXPORT_SYMBOL(dev_open);
1134 1176
1135/** 1177/**
1136 * dev_close - shutdown an interface. 1178 * dev_open - prepare an interface for use.
1137 * @dev: device to shutdown 1179 * @dev: device to open
1138 * 1180 *
1139 * This function moves an active device into down state. A 1181 * Takes a device from down to up state. The device's private open
1140 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1182 * function is invoked and then the multicast lists are loaded. Finally
1141 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1183 * the device is moved into the up state and a %NETDEV_UP message is
1142 * chain. 1184 * sent to the netdev notifier chain.
1185 *
1186 * Calling this function on an active interface is a nop. On a failure
1187 * a negative errno code is returned.
1143 */ 1188 */
1144int dev_close(struct net_device *dev) 1189int dev_open(struct net_device *dev)
1190{
1191 int ret;
1192
1193 /*
1194 * Is it already up?
1195 */
1196 if (dev->flags & IFF_UP)
1197 return 0;
1198
1199 /*
1200 * Open device
1201 */
1202 ret = __dev_open(dev);
1203 if (ret < 0)
1204 return ret;
1205
1206 /*
1207 * ... and announce new interface.
1208 */
1209 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1210 call_netdevice_notifiers(NETDEV_UP, dev);
1211
1212 return ret;
1213}
1214EXPORT_SYMBOL(dev_open);
1215
1216static int __dev_close(struct net_device *dev)
1145{ 1217{
1146 const struct net_device_ops *ops = dev->netdev_ops; 1218 const struct net_device_ops *ops = dev->netdev_ops;
1147 ASSERT_RTNL();
1148 1219
1220 ASSERT_RTNL();
1149 might_sleep(); 1221 might_sleep();
1150 1222
1151 if (!(dev->flags & IFF_UP))
1152 return 0;
1153
1154 /* 1223 /*
1155 * Tell people we are going down, so that they can 1224 * Tell people we are going down, so that they can
1156 * prepare to death, when device is still operating. 1225 * prepare to death, when device is still operating.
@@ -1186,14 +1255,34 @@ int dev_close(struct net_device *dev)
1186 dev->flags &= ~IFF_UP; 1255 dev->flags &= ~IFF_UP;
1187 1256
1188 /* 1257 /*
1189 * Tell people we are down 1258 * Shutdown NET_DMA
1190 */ 1259 */
1191 call_netdevice_notifiers(NETDEV_DOWN, dev); 1260 net_dmaengine_put();
1261
1262 return 0;
1263}
1264
1265/**
1266 * dev_close - shutdown an interface.
1267 * @dev: device to shutdown
1268 *
1269 * This function moves an active device into down state. A
1270 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1271 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1272 * chain.
1273 */
1274int dev_close(struct net_device *dev)
1275{
1276 if (!(dev->flags & IFF_UP))
1277 return 0;
1278
1279 __dev_close(dev);
1192 1280
1193 /* 1281 /*
1194 * Shutdown NET_DMA 1282 * Tell people we are down
1195 */ 1283 */
1196 net_dmaengine_put(); 1284 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1285 call_netdevice_notifiers(NETDEV_DOWN, dev);
1197 1286
1198 return 0; 1287 return 0;
1199} 1288}
@@ -1287,6 +1376,7 @@ rollback:
1287 nb->notifier_call(nb, NETDEV_DOWN, dev); 1376 nb->notifier_call(nb, NETDEV_DOWN, dev);
1288 } 1377 }
1289 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1378 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1379 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1290 } 1380 }
1291 } 1381 }
1292 1382
@@ -1353,6 +1443,41 @@ static inline void net_timestamp(struct sk_buff *skb)
1353 skb->tstamp.tv64 = 0; 1443 skb->tstamp.tv64 = 0;
1354} 1444}
1355 1445
1446/**
1447 * dev_forward_skb - loopback an skb to another netif
1448 *
1449 * @dev: destination network device
1450 * @skb: buffer to forward
1451 *
1452 * return values:
1453 * NET_RX_SUCCESS (no congestion)
1454 * NET_RX_DROP (packet was dropped, but freed)
1455 *
1456 * dev_forward_skb can be used for injecting an skb from the
1457 * start_xmit function of one device into the receive queue
1458 * of another device.
1459 *
1460 * The receiving device may be in another namespace, so
1461 * we have to clear all information in the skb that could
1462 * impact namespace isolation.
1463 */
1464int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1465{
1466 skb_orphan(skb);
1467
1468 if (!(dev->flags & IFF_UP) ||
1469 (skb->len > (dev->mtu + dev->hard_header_len))) {
1470 kfree_skb(skb);
1471 return NET_RX_DROP;
1472 }
1473 skb_set_dev(skb, dev);
1474 skb->tstamp.tv64 = 0;
1475 skb->pkt_type = PACKET_HOST;
1476 skb->protocol = eth_type_trans(skb, dev);
1477 return netif_rx(skb);
1478}
1479EXPORT_SYMBOL_GPL(dev_forward_skb);
1480
1356/* 1481/*
1357 * Support routine. Sends outgoing frames to any network 1482 * Support routine. Sends outgoing frames to any network
1358 * taps currently in use. 1483 * taps currently in use.
@@ -1508,6 +1633,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1508 return false; 1633 return false;
1509} 1634}
1510 1635
1636/**
1637 * skb_dev_set -- assign a new device to a buffer
1638 * @skb: buffer for the new device
1639 * @dev: network device
1640 *
1641 * If an skb is owned by a device already, we have to reset
1642 * all data private to the namespace a device belongs to
1643 * before assigning it a new device.
1644 */
1645#ifdef CONFIG_NET_NS
1646void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1647{
1648 skb_dst_drop(skb);
1649 if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1650 secpath_reset(skb);
1651 nf_reset(skb);
1652 skb_init_secmark(skb);
1653 skb->mark = 0;
1654 skb->priority = 0;
1655 skb->nf_trace = 0;
1656 skb->ipvs_property = 0;
1657#ifdef CONFIG_NET_SCHED
1658 skb->tc_index = 0;
1659#endif
1660 }
1661 skb->dev = dev;
1662}
1663EXPORT_SYMBOL(skb_set_dev);
1664#endif /* CONFIG_NET_NS */
1665
1511/* 1666/*
1512 * Invalidate hardware checksum when packet is to be mangled, and 1667 * Invalidate hardware checksum when packet is to be mangled, and
1513 * complete checksum manually on outgoing path. 1668 * complete checksum manually on outgoing path.
@@ -1701,7 +1856,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1701 struct netdev_queue *txq) 1856 struct netdev_queue *txq)
1702{ 1857{
1703 const struct net_device_ops *ops = dev->netdev_ops; 1858 const struct net_device_ops *ops = dev->netdev_ops;
1704 int rc; 1859 int rc = NETDEV_TX_OK;
1705 1860
1706 if (likely(!skb->next)) { 1861 if (likely(!skb->next)) {
1707 if (!list_empty(&ptype_all)) 1862 if (!list_empty(&ptype_all))
@@ -1747,8 +1902,18 @@ gso:
1747 1902
1748 skb->next = nskb->next; 1903 skb->next = nskb->next;
1749 nskb->next = NULL; 1904 nskb->next = NULL;
1905
1906 /*
1907 * If device doesnt need nskb->dst, release it right now while
1908 * its hot in this cpu cache
1909 */
1910 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1911 skb_dst_drop(nskb);
1912
1750 rc = ops->ndo_start_xmit(nskb, dev); 1913 rc = ops->ndo_start_xmit(nskb, dev);
1751 if (unlikely(rc != NETDEV_TX_OK)) { 1914 if (unlikely(rc != NETDEV_TX_OK)) {
1915 if (rc & ~NETDEV_TX_MASK)
1916 goto out_kfree_gso_skb;
1752 nskb->next = skb->next; 1917 nskb->next = skb->next;
1753 skb->next = nskb; 1918 skb->next = nskb;
1754 return rc; 1919 return rc;
@@ -1758,11 +1923,12 @@ gso:
1758 return NETDEV_TX_BUSY; 1923 return NETDEV_TX_BUSY;
1759 } while (skb->next); 1924 } while (skb->next);
1760 1925
1761 skb->destructor = DEV_GSO_CB(skb)->destructor; 1926out_kfree_gso_skb:
1762 1927 if (likely(skb->next == NULL))
1928 skb->destructor = DEV_GSO_CB(skb)->destructor;
1763out_kfree_skb: 1929out_kfree_skb:
1764 kfree_skb(skb); 1930 kfree_skb(skb);
1765 return NETDEV_TX_OK; 1931 return rc;
1766} 1932}
1767 1933
1768static u32 skb_tx_hashrnd; 1934static u32 skb_tx_hashrnd;
@@ -1789,16 +1955,47 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1789} 1955}
1790EXPORT_SYMBOL(skb_tx_hash); 1956EXPORT_SYMBOL(skb_tx_hash);
1791 1957
1958static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1959{
1960 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1961 if (net_ratelimit()) {
1962 WARN(1, "%s selects TX queue %d, but "
1963 "real number of TX queues is %d\n",
1964 dev->name, queue_index,
1965 dev->real_num_tx_queues);
1966 }
1967 return 0;
1968 }
1969 return queue_index;
1970}
1971
1792static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1972static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1793 struct sk_buff *skb) 1973 struct sk_buff *skb)
1794{ 1974{
1795 const struct net_device_ops *ops = dev->netdev_ops; 1975 u16 queue_index;
1796 u16 queue_index = 0; 1976 struct sock *sk = skb->sk;
1977
1978 if (sk_tx_queue_recorded(sk)) {
1979 queue_index = sk_tx_queue_get(sk);
1980 } else {
1981 const struct net_device_ops *ops = dev->netdev_ops;
1982
1983 if (ops->ndo_select_queue) {
1984 queue_index = ops->ndo_select_queue(dev, skb);
1985 queue_index = dev_cap_txqueue(dev, queue_index);
1986 } else {
1987 queue_index = 0;
1988 if (dev->real_num_tx_queues > 1)
1989 queue_index = skb_tx_hash(dev, skb);
1990
1991 if (sk) {
1992 struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache);
1797 1993
1798 if (ops->ndo_select_queue) 1994 if (dst && skb_dst(skb) == dst)
1799 queue_index = ops->ndo_select_queue(dev, skb); 1995 sk_tx_queue_set(sk, queue_index);
1800 else if (dev->real_num_tx_queues > 1) 1996 }
1801 queue_index = skb_tx_hash(dev, skb); 1997 }
1998 }
1802 1999
1803 skb_set_queue_mapping(skb, queue_index); 2000 skb_set_queue_mapping(skb, queue_index);
1804 return netdev_get_tx_queue(dev, queue_index); 2001 return netdev_get_tx_queue(dev, queue_index);
@@ -1838,6 +2035,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1838 return rc; 2035 return rc;
1839} 2036}
1840 2037
2038/*
2039 * Returns true if either:
2040 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2041 * 2. skb is fragmented and the device does not support SG, or if
2042 * at least one of fragments is in highmem and device does not
2043 * support DMA from it.
2044 */
2045static inline int skb_needs_linearize(struct sk_buff *skb,
2046 struct net_device *dev)
2047{
2048 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2049 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2050 illegal_highdma(dev, skb)));
2051}
2052
1841/** 2053/**
1842 * dev_queue_xmit - transmit a buffer 2054 * dev_queue_xmit - transmit a buffer
1843 * @skb: buffer to transmit 2055 * @skb: buffer to transmit
@@ -1874,18 +2086,8 @@ int dev_queue_xmit(struct sk_buff *skb)
1874 if (netif_needs_gso(dev, skb)) 2086 if (netif_needs_gso(dev, skb))
1875 goto gso; 2087 goto gso;
1876 2088
1877 if (skb_has_frags(skb) && 2089 /* Convert a paged skb to linear, if required */
1878 !(dev->features & NETIF_F_FRAGLIST) && 2090 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
1879 __skb_linearize(skb))
1880 goto out_kfree_skb;
1881
1882 /* Fragmented skb is linearized if device does not support SG,
1883 * or if at least one of fragments is in highmem and device
1884 * does not support DMA from it.
1885 */
1886 if (skb_shinfo(skb)->nr_frags &&
1887 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1888 __skb_linearize(skb))
1889 goto out_kfree_skb; 2091 goto out_kfree_skb;
1890 2092
1891 /* If packet is not checksummed and device does not support 2093 /* If packet is not checksummed and device does not support
@@ -1905,7 +2107,7 @@ gso:
1905 rcu_read_lock_bh(); 2107 rcu_read_lock_bh();
1906 2108
1907 txq = dev_pick_tx(dev, skb); 2109 txq = dev_pick_tx(dev, skb);
1908 q = rcu_dereference(txq->qdisc); 2110 q = rcu_dereference_bh(txq->qdisc);
1909 2111
1910#ifdef CONFIG_NET_CLS_ACT 2112#ifdef CONFIG_NET_CLS_ACT
1911 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2113 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
@@ -1935,8 +2137,8 @@ gso:
1935 HARD_TX_LOCK(dev, txq, cpu); 2137 HARD_TX_LOCK(dev, txq, cpu);
1936 2138
1937 if (!netif_tx_queue_stopped(txq)) { 2139 if (!netif_tx_queue_stopped(txq)) {
1938 rc = NET_XMIT_SUCCESS; 2140 rc = dev_hard_start_xmit(skb, dev, txq);
1939 if (!dev_hard_start_xmit(skb, dev, txq)) { 2141 if (dev_xmit_complete(rc)) {
1940 HARD_TX_UNLOCK(dev, txq); 2142 HARD_TX_UNLOCK(dev, txq);
1941 goto out; 2143 goto out;
1942 } 2144 }
@@ -2191,7 +2393,7 @@ static int ing_filter(struct sk_buff *skb)
2191 if (MAX_RED_LOOP < ttl++) { 2393 if (MAX_RED_LOOP < ttl++) {
2192 printk(KERN_WARNING 2394 printk(KERN_WARNING
2193 "Redir loop detected Dropping packet (%d->%d)\n", 2395 "Redir loop detected Dropping packet (%d->%d)\n",
2194 skb->iif, dev->ifindex); 2396 skb->skb_iif, dev->ifindex);
2195 return TC_ACT_SHOT; 2397 return TC_ACT_SHOT;
2196 } 2398 }
2197 2399
@@ -2285,30 +2487,33 @@ int netif_receive_skb(struct sk_buff *skb)
2285{ 2487{
2286 struct packet_type *ptype, *pt_prev; 2488 struct packet_type *ptype, *pt_prev;
2287 struct net_device *orig_dev; 2489 struct net_device *orig_dev;
2490 struct net_device *master;
2288 struct net_device *null_or_orig; 2491 struct net_device *null_or_orig;
2492 struct net_device *null_or_bond;
2289 int ret = NET_RX_DROP; 2493 int ret = NET_RX_DROP;
2290 __be16 type; 2494 __be16 type;
2291 2495
2292 if (!skb->tstamp.tv64) 2496 if (!skb->tstamp.tv64)
2293 net_timestamp(skb); 2497 net_timestamp(skb);
2294 2498
2295 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2499 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2296 return NET_RX_SUCCESS; 2500 return NET_RX_SUCCESS;
2297 2501
2298 /* if we've gotten here through NAPI, check netpoll */ 2502 /* if we've gotten here through NAPI, check netpoll */
2299 if (netpoll_receive_skb(skb)) 2503 if (netpoll_receive_skb(skb))
2300 return NET_RX_DROP; 2504 return NET_RX_DROP;
2301 2505
2302 if (!skb->iif) 2506 if (!skb->skb_iif)
2303 skb->iif = skb->dev->ifindex; 2507 skb->skb_iif = skb->dev->ifindex;
2304 2508
2305 null_or_orig = NULL; 2509 null_or_orig = NULL;
2306 orig_dev = skb->dev; 2510 orig_dev = skb->dev;
2307 if (orig_dev->master) { 2511 master = ACCESS_ONCE(orig_dev->master);
2308 if (skb_bond_should_drop(skb)) 2512 if (master) {
2513 if (skb_bond_should_drop(skb, master))
2309 null_or_orig = orig_dev; /* deliver only exact match */ 2514 null_or_orig = orig_dev; /* deliver only exact match */
2310 else 2515 else
2311 skb->dev = orig_dev->master; 2516 skb->dev = master;
2312 } 2517 }
2313 2518
2314 __get_cpu_var(netdev_rx_stat).total++; 2519 __get_cpu_var(netdev_rx_stat).total++;
@@ -2351,12 +2556,24 @@ ncls:
2351 if (!skb) 2556 if (!skb)
2352 goto out; 2557 goto out;
2353 2558
2559 /*
2560 * Make sure frames received on VLAN interfaces stacked on
2561 * bonding interfaces still make their way to any base bonding
2562 * device that may have registered for a specific ptype. The
2563 * handler may have to adjust skb->dev and orig_dev.
2564 */
2565 null_or_bond = NULL;
2566 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2567 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2568 null_or_bond = vlan_dev_real_dev(skb->dev);
2569 }
2570
2354 type = skb->protocol; 2571 type = skb->protocol;
2355 list_for_each_entry_rcu(ptype, 2572 list_for_each_entry_rcu(ptype,
2356 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2573 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2357 if (ptype->type == type && 2574 if (ptype->type == type && (ptype->dev == null_or_orig ||
2358 (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2575 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2359 ptype->dev == orig_dev)) { 2576 ptype->dev == null_or_bond)) {
2360 if (pt_prev) 2577 if (pt_prev)
2361 ret = deliver_skb(skb, pt_prev, orig_dev); 2578 ret = deliver_skb(skb, pt_prev, orig_dev);
2362 pt_prev = ptype; 2579 pt_prev = ptype;
@@ -2425,7 +2642,7 @@ out:
2425 return netif_receive_skb(skb); 2642 return netif_receive_skb(skb);
2426} 2643}
2427 2644
2428void napi_gro_flush(struct napi_struct *napi) 2645static void napi_gro_flush(struct napi_struct *napi)
2429{ 2646{
2430 struct sk_buff *skb, *next; 2647 struct sk_buff *skb, *next;
2431 2648
@@ -2438,9 +2655,8 @@ void napi_gro_flush(struct napi_struct *napi)
2438 napi->gro_count = 0; 2655 napi->gro_count = 0;
2439 napi->gro_list = NULL; 2656 napi->gro_list = NULL;
2440} 2657}
2441EXPORT_SYMBOL(napi_gro_flush);
2442 2658
2443int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2659enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444{ 2660{
2445 struct sk_buff **pp = NULL; 2661 struct sk_buff **pp = NULL;
2446 struct packet_type *ptype; 2662 struct packet_type *ptype;
@@ -2448,7 +2664,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2448 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2664 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2449 int same_flow; 2665 int same_flow;
2450 int mac_len; 2666 int mac_len;
2451 int ret; 2667 enum gro_result ret;
2452 2668
2453 if (!(skb->dev->features & NETIF_F_GRO)) 2669 if (!(skb->dev->features & NETIF_F_GRO))
2454 goto normal; 2670 goto normal;
@@ -2532,7 +2748,8 @@ normal:
2532} 2748}
2533EXPORT_SYMBOL(dev_gro_receive); 2749EXPORT_SYMBOL(dev_gro_receive);
2534 2750
2535static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2751static gro_result_t
2752__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2536{ 2753{
2537 struct sk_buff *p; 2754 struct sk_buff *p;
2538 2755
@@ -2540,33 +2757,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2540 return GRO_NORMAL; 2757 return GRO_NORMAL;
2541 2758
2542 for (p = napi->gro_list; p; p = p->next) { 2759 for (p = napi->gro_list; p; p = p->next) {
2543 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2760 NAPI_GRO_CB(p)->same_flow =
2544 && !compare_ether_header(skb_mac_header(p), 2761 (p->dev == skb->dev) &&
2545 skb_gro_mac_header(skb)); 2762 !compare_ether_header(skb_mac_header(p),
2763 skb_gro_mac_header(skb));
2546 NAPI_GRO_CB(p)->flush = 0; 2764 NAPI_GRO_CB(p)->flush = 0;
2547 } 2765 }
2548 2766
2549 return dev_gro_receive(napi, skb); 2767 return dev_gro_receive(napi, skb);
2550} 2768}
2551 2769
2552int napi_skb_finish(int ret, struct sk_buff *skb) 2770gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2553{ 2771{
2554 int err = NET_RX_SUCCESS;
2555
2556 switch (ret) { 2772 switch (ret) {
2557 case GRO_NORMAL: 2773 case GRO_NORMAL:
2558 return netif_receive_skb(skb); 2774 if (netif_receive_skb(skb))
2775 ret = GRO_DROP;
2776 break;
2559 2777
2560 case GRO_DROP: 2778 case GRO_DROP:
2561 err = NET_RX_DROP;
2562 /* fall through */
2563
2564 case GRO_MERGED_FREE: 2779 case GRO_MERGED_FREE:
2565 kfree_skb(skb); 2780 kfree_skb(skb);
2566 break; 2781 break;
2782
2783 case GRO_HELD:
2784 case GRO_MERGED:
2785 break;
2567 } 2786 }
2568 2787
2569 return err; 2788 return ret;
2570} 2789}
2571EXPORT_SYMBOL(napi_skb_finish); 2790EXPORT_SYMBOL(napi_skb_finish);
2572 2791
@@ -2586,7 +2805,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
2586} 2805}
2587EXPORT_SYMBOL(skb_gro_reset_offset); 2806EXPORT_SYMBOL(skb_gro_reset_offset);
2588 2807
2589int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2808gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2590{ 2809{
2591 skb_gro_reset_offset(skb); 2810 skb_gro_reset_offset(skb);
2592 2811
@@ -2605,49 +2824,41 @@ EXPORT_SYMBOL(napi_reuse_skb);
2605 2824
2606struct sk_buff *napi_get_frags(struct napi_struct *napi) 2825struct sk_buff *napi_get_frags(struct napi_struct *napi)
2607{ 2826{
2608 struct net_device *dev = napi->dev;
2609 struct sk_buff *skb = napi->skb; 2827 struct sk_buff *skb = napi->skb;
2610 2828
2611 if (!skb) { 2829 if (!skb) {
2612 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2830 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2613 if (!skb) 2831 if (skb)
2614 goto out; 2832 napi->skb = skb;
2615
2616 skb_reserve(skb, NET_IP_ALIGN);
2617
2618 napi->skb = skb;
2619 } 2833 }
2620
2621out:
2622 return skb; 2834 return skb;
2623} 2835}
2624EXPORT_SYMBOL(napi_get_frags); 2836EXPORT_SYMBOL(napi_get_frags);
2625 2837
2626int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2838gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2839 gro_result_t ret)
2627{ 2840{
2628 int err = NET_RX_SUCCESS;
2629
2630 switch (ret) { 2841 switch (ret) {
2631 case GRO_NORMAL: 2842 case GRO_NORMAL:
2632 case GRO_HELD: 2843 case GRO_HELD:
2633 skb->protocol = eth_type_trans(skb, napi->dev); 2844 skb->protocol = eth_type_trans(skb, skb->dev);
2634 2845
2635 if (ret == GRO_NORMAL) 2846 if (ret == GRO_HELD)
2636 return netif_receive_skb(skb); 2847 skb_gro_pull(skb, -ETH_HLEN);
2637 2848 else if (netif_receive_skb(skb))
2638 skb_gro_pull(skb, -ETH_HLEN); 2849 ret = GRO_DROP;
2639 break; 2850 break;
2640 2851
2641 case GRO_DROP: 2852 case GRO_DROP:
2642 err = NET_RX_DROP;
2643 /* fall through */
2644
2645 case GRO_MERGED_FREE: 2853 case GRO_MERGED_FREE:
2646 napi_reuse_skb(napi, skb); 2854 napi_reuse_skb(napi, skb);
2647 break; 2855 break;
2856
2857 case GRO_MERGED:
2858 break;
2648 } 2859 }
2649 2860
2650 return err; 2861 return ret;
2651} 2862}
2652EXPORT_SYMBOL(napi_frags_finish); 2863EXPORT_SYMBOL(napi_frags_finish);
2653 2864
@@ -2688,12 +2899,12 @@ out:
2688} 2899}
2689EXPORT_SYMBOL(napi_frags_skb); 2900EXPORT_SYMBOL(napi_frags_skb);
2690 2901
2691int napi_gro_frags(struct napi_struct *napi) 2902gro_result_t napi_gro_frags(struct napi_struct *napi)
2692{ 2903{
2693 struct sk_buff *skb = napi_frags_skb(napi); 2904 struct sk_buff *skb = napi_frags_skb(napi);
2694 2905
2695 if (!skb) 2906 if (!skb)
2696 return NET_RX_DROP; 2907 return GRO_DROP;
2697 2908
2698 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 2909 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2699} 2910}
@@ -2835,7 +3046,7 @@ static void net_rx_action(struct softirq_action *h)
2835 * entries to the tail of this list, and only ->poll() 3046 * entries to the tail of this list, and only ->poll()
2836 * calls can remove this head entry from the list. 3047 * calls can remove this head entry from the list.
2837 */ 3048 */
2838 n = list_entry(list->next, struct napi_struct, poll_list); 3049 n = list_first_entry(list, struct napi_struct, poll_list);
2839 3050
2840 have = netpoll_poll_lock(n); 3051 have = netpoll_poll_lock(n);
2841 3052
@@ -2938,15 +3149,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
2938 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3149 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2939 return -EFAULT; 3150 return -EFAULT;
2940 3151
2941 read_lock(&dev_base_lock); 3152 rcu_read_lock();
2942 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 3153 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
2943 if (!dev) { 3154 if (!dev) {
2944 read_unlock(&dev_base_lock); 3155 rcu_read_unlock();
2945 return -ENODEV; 3156 return -ENODEV;
2946 } 3157 }
2947 3158
2948 strcpy(ifr.ifr_name, dev->name); 3159 strcpy(ifr.ifr_name, dev->name);
2949 read_unlock(&dev_base_lock); 3160 rcu_read_unlock();
2950 3161
2951 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 3162 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2952 return -EFAULT; 3163 return -EFAULT;
@@ -3016,18 +3227,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
3016 * in detail. 3227 * in detail.
3017 */ 3228 */
3018void *dev_seq_start(struct seq_file *seq, loff_t *pos) 3229void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3019 __acquires(dev_base_lock) 3230 __acquires(RCU)
3020{ 3231{
3021 struct net *net = seq_file_net(seq); 3232 struct net *net = seq_file_net(seq);
3022 loff_t off; 3233 loff_t off;
3023 struct net_device *dev; 3234 struct net_device *dev;
3024 3235
3025 read_lock(&dev_base_lock); 3236 rcu_read_lock();
3026 if (!*pos) 3237 if (!*pos)
3027 return SEQ_START_TOKEN; 3238 return SEQ_START_TOKEN;
3028 3239
3029 off = 1; 3240 off = 1;
3030 for_each_netdev(net, dev) 3241 for_each_netdev_rcu(net, dev)
3031 if (off++ == *pos) 3242 if (off++ == *pos)
3032 return dev; 3243 return dev;
3033 3244
@@ -3036,23 +3247,25 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3036 3247
3037void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3248void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3038{ 3249{
3039 struct net *net = seq_file_net(seq); 3250 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3251 first_net_device(seq_file_net(seq)) :
3252 next_net_device((struct net_device *)v);
3253
3040 ++*pos; 3254 ++*pos;
3041 return v == SEQ_START_TOKEN ? 3255 return rcu_dereference(dev);
3042 first_net_device(net) : next_net_device((struct net_device *)v);
3043} 3256}
3044 3257
3045void dev_seq_stop(struct seq_file *seq, void *v) 3258void dev_seq_stop(struct seq_file *seq, void *v)
3046 __releases(dev_base_lock) 3259 __releases(RCU)
3047{ 3260{
3048 read_unlock(&dev_base_lock); 3261 rcu_read_unlock();
3049} 3262}
3050 3263
3051static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3264static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3052{ 3265{
3053 const struct net_device_stats *stats = dev_get_stats(dev); 3266 const struct net_device_stats *stats = dev_get_stats(dev);
3054 3267
3055 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 3268 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3056 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 3269 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3057 dev->name, stats->rx_bytes, stats->rx_packets, 3270 dev->name, stats->rx_bytes, stats->rx_packets,
3058 stats->rx_errors, 3271 stats->rx_errors,
@@ -3507,10 +3720,10 @@ void __dev_set_rx_mode(struct net_device *dev)
3507 /* Unicast addresses changes may only happen under the rtnl, 3720 /* Unicast addresses changes may only happen under the rtnl,
3508 * therefore calling __dev_set_promiscuity here is safe. 3721 * therefore calling __dev_set_promiscuity here is safe.
3509 */ 3722 */
3510 if (dev->uc.count > 0 && !dev->uc_promisc) { 3723 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
3511 __dev_set_promiscuity(dev, 1); 3724 __dev_set_promiscuity(dev, 1);
3512 dev->uc_promisc = 1; 3725 dev->uc_promisc = 1;
3513 } else if (dev->uc.count == 0 && dev->uc_promisc) { 3726 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
3514 __dev_set_promiscuity(dev, -1); 3727 __dev_set_promiscuity(dev, -1);
3515 dev->uc_promisc = 0; 3728 dev->uc_promisc = 0;
3516 } 3729 }
@@ -4078,7 +4291,7 @@ static void dev_addr_discard(struct net_device *dev)
4078 netif_addr_lock_bh(dev); 4291 netif_addr_lock_bh(dev);
4079 4292
4080 __dev_addr_discard(&dev->mc_list); 4293 __dev_addr_discard(&dev->mc_list);
4081 dev->mc_count = 0; 4294 netdev_mc_count(dev) = 0;
4082 4295
4083 netif_addr_unlock_bh(dev); 4296 netif_addr_unlock_bh(dev);
4084} 4297}
@@ -4114,18 +4327,10 @@ unsigned dev_get_flags(const struct net_device *dev)
4114} 4327}
4115EXPORT_SYMBOL(dev_get_flags); 4328EXPORT_SYMBOL(dev_get_flags);
4116 4329
4117/** 4330int __dev_change_flags(struct net_device *dev, unsigned int flags)
4118 * dev_change_flags - change device settings
4119 * @dev: device
4120 * @flags: device state flags
4121 *
4122 * Change settings on device based state flags. The flags are
4123 * in the userspace exported format.
4124 */
4125int dev_change_flags(struct net_device *dev, unsigned flags)
4126{ 4331{
4127 int ret, changes;
4128 int old_flags = dev->flags; 4332 int old_flags = dev->flags;
4333 int ret;
4129 4334
4130 ASSERT_RTNL(); 4335 ASSERT_RTNL();
4131 4336
@@ -4156,17 +4361,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
4156 4361
4157 ret = 0; 4362 ret = 0;
4158 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 4363 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
4159 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 4364 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4160 4365
4161 if (!ret) 4366 if (!ret)
4162 dev_set_rx_mode(dev); 4367 dev_set_rx_mode(dev);
4163 } 4368 }
4164 4369
4165 if (dev->flags & IFF_UP &&
4166 ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4167 IFF_VOLATILE)))
4168 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4169
4170 if ((flags ^ dev->gflags) & IFF_PROMISC) { 4370 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4171 int inc = (flags & IFF_PROMISC) ? 1 : -1; 4371 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4172 4372
@@ -4185,11 +4385,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
4185 dev_set_allmulti(dev, inc); 4385 dev_set_allmulti(dev, inc);
4186 } 4386 }
4187 4387
4188 /* Exclude state transition flags, already notified */ 4388 return ret;
4189 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 4389}
4390
4391void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4392{
4393 unsigned int changes = dev->flags ^ old_flags;
4394
4395 if (changes & IFF_UP) {
4396 if (dev->flags & IFF_UP)
4397 call_netdevice_notifiers(NETDEV_UP, dev);
4398 else
4399 call_netdevice_notifiers(NETDEV_DOWN, dev);
4400 }
4401
4402 if (dev->flags & IFF_UP &&
4403 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4404 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4405}
4406
4407/**
4408 * dev_change_flags - change device settings
4409 * @dev: device
4410 * @flags: device state flags
4411 *
4412 * Change settings on device based state flags. The flags are
4413 * in the userspace exported format.
4414 */
4415int dev_change_flags(struct net_device *dev, unsigned flags)
4416{
4417 int ret, changes;
4418 int old_flags = dev->flags;
4419
4420 ret = __dev_change_flags(dev, flags);
4421 if (ret < 0)
4422 return ret;
4423
4424 changes = old_flags ^ dev->flags;
4190 if (changes) 4425 if (changes)
4191 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 4426 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4192 4427
4428 __dev_notify_flags(dev, old_flags);
4193 return ret; 4429 return ret;
4194} 4430}
4195EXPORT_SYMBOL(dev_change_flags); 4431EXPORT_SYMBOL(dev_change_flags);
@@ -4254,12 +4490,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4254EXPORT_SYMBOL(dev_set_mac_address); 4490EXPORT_SYMBOL(dev_set_mac_address);
4255 4491
4256/* 4492/*
4257 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 4493 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4258 */ 4494 */
4259static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 4495static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4260{ 4496{
4261 int err; 4497 int err;
4262 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 4498 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4263 4499
4264 if (!dev) 4500 if (!dev)
4265 return -ENODEV; 4501 return -ENODEV;
@@ -4491,9 +4727,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4491 case SIOCGIFINDEX: 4727 case SIOCGIFINDEX:
4492 case SIOCGIFTXQLEN: 4728 case SIOCGIFTXQLEN:
4493 dev_load(net, ifr.ifr_name); 4729 dev_load(net, ifr.ifr_name);
4494 read_lock(&dev_base_lock); 4730 rcu_read_lock();
4495 ret = dev_ifsioc_locked(net, &ifr, cmd); 4731 ret = dev_ifsioc_locked(net, &ifr, cmd);
4496 read_unlock(&dev_base_lock); 4732 rcu_read_unlock();
4497 if (!ret) { 4733 if (!ret) {
4498 if (colon) 4734 if (colon)
4499 *colon = ':'; 4735 *colon = ':';
@@ -4636,59 +4872,86 @@ static void net_set_todo(struct net_device *dev)
4636 list_add_tail(&dev->todo_list, &net_todo_list); 4872 list_add_tail(&dev->todo_list, &net_todo_list);
4637} 4873}
4638 4874
4639static void rollback_registered(struct net_device *dev) 4875static void rollback_registered_many(struct list_head *head)
4640{ 4876{
4877 struct net_device *dev, *tmp;
4878
4641 BUG_ON(dev_boot_phase); 4879 BUG_ON(dev_boot_phase);
4642 ASSERT_RTNL(); 4880 ASSERT_RTNL();
4643 4881
4644 /* Some devices call without registering for initialization unwind. */ 4882 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4645 if (dev->reg_state == NETREG_UNINITIALIZED) { 4883 /* Some devices call without registering
4646 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 4884 * for initialization unwind. Remove those
4647 "was registered\n", dev->name, dev); 4885 * devices and proceed with the remaining.
4886 */
4887 if (dev->reg_state == NETREG_UNINITIALIZED) {
4888 pr_debug("unregister_netdevice: device %s/%p never "
4889 "was registered\n", dev->name, dev);
4648 4890
4649 WARN_ON(1); 4891 WARN_ON(1);
4650 return; 4892 list_del(&dev->unreg_list);
4651 } 4893 continue;
4894 }
4652 4895
4653 BUG_ON(dev->reg_state != NETREG_REGISTERED); 4896 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4654 4897
4655 /* If device is running, close it first. */ 4898 /* If device is running, close it first. */
4656 dev_close(dev); 4899 dev_close(dev);
4657 4900
4658 /* And unlink it from device chain. */ 4901 /* And unlink it from device chain. */
4659 unlist_netdevice(dev); 4902 unlist_netdevice(dev);
4660 4903
4661 dev->reg_state = NETREG_UNREGISTERING; 4904 dev->reg_state = NETREG_UNREGISTERING;
4905 }
4662 4906
4663 synchronize_net(); 4907 synchronize_net();
4664 4908
4665 /* Shutdown queueing discipline. */ 4909 list_for_each_entry(dev, head, unreg_list) {
4666 dev_shutdown(dev); 4910 /* Shutdown queueing discipline. */
4911 dev_shutdown(dev);
4667 4912
4668 4913
4669 /* Notify protocols, that we are about to destroy 4914 /* Notify protocols, that we are about to destroy
4670 this device. They should clean all the things. 4915 this device. They should clean all the things.
4671 */ 4916 */
4672 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4917 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4673 4918
4674 /* 4919 if (!dev->rtnl_link_ops ||
4675 * Flush the unicast and multicast chains 4920 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4676 */ 4921 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4677 dev_unicast_flush(dev);
4678 dev_addr_discard(dev);
4679 4922
4680 if (dev->netdev_ops->ndo_uninit) 4923 /*
4681 dev->netdev_ops->ndo_uninit(dev); 4924 * Flush the unicast and multicast chains
4925 */
4926 dev_unicast_flush(dev);
4927 dev_addr_discard(dev);
4682 4928
4683 /* Notifier chain MUST detach us from master device. */ 4929 if (dev->netdev_ops->ndo_uninit)
4684 WARN_ON(dev->master); 4930 dev->netdev_ops->ndo_uninit(dev);
4685 4931
4686 /* Remove entries from kobject tree */ 4932 /* Notifier chain MUST detach us from master device. */
4687 netdev_unregister_kobject(dev); 4933 WARN_ON(dev->master);
4934
4935 /* Remove entries from kobject tree */
4936 netdev_unregister_kobject(dev);
4937 }
4938
4939 /* Process any work delayed until the end of the batch */
4940 dev = list_first_entry(head, struct net_device, unreg_list);
4941 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4688 4942
4689 synchronize_net(); 4943 synchronize_net();
4690 4944
4691 dev_put(dev); 4945 list_for_each_entry(dev, head, unreg_list)
4946 dev_put(dev);
4947}
4948
4949static void rollback_registered(struct net_device *dev)
4950{
4951 LIST_HEAD(single);
4952
4953 list_add(&dev->unreg_list, &single);
4954 rollback_registered_many(&single);
4692} 4955}
4693 4956
4694static void __netdev_init_queue_locks_one(struct net_device *dev, 4957static void __netdev_init_queue_locks_one(struct net_device *dev,
@@ -4747,6 +5010,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4747EXPORT_SYMBOL(netdev_fix_features); 5010EXPORT_SYMBOL(netdev_fix_features);
4748 5011
4749/** 5012/**
5013 * netif_stacked_transfer_operstate - transfer operstate
5014 * @rootdev: the root or lower level device to transfer state from
5015 * @dev: the device to transfer operstate to
5016 *
5017 * Transfer operational state from root to device. This is normally
5018 * called when a stacking relationship exists between the root
5019 * device and the device(a leaf device).
5020 */
5021void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5022 struct net_device *dev)
5023{
5024 if (rootdev->operstate == IF_OPER_DORMANT)
5025 netif_dormant_on(dev);
5026 else
5027 netif_dormant_off(dev);
5028
5029 if (netif_carrier_ok(rootdev)) {
5030 if (!netif_carrier_ok(dev))
5031 netif_carrier_on(dev);
5032 } else {
5033 if (netif_carrier_ok(dev))
5034 netif_carrier_off(dev);
5035 }
5036}
5037EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5038
5039/**
4750 * register_netdevice - register a network device 5040 * register_netdevice - register a network device
4751 * @dev: device to register 5041 * @dev: device to register
4752 * 5042 *
@@ -4765,8 +5055,6 @@ EXPORT_SYMBOL(netdev_fix_features);
4765 5055
4766int register_netdevice(struct net_device *dev) 5056int register_netdevice(struct net_device *dev)
4767{ 5057{
4768 struct hlist_head *head;
4769 struct hlist_node *p;
4770 int ret; 5058 int ret;
4771 struct net *net = dev_net(dev); 5059 struct net *net = dev_net(dev);
4772 5060
@@ -4795,26 +5083,14 @@ int register_netdevice(struct net_device *dev)
4795 } 5083 }
4796 } 5084 }
4797 5085
4798 if (!dev_valid_name(dev->name)) { 5086 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
4799 ret = -EINVAL; 5087 if (ret)
4800 goto err_uninit; 5088 goto err_uninit;
4801 }
4802 5089
4803 dev->ifindex = dev_new_index(net); 5090 dev->ifindex = dev_new_index(net);
4804 if (dev->iflink == -1) 5091 if (dev->iflink == -1)
4805 dev->iflink = dev->ifindex; 5092 dev->iflink = dev->ifindex;
4806 5093
4807 /* Check for existence of name */
4808 head = dev_name_hash(net, dev->name);
4809 hlist_for_each(p, head) {
4810 struct net_device *d
4811 = hlist_entry(p, struct net_device, name_hlist);
4812 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4813 ret = -EEXIST;
4814 goto err_uninit;
4815 }
4816 }
4817
4818 /* Fix illegal checksum combinations */ 5094 /* Fix illegal checksum combinations */
4819 if ((dev->features & NETIF_F_HW_CSUM) && 5095 if ((dev->features & NETIF_F_HW_CSUM) &&
4820 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5096 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
@@ -4837,6 +5113,12 @@ int register_netdevice(struct net_device *dev)
4837 dev->features |= NETIF_F_GSO; 5113 dev->features |= NETIF_F_GSO;
4838 5114
4839 netdev_initialize_kobject(dev); 5115 netdev_initialize_kobject(dev);
5116
5117 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5118 ret = notifier_to_errno(ret);
5119 if (ret)
5120 goto err_uninit;
5121
4840 ret = netdev_register_kobject(dev); 5122 ret = netdev_register_kobject(dev);
4841 if (ret) 5123 if (ret)
4842 goto err_uninit; 5124 goto err_uninit;
@@ -4860,6 +5142,13 @@ int register_netdevice(struct net_device *dev)
4860 rollback_registered(dev); 5142 rollback_registered(dev);
4861 dev->reg_state = NETREG_UNREGISTERED; 5143 dev->reg_state = NETREG_UNREGISTERED;
4862 } 5144 }
5145 /*
5146 * Prevent userspace races by waiting until the network
5147 * device is fully setup before sending notifications.
5148 */
5149 if (!dev->rtnl_link_ops ||
5150 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5151 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
4863 5152
4864out: 5153out:
4865 return ret; 5154 return ret;
@@ -4961,6 +5250,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4961{ 5250{
4962 unsigned long rebroadcast_time, warning_time; 5251 unsigned long rebroadcast_time, warning_time;
4963 5252
5253 linkwatch_forget_dev(dev);
5254
4964 rebroadcast_time = warning_time = jiffies; 5255 rebroadcast_time = warning_time = jiffies;
4965 while (atomic_read(&dev->refcnt) != 0) { 5256 while (atomic_read(&dev->refcnt) != 0) {
4966 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5257 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -4968,6 +5259,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4968 5259
4969 /* Rebroadcast unregister notification */ 5260 /* Rebroadcast unregister notification */
4970 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5261 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5262 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5263 * should have already handle it the first time */
4971 5264
4972 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5265 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4973 &dev->state)) { 5266 &dev->state)) {
@@ -5032,7 +5325,7 @@ void netdev_run_todo(void)
5032 5325
5033 while (!list_empty(&list)) { 5326 while (!list_empty(&list)) {
5034 struct net_device *dev 5327 struct net_device *dev
5035 = list_entry(list.next, struct net_device, todo_list); 5328 = list_first_entry(&list, struct net_device, todo_list);
5036 list_del(&dev->todo_list); 5329 list_del(&dev->todo_list);
5037 5330
5038 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 5331 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5063,6 +5356,32 @@ void netdev_run_todo(void)
5063} 5356}
5064 5357
5065/** 5358/**
5359 * dev_txq_stats_fold - fold tx_queues stats
5360 * @dev: device to get statistics from
5361 * @stats: struct net_device_stats to hold results
5362 */
5363void dev_txq_stats_fold(const struct net_device *dev,
5364 struct net_device_stats *stats)
5365{
5366 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5367 unsigned int i;
5368 struct netdev_queue *txq;
5369
5370 for (i = 0; i < dev->num_tx_queues; i++) {
5371 txq = netdev_get_tx_queue(dev, i);
5372 tx_bytes += txq->tx_bytes;
5373 tx_packets += txq->tx_packets;
5374 tx_dropped += txq->tx_dropped;
5375 }
5376 if (tx_bytes || tx_packets || tx_dropped) {
5377 stats->tx_bytes = tx_bytes;
5378 stats->tx_packets = tx_packets;
5379 stats->tx_dropped = tx_dropped;
5380 }
5381}
5382EXPORT_SYMBOL(dev_txq_stats_fold);
5383
5384/**
5066 * dev_get_stats - get network device statistics 5385 * dev_get_stats - get network device statistics
5067 * @dev: device to get statistics from 5386 * @dev: device to get statistics from
5068 * 5387 *
@@ -5076,25 +5395,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev)
5076 5395
5077 if (ops->ndo_get_stats) 5396 if (ops->ndo_get_stats)
5078 return ops->ndo_get_stats(dev); 5397 return ops->ndo_get_stats(dev);
5079 else { 5398
5080 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5399 dev_txq_stats_fold(dev, &dev->stats);
5081 struct net_device_stats *stats = &dev->stats; 5400 return &dev->stats;
5082 unsigned int i;
5083 struct netdev_queue *txq;
5084
5085 for (i = 0; i < dev->num_tx_queues; i++) {
5086 txq = netdev_get_tx_queue(dev, i);
5087 tx_bytes += txq->tx_bytes;
5088 tx_packets += txq->tx_packets;
5089 tx_dropped += txq->tx_dropped;
5090 }
5091 if (tx_bytes || tx_packets || tx_dropped) {
5092 stats->tx_bytes = tx_bytes;
5093 stats->tx_packets = tx_packets;
5094 stats->tx_dropped = tx_dropped;
5095 }
5096 return stats;
5097 }
5098} 5401}
5099EXPORT_SYMBOL(dev_get_stats); 5402EXPORT_SYMBOL(dev_get_stats);
5100 5403
@@ -5173,7 +5476,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5173 5476
5174 netdev_init_queues(dev); 5477 netdev_init_queues(dev);
5175 5478
5479 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5480 dev->ethtool_ntuple_list.count = 0;
5176 INIT_LIST_HEAD(&dev->napi_list); 5481 INIT_LIST_HEAD(&dev->napi_list);
5482 INIT_LIST_HEAD(&dev->unreg_list);
5483 INIT_LIST_HEAD(&dev->link_watch_list);
5177 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5484 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5178 setup(dev); 5485 setup(dev);
5179 strcpy(dev->name, name); 5486 strcpy(dev->name, name);
@@ -5207,6 +5514,9 @@ void free_netdev(struct net_device *dev)
5207 /* Flush device addresses */ 5514 /* Flush device addresses */
5208 dev_addr_flush(dev); 5515 dev_addr_flush(dev);
5209 5516
5517 /* Clear ethtool n-tuple list */
5518 ethtool_ntuple_flush(dev);
5519
5210 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5520 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5211 netif_napi_del(p); 5521 netif_napi_del(p);
5212 5522
@@ -5238,25 +5548,47 @@ void synchronize_net(void)
5238EXPORT_SYMBOL(synchronize_net); 5548EXPORT_SYMBOL(synchronize_net);
5239 5549
5240/** 5550/**
5241 * unregister_netdevice - remove device from the kernel 5551 * unregister_netdevice_queue - remove device from the kernel
5242 * @dev: device 5552 * @dev: device
5553 * @head: list
5243 * 5554 *
5244 * This function shuts down a device interface and removes it 5555 * This function shuts down a device interface and removes it
5245 * from the kernel tables. 5556 * from the kernel tables.
5557 * If head not NULL, device is queued to be unregistered later.
5246 * 5558 *
5247 * Callers must hold the rtnl semaphore. You may want 5559 * Callers must hold the rtnl semaphore. You may want
5248 * unregister_netdev() instead of this. 5560 * unregister_netdev() instead of this.
5249 */ 5561 */
5250 5562
5251void unregister_netdevice(struct net_device *dev) 5563void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5252{ 5564{
5253 ASSERT_RTNL(); 5565 ASSERT_RTNL();
5254 5566
5255 rollback_registered(dev); 5567 if (head) {
5256 /* Finish processing unregister after unlock */ 5568 list_move_tail(&dev->unreg_list, head);
5257 net_set_todo(dev); 5569 } else {
5570 rollback_registered(dev);
5571 /* Finish processing unregister after unlock */
5572 net_set_todo(dev);
5573 }
5258} 5574}
5259EXPORT_SYMBOL(unregister_netdevice); 5575EXPORT_SYMBOL(unregister_netdevice_queue);
5576
5577/**
5578 * unregister_netdevice_many - unregister many devices
5579 * @head: list of devices
5580 */
5581void unregister_netdevice_many(struct list_head *head)
5582{
5583 struct net_device *dev;
5584
5585 if (!list_empty(head)) {
5586 rollback_registered_many(head);
5587 list_for_each_entry(dev, head, unreg_list)
5588 net_set_todo(dev);
5589 }
5590}
5591EXPORT_SYMBOL(unregister_netdevice_many);
5260 5592
5261/** 5593/**
5262 * unregister_netdev - remove device from the kernel 5594 * unregister_netdev - remove device from the kernel
@@ -5293,8 +5625,6 @@ EXPORT_SYMBOL(unregister_netdev);
5293 5625
5294int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 5626int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5295{ 5627{
5296 char buf[IFNAMSIZ];
5297 const char *destname;
5298 int err; 5628 int err;
5299 5629
5300 ASSERT_RTNL(); 5630 ASSERT_RTNL();
@@ -5327,20 +5657,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5327 * we can use it in the destination network namespace. 5657 * we can use it in the destination network namespace.
5328 */ 5658 */
5329 err = -EEXIST; 5659 err = -EEXIST;
5330 destname = dev->name; 5660 if (__dev_get_by_name(net, dev->name)) {
5331 if (__dev_get_by_name(net, destname)) {
5332 /* We get here if we can't use the current device name */ 5661 /* We get here if we can't use the current device name */
5333 if (!pat) 5662 if (!pat)
5334 goto out; 5663 goto out;
5335 if (!dev_valid_name(pat)) 5664 if (dev_get_valid_name(net, pat, dev->name, 1))
5336 goto out;
5337 if (strchr(pat, '%')) {
5338 if (__dev_alloc_name(net, pat, buf) < 0)
5339 goto out;
5340 destname = buf;
5341 } else
5342 destname = pat;
5343 if (__dev_get_by_name(net, destname))
5344 goto out; 5665 goto out;
5345 } 5666 }
5346 5667
@@ -5364,6 +5685,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5364 this device. They should clean all the things. 5685 this device. They should clean all the things.
5365 */ 5686 */
5366 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5687 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5688 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5367 5689
5368 /* 5690 /*
5369 * Flush the unicast and multicast chains 5691 * Flush the unicast and multicast chains
@@ -5376,10 +5698,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5376 /* Actually switch the network namespace */ 5698 /* Actually switch the network namespace */
5377 dev_net_set(dev, net); 5699 dev_net_set(dev, net);
5378 5700
5379 /* Assign the new device name */
5380 if (destname != dev->name)
5381 strcpy(dev->name, destname);
5382
5383 /* If there is an ifindex conflict assign a new one */ 5701 /* If there is an ifindex conflict assign a new one */
5384 if (__dev_get_by_index(net, dev->ifindex)) { 5702 if (__dev_get_by_index(net, dev->ifindex)) {
5385 int iflink = (dev->iflink == dev->ifindex); 5703 int iflink = (dev->iflink == dev->ifindex);
@@ -5398,6 +5716,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5398 /* Notify protocols, that a new device appeared. */ 5716 /* Notify protocols, that a new device appeared. */
5399 call_netdevice_notifiers(NETDEV_REGISTER, dev); 5717 call_netdevice_notifiers(NETDEV_REGISTER, dev);
5400 5718
5719 /*
5720 * Prevent userspace races by waiting until the network
5721 * device is fully setup before sending notifications.
5722 */
5723 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5724
5401 synchronize_net(); 5725 synchronize_net();
5402 err = 0; 5726 err = 0;
5403out: 5727out:
@@ -5484,7 +5808,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5484 one |= NETIF_F_ALL_CSUM; 5808 one |= NETIF_F_ALL_CSUM;
5485 5809
5486 one |= all & NETIF_F_ONE_FOR_ALL; 5810 one |= all & NETIF_F_ONE_FOR_ALL;
5487 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 5811 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5488 all |= one & mask & NETIF_F_ONE_FOR_ALL; 5812 all |= one & mask & NETIF_F_ONE_FOR_ALL;
5489 5813
5490 return all; 5814 return all;
@@ -5566,14 +5890,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
5566 5890
5567static void __net_exit default_device_exit(struct net *net) 5891static void __net_exit default_device_exit(struct net *net)
5568{ 5892{
5569 struct net_device *dev; 5893 struct net_device *dev, *aux;
5570 /* 5894 /*
5571 * Push all migratable of the network devices back to the 5895 * Push all migratable network devices back to the
5572 * initial network namespace 5896 * initial network namespace
5573 */ 5897 */
5574 rtnl_lock(); 5898 rtnl_lock();
5575restart: 5899 for_each_netdev_safe(net, dev, aux) {
5576 for_each_netdev(net, dev) {
5577 int err; 5900 int err;
5578 char fb_name[IFNAMSIZ]; 5901 char fb_name[IFNAMSIZ];
5579 5902
@@ -5581,11 +5904,9 @@ restart:
5581 if (dev->features & NETIF_F_NETNS_LOCAL) 5904 if (dev->features & NETIF_F_NETNS_LOCAL)
5582 continue; 5905 continue;
5583 5906
5584 /* Delete virtual devices */ 5907 /* Leave virtual devices for the generic cleanup */
5585 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 5908 if (dev->rtnl_link_ops)
5586 dev->rtnl_link_ops->dellink(dev); 5909 continue;
5587 goto restart;
5588 }
5589 5910
5590 /* Push remaing network devices to init_net */ 5911 /* Push remaing network devices to init_net */
5591 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 5912 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5595,13 +5916,37 @@ restart:
5595 __func__, dev->name, err); 5916 __func__, dev->name, err);
5596 BUG(); 5917 BUG();
5597 } 5918 }
5598 goto restart;
5599 } 5919 }
5600 rtnl_unlock(); 5920 rtnl_unlock();
5601} 5921}
5602 5922
5923static void __net_exit default_device_exit_batch(struct list_head *net_list)
5924{
5925 /* At exit all network devices most be removed from a network
5926 * namespace. Do this in the reverse order of registeration.
5927 * Do this across as many network namespaces as possible to
5928 * improve batching efficiency.
5929 */
5930 struct net_device *dev;
5931 struct net *net;
5932 LIST_HEAD(dev_kill_list);
5933
5934 rtnl_lock();
5935 list_for_each_entry(net, net_list, exit_list) {
5936 for_each_netdev_reverse(net, dev) {
5937 if (dev->rtnl_link_ops)
5938 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5939 else
5940 unregister_netdevice_queue(dev, &dev_kill_list);
5941 }
5942 }
5943 unregister_netdevice_many(&dev_kill_list);
5944 rtnl_unlock();
5945}
5946
5603static struct pernet_operations __net_initdata default_device_ops = { 5947static struct pernet_operations __net_initdata default_device_ops = {
5604 .exit = default_device_exit, 5948 .exit = default_device_exit,
5949 .exit_batch = default_device_exit_batch,
5605}; 5950};
5606 5951
5607/* 5952/*
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 9e2fa39f22a3..3dc295beb483 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -96,7 +96,10 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
96 int err; 96 int err;
97 97
98 netif_addr_lock_bh(dev); 98 netif_addr_lock_bh(dev);
99 err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); 99 if (alen != dev->addr_len)
100 err = -EINVAL;
101 else
102 err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
100 if (!err) 103 if (!err)
101 __dev_set_rx_mode(dev); 104 __dev_set_rx_mode(dev);
102 netif_addr_unlock_bh(dev); 105 netif_addr_unlock_bh(dev);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 0a113f26bc9f..cf208d8042b1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/timer.h> 22#include <linux/timer.h>
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <linux/slab.h>
24#include <net/genetlink.h> 25#include <net/genetlink.h>
25#include <net/netevent.h> 26#include <net/netevent.h>
26 27
@@ -41,7 +42,7 @@ static void send_dm_alert(struct work_struct *unused);
41 * netlink alerts 42 * netlink alerts
42 */ 43 */
43static int trace_state = TRACE_OFF; 44static int trace_state = TRACE_OFF;
44static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; 45static DEFINE_SPINLOCK(trace_state_lock);
45 46
46struct per_cpu_dm_data { 47struct per_cpu_dm_data {
47 struct work_struct dm_alert_work; 48 struct work_struct dm_alert_work;
@@ -296,7 +297,6 @@ static int dropmon_net_event(struct notifier_block *ev_block,
296 297
297 new_stat->dev = dev; 298 new_stat->dev = dev;
298 new_stat->last_rx = jiffies; 299 new_stat->last_rx = jiffies;
299 INIT_RCU_HEAD(&new_stat->rcu);
300 spin_lock(&trace_state_lock); 300 spin_lock(&trace_state_lock);
301 list_add_rcu(&new_stat->list, &hw_stats_list); 301 list_add_rcu(&new_stat->list, &hw_stats_list);
302 spin_unlock(&trace_state_lock); 302 spin_unlock(&trace_state_lock);
diff --git a/net/core/dst.c b/net/core/dst.c
index 57bc4d5b8d08..f307bc18f6a0 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -12,11 +12,13 @@
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/types.h> 19#include <linux/types.h>
19#include <net/net_namespace.h> 20#include <net/net_namespace.h>
21#include <linux/sched.h>
20 22
21#include <net/dst.h> 23#include <net/dst.h>
22 24
@@ -79,6 +81,7 @@ loop:
79 while ((dst = next) != NULL) { 81 while ((dst = next) != NULL) {
80 next = dst->next; 82 next = dst->next;
81 prefetch(&next->next); 83 prefetch(&next->next);
84 cond_resched();
82 if (likely(atomic_read(&dst->__refcnt))) { 85 if (likely(atomic_read(&dst->__refcnt))) {
83 last->next = dst; 86 last->next = dst;
84 last = dst; 87 last = dst;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4c12ddb5f5ee..9d55c57f318a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,6 +17,8 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/ethtool.h> 18#include <linux/ethtool.h>
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/bitops.h>
21#include <linux/slab.h>
20#include <asm/uaccess.h> 22#include <asm/uaccess.h>
21 23
22/* 24/*
@@ -120,7 +122,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
120 * NETIF_F_xxx values in include/linux/netdevice.h 122 * NETIF_F_xxx values in include/linux/netdevice.h
121 */ 123 */
122static const u32 flags_dup_features = 124static const u32 flags_dup_features =
123 ETH_FLAG_LRO; 125 (ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
124 126
125u32 ethtool_op_get_flags(struct net_device *dev) 127u32 ethtool_op_get_flags(struct net_device *dev)
126{ 128{
@@ -134,19 +136,44 @@ u32 ethtool_op_get_flags(struct net_device *dev)
134 136
135int ethtool_op_set_flags(struct net_device *dev, u32 data) 137int ethtool_op_set_flags(struct net_device *dev, u32 data)
136{ 138{
139 const struct ethtool_ops *ops = dev->ethtool_ops;
140 unsigned long features = dev->features;
141
137 if (data & ETH_FLAG_LRO) 142 if (data & ETH_FLAG_LRO)
138 dev->features |= NETIF_F_LRO; 143 features |= NETIF_F_LRO;
139 else 144 else
140 dev->features &= ~NETIF_F_LRO; 145 features &= ~NETIF_F_LRO;
146
147 if (data & ETH_FLAG_NTUPLE) {
148 if (!ops->set_rx_ntuple)
149 return -EOPNOTSUPP;
150 features |= NETIF_F_NTUPLE;
151 } else {
152 /* safe to clear regardless */
153 features &= ~NETIF_F_NTUPLE;
154 }
141 155
156 dev->features = features;
142 return 0; 157 return 0;
143} 158}
144 159
160void ethtool_ntuple_flush(struct net_device *dev)
161{
162 struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
163
164 list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
165 list_del(&fsc->list);
166 kfree(fsc);
167 }
168 dev->ethtool_ntuple_list.count = 0;
169}
170EXPORT_SYMBOL(ethtool_ntuple_flush);
171
145/* Handlers for each ethtool command */ 172/* Handlers for each ethtool command */
146 173
147static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 174static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
148{ 175{
149 struct ethtool_cmd cmd = { ETHTOOL_GSET }; 176 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
150 int err; 177 int err;
151 178
152 if (!dev->ethtool_ops->get_settings) 179 if (!dev->ethtool_ops->get_settings)
@@ -174,7 +201,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
174 return dev->ethtool_ops->set_settings(dev, &cmd); 201 return dev->ethtool_ops->set_settings(dev, &cmd);
175} 202}
176 203
177static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) 204static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
178{ 205{
179 struct ethtool_drvinfo info; 206 struct ethtool_drvinfo info;
180 const struct ethtool_ops *ops = dev->ethtool_ops; 207 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -186,6 +213,10 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
186 info.cmd = ETHTOOL_GDRVINFO; 213 info.cmd = ETHTOOL_GDRVINFO;
187 ops->get_drvinfo(dev, &info); 214 ops->get_drvinfo(dev, &info);
188 215
216 /*
217 * this method of obtaining string set info is deprecated;
218 * Use ETHTOOL_GSSET_INFO instead.
219 */
189 if (ops->get_sset_count) { 220 if (ops->get_sset_count) {
190 int rc; 221 int rc;
191 222
@@ -198,13 +229,6 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
198 rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); 229 rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
199 if (rc >= 0) 230 if (rc >= 0)
200 info.n_priv_flags = rc; 231 info.n_priv_flags = rc;
201 } else {
202 /* code path for obsolete hooks */
203
204 if (ops->self_test_count)
205 info.testinfo_len = ops->self_test_count(dev);
206 if (ops->get_stats_count)
207 info.n_stats = ops->get_stats_count(dev);
208 } 232 }
209 if (ops->get_regs_len) 233 if (ops->get_regs_len)
210 info.regdump_len = ops->get_regs_len(dev); 234 info.regdump_len = ops->get_regs_len(dev);
@@ -216,7 +240,67 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
216 return 0; 240 return 0;
217} 241}
218 242
219static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) 243static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
244 void __user *useraddr)
245{
246 struct ethtool_sset_info info;
247 const struct ethtool_ops *ops = dev->ethtool_ops;
248 u64 sset_mask;
249 int i, idx = 0, n_bits = 0, ret, rc;
250 u32 *info_buf = NULL;
251
252 if (!ops->get_sset_count)
253 return -EOPNOTSUPP;
254
255 if (copy_from_user(&info, useraddr, sizeof(info)))
256 return -EFAULT;
257
258 /* store copy of mask, because we zero struct later on */
259 sset_mask = info.sset_mask;
260 if (!sset_mask)
261 return 0;
262
263 /* calculate size of return buffer */
264 n_bits = hweight64(sset_mask);
265
266 memset(&info, 0, sizeof(info));
267 info.cmd = ETHTOOL_GSSET_INFO;
268
269 info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
270 if (!info_buf)
271 return -ENOMEM;
272
273 /*
274 * fill return buffer based on input bitmask and successful
275 * get_sset_count return
276 */
277 for (i = 0; i < 64; i++) {
278 if (!(sset_mask & (1ULL << i)))
279 continue;
280
281 rc = ops->get_sset_count(dev, i);
282 if (rc >= 0) {
283 info.sset_mask |= (1ULL << i);
284 info_buf[idx++] = rc;
285 }
286 }
287
288 ret = -EFAULT;
289 if (copy_to_user(useraddr, &info, sizeof(info)))
290 goto out;
291
292 useraddr += offsetof(struct ethtool_sset_info, data);
293 if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
294 goto out;
295
296 ret = 0;
297
298out:
299 kfree(info_buf);
300 return ret;
301}
302
303static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
220{ 304{
221 struct ethtool_rxnfc cmd; 305 struct ethtool_rxnfc cmd;
222 306
@@ -229,7 +313,7 @@ static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
229 return dev->ethtool_ops->set_rxnfc(dev, &cmd); 313 return dev->ethtool_ops->set_rxnfc(dev, &cmd);
230} 314}
231 315
232static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) 316static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
233{ 317{
234 struct ethtool_rxnfc info; 318 struct ethtool_rxnfc info;
235 const struct ethtool_ops *ops = dev->ethtool_ops; 319 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -273,6 +357,312 @@ err_out:
273 return ret; 357 return ret;
274} 358}
275 359
360static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
361 struct ethtool_rx_ntuple_flow_spec *spec,
362 struct ethtool_rx_ntuple_flow_spec_container *fsc)
363{
364
365 /* don't add filters forever */
366 if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
367 /* free the container */
368 kfree(fsc);
369 return;
370 }
371
372 /* Copy the whole filter over */
373 fsc->fs.flow_type = spec->flow_type;
374 memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
375 memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
376
377 fsc->fs.vlan_tag = spec->vlan_tag;
378 fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
379 fsc->fs.data = spec->data;
380 fsc->fs.data_mask = spec->data_mask;
381 fsc->fs.action = spec->action;
382
383 /* add to the list */
384 list_add_tail_rcu(&fsc->list, &list->list);
385 list->count++;
386}
387
388static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
389{
390 struct ethtool_rx_ntuple cmd;
391 const struct ethtool_ops *ops = dev->ethtool_ops;
392 struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
393 int ret;
394
395 if (!(dev->features & NETIF_F_NTUPLE))
396 return -EINVAL;
397
398 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
399 return -EFAULT;
400
401 /*
402 * Cache filter in dev struct for GET operation only if
403 * the underlying driver doesn't have its own GET operation, and
404 * only if the filter was added successfully. First make sure we
405 * can allocate the filter, then continue if successful.
406 */
407 if (!ops->get_rx_ntuple) {
408 fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
409 if (!fsc)
410 return -ENOMEM;
411 }
412
413 ret = ops->set_rx_ntuple(dev, &cmd);
414 if (ret) {
415 kfree(fsc);
416 return ret;
417 }
418
419 if (!ops->get_rx_ntuple)
420 __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
421
422 return ret;
423}
424
425static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
426{
427 struct ethtool_gstrings gstrings;
428 const struct ethtool_ops *ops = dev->ethtool_ops;
429 struct ethtool_rx_ntuple_flow_spec_container *fsc;
430 u8 *data;
431 char *p;
432 int ret, i, num_strings = 0;
433
434 if (!ops->get_sset_count)
435 return -EOPNOTSUPP;
436
437 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
438 return -EFAULT;
439
440 ret = ops->get_sset_count(dev, gstrings.string_set);
441 if (ret < 0)
442 return ret;
443
444 gstrings.len = ret;
445
446 data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
447 if (!data)
448 return -ENOMEM;
449
450 if (ops->get_rx_ntuple) {
451 /* driver-specific filter grab */
452 ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
453 goto copy;
454 }
455
456 /* default ethtool filter grab */
457 i = 0;
458 p = (char *)data;
459 list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
460 sprintf(p, "Filter %d:\n", i);
461 p += ETH_GSTRING_LEN;
462 num_strings++;
463
464 switch (fsc->fs.flow_type) {
465 case TCP_V4_FLOW:
466 sprintf(p, "\tFlow Type: TCP\n");
467 p += ETH_GSTRING_LEN;
468 num_strings++;
469 break;
470 case UDP_V4_FLOW:
471 sprintf(p, "\tFlow Type: UDP\n");
472 p += ETH_GSTRING_LEN;
473 num_strings++;
474 break;
475 case SCTP_V4_FLOW:
476 sprintf(p, "\tFlow Type: SCTP\n");
477 p += ETH_GSTRING_LEN;
478 num_strings++;
479 break;
480 case AH_ESP_V4_FLOW:
481 sprintf(p, "\tFlow Type: AH ESP\n");
482 p += ETH_GSTRING_LEN;
483 num_strings++;
484 break;
485 case ESP_V4_FLOW:
486 sprintf(p, "\tFlow Type: ESP\n");
487 p += ETH_GSTRING_LEN;
488 num_strings++;
489 break;
490 case IP_USER_FLOW:
491 sprintf(p, "\tFlow Type: Raw IP\n");
492 p += ETH_GSTRING_LEN;
493 num_strings++;
494 break;
495 case IPV4_FLOW:
496 sprintf(p, "\tFlow Type: IPv4\n");
497 p += ETH_GSTRING_LEN;
498 num_strings++;
499 break;
500 default:
501 sprintf(p, "\tFlow Type: Unknown\n");
502 p += ETH_GSTRING_LEN;
503 num_strings++;
504 goto unknown_filter;
505 };
506
507 /* now the rest of the filters */
508 switch (fsc->fs.flow_type) {
509 case TCP_V4_FLOW:
510 case UDP_V4_FLOW:
511 case SCTP_V4_FLOW:
512 sprintf(p, "\tSrc IP addr: 0x%x\n",
513 fsc->fs.h_u.tcp_ip4_spec.ip4src);
514 p += ETH_GSTRING_LEN;
515 num_strings++;
516 sprintf(p, "\tSrc IP mask: 0x%x\n",
517 fsc->fs.m_u.tcp_ip4_spec.ip4src);
518 p += ETH_GSTRING_LEN;
519 num_strings++;
520 sprintf(p, "\tDest IP addr: 0x%x\n",
521 fsc->fs.h_u.tcp_ip4_spec.ip4dst);
522 p += ETH_GSTRING_LEN;
523 num_strings++;
524 sprintf(p, "\tDest IP mask: 0x%x\n",
525 fsc->fs.m_u.tcp_ip4_spec.ip4dst);
526 p += ETH_GSTRING_LEN;
527 num_strings++;
528 sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
529 fsc->fs.h_u.tcp_ip4_spec.psrc,
530 fsc->fs.m_u.tcp_ip4_spec.psrc);
531 p += ETH_GSTRING_LEN;
532 num_strings++;
533 sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
534 fsc->fs.h_u.tcp_ip4_spec.pdst,
535 fsc->fs.m_u.tcp_ip4_spec.pdst);
536 p += ETH_GSTRING_LEN;
537 num_strings++;
538 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
539 fsc->fs.h_u.tcp_ip4_spec.tos,
540 fsc->fs.m_u.tcp_ip4_spec.tos);
541 p += ETH_GSTRING_LEN;
542 num_strings++;
543 break;
544 case AH_ESP_V4_FLOW:
545 case ESP_V4_FLOW:
546 sprintf(p, "\tSrc IP addr: 0x%x\n",
547 fsc->fs.h_u.ah_ip4_spec.ip4src);
548 p += ETH_GSTRING_LEN;
549 num_strings++;
550 sprintf(p, "\tSrc IP mask: 0x%x\n",
551 fsc->fs.m_u.ah_ip4_spec.ip4src);
552 p += ETH_GSTRING_LEN;
553 num_strings++;
554 sprintf(p, "\tDest IP addr: 0x%x\n",
555 fsc->fs.h_u.ah_ip4_spec.ip4dst);
556 p += ETH_GSTRING_LEN;
557 num_strings++;
558 sprintf(p, "\tDest IP mask: 0x%x\n",
559 fsc->fs.m_u.ah_ip4_spec.ip4dst);
560 p += ETH_GSTRING_LEN;
561 num_strings++;
562 sprintf(p, "\tSPI: %d, mask: 0x%x\n",
563 fsc->fs.h_u.ah_ip4_spec.spi,
564 fsc->fs.m_u.ah_ip4_spec.spi);
565 p += ETH_GSTRING_LEN;
566 num_strings++;
567 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
568 fsc->fs.h_u.ah_ip4_spec.tos,
569 fsc->fs.m_u.ah_ip4_spec.tos);
570 p += ETH_GSTRING_LEN;
571 num_strings++;
572 break;
573 case IP_USER_FLOW:
574 sprintf(p, "\tSrc IP addr: 0x%x\n",
575 fsc->fs.h_u.raw_ip4_spec.ip4src);
576 p += ETH_GSTRING_LEN;
577 num_strings++;
578 sprintf(p, "\tSrc IP mask: 0x%x\n",
579 fsc->fs.m_u.raw_ip4_spec.ip4src);
580 p += ETH_GSTRING_LEN;
581 num_strings++;
582 sprintf(p, "\tDest IP addr: 0x%x\n",
583 fsc->fs.h_u.raw_ip4_spec.ip4dst);
584 p += ETH_GSTRING_LEN;
585 num_strings++;
586 sprintf(p, "\tDest IP mask: 0x%x\n",
587 fsc->fs.m_u.raw_ip4_spec.ip4dst);
588 p += ETH_GSTRING_LEN;
589 num_strings++;
590 break;
591 case IPV4_FLOW:
592 sprintf(p, "\tSrc IP addr: 0x%x\n",
593 fsc->fs.h_u.usr_ip4_spec.ip4src);
594 p += ETH_GSTRING_LEN;
595 num_strings++;
596 sprintf(p, "\tSrc IP mask: 0x%x\n",
597 fsc->fs.m_u.usr_ip4_spec.ip4src);
598 p += ETH_GSTRING_LEN;
599 num_strings++;
600 sprintf(p, "\tDest IP addr: 0x%x\n",
601 fsc->fs.h_u.usr_ip4_spec.ip4dst);
602 p += ETH_GSTRING_LEN;
603 num_strings++;
604 sprintf(p, "\tDest IP mask: 0x%x\n",
605 fsc->fs.m_u.usr_ip4_spec.ip4dst);
606 p += ETH_GSTRING_LEN;
607 num_strings++;
608 sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
609 fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
610 fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
611 p += ETH_GSTRING_LEN;
612 num_strings++;
613 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
614 fsc->fs.h_u.usr_ip4_spec.tos,
615 fsc->fs.m_u.usr_ip4_spec.tos);
616 p += ETH_GSTRING_LEN;
617 num_strings++;
618 sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
619 fsc->fs.h_u.usr_ip4_spec.ip_ver,
620 fsc->fs.m_u.usr_ip4_spec.ip_ver);
621 p += ETH_GSTRING_LEN;
622 num_strings++;
623 sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
624 fsc->fs.h_u.usr_ip4_spec.proto,
625 fsc->fs.m_u.usr_ip4_spec.proto);
626 p += ETH_GSTRING_LEN;
627 num_strings++;
628 break;
629 };
630 sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
631 fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
632 p += ETH_GSTRING_LEN;
633 num_strings++;
634 sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
635 p += ETH_GSTRING_LEN;
636 num_strings++;
637 sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
638 p += ETH_GSTRING_LEN;
639 num_strings++;
640 if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
641 sprintf(p, "\tAction: Drop\n");
642 else
643 sprintf(p, "\tAction: Direct to queue %d\n",
644 fsc->fs.action);
645 p += ETH_GSTRING_LEN;
646 num_strings++;
647unknown_filter:
648 i++;
649 }
650copy:
651 /* indicate to userspace how many strings we actually have */
652 gstrings.len = num_strings;
653 ret = -EFAULT;
654 if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
655 goto out;
656 useraddr += sizeof(gstrings);
657 if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
658 goto out;
659 ret = 0;
660
661out:
662 kfree(data);
663 return ret;
664}
665
276static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) 666static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
277{ 667{
278 struct ethtool_regs regs; 668 struct ethtool_regs regs;
@@ -309,9 +699,29 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
309 return ret; 699 return ret;
310} 700}
311 701
702static int ethtool_reset(struct net_device *dev, char __user *useraddr)
703{
704 struct ethtool_value reset;
705 int ret;
706
707 if (!dev->ethtool_ops->reset)
708 return -EOPNOTSUPP;
709
710 if (copy_from_user(&reset, useraddr, sizeof(reset)))
711 return -EFAULT;
712
713 ret = dev->ethtool_ops->reset(dev, &reset.data);
714 if (ret)
715 return ret;
716
717 if (copy_to_user(useraddr, &reset, sizeof(reset)))
718 return -EFAULT;
719 return 0;
720}
721
312static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) 722static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
313{ 723{
314 struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; 724 struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
315 725
316 if (!dev->ethtool_ops->get_wol) 726 if (!dev->ethtool_ops->get_wol)
317 return -EOPNOTSUPP; 727 return -EOPNOTSUPP;
@@ -443,9 +853,9 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
443 return ret; 853 return ret;
444} 854}
445 855
446static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) 856static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
447{ 857{
448 struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; 858 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
449 859
450 if (!dev->ethtool_ops->get_coalesce) 860 if (!dev->ethtool_ops->get_coalesce)
451 return -EOPNOTSUPP; 861 return -EOPNOTSUPP;
@@ -457,7 +867,7 @@ static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
457 return 0; 867 return 0;
458} 868}
459 869
460static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) 870static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
461{ 871{
462 struct ethtool_coalesce coalesce; 872 struct ethtool_coalesce coalesce;
463 873
@@ -472,7 +882,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
472 882
473static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) 883static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
474{ 884{
475 struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; 885 struct ethtool_ringparam ringparam = { .cmd = ETHTOOL_GRINGPARAM };
476 886
477 if (!dev->ethtool_ops->get_ringparam) 887 if (!dev->ethtool_ops->get_ringparam)
478 return -EOPNOTSUPP; 888 return -EOPNOTSUPP;
@@ -684,16 +1094,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
684 u64 *data; 1094 u64 *data;
685 int ret, test_len; 1095 int ret, test_len;
686 1096
687 if (!ops->self_test) 1097 if (!ops->self_test || !ops->get_sset_count)
688 return -EOPNOTSUPP;
689 if (!ops->get_sset_count && !ops->self_test_count)
690 return -EOPNOTSUPP; 1098 return -EOPNOTSUPP;
691 1099
692 if (ops->get_sset_count) 1100 test_len = ops->get_sset_count(dev, ETH_SS_TEST);
693 test_len = ops->get_sset_count(dev, ETH_SS_TEST);
694 else
695 /* code path for obsolete hook */
696 test_len = ops->self_test_count(dev);
697 if (test_len < 0) 1101 if (test_len < 0)
698 return test_len; 1102 return test_len;
699 WARN_ON(test_len == 0); 1103 WARN_ON(test_len == 0);
@@ -728,36 +1132,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
728 u8 *data; 1132 u8 *data;
729 int ret; 1133 int ret;
730 1134
731 if (!ops->get_strings) 1135 if (!ops->get_strings || !ops->get_sset_count)
732 return -EOPNOTSUPP; 1136 return -EOPNOTSUPP;
733 1137
734 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) 1138 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
735 return -EFAULT; 1139 return -EFAULT;
736 1140
737 if (ops->get_sset_count) { 1141 ret = ops->get_sset_count(dev, gstrings.string_set);
738 ret = ops->get_sset_count(dev, gstrings.string_set); 1142 if (ret < 0)
739 if (ret < 0) 1143 return ret;
740 return ret;
741 1144
742 gstrings.len = ret; 1145 gstrings.len = ret;
743 } else {
744 /* code path for obsolete hooks */
745
746 switch (gstrings.string_set) {
747 case ETH_SS_TEST:
748 if (!ops->self_test_count)
749 return -EOPNOTSUPP;
750 gstrings.len = ops->self_test_count(dev);
751 break;
752 case ETH_SS_STATS:
753 if (!ops->get_stats_count)
754 return -EOPNOTSUPP;
755 gstrings.len = ops->get_stats_count(dev);
756 break;
757 default:
758 return -EINVAL;
759 }
760 }
761 1146
762 data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); 1147 data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
763 if (!data) 1148 if (!data)
@@ -798,16 +1183,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
798 u64 *data; 1183 u64 *data;
799 int ret, n_stats; 1184 int ret, n_stats;
800 1185
801 if (!ops->get_ethtool_stats) 1186 if (!ops->get_ethtool_stats || !ops->get_sset_count)
802 return -EOPNOTSUPP;
803 if (!ops->get_sset_count && !ops->get_stats_count)
804 return -EOPNOTSUPP; 1187 return -EOPNOTSUPP;
805 1188
806 if (ops->get_sset_count) 1189 n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
807 n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
808 else
809 /* code path for obsolete hook */
810 n_stats = ops->get_stats_count(dev);
811 if (n_stats < 0) 1190 if (n_stats < 0)
812 return n_stats; 1191 return n_stats;
813 WARN_ON(n_stats == 0); 1192 WARN_ON(n_stats == 0);
@@ -857,7 +1236,7 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
857static int ethtool_get_value(struct net_device *dev, char __user *useraddr, 1236static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
858 u32 cmd, u32 (*actor)(struct net_device *)) 1237 u32 cmd, u32 (*actor)(struct net_device *))
859{ 1238{
860 struct ethtool_value edata = { cmd }; 1239 struct ethtool_value edata = { .cmd = cmd };
861 1240
862 if (!actor) 1241 if (!actor)
863 return -EOPNOTSUPP; 1242 return -EOPNOTSUPP;
@@ -898,7 +1277,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
898 return actor(dev, edata.data); 1277 return actor(dev, edata.data);
899} 1278}
900 1279
901static int ethtool_flash_device(struct net_device *dev, char __user *useraddr) 1280static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
902{ 1281{
903 struct ethtool_flash efl; 1282 struct ethtool_flash efl;
904 1283
@@ -945,6 +1324,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
945 case ETHTOOL_GPERMADDR: 1324 case ETHTOOL_GPERMADDR:
946 case ETHTOOL_GUFO: 1325 case ETHTOOL_GUFO:
947 case ETHTOOL_GGSO: 1326 case ETHTOOL_GGSO:
1327 case ETHTOOL_GGRO:
948 case ETHTOOL_GFLAGS: 1328 case ETHTOOL_GFLAGS:
949 case ETHTOOL_GPFLAGS: 1329 case ETHTOOL_GPFLAGS:
950 case ETHTOOL_GRXFH: 1330 case ETHTOOL_GRXFH:
@@ -1127,6 +1507,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1127 case ETHTOOL_FLASHDEV: 1507 case ETHTOOL_FLASHDEV:
1128 rc = ethtool_flash_device(dev, useraddr); 1508 rc = ethtool_flash_device(dev, useraddr);
1129 break; 1509 break;
1510 case ETHTOOL_RESET:
1511 rc = ethtool_reset(dev, useraddr);
1512 break;
1513 case ETHTOOL_SRXNTUPLE:
1514 rc = ethtool_set_rx_ntuple(dev, useraddr);
1515 break;
1516 case ETHTOOL_GRXNTUPLE:
1517 rc = ethtool_get_rx_ntuple(dev, useraddr);
1518 break;
1519 case ETHTOOL_GSSET_INFO:
1520 rc = ethtool_get_sset_info(dev, useraddr);
1521 break;
1130 default: 1522 default:
1131 rc = -EOPNOTSUPP; 1523 rc = -EOPNOTSUPP;
1132 } 1524 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bd309384f8b8..d2c3e7dc2e5f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/slab.h>
13#include <linux/list.h> 14#include <linux/list.h>
14#include <net/net_namespace.h> 15#include <net/net_namespace.h>
15#include <net/sock.h> 16#include <net/sock.h>
@@ -72,7 +73,7 @@ static void flush_route_cache(struct fib_rules_ops *ops)
72 ops->flush_cache(ops); 73 ops->flush_cache(ops);
73} 74}
74 75
75int fib_rules_register(struct fib_rules_ops *ops) 76static int __fib_rules_register(struct fib_rules_ops *ops)
76{ 77{
77 int err = -EEXIST; 78 int err = -EEXIST;
78 struct fib_rules_ops *o; 79 struct fib_rules_ops *o;
@@ -102,6 +103,28 @@ errout:
102 return err; 103 return err;
103} 104}
104 105
106struct fib_rules_ops *
107fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
108{
109 struct fib_rules_ops *ops;
110 int err;
111
112 ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
113 if (ops == NULL)
114 return ERR_PTR(-ENOMEM);
115
116 INIT_LIST_HEAD(&ops->rules_list);
117 ops->fro_net = net;
118
119 err = __fib_rules_register(ops);
120 if (err) {
121 kfree(ops);
122 ops = ERR_PTR(err);
123 }
124
125 return ops;
126}
127
105EXPORT_SYMBOL_GPL(fib_rules_register); 128EXPORT_SYMBOL_GPL(fib_rules_register);
106 129
107void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 130void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -115,6 +138,15 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
115} 138}
116EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); 139EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
117 140
141static void fib_rules_put_rcu(struct rcu_head *head)
142{
143 struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
144 struct net *net = ops->fro_net;
145
146 release_net(net);
147 kfree(ops);
148}
149
118void fib_rules_unregister(struct fib_rules_ops *ops) 150void fib_rules_unregister(struct fib_rules_ops *ops)
119{ 151{
120 struct net *net = ops->fro_net; 152 struct net *net = ops->fro_net;
@@ -124,8 +156,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
124 fib_rules_cleanup_ops(ops); 156 fib_rules_cleanup_ops(ops);
125 spin_unlock(&net->rules_mod_lock); 157 spin_unlock(&net->rules_mod_lock);
126 158
127 synchronize_rcu(); 159 call_rcu(&ops->rcu, fib_rules_put_rcu);
128 release_net(net);
129} 160}
130 161
131EXPORT_SYMBOL_GPL(fib_rules_unregister); 162EXPORT_SYMBOL_GPL(fib_rules_unregister);
@@ -135,7 +166,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
135{ 166{
136 int ret = 0; 167 int ret = 0;
137 168
138 if (rule->ifindex && (rule->ifindex != fl->iif)) 169 if (rule->iifindex && (rule->iifindex != fl->iif))
170 goto out;
171
172 if (rule->oifindex && (rule->oifindex != fl->oif))
139 goto out; 173 goto out;
140 174
141 if ((rule->mark ^ fl->mark) & rule->mark_mask) 175 if ((rule->mark ^ fl->mark) & rule->mark_mask)
@@ -248,14 +282,24 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
248 if (tb[FRA_PRIORITY]) 282 if (tb[FRA_PRIORITY])
249 rule->pref = nla_get_u32(tb[FRA_PRIORITY]); 283 rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
250 284
251 if (tb[FRA_IFNAME]) { 285 if (tb[FRA_IIFNAME]) {
286 struct net_device *dev;
287
288 rule->iifindex = -1;
289 nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
290 dev = __dev_get_by_name(net, rule->iifname);
291 if (dev)
292 rule->iifindex = dev->ifindex;
293 }
294
295 if (tb[FRA_OIFNAME]) {
252 struct net_device *dev; 296 struct net_device *dev;
253 297
254 rule->ifindex = -1; 298 rule->oifindex = -1;
255 nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); 299 nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
256 dev = __dev_get_by_name(net, rule->ifname); 300 dev = __dev_get_by_name(net, rule->oifname);
257 if (dev) 301 if (dev)
258 rule->ifindex = dev->ifindex; 302 rule->oifindex = dev->ifindex;
259 } 303 }
260 304
261 if (tb[FRA_FWMARK]) { 305 if (tb[FRA_FWMARK]) {
@@ -274,7 +318,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
274 rule->flags = frh->flags; 318 rule->flags = frh->flags;
275 rule->table = frh_get_table(frh, tb); 319 rule->table = frh_get_table(frh, tb);
276 320
277 if (!rule->pref && ops->default_pref) 321 if (!tb[FRA_PRIORITY] && ops->default_pref)
278 rule->pref = ops->default_pref(ops); 322 rule->pref = ops->default_pref(ops);
279 323
280 err = -EINVAL; 324 err = -EINVAL;
@@ -388,8 +432,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
388 (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) 432 (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
389 continue; 433 continue;
390 434
391 if (tb[FRA_IFNAME] && 435 if (tb[FRA_IIFNAME] &&
392 nla_strcmp(tb[FRA_IFNAME], rule->ifname)) 436 nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
437 continue;
438
439 if (tb[FRA_OIFNAME] &&
440 nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
393 continue; 441 continue;
394 442
395 if (tb[FRA_FWMARK] && 443 if (tb[FRA_FWMARK] &&
@@ -447,7 +495,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
447 struct fib_rule *rule) 495 struct fib_rule *rule)
448{ 496{
449 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) 497 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
450 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ 498 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
499 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
451 + nla_total_size(4) /* FRA_PRIORITY */ 500 + nla_total_size(4) /* FRA_PRIORITY */
452 + nla_total_size(4) /* FRA_TABLE */ 501 + nla_total_size(4) /* FRA_TABLE */
453 + nla_total_size(4) /* FRA_FWMARK */ 502 + nla_total_size(4) /* FRA_FWMARK */
@@ -481,11 +530,18 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
481 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 530 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
482 frh->flags |= FIB_RULE_UNRESOLVED; 531 frh->flags |= FIB_RULE_UNRESOLVED;
483 532
484 if (rule->ifname[0]) { 533 if (rule->iifname[0]) {
485 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); 534 NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname);
486 535
487 if (rule->ifindex == -1) 536 if (rule->iifindex == -1)
488 frh->flags |= FIB_RULE_DEV_DETACHED; 537 frh->flags |= FIB_RULE_IIF_DETACHED;
538 }
539
540 if (rule->oifname[0]) {
541 NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname);
542
543 if (rule->oifindex == -1)
544 frh->flags |= FIB_RULE_OIF_DETACHED;
489 } 545 }
490 546
491 if (rule->pref) 547 if (rule->pref)
@@ -600,9 +656,12 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
600 struct fib_rule *rule; 656 struct fib_rule *rule;
601 657
602 list_for_each_entry(rule, rules, list) { 658 list_for_each_entry(rule, rules, list) {
603 if (rule->ifindex == -1 && 659 if (rule->iifindex == -1 &&
604 strcmp(dev->name, rule->ifname) == 0) 660 strcmp(dev->name, rule->iifname) == 0)
605 rule->ifindex = dev->ifindex; 661 rule->iifindex = dev->ifindex;
662 if (rule->oifindex == -1 &&
663 strcmp(dev->name, rule->oifname) == 0)
664 rule->oifindex = dev->ifindex;
606 } 665 }
607} 666}
608 667
@@ -610,9 +669,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
610{ 669{
611 struct fib_rule *rule; 670 struct fib_rule *rule;
612 671
613 list_for_each_entry(rule, rules, list) 672 list_for_each_entry(rule, rules, list) {
614 if (rule->ifindex == dev->ifindex) 673 if (rule->iifindex == dev->ifindex)
615 rule->ifindex = -1; 674 rule->iifindex = -1;
675 if (rule->oifindex == dev->ifindex)
676 rule->oifindex = -1;
677 }
616} 678}
617 679
618 680
@@ -647,7 +709,7 @@ static struct notifier_block fib_rules_notifier = {
647 .notifier_call = fib_rules_event, 709 .notifier_call = fib_rules_event,
648}; 710};
649 711
650static int fib_rules_net_init(struct net *net) 712static int __net_init fib_rules_net_init(struct net *net)
651{ 713{
652 INIT_LIST_HEAD(&net->rules_ops); 714 INIT_LIST_HEAD(&net->rules_ops);
653 spin_lock_init(&net->rules_mod_lock); 715 spin_lock_init(&net->rules_mod_lock);
diff --git a/net/core/filter.c b/net/core/filter.c
index d1d779ca096d..ff943bed21af 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -25,6 +25,7 @@
25#include <linux/inet.h> 25#include <linux/inet.h>
26#include <linux/netdevice.h> 26#include <linux/netdevice.h>
27#include <linux/if_packet.h> 27#include <linux/if_packet.h>
28#include <linux/gfp.h>
28#include <net/ip.h> 29#include <net/ip.h>
29#include <net/protocol.h> 30#include <net/protocol.h>
30#include <net/netlink.h> 31#include <net/netlink.h>
@@ -86,7 +87,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
86 return err; 87 return err;
87 88
88 rcu_read_lock_bh(); 89 rcu_read_lock_bh();
89 filter = rcu_dereference(sk->sk_filter); 90 filter = rcu_dereference_bh(sk->sk_filter);
90 if (filter) { 91 if (filter) {
91 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 92 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
92 filter->len); 93 filter->len);
@@ -303,6 +304,12 @@ load_b:
303 case SKF_AD_IFINDEX: 304 case SKF_AD_IFINDEX:
304 A = skb->dev->ifindex; 305 A = skb->dev->ifindex;
305 continue; 306 continue;
307 case SKF_AD_MARK:
308 A = skb->mark;
309 continue;
310 case SKF_AD_QUEUE:
311 A = skb->queue_mapping;
312 continue;
306 case SKF_AD_NLATTR: { 313 case SKF_AD_NLATTR: {
307 struct nlattr *nla; 314 struct nlattr *nla;
308 315
@@ -515,7 +522,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
515 } 522 }
516 523
517 rcu_read_lock_bh(); 524 rcu_read_lock_bh();
518 old_fp = rcu_dereference(sk->sk_filter); 525 old_fp = rcu_dereference_bh(sk->sk_filter);
519 rcu_assign_pointer(sk->sk_filter, fp); 526 rcu_assign_pointer(sk->sk_filter, fp);
520 rcu_read_unlock_bh(); 527 rcu_read_unlock_bh();
521 528
@@ -523,6 +530,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
523 sk_filter_delayed_uncharge(sk, old_fp); 530 sk_filter_delayed_uncharge(sk, old_fp);
524 return 0; 531 return 0;
525} 532}
533EXPORT_SYMBOL_GPL(sk_attach_filter);
526 534
527int sk_detach_filter(struct sock *sk) 535int sk_detach_filter(struct sock *sk)
528{ 536{
@@ -530,7 +538,7 @@ int sk_detach_filter(struct sock *sk)
530 struct sk_filter *filter; 538 struct sk_filter *filter;
531 539
532 rcu_read_lock_bh(); 540 rcu_read_lock_bh();
533 filter = rcu_dereference(sk->sk_filter); 541 filter = rcu_dereference_bh(sk->sk_filter);
534 if (filter) { 542 if (filter) {
535 rcu_assign_pointer(sk->sk_filter, NULL); 543 rcu_assign_pointer(sk->sk_filter, NULL);
536 sk_filter_delayed_uncharge(sk, filter); 544 sk_filter_delayed_uncharge(sk, filter);
@@ -539,3 +547,4 @@ int sk_detach_filter(struct sock *sk)
539 rcu_read_unlock_bh(); 547 rcu_read_unlock_bh();
540 return ret; 548 return ret;
541} 549}
550EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 493775f4f2f1..cf8e70392fe0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -32,6 +32,7 @@
32#include <linux/rtnetlink.h> 32#include <linux/rtnetlink.h>
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35#include <linux/slab.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/gen_stats.h> 37#include <net/gen_stats.h>
37 38
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 8569310268ab..393b1d8618e2 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -127,6 +127,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
127/** 127/**
128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV 128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
129 * @d: dumping handle 129 * @d: dumping handle
130 * @b: basic statistics
130 * @r: rate estimator statistics 131 * @r: rate estimator statistics
131 * 132 *
132 * Appends the rate estimator statistics to the top level TLV created by 133 * Appends the rate estimator statistics to the top level TLV created by
@@ -136,8 +137,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
136 * if the room in the socket buffer was not sufficient. 137 * if the room in the socket buffer was not sufficient.
137 */ 138 */
138int 139int
139gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) 140gnet_stats_copy_rate_est(struct gnet_dump *d,
141 const struct gnet_stats_basic_packed *b,
142 struct gnet_stats_rate_est *r)
140{ 143{
144 if (b && !gen_estimator_active(b, r))
145 return 0;
146
141 if (d->compat_tc_stats) { 147 if (d->compat_tc_stats) {
142 d->tc_stats.bps = r->bps; 148 d->tc_stats.bps = r->bps;
143 d->tc_stats.pps = r->pps; 149 d->tc_stats.pps = r->pps;
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 16ad45d4882b..1e7f4e91a935 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -20,7 +20,6 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/slab.h>
24#include <linux/net.h> 23#include <linux/net.h>
25#include <linux/in6.h> 24#include <linux/in6.h>
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af699d7..bdbce2f5875b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -19,7 +19,6 @@
19#include <linux/rtnetlink.h> 19#include <linux/rtnetlink.h>
20#include <linux/jiffies.h> 20#include <linux/jiffies.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h>
23#include <linux/workqueue.h> 22#include <linux/workqueue.h>
24#include <linux/bitops.h> 23#include <linux/bitops.h>
25#include <asm/types.h> 24#include <asm/types.h>
@@ -35,7 +34,7 @@ static unsigned long linkwatch_nextevent;
35static void linkwatch_event(struct work_struct *dummy); 34static void linkwatch_event(struct work_struct *dummy);
36static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); 35static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
37 36
38static struct net_device *lweventlist; 37static LIST_HEAD(lweventlist);
39static DEFINE_SPINLOCK(lweventlist_lock); 38static DEFINE_SPINLOCK(lweventlist_lock);
40 39
41static unsigned char default_operstate(const struct net_device *dev) 40static unsigned char default_operstate(const struct net_device *dev)
@@ -89,8 +88,10 @@ static void linkwatch_add_event(struct net_device *dev)
89 unsigned long flags; 88 unsigned long flags;
90 89
91 spin_lock_irqsave(&lweventlist_lock, flags); 90 spin_lock_irqsave(&lweventlist_lock, flags);
92 dev->link_watch_next = lweventlist; 91 if (list_empty(&dev->link_watch_list)) {
93 lweventlist = dev; 92 list_add_tail(&dev->link_watch_list, &lweventlist);
93 dev_hold(dev);
94 }
94 spin_unlock_irqrestore(&lweventlist_lock, flags); 95 spin_unlock_irqrestore(&lweventlist_lock, flags);
95} 96}
96 97
@@ -133,9 +134,35 @@ static void linkwatch_schedule_work(int urgent)
133} 134}
134 135
135 136
137static void linkwatch_do_dev(struct net_device *dev)
138{
139 /*
140 * Make sure the above read is complete since it can be
141 * rewritten as soon as we clear the bit below.
142 */
143 smp_mb__before_clear_bit();
144
145 /* We are about to handle this device,
146 * so new events can be accepted
147 */
148 clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
149
150 rfc2863_policy(dev);
151 if (dev->flags & IFF_UP) {
152 if (netif_carrier_ok(dev))
153 dev_activate(dev);
154 else
155 dev_deactivate(dev);
156
157 netdev_state_change(dev);
158 }
159 dev_put(dev);
160}
161
136static void __linkwatch_run_queue(int urgent_only) 162static void __linkwatch_run_queue(int urgent_only)
137{ 163{
138 struct net_device *next; 164 struct net_device *dev;
165 LIST_HEAD(wrk);
139 166
140 /* 167 /*
141 * Limit the number of linkwatch events to one 168 * Limit the number of linkwatch events to one
@@ -153,46 +180,40 @@ static void __linkwatch_run_queue(int urgent_only)
153 clear_bit(LW_URGENT, &linkwatch_flags); 180 clear_bit(LW_URGENT, &linkwatch_flags);
154 181
155 spin_lock_irq(&lweventlist_lock); 182 spin_lock_irq(&lweventlist_lock);
156 next = lweventlist; 183 list_splice_init(&lweventlist, &wrk);
157 lweventlist = NULL;
158 spin_unlock_irq(&lweventlist_lock);
159 184
160 while (next) { 185 while (!list_empty(&wrk)) {
161 struct net_device *dev = next;
162 186
163 next = dev->link_watch_next; 187 dev = list_first_entry(&wrk, struct net_device, link_watch_list);
188 list_del_init(&dev->link_watch_list);
164 189
165 if (urgent_only && !linkwatch_urgent_event(dev)) { 190 if (urgent_only && !linkwatch_urgent_event(dev)) {
166 linkwatch_add_event(dev); 191 list_add_tail(&dev->link_watch_list, &lweventlist);
167 continue; 192 continue;
168 } 193 }
169 194 spin_unlock_irq(&lweventlist_lock);
170 /* 195 linkwatch_do_dev(dev);
171 * Make sure the above read is complete since it can be 196 spin_lock_irq(&lweventlist_lock);
172 * rewritten as soon as we clear the bit below.
173 */
174 smp_mb__before_clear_bit();
175
176 /* We are about to handle this device,
177 * so new events can be accepted
178 */
179 clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
180
181 rfc2863_policy(dev);
182 if (dev->flags & IFF_UP) {
183 if (netif_carrier_ok(dev))
184 dev_activate(dev);
185 else
186 dev_deactivate(dev);
187
188 netdev_state_change(dev);
189 }
190
191 dev_put(dev);
192 } 197 }
193 198
194 if (lweventlist) 199 if (!list_empty(&lweventlist))
195 linkwatch_schedule_work(0); 200 linkwatch_schedule_work(0);
201 spin_unlock_irq(&lweventlist_lock);
202}
203
204void linkwatch_forget_dev(struct net_device *dev)
205{
206 unsigned long flags;
207 int clean = 0;
208
209 spin_lock_irqsave(&lweventlist_lock, flags);
210 if (!list_empty(&dev->link_watch_list)) {
211 list_del_init(&dev->link_watch_list);
212 clean = 1;
213 }
214 spin_unlock_irqrestore(&lweventlist_lock, flags);
215 if (clean)
216 linkwatch_do_dev(dev);
196} 217}
197 218
198 219
@@ -216,8 +237,6 @@ void linkwatch_fire_event(struct net_device *dev)
216 bool urgent = linkwatch_urgent_event(dev); 237 bool urgent = linkwatch_urgent_event(dev);
217 238
218 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { 239 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
219 dev_hold(dev);
220
221 linkwatch_add_event(dev); 240 linkwatch_add_event(dev);
222 } else if (!urgent) 241 } else if (!urgent)
223 return; 242 return;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e587e6819698..bff37908bd55 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -15,6 +15,7 @@
15 * Harald Welte Add neighbour cache statistics like rtstat 15 * Harald Welte Add neighbour cache statistics like rtstat
16 */ 16 */
17 17
18#include <linux/slab.h>
18#include <linux/types.h> 19#include <linux/types.h>
19#include <linux/kernel.h> 20#include <linux/kernel.h>
20#include <linux/module.h> 21#include <linux/module.h>
@@ -771,6 +772,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
771} 772}
772 773
773static void neigh_invalidate(struct neighbour *neigh) 774static void neigh_invalidate(struct neighbour *neigh)
775 __releases(neigh->lock)
776 __acquires(neigh->lock)
774{ 777{
775 struct sk_buff *skb; 778 struct sk_buff *skb;
776 779
@@ -2092,7 +2095,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2092 if (h > s_h) 2095 if (h > s_h)
2093 s_idx = 0; 2096 s_idx = 0;
2094 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { 2097 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
2095 if (dev_net(n->dev) != net) 2098 if (!net_eq(dev_net(n->dev), net))
2096 continue; 2099 continue;
2097 if (idx < s_idx) 2100 if (idx < s_idx)
2098 goto next; 2101 goto next;
@@ -2417,8 +2420,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
2417 2420
2418static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 2421static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2419{ 2422{
2420 struct proc_dir_entry *pde = seq->private; 2423 struct neigh_table *tbl = seq->private;
2421 struct neigh_table *tbl = pde->data;
2422 int cpu; 2424 int cpu;
2423 2425
2424 if (*pos == 0) 2426 if (*pos == 0)
@@ -2435,8 +2437,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2435 2437
2436static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2438static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2437{ 2439{
2438 struct proc_dir_entry *pde = seq->private; 2440 struct neigh_table *tbl = seq->private;
2439 struct neigh_table *tbl = pde->data;
2440 int cpu; 2441 int cpu;
2441 2442
2442 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 2443 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2455,8 +2456,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2455 2456
2456static int neigh_stat_seq_show(struct seq_file *seq, void *v) 2457static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2457{ 2458{
2458 struct proc_dir_entry *pde = seq->private; 2459 struct neigh_table *tbl = seq->private;
2459 struct neigh_table *tbl = pde->data;
2460 struct neigh_statistics *st = v; 2460 struct neigh_statistics *st = v;
2461 2461
2462 if (v == SEQ_START_TOKEN) { 2462 if (v == SEQ_START_TOKEN) {
@@ -2501,7 +2501,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2501 2501
2502 if (!ret) { 2502 if (!ret) {
2503 struct seq_file *sf = file->private_data; 2503 struct seq_file *sf = file->private_data;
2504 sf->private = PDE(inode); 2504 sf->private = PDE(inode)->data;
2505 } 2505 }
2506 return ret; 2506 return ret;
2507}; 2507};
@@ -2559,28 +2559,27 @@ EXPORT_SYMBOL(neigh_app_ns);
2559 2559
2560#ifdef CONFIG_SYSCTL 2560#ifdef CONFIG_SYSCTL
2561 2561
2562#define NEIGH_VARS_MAX 19
2563
2562static struct neigh_sysctl_table { 2564static struct neigh_sysctl_table {
2563 struct ctl_table_header *sysctl_header; 2565 struct ctl_table_header *sysctl_header;
2564 struct ctl_table neigh_vars[__NET_NEIGH_MAX]; 2566 struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2565 char *dev_name; 2567 char *dev_name;
2566} neigh_sysctl_template __read_mostly = { 2568} neigh_sysctl_template __read_mostly = {
2567 .neigh_vars = { 2569 .neigh_vars = {
2568 { 2570 {
2569 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
2570 .procname = "mcast_solicit", 2571 .procname = "mcast_solicit",
2571 .maxlen = sizeof(int), 2572 .maxlen = sizeof(int),
2572 .mode = 0644, 2573 .mode = 0644,
2573 .proc_handler = proc_dointvec, 2574 .proc_handler = proc_dointvec,
2574 }, 2575 },
2575 { 2576 {
2576 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2577 .procname = "ucast_solicit", 2577 .procname = "ucast_solicit",
2578 .maxlen = sizeof(int), 2578 .maxlen = sizeof(int),
2579 .mode = 0644, 2579 .mode = 0644,
2580 .proc_handler = proc_dointvec, 2580 .proc_handler = proc_dointvec,
2581 }, 2581 },
2582 { 2582 {
2583 .ctl_name = NET_NEIGH_APP_SOLICIT,
2584 .procname = "app_solicit", 2583 .procname = "app_solicit",
2585 .maxlen = sizeof(int), 2584 .maxlen = sizeof(int),
2586 .mode = 0644, 2585 .mode = 0644,
@@ -2593,38 +2592,30 @@ static struct neigh_sysctl_table {
2593 .proc_handler = proc_dointvec_userhz_jiffies, 2592 .proc_handler = proc_dointvec_userhz_jiffies,
2594 }, 2593 },
2595 { 2594 {
2596 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2597 .procname = "base_reachable_time", 2595 .procname = "base_reachable_time",
2598 .maxlen = sizeof(int), 2596 .maxlen = sizeof(int),
2599 .mode = 0644, 2597 .mode = 0644,
2600 .proc_handler = proc_dointvec_jiffies, 2598 .proc_handler = proc_dointvec_jiffies,
2601 .strategy = sysctl_jiffies,
2602 }, 2599 },
2603 { 2600 {
2604 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2605 .procname = "delay_first_probe_time", 2601 .procname = "delay_first_probe_time",
2606 .maxlen = sizeof(int), 2602 .maxlen = sizeof(int),
2607 .mode = 0644, 2603 .mode = 0644,
2608 .proc_handler = proc_dointvec_jiffies, 2604 .proc_handler = proc_dointvec_jiffies,
2609 .strategy = sysctl_jiffies,
2610 }, 2605 },
2611 { 2606 {
2612 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2613 .procname = "gc_stale_time", 2607 .procname = "gc_stale_time",
2614 .maxlen = sizeof(int), 2608 .maxlen = sizeof(int),
2615 .mode = 0644, 2609 .mode = 0644,
2616 .proc_handler = proc_dointvec_jiffies, 2610 .proc_handler = proc_dointvec_jiffies,
2617 .strategy = sysctl_jiffies,
2618 }, 2611 },
2619 { 2612 {
2620 .ctl_name = NET_NEIGH_UNRES_QLEN,
2621 .procname = "unres_qlen", 2613 .procname = "unres_qlen",
2622 .maxlen = sizeof(int), 2614 .maxlen = sizeof(int),
2623 .mode = 0644, 2615 .mode = 0644,
2624 .proc_handler = proc_dointvec, 2616 .proc_handler = proc_dointvec,
2625 }, 2617 },
2626 { 2618 {
2627 .ctl_name = NET_NEIGH_PROXY_QLEN,
2628 .procname = "proxy_qlen", 2619 .procname = "proxy_qlen",
2629 .maxlen = sizeof(int), 2620 .maxlen = sizeof(int),
2630 .mode = 0644, 2621 .mode = 0644,
@@ -2649,45 +2640,36 @@ static struct neigh_sysctl_table {
2649 .proc_handler = proc_dointvec_userhz_jiffies, 2640 .proc_handler = proc_dointvec_userhz_jiffies,
2650 }, 2641 },
2651 { 2642 {
2652 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2653 .procname = "retrans_time_ms", 2643 .procname = "retrans_time_ms",
2654 .maxlen = sizeof(int), 2644 .maxlen = sizeof(int),
2655 .mode = 0644, 2645 .mode = 0644,
2656 .proc_handler = proc_dointvec_ms_jiffies, 2646 .proc_handler = proc_dointvec_ms_jiffies,
2657 .strategy = sysctl_ms_jiffies,
2658 }, 2647 },
2659 { 2648 {
2660 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2661 .procname = "base_reachable_time_ms", 2649 .procname = "base_reachable_time_ms",
2662 .maxlen = sizeof(int), 2650 .maxlen = sizeof(int),
2663 .mode = 0644, 2651 .mode = 0644,
2664 .proc_handler = proc_dointvec_ms_jiffies, 2652 .proc_handler = proc_dointvec_ms_jiffies,
2665 .strategy = sysctl_ms_jiffies,
2666 }, 2653 },
2667 { 2654 {
2668 .ctl_name = NET_NEIGH_GC_INTERVAL,
2669 .procname = "gc_interval", 2655 .procname = "gc_interval",
2670 .maxlen = sizeof(int), 2656 .maxlen = sizeof(int),
2671 .mode = 0644, 2657 .mode = 0644,
2672 .proc_handler = proc_dointvec_jiffies, 2658 .proc_handler = proc_dointvec_jiffies,
2673 .strategy = sysctl_jiffies,
2674 }, 2659 },
2675 { 2660 {
2676 .ctl_name = NET_NEIGH_GC_THRESH1,
2677 .procname = "gc_thresh1", 2661 .procname = "gc_thresh1",
2678 .maxlen = sizeof(int), 2662 .maxlen = sizeof(int),
2679 .mode = 0644, 2663 .mode = 0644,
2680 .proc_handler = proc_dointvec, 2664 .proc_handler = proc_dointvec,
2681 }, 2665 },
2682 { 2666 {
2683 .ctl_name = NET_NEIGH_GC_THRESH2,
2684 .procname = "gc_thresh2", 2667 .procname = "gc_thresh2",
2685 .maxlen = sizeof(int), 2668 .maxlen = sizeof(int),
2686 .mode = 0644, 2669 .mode = 0644,
2687 .proc_handler = proc_dointvec, 2670 .proc_handler = proc_dointvec,
2688 }, 2671 },
2689 { 2672 {
2690 .ctl_name = NET_NEIGH_GC_THRESH3,
2691 .procname = "gc_thresh3", 2673 .procname = "gc_thresh3",
2692 .maxlen = sizeof(int), 2674 .maxlen = sizeof(int),
2693 .mode = 0644, 2675 .mode = 0644,
@@ -2698,8 +2680,7 @@ static struct neigh_sysctl_table {
2698}; 2680};
2699 2681
2700int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 2682int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2701 int p_id, int pdev_id, char *p_name, 2683 char *p_name, proc_handler *handler)
2702 proc_handler *handler, ctl_handler *strategy)
2703{ 2684{
2704 struct neigh_sysctl_table *t; 2685 struct neigh_sysctl_table *t;
2705 const char *dev_name_source = NULL; 2686 const char *dev_name_source = NULL;
@@ -2710,10 +2691,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2710#define NEIGH_CTL_PATH_DEV 3 2691#define NEIGH_CTL_PATH_DEV 3
2711 2692
2712 struct ctl_path neigh_path[] = { 2693 struct ctl_path neigh_path[] = {
2713 { .procname = "net", .ctl_name = CTL_NET, }, 2694 { .procname = "net", },
2714 { .procname = "proto", .ctl_name = 0, }, 2695 { .procname = "proto", },
2715 { .procname = "neigh", .ctl_name = 0, }, 2696 { .procname = "neigh", },
2716 { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, }, 2697 { .procname = "default", },
2717 { }, 2698 { },
2718 }; 2699 };
2719 2700
@@ -2738,7 +2719,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2738 2719
2739 if (dev) { 2720 if (dev) {
2740 dev_name_source = dev->name; 2721 dev_name_source = dev->name;
2741 neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
2742 /* Terminate the table early */ 2722 /* Terminate the table early */
2743 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); 2723 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2744 } else { 2724 } else {
@@ -2750,31 +2730,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2750 } 2730 }
2751 2731
2752 2732
2753 if (handler || strategy) { 2733 if (handler) {
2754 /* RetransTime */ 2734 /* RetransTime */
2755 t->neigh_vars[3].proc_handler = handler; 2735 t->neigh_vars[3].proc_handler = handler;
2756 t->neigh_vars[3].strategy = strategy;
2757 t->neigh_vars[3].extra1 = dev; 2736 t->neigh_vars[3].extra1 = dev;
2758 if (!strategy)
2759 t->neigh_vars[3].ctl_name = CTL_UNNUMBERED;
2760 /* ReachableTime */ 2737 /* ReachableTime */
2761 t->neigh_vars[4].proc_handler = handler; 2738 t->neigh_vars[4].proc_handler = handler;
2762 t->neigh_vars[4].strategy = strategy;
2763 t->neigh_vars[4].extra1 = dev; 2739 t->neigh_vars[4].extra1 = dev;
2764 if (!strategy)
2765 t->neigh_vars[4].ctl_name = CTL_UNNUMBERED;
2766 /* RetransTime (in milliseconds)*/ 2740 /* RetransTime (in milliseconds)*/
2767 t->neigh_vars[12].proc_handler = handler; 2741 t->neigh_vars[12].proc_handler = handler;
2768 t->neigh_vars[12].strategy = strategy;
2769 t->neigh_vars[12].extra1 = dev; 2742 t->neigh_vars[12].extra1 = dev;
2770 if (!strategy)
2771 t->neigh_vars[12].ctl_name = CTL_UNNUMBERED;
2772 /* ReachableTime (in milliseconds) */ 2743 /* ReachableTime (in milliseconds) */
2773 t->neigh_vars[13].proc_handler = handler; 2744 t->neigh_vars[13].proc_handler = handler;
2774 t->neigh_vars[13].strategy = strategy;
2775 t->neigh_vars[13].extra1 = dev; 2745 t->neigh_vars[13].extra1 = dev;
2776 if (!strategy)
2777 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
2778 } 2746 }
2779 2747
2780 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); 2748 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
@@ -2782,9 +2750,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2782 goto free; 2750 goto free;
2783 2751
2784 neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; 2752 neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2785 neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
2786 neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; 2753 neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2787 neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
2788 2754
2789 t->sysctl_header = 2755 t->sysctl_header =
2790 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars); 2756 register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 427ded841224..59cfc7d8fc45 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/if_arp.h> 15#include <linux/if_arp.h>
16#include <linux/slab.h>
16#include <net/sock.h> 17#include <net/sock.h>
17#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
18#include <linux/wireless.h> 19#include <linux/wireless.h>
@@ -130,6 +131,48 @@ static ssize_t show_carrier(struct device *dev,
130 return -EINVAL; 131 return -EINVAL;
131} 132}
132 133
134static ssize_t show_speed(struct device *dev,
135 struct device_attribute *attr, char *buf)
136{
137 struct net_device *netdev = to_net_dev(dev);
138 int ret = -EINVAL;
139
140 if (!rtnl_trylock())
141 return restart_syscall();
142
143 if (netif_running(netdev) &&
144 netdev->ethtool_ops &&
145 netdev->ethtool_ops->get_settings) {
146 struct ethtool_cmd cmd = { ETHTOOL_GSET };
147
148 if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
149 ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
150 }
151 rtnl_unlock();
152 return ret;
153}
154
155static ssize_t show_duplex(struct device *dev,
156 struct device_attribute *attr, char *buf)
157{
158 struct net_device *netdev = to_net_dev(dev);
159 int ret = -EINVAL;
160
161 if (!rtnl_trylock())
162 return restart_syscall();
163
164 if (netif_running(netdev) &&
165 netdev->ethtool_ops &&
166 netdev->ethtool_ops->get_settings) {
167 struct ethtool_cmd cmd = { ETHTOOL_GSET };
168
169 if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
170 ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
171 }
172 rtnl_unlock();
173 return ret;
174}
175
133static ssize_t show_dormant(struct device *dev, 176static ssize_t show_dormant(struct device *dev,
134 struct device_attribute *attr, char *buf) 177 struct device_attribute *attr, char *buf)
135{ 178{
@@ -259,6 +302,8 @@ static struct device_attribute net_class_attributes[] = {
259 __ATTR(address, S_IRUGO, show_address, NULL), 302 __ATTR(address, S_IRUGO, show_address, NULL),
260 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 303 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
261 __ATTR(carrier, S_IRUGO, show_carrier, NULL), 304 __ATTR(carrier, S_IRUGO, show_carrier, NULL),
305 __ATTR(speed, S_IRUGO, show_speed, NULL),
306 __ATTR(duplex, S_IRUGO, show_duplex, NULL),
262 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 307 __ATTR(dormant, S_IRUGO, show_dormant, NULL),
263 __ATTR(operstate, S_IRUGO, show_operstate, NULL), 308 __ATTR(operstate, S_IRUGO, show_operstate, NULL),
264 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), 309 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
@@ -366,7 +411,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
366 const struct iw_statistics *iw; 411 const struct iw_statistics *iw;
367 ssize_t ret = -EINVAL; 412 ssize_t ret = -EINVAL;
368 413
369 rtnl_lock(); 414 if (!rtnl_trylock())
415 return restart_syscall();
370 if (dev_isalive(dev)) { 416 if (dev_isalive(dev)) {
371 iw = get_wireless_stats(dev); 417 iw = get_wireless_stats(dev);
372 if (iw) 418 if (iw)
@@ -481,7 +527,7 @@ void netdev_unregister_kobject(struct net_device * net)
481 527
482 kobject_get(&dev->kobj); 528 kobject_get(&dev->kobj);
483 529
484 if (dev_net(net) != &init_net) 530 if (!net_eq(dev_net(net), &init_net))
485 return; 531 return;
486 532
487 device_del(dev); 533 device_del(dev);
@@ -500,15 +546,22 @@ int netdev_register_kobject(struct net_device *net)
500 dev_set_name(dev, "%s", net->name); 546 dev_set_name(dev, "%s", net->name);
501 547
502#ifdef CONFIG_SYSFS 548#ifdef CONFIG_SYSFS
503 *groups++ = &netstat_group; 549 /* Allow for a device specific group */
550 if (*groups)
551 groups++;
504 552
553 *groups++ = &netstat_group;
505#ifdef CONFIG_WIRELESS_EXT_SYSFS 554#ifdef CONFIG_WIRELESS_EXT_SYSFS
506 if (net->wireless_handlers || net->ieee80211_ptr) 555 if (net->ieee80211_ptr)
507 *groups++ = &wireless_group; 556 *groups++ = &wireless_group;
557#ifdef CONFIG_WIRELESS_EXT
558 else if (net->wireless_handlers)
559 *groups++ = &wireless_group;
560#endif
508#endif 561#endif
509#endif /* CONFIG_SYSFS */ 562#endif /* CONFIG_SYSFS */
510 563
511 if (dev_net(net) != &init_net) 564 if (!net_eq(dev_net(net), &init_net))
512 return 0; 565 return 0;
513 566
514 return device_add(dev); 567 return device_add(dev);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index f1e982c508bb..afa6380ed88a 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,6 +19,7 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/netlink.h> 20#include <linux/netlink.h>
21#include <linux/net_dropmon.h> 21#include <linux/net_dropmon.h>
22#include <linux/slab.h>
22 23
23#include <asm/unaligned.h> 24#include <asm/unaligned.h>
24#include <asm/bitops.h> 25#include <asm/bitops.h>
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1c1af2756f38..bd8c4712ea24 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,14 +27,64 @@ EXPORT_SYMBOL(init_net);
27 27
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29 29
30static int ops_init(const struct pernet_operations *ops, struct net *net)
31{
32 int err;
33 if (ops->id && ops->size) {
34 void *data = kzalloc(ops->size, GFP_KERNEL);
35 if (!data)
36 return -ENOMEM;
37
38 err = net_assign_generic(net, *ops->id, data);
39 if (err) {
40 kfree(data);
41 return err;
42 }
43 }
44 if (ops->init)
45 return ops->init(net);
46 return 0;
47}
48
49static void ops_free(const struct pernet_operations *ops, struct net *net)
50{
51 if (ops->id && ops->size) {
52 int id = *ops->id;
53 kfree(net_generic(net, id));
54 }
55}
56
57static void ops_exit_list(const struct pernet_operations *ops,
58 struct list_head *net_exit_list)
59{
60 struct net *net;
61 if (ops->exit) {
62 list_for_each_entry(net, net_exit_list, exit_list)
63 ops->exit(net);
64 }
65 if (ops->exit_batch)
66 ops->exit_batch(net_exit_list);
67}
68
69static void ops_free_list(const struct pernet_operations *ops,
70 struct list_head *net_exit_list)
71{
72 struct net *net;
73 if (ops->size && ops->id) {
74 list_for_each_entry(net, net_exit_list, exit_list)
75 ops_free(ops, net);
76 }
77}
78
30/* 79/*
31 * setup_net runs the initializers for the network namespace object. 80 * setup_net runs the initializers for the network namespace object.
32 */ 81 */
33static __net_init int setup_net(struct net *net) 82static __net_init int setup_net(struct net *net)
34{ 83{
35 /* Must be called with net_mutex held */ 84 /* Must be called with net_mutex held */
36 struct pernet_operations *ops; 85 const struct pernet_operations *ops, *saved_ops;
37 int error = 0; 86 int error = 0;
87 LIST_HEAD(net_exit_list);
38 88
39 atomic_set(&net->count, 1); 89 atomic_set(&net->count, 1);
40 90
@@ -43,11 +93,9 @@ static __net_init int setup_net(struct net *net)
43#endif 93#endif
44 94
45 list_for_each_entry(ops, &pernet_list, list) { 95 list_for_each_entry(ops, &pernet_list, list) {
46 if (ops->init) { 96 error = ops_init(ops, net);
47 error = ops->init(net); 97 if (error < 0)
48 if (error < 0) 98 goto out_undo;
49 goto out_undo;
50 }
51 } 99 }
52out: 100out:
53 return error; 101 return error;
@@ -56,10 +104,14 @@ out_undo:
56 /* Walk through the list backwards calling the exit functions 104 /* Walk through the list backwards calling the exit functions
57 * for the pernet modules whose init functions did not fail. 105 * for the pernet modules whose init functions did not fail.
58 */ 106 */
59 list_for_each_entry_continue_reverse(ops, &pernet_list, list) { 107 list_add(&net->exit_list, &net_exit_list);
60 if (ops->exit) 108 saved_ops = ops;
61 ops->exit(net); 109 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
62 } 110 ops_exit_list(ops, &net_exit_list);
111
112 ops = saved_ops;
113 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
114 ops_free_list(ops, &net_exit_list);
63 115
64 rcu_barrier(); 116 rcu_barrier();
65 goto out; 117 goto out;
@@ -147,18 +199,29 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
147 return net_create(); 199 return net_create();
148} 200}
149 201
202static DEFINE_SPINLOCK(cleanup_list_lock);
203static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
204
150static void cleanup_net(struct work_struct *work) 205static void cleanup_net(struct work_struct *work)
151{ 206{
152 struct pernet_operations *ops; 207 const struct pernet_operations *ops;
153 struct net *net; 208 struct net *net, *tmp;
209 LIST_HEAD(net_kill_list);
210 LIST_HEAD(net_exit_list);
154 211
155 net = container_of(work, struct net, work); 212 /* Atomically snapshot the list of namespaces to cleanup */
213 spin_lock_irq(&cleanup_list_lock);
214 list_replace_init(&cleanup_list, &net_kill_list);
215 spin_unlock_irq(&cleanup_list_lock);
156 216
157 mutex_lock(&net_mutex); 217 mutex_lock(&net_mutex);
158 218
159 /* Don't let anyone else find us. */ 219 /* Don't let anyone else find us. */
160 rtnl_lock(); 220 rtnl_lock();
161 list_del_rcu(&net->list); 221 list_for_each_entry(net, &net_kill_list, cleanup_list) {
222 list_del_rcu(&net->list);
223 list_add_tail(&net->exit_list, &net_exit_list);
224 }
162 rtnl_unlock(); 225 rtnl_unlock();
163 226
164 /* 227 /*
@@ -169,10 +232,12 @@ static void cleanup_net(struct work_struct *work)
169 synchronize_rcu(); 232 synchronize_rcu();
170 233
171 /* Run all of the network namespace exit methods */ 234 /* Run all of the network namespace exit methods */
172 list_for_each_entry_reverse(ops, &pernet_list, list) { 235 list_for_each_entry_reverse(ops, &pernet_list, list)
173 if (ops->exit) 236 ops_exit_list(ops, &net_exit_list);
174 ops->exit(net); 237
175 } 238 /* Free the net generic variables */
239 list_for_each_entry_reverse(ops, &pernet_list, list)
240 ops_free_list(ops, &net_exit_list);
176 241
177 mutex_unlock(&net_mutex); 242 mutex_unlock(&net_mutex);
178 243
@@ -182,14 +247,23 @@ static void cleanup_net(struct work_struct *work)
182 rcu_barrier(); 247 rcu_barrier();
183 248
184 /* Finally it is safe to free my network namespace structure */ 249 /* Finally it is safe to free my network namespace structure */
185 net_free(net); 250 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
251 list_del_init(&net->exit_list);
252 net_free(net);
253 }
186} 254}
255static DECLARE_WORK(net_cleanup_work, cleanup_net);
187 256
188void __put_net(struct net *net) 257void __put_net(struct net *net)
189{ 258{
190 /* Cleanup the network namespace in process context */ 259 /* Cleanup the network namespace in process context */
191 INIT_WORK(&net->work, cleanup_net); 260 unsigned long flags;
192 queue_work(netns_wq, &net->work); 261
262 spin_lock_irqsave(&cleanup_list_lock, flags);
263 list_add(&net->cleanup_list, &cleanup_list);
264 spin_unlock_irqrestore(&cleanup_list_lock, flags);
265
266 queue_work(netns_wq, &net_cleanup_work);
193} 267}
194EXPORT_SYMBOL_GPL(__put_net); 268EXPORT_SYMBOL_GPL(__put_net);
195 269
@@ -259,18 +333,20 @@ static int __init net_ns_init(void)
259pure_initcall(net_ns_init); 333pure_initcall(net_ns_init);
260 334
261#ifdef CONFIG_NET_NS 335#ifdef CONFIG_NET_NS
262static int register_pernet_operations(struct list_head *list, 336static int __register_pernet_operations(struct list_head *list,
263 struct pernet_operations *ops) 337 struct pernet_operations *ops)
264{ 338{
265 struct net *net, *undo_net; 339 struct net *net;
266 int error; 340 int error;
341 LIST_HEAD(net_exit_list);
267 342
268 list_add_tail(&ops->list, list); 343 list_add_tail(&ops->list, list);
269 if (ops->init) { 344 if (ops->init || (ops->id && ops->size)) {
270 for_each_net(net) { 345 for_each_net(net) {
271 error = ops->init(net); 346 error = ops_init(ops, net);
272 if (error) 347 if (error)
273 goto out_undo; 348 goto out_undo;
349 list_add_tail(&net->exit_list, &net_exit_list);
274 } 350 }
275 } 351 }
276 return 0; 352 return 0;
@@ -278,45 +354,82 @@ static int register_pernet_operations(struct list_head *list,
278out_undo: 354out_undo:
279 /* If I have an error cleanup all namespaces I initialized */ 355 /* If I have an error cleanup all namespaces I initialized */
280 list_del(&ops->list); 356 list_del(&ops->list);
281 if (ops->exit) { 357 ops_exit_list(ops, &net_exit_list);
282 for_each_net(undo_net) { 358 ops_free_list(ops, &net_exit_list);
283 if (undo_net == net)
284 goto undone;
285 ops->exit(undo_net);
286 }
287 }
288undone:
289 return error; 359 return error;
290} 360}
291 361
292static void unregister_pernet_operations(struct pernet_operations *ops) 362static void __unregister_pernet_operations(struct pernet_operations *ops)
293{ 363{
294 struct net *net; 364 struct net *net;
365 LIST_HEAD(net_exit_list);
295 366
296 list_del(&ops->list); 367 list_del(&ops->list);
297 if (ops->exit) 368 for_each_net(net)
298 for_each_net(net) 369 list_add_tail(&net->exit_list, &net_exit_list);
299 ops->exit(net); 370 ops_exit_list(ops, &net_exit_list);
371 ops_free_list(ops, &net_exit_list);
300} 372}
301 373
302#else 374#else
303 375
376static int __register_pernet_operations(struct list_head *list,
377 struct pernet_operations *ops)
378{
379 int err = 0;
380 err = ops_init(ops, &init_net);
381 if (err)
382 ops_free(ops, &init_net);
383 return err;
384
385}
386
387static void __unregister_pernet_operations(struct pernet_operations *ops)
388{
389 LIST_HEAD(net_exit_list);
390 list_add(&init_net.exit_list, &net_exit_list);
391 ops_exit_list(ops, &net_exit_list);
392 ops_free_list(ops, &net_exit_list);
393}
394
395#endif /* CONFIG_NET_NS */
396
397static DEFINE_IDA(net_generic_ids);
398
304static int register_pernet_operations(struct list_head *list, 399static int register_pernet_operations(struct list_head *list,
305 struct pernet_operations *ops) 400 struct pernet_operations *ops)
306{ 401{
307 if (ops->init == NULL) 402 int error;
308 return 0; 403
309 return ops->init(&init_net); 404 if (ops->id) {
405again:
406 error = ida_get_new_above(&net_generic_ids, 1, ops->id);
407 if (error < 0) {
408 if (error == -EAGAIN) {
409 ida_pre_get(&net_generic_ids, GFP_KERNEL);
410 goto again;
411 }
412 return error;
413 }
414 }
415 error = __register_pernet_operations(list, ops);
416 if (error) {
417 rcu_barrier();
418 if (ops->id)
419 ida_remove(&net_generic_ids, *ops->id);
420 }
421
422 return error;
310} 423}
311 424
312static void unregister_pernet_operations(struct pernet_operations *ops) 425static void unregister_pernet_operations(struct pernet_operations *ops)
313{ 426{
314 if (ops->exit) 427
315 ops->exit(&init_net); 428 __unregister_pernet_operations(ops);
429 rcu_barrier();
430 if (ops->id)
431 ida_remove(&net_generic_ids, *ops->id);
316} 432}
317#endif
318
319static DEFINE_IDA(net_generic_ids);
320 433
321/** 434/**
322 * register_pernet_subsys - register a network namespace subsystem 435 * register_pernet_subsys - register a network namespace subsystem
@@ -364,38 +477,6 @@ void unregister_pernet_subsys(struct pernet_operations *module)
364} 477}
365EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 478EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
366 479
367int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
368{
369 int rv;
370
371 mutex_lock(&net_mutex);
372again:
373 rv = ida_get_new_above(&net_generic_ids, 1, id);
374 if (rv < 0) {
375 if (rv == -EAGAIN) {
376 ida_pre_get(&net_generic_ids, GFP_KERNEL);
377 goto again;
378 }
379 goto out;
380 }
381 rv = register_pernet_operations(first_device, ops);
382 if (rv < 0)
383 ida_remove(&net_generic_ids, *id);
384out:
385 mutex_unlock(&net_mutex);
386 return rv;
387}
388EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
389
390void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
391{
392 mutex_lock(&net_mutex);
393 unregister_pernet_operations(ops);
394 ida_remove(&net_generic_ids, id);
395 mutex_unlock(&net_mutex);
396}
397EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
398
399/** 480/**
400 * register_pernet_device - register a network namespace device 481 * register_pernet_device - register a network namespace device
401 * @ops: pernet operations structure for the subsystem 482 * @ops: pernet operations structure for the subsystem
@@ -427,30 +508,6 @@ int register_pernet_device(struct pernet_operations *ops)
427} 508}
428EXPORT_SYMBOL_GPL(register_pernet_device); 509EXPORT_SYMBOL_GPL(register_pernet_device);
429 510
430int register_pernet_gen_device(int *id, struct pernet_operations *ops)
431{
432 int error;
433 mutex_lock(&net_mutex);
434again:
435 error = ida_get_new_above(&net_generic_ids, 1, id);
436 if (error) {
437 if (error == -EAGAIN) {
438 ida_pre_get(&net_generic_ids, GFP_KERNEL);
439 goto again;
440 }
441 goto out;
442 }
443 error = register_pernet_operations(&pernet_list, ops);
444 if (error)
445 ida_remove(&net_generic_ids, *id);
446 else if (first_device == &pernet_list)
447 first_device = &ops->list;
448out:
449 mutex_unlock(&net_mutex);
450 return error;
451}
452EXPORT_SYMBOL_GPL(register_pernet_gen_device);
453
454/** 511/**
455 * unregister_pernet_device - unregister a network namespace netdevice 512 * unregister_pernet_device - unregister a network namespace netdevice
456 * @ops: pernet operations structure to manipulate 513 * @ops: pernet operations structure to manipulate
@@ -470,17 +527,6 @@ void unregister_pernet_device(struct pernet_operations *ops)
470} 527}
471EXPORT_SYMBOL_GPL(unregister_pernet_device); 528EXPORT_SYMBOL_GPL(unregister_pernet_device);
472 529
473void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
474{
475 mutex_lock(&net_mutex);
476 if (&ops->list == first_device)
477 first_device = first_device->next;
478 unregister_pernet_operations(ops);
479 ida_remove(&net_generic_ids, id);
480 mutex_unlock(&net_mutex);
481}
482EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
483
484static void net_generic_release(struct rcu_head *rcu) 530static void net_generic_release(struct rcu_head *rcu)
485{ 531{
486 struct net_generic *ng; 532 struct net_generic *ng;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0b4d0d35ef40..a58f59b97597 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -22,6 +22,7 @@
22#include <linux/delay.h> 22#include <linux/delay.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/slab.h>
25#include <net/tcp.h> 26#include <net/tcp.h>
26#include <net/udp.h> 27#include <net/udp.h>
27#include <asm/unaligned.h> 28#include <asm/unaligned.h>
@@ -407,11 +408,24 @@ static void arp_reply(struct sk_buff *skb)
407 __be32 sip, tip; 408 __be32 sip, tip;
408 unsigned char *sha; 409 unsigned char *sha;
409 struct sk_buff *send_skb; 410 struct sk_buff *send_skb;
410 struct netpoll *np = NULL; 411 struct netpoll *np, *tmp;
412 unsigned long flags;
413 int hits = 0;
414
415 if (list_empty(&npinfo->rx_np))
416 return;
417
418 /* Before checking the packet, we do some early
419 inspection whether this is interesting at all */
420 spin_lock_irqsave(&npinfo->rx_lock, flags);
421 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
422 if (np->dev == skb->dev)
423 hits++;
424 }
425 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
411 426
412 if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) 427 /* No netpoll struct is using this dev */
413 np = npinfo->rx_np; 428 if (!hits)
414 if (!np)
415 return; 429 return;
416 430
417 /* No arp on this interface */ 431 /* No arp on this interface */
@@ -437,77 +451,91 @@ static void arp_reply(struct sk_buff *skb)
437 arp_ptr += skb->dev->addr_len; 451 arp_ptr += skb->dev->addr_len;
438 memcpy(&sip, arp_ptr, 4); 452 memcpy(&sip, arp_ptr, 4);
439 arp_ptr += 4; 453 arp_ptr += 4;
440 /* if we actually cared about dst hw addr, it would get copied here */ 454 /* If we actually cared about dst hw addr,
455 it would get copied here */
441 arp_ptr += skb->dev->addr_len; 456 arp_ptr += skb->dev->addr_len;
442 memcpy(&tip, arp_ptr, 4); 457 memcpy(&tip, arp_ptr, 4);
443 458
444 /* Should we ignore arp? */ 459 /* Should we ignore arp? */
445 if (tip != np->local_ip || 460 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
446 ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
447 return; 461 return;
448 462
449 size = arp_hdr_len(skb->dev); 463 size = arp_hdr_len(skb->dev);
450 send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
451 LL_RESERVED_SPACE(np->dev));
452 464
453 if (!send_skb) 465 spin_lock_irqsave(&npinfo->rx_lock, flags);
454 return; 466 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
455 467 if (tip != np->local_ip)
456 skb_reset_network_header(send_skb); 468 continue;
457 arp = (struct arphdr *) skb_put(send_skb, size);
458 send_skb->dev = skb->dev;
459 send_skb->protocol = htons(ETH_P_ARP);
460 469
461 /* Fill the device header for the ARP frame */ 470 send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
462 if (dev_hard_header(send_skb, skb->dev, ptype, 471 LL_RESERVED_SPACE(np->dev));
463 sha, np->dev->dev_addr, 472 if (!send_skb)
464 send_skb->len) < 0) { 473 continue;
465 kfree_skb(send_skb);
466 return;
467 }
468 474
469 /* 475 skb_reset_network_header(send_skb);
470 * Fill out the arp protocol part. 476 arp = (struct arphdr *) skb_put(send_skb, size);
471 * 477 send_skb->dev = skb->dev;
472 * we only support ethernet device type, 478 send_skb->protocol = htons(ETH_P_ARP);
473 * which (according to RFC 1390) should always equal 1 (Ethernet).
474 */
475 479
476 arp->ar_hrd = htons(np->dev->type); 480 /* Fill the device header for the ARP frame */
477 arp->ar_pro = htons(ETH_P_IP); 481 if (dev_hard_header(send_skb, skb->dev, ptype,
478 arp->ar_hln = np->dev->addr_len; 482 sha, np->dev->dev_addr,
479 arp->ar_pln = 4; 483 send_skb->len) < 0) {
480 arp->ar_op = htons(type); 484 kfree_skb(send_skb);
485 continue;
486 }
481 487
482 arp_ptr=(unsigned char *)(arp + 1); 488 /*
483 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); 489 * Fill out the arp protocol part.
484 arp_ptr += np->dev->addr_len; 490 *
485 memcpy(arp_ptr, &tip, 4); 491 * we only support ethernet device type,
486 arp_ptr += 4; 492 * which (according to RFC 1390) should
487 memcpy(arp_ptr, sha, np->dev->addr_len); 493 * always equal 1 (Ethernet).
488 arp_ptr += np->dev->addr_len; 494 */
489 memcpy(arp_ptr, &sip, 4);
490 495
491 netpoll_send_skb(np, send_skb); 496 arp->ar_hrd = htons(np->dev->type);
497 arp->ar_pro = htons(ETH_P_IP);
498 arp->ar_hln = np->dev->addr_len;
499 arp->ar_pln = 4;
500 arp->ar_op = htons(type);
501
502 arp_ptr = (unsigned char *)(arp + 1);
503 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
504 arp_ptr += np->dev->addr_len;
505 memcpy(arp_ptr, &tip, 4);
506 arp_ptr += 4;
507 memcpy(arp_ptr, sha, np->dev->addr_len);
508 arp_ptr += np->dev->addr_len;
509 memcpy(arp_ptr, &sip, 4);
510
511 netpoll_send_skb(np, send_skb);
512
513 /* If there are several rx_hooks for the same address,
514 we're fine by sending a single reply */
515 break;
516 }
517 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
492} 518}
493 519
494int __netpoll_rx(struct sk_buff *skb) 520int __netpoll_rx(struct sk_buff *skb)
495{ 521{
496 int proto, len, ulen; 522 int proto, len, ulen;
523 int hits = 0;
497 struct iphdr *iph; 524 struct iphdr *iph;
498 struct udphdr *uh; 525 struct udphdr *uh;
499 struct netpoll_info *npi = skb->dev->npinfo; 526 struct netpoll_info *npinfo = skb->dev->npinfo;
500 struct netpoll *np = npi->rx_np; 527 struct netpoll *np, *tmp;
501 528
502 if (!np) 529 if (list_empty(&npinfo->rx_np))
503 goto out; 530 goto out;
531
504 if (skb->dev->type != ARPHRD_ETHER) 532 if (skb->dev->type != ARPHRD_ETHER)
505 goto out; 533 goto out;
506 534
507 /* check if netpoll clients need ARP */ 535 /* check if netpoll clients need ARP */
508 if (skb->protocol == htons(ETH_P_ARP) && 536 if (skb->protocol == htons(ETH_P_ARP) &&
509 atomic_read(&trapped)) { 537 atomic_read(&trapped)) {
510 skb_queue_tail(&npi->arp_tx, skb); 538 skb_queue_tail(&npinfo->arp_tx, skb);
511 return 1; 539 return 1;
512 } 540 }
513 541
@@ -551,16 +579,23 @@ int __netpoll_rx(struct sk_buff *skb)
551 goto out; 579 goto out;
552 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) 580 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
553 goto out; 581 goto out;
554 if (np->local_ip && np->local_ip != iph->daddr)
555 goto out;
556 if (np->remote_ip && np->remote_ip != iph->saddr)
557 goto out;
558 if (np->local_port && np->local_port != ntohs(uh->dest))
559 goto out;
560 582
561 np->rx_hook(np, ntohs(uh->source), 583 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
562 (char *)(uh+1), 584 if (np->local_ip && np->local_ip != iph->daddr)
563 ulen - sizeof(struct udphdr)); 585 continue;
586 if (np->remote_ip && np->remote_ip != iph->saddr)
587 continue;
588 if (np->local_port && np->local_port != ntohs(uh->dest))
589 continue;
590
591 np->rx_hook(np, ntohs(uh->source),
592 (char *)(uh+1),
593 ulen - sizeof(struct udphdr));
594 hits++;
595 }
596
597 if (!hits)
598 goto out;
564 599
565 kfree_skb(skb); 600 kfree_skb(skb);
566 return 1; 601 return 1;
@@ -580,7 +615,7 @@ void netpoll_print_options(struct netpoll *np)
580 np->name, np->local_port); 615 np->name, np->local_port);
581 printk(KERN_INFO "%s: local IP %pI4\n", 616 printk(KERN_INFO "%s: local IP %pI4\n",
582 np->name, &np->local_ip); 617 np->name, &np->local_ip);
583 printk(KERN_INFO "%s: interface %s\n", 618 printk(KERN_INFO "%s: interface '%s'\n",
584 np->name, np->dev_name); 619 np->name, np->dev_name);
585 printk(KERN_INFO "%s: remote port %d\n", 620 printk(KERN_INFO "%s: remote port %d\n",
586 np->name, np->remote_port); 621 np->name, np->remote_port);
@@ -627,6 +662,9 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
627 if ((delim = strchr(cur, '@')) == NULL) 662 if ((delim = strchr(cur, '@')) == NULL)
628 goto parse_failed; 663 goto parse_failed;
629 *delim = 0; 664 *delim = 0;
665 if (*cur == ' ' || *cur == '\t')
666 printk(KERN_INFO "%s: warning: whitespace"
667 "is not allowed\n", np->name);
630 np->remote_port = simple_strtol(cur, NULL, 10); 668 np->remote_port = simple_strtol(cur, NULL, 10);
631 cur = delim; 669 cur = delim;
632 } 670 }
@@ -674,7 +712,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
674 return 0; 712 return 0;
675 713
676 parse_failed: 714 parse_failed:
677 printk(KERN_INFO "%s: couldn't parse config at %s!\n", 715 printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
678 np->name, cur); 716 np->name, cur);
679 return -1; 717 return -1;
680} 718}
@@ -684,6 +722,7 @@ int netpoll_setup(struct netpoll *np)
684 struct net_device *ndev = NULL; 722 struct net_device *ndev = NULL;
685 struct in_device *in_dev; 723 struct in_device *in_dev;
686 struct netpoll_info *npinfo; 724 struct netpoll_info *npinfo;
725 struct netpoll *npe, *tmp;
687 unsigned long flags; 726 unsigned long flags;
688 int err; 727 int err;
689 728
@@ -700,11 +739,11 @@ int netpoll_setup(struct netpoll *np)
700 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); 739 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
701 if (!npinfo) { 740 if (!npinfo) {
702 err = -ENOMEM; 741 err = -ENOMEM;
703 goto release; 742 goto put;
704 } 743 }
705 744
706 npinfo->rx_flags = 0; 745 npinfo->rx_flags = 0;
707 npinfo->rx_np = NULL; 746 INIT_LIST_HEAD(&npinfo->rx_np);
708 747
709 spin_lock_init(&npinfo->rx_lock); 748 spin_lock_init(&npinfo->rx_lock);
710 skb_queue_head_init(&npinfo->arp_tx); 749 skb_queue_head_init(&npinfo->arp_tx);
@@ -785,7 +824,7 @@ int netpoll_setup(struct netpoll *np)
785 if (np->rx_hook) { 824 if (np->rx_hook) {
786 spin_lock_irqsave(&npinfo->rx_lock, flags); 825 spin_lock_irqsave(&npinfo->rx_lock, flags);
787 npinfo->rx_flags |= NETPOLL_RX_ENABLED; 826 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
788 npinfo->rx_np = np; 827 list_add_tail(&np->rx, &npinfo->rx_np);
789 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 828 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
790 } 829 }
791 830
@@ -801,9 +840,16 @@ int netpoll_setup(struct netpoll *np)
801 return 0; 840 return 0;
802 841
803 release: 842 release:
804 if (!ndev->npinfo) 843 if (!ndev->npinfo) {
844 spin_lock_irqsave(&npinfo->rx_lock, flags);
845 list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
846 npe->dev = NULL;
847 }
848 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
849
805 kfree(npinfo); 850 kfree(npinfo);
806 np->dev = NULL; 851 }
852put:
807 dev_put(ndev); 853 dev_put(ndev);
808 return err; 854 return err;
809} 855}
@@ -823,10 +869,11 @@ void netpoll_cleanup(struct netpoll *np)
823 if (np->dev) { 869 if (np->dev) {
824 npinfo = np->dev->npinfo; 870 npinfo = np->dev->npinfo;
825 if (npinfo) { 871 if (npinfo) {
826 if (npinfo->rx_np == np) { 872 if (!list_empty(&npinfo->rx_np)) {
827 spin_lock_irqsave(&npinfo->rx_lock, flags); 873 spin_lock_irqsave(&npinfo->rx_lock, flags);
828 npinfo->rx_np = NULL; 874 list_del(&np->rx);
829 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; 875 if (list_empty(&npinfo->rx_np))
876 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
830 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 877 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
831 } 878 }
832 879
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e79e96cb4f2..43923811bd6a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -250,8 +250,7 @@ struct pktgen_dev {
250 __u64 count; /* Default No packets to send */ 250 __u64 count; /* Default No packets to send */
251 __u64 sofar; /* How many pkts we've sent so far */ 251 __u64 sofar; /* How many pkts we've sent so far */
252 __u64 tx_bytes; /* How many bytes we've transmitted */ 252 __u64 tx_bytes; /* How many bytes we've transmitted */
253 __u64 errors; /* Errors when trying to transmit, 253 __u64 errors; /* Errors when trying to transmit, */
254 pkts will be re-sent */
255 254
256 /* runtime counters relating to clone_skb */ 255 /* runtime counters relating to clone_skb */
257 256
@@ -340,6 +339,7 @@ struct pktgen_dev {
340 __u16 cur_udp_src; 339 __u16 cur_udp_src;
341 __u16 cur_queue_map; 340 __u16 cur_queue_map;
342 __u32 cur_pkt_size; 341 __u32 cur_pkt_size;
342 __u32 last_pkt_size;
343 343
344 __u8 hh[14]; 344 __u8 hh[14];
345 /* = { 345 /* = {
@@ -2051,9 +2051,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2051 read_lock_bh(&idev->lock); 2051 read_lock_bh(&idev->lock);
2052 for (ifp = idev->addr_list; ifp; 2052 for (ifp = idev->addr_list; ifp;
2053 ifp = ifp->if_next) { 2053 ifp = ifp->if_next) {
2054 if (ifp->scope == IFA_LINK 2054 if (ifp->scope == IFA_LINK &&
2055 && !(ifp-> 2055 !(ifp->flags & IFA_F_TENTATIVE)) {
2056 flags & IFA_F_TENTATIVE)) {
2057 ipv6_addr_copy(&pkt_dev-> 2056 ipv6_addr_copy(&pkt_dev->
2058 cur_in6_saddr, 2057 cur_in6_saddr,
2059 &ifp->addr); 2058 &ifp->addr);
@@ -2189,12 +2188,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
2189/* If there was already an IPSEC SA, we keep it as is, else 2188/* If there was already an IPSEC SA, we keep it as is, else
2190 * we go look for it ... 2189 * we go look for it ...
2191*/ 2190*/
2191#define DUMMY_MARK 0
2192static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) 2192static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2193{ 2193{
2194 struct xfrm_state *x = pkt_dev->flows[flow].x; 2194 struct xfrm_state *x = pkt_dev->flows[flow].x;
2195 if (!x) { 2195 if (!x) {
2196 /*slow path: we dont already have xfrm_state*/ 2196 /*slow path: we dont already have xfrm_state*/
2197 x = xfrm_stateonly_find(&init_net, 2197 x = xfrm_stateonly_find(&init_net, DUMMY_MARK,
2198 (xfrm_address_t *)&pkt_dev->cur_daddr, 2198 (xfrm_address_t *)&pkt_dev->cur_daddr,
2199 (xfrm_address_t *)&pkt_dev->cur_saddr, 2199 (xfrm_address_t *)&pkt_dev->cur_saddr,
2200 AF_INET, 2200 AF_INET,
@@ -3436,7 +3436,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3436 pkt_dev->clone_count--; /* back out increment, OOM */ 3436 pkt_dev->clone_count--; /* back out increment, OOM */
3437 return; 3437 return;
3438 } 3438 }
3439 3439 pkt_dev->last_pkt_size = pkt_dev->skb->len;
3440 pkt_dev->allocated_skbs++; 3440 pkt_dev->allocated_skbs++;
3441 pkt_dev->clone_count = 0; /* reset counter */ 3441 pkt_dev->clone_count = 0; /* reset counter */
3442 } 3442 }
@@ -3448,12 +3448,14 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3448 txq = netdev_get_tx_queue(odev, queue_map); 3448 txq = netdev_get_tx_queue(odev, queue_map);
3449 3449
3450 __netif_tx_lock_bh(txq); 3450 __netif_tx_lock_bh(txq);
3451 atomic_inc(&(pkt_dev->skb->users));
3452 3451
3453 if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) 3452 if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) {
3454 ret = NETDEV_TX_BUSY; 3453 ret = NETDEV_TX_BUSY;
3455 else 3454 pkt_dev->last_ok = 0;
3456 ret = (*xmit)(pkt_dev->skb, odev); 3455 goto unlock;
3456 }
3457 atomic_inc(&(pkt_dev->skb->users));
3458 ret = (*xmit)(pkt_dev->skb, odev);
3457 3459
3458 switch (ret) { 3460 switch (ret) {
3459 case NETDEV_TX_OK: 3461 case NETDEV_TX_OK:
@@ -3461,7 +3463,13 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3461 pkt_dev->last_ok = 1; 3463 pkt_dev->last_ok = 1;
3462 pkt_dev->sofar++; 3464 pkt_dev->sofar++;
3463 pkt_dev->seq_num++; 3465 pkt_dev->seq_num++;
3464 pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; 3466 pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
3467 break;
3468 case NET_XMIT_DROP:
3469 case NET_XMIT_CN:
3470 case NET_XMIT_POLICED:
3471 /* skb has been consumed */
3472 pkt_dev->errors++;
3465 break; 3473 break;
3466 default: /* Drivers are not supposed to return other values! */ 3474 default: /* Drivers are not supposed to return other values! */
3467 if (net_ratelimit()) 3475 if (net_ratelimit())
@@ -3475,6 +3483,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3475 atomic_dec(&(pkt_dev->skb->users)); 3483 atomic_dec(&(pkt_dev->skb->users));
3476 pkt_dev->last_ok = 0; 3484 pkt_dev->last_ok = 0;
3477 } 3485 }
3486unlock:
3478 __netif_tx_unlock_bh(txq); 3487 __netif_tx_unlock_bh(txq);
3479 3488
3480 /* If pkt_dev->count is zero, then run forever */ 3489 /* If pkt_dev->count is zero, then run forever */
@@ -3516,6 +3525,7 @@ static int pktgen_thread_worker(void *arg)
3516 wait_event_interruptible_timeout(t->queue, 3525 wait_event_interruptible_timeout(t->queue,
3517 t->control != 0, 3526 t->control != 0,
3518 HZ/10); 3527 HZ/10);
3528 try_to_freeze();
3519 continue; 3529 continue;
3520 } 3530 }
3521 3531
@@ -3622,6 +3632,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3622{ 3632{
3623 struct pktgen_dev *pkt_dev; 3633 struct pktgen_dev *pkt_dev;
3624 int err; 3634 int err;
3635 int node = cpu_to_node(t->cpu);
3625 3636
3626 /* We don't allow a device to be on several threads */ 3637 /* We don't allow a device to be on several threads */
3627 3638
@@ -3631,12 +3642,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3631 return -EBUSY; 3642 return -EBUSY;
3632 } 3643 }
3633 3644
3634 pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); 3645 pkt_dev = kzalloc_node(sizeof(struct pktgen_dev), GFP_KERNEL, node);
3635 if (!pkt_dev) 3646 if (!pkt_dev)
3636 return -ENOMEM; 3647 return -ENOMEM;
3637 3648
3638 strcpy(pkt_dev->odevname, ifname); 3649 strcpy(pkt_dev->odevname, ifname);
3639 pkt_dev->flows = vmalloc(MAX_CFLOWS * sizeof(struct flow_state)); 3650 pkt_dev->flows = vmalloc_node(MAX_CFLOWS * sizeof(struct flow_state),
3651 node);
3640 if (pkt_dev->flows == NULL) { 3652 if (pkt_dev->flows == NULL) {
3641 kfree(pkt_dev); 3653 kfree(pkt_dev);
3642 return -ENOMEM; 3654 return -ENOMEM;
@@ -3698,7 +3710,8 @@ static int __init pktgen_create_thread(int cpu)
3698 struct proc_dir_entry *pe; 3710 struct proc_dir_entry *pe;
3699 struct task_struct *p; 3711 struct task_struct *p;
3700 3712
3701 t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); 3713 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
3714 cpu_to_node(cpu));
3702 if (!t) { 3715 if (!t) {
3703 printk(KERN_ERR "pktgen: ERROR: out of memory, can't " 3716 printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
3704 "create new thread.\n"); 3717 "create new thread.\n");
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eb42873f2a3a..31e85d327aa2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,10 +35,10 @@
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/if_addr.h> 37#include <linux/if_addr.h>
38#include <linux/pci.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/system.h> 41#include <asm/system.h>
41#include <asm/string.h>
42 42
43#include <linux/inet.h> 43#include <linux/inet.h>
44#include <linux/netdevice.h> 44#include <linux/netdevice.h>
@@ -53,8 +53,7 @@
53#include <net/rtnetlink.h> 53#include <net/rtnetlink.h>
54#include <net/net_namespace.h> 54#include <net/net_namespace.h>
55 55
56struct rtnl_link 56struct rtnl_link {
57{
58 rtnl_doit_func doit; 57 rtnl_doit_func doit;
59 rtnl_dumpit_func dumpit; 58 rtnl_dumpit_func dumpit;
60}; 59};
@@ -65,6 +64,7 @@ void rtnl_lock(void)
65{ 64{
66 mutex_lock(&rtnl_mutex); 65 mutex_lock(&rtnl_mutex);
67} 66}
67EXPORT_SYMBOL(rtnl_lock);
68 68
69void __rtnl_unlock(void) 69void __rtnl_unlock(void)
70{ 70{
@@ -76,16 +76,27 @@ void rtnl_unlock(void)
76 /* This fellow will unlock it for us. */ 76 /* This fellow will unlock it for us. */
77 netdev_run_todo(); 77 netdev_run_todo();
78} 78}
79EXPORT_SYMBOL(rtnl_unlock);
79 80
80int rtnl_trylock(void) 81int rtnl_trylock(void)
81{ 82{
82 return mutex_trylock(&rtnl_mutex); 83 return mutex_trylock(&rtnl_mutex);
83} 84}
85EXPORT_SYMBOL(rtnl_trylock);
84 86
85int rtnl_is_locked(void) 87int rtnl_is_locked(void)
86{ 88{
87 return mutex_is_locked(&rtnl_mutex); 89 return mutex_is_locked(&rtnl_mutex);
88} 90}
91EXPORT_SYMBOL(rtnl_is_locked);
92
93#ifdef CONFIG_PROVE_LOCKING
94int lockdep_rtnl_is_held(void)
95{
96 return lockdep_is_held(&rtnl_mutex);
97}
98EXPORT_SYMBOL(lockdep_rtnl_is_held);
99#endif /* #ifdef CONFIG_PROVE_LOCKING */
89 100
90static struct rtnl_link *rtnl_msg_handlers[NPROTO]; 101static struct rtnl_link *rtnl_msg_handlers[NPROTO];
91 102
@@ -168,7 +179,6 @@ int __rtnl_register(int protocol, int msgtype,
168 179
169 return 0; 180 return 0;
170} 181}
171
172EXPORT_SYMBOL_GPL(__rtnl_register); 182EXPORT_SYMBOL_GPL(__rtnl_register);
173 183
174/** 184/**
@@ -188,7 +198,6 @@ void rtnl_register(int protocol, int msgtype,
188 "protocol = %d, message type = %d\n", 198 "protocol = %d, message type = %d\n",
189 protocol, msgtype); 199 protocol, msgtype);
190} 200}
191
192EXPORT_SYMBOL_GPL(rtnl_register); 201EXPORT_SYMBOL_GPL(rtnl_register);
193 202
194/** 203/**
@@ -213,7 +222,6 @@ int rtnl_unregister(int protocol, int msgtype)
213 222
214 return 0; 223 return 0;
215} 224}
216
217EXPORT_SYMBOL_GPL(rtnl_unregister); 225EXPORT_SYMBOL_GPL(rtnl_unregister);
218 226
219/** 227/**
@@ -230,7 +238,6 @@ void rtnl_unregister_all(int protocol)
230 kfree(rtnl_msg_handlers[protocol]); 238 kfree(rtnl_msg_handlers[protocol]);
231 rtnl_msg_handlers[protocol] = NULL; 239 rtnl_msg_handlers[protocol] = NULL;
232} 240}
233
234EXPORT_SYMBOL_GPL(rtnl_unregister_all); 241EXPORT_SYMBOL_GPL(rtnl_unregister_all);
235 242
236static LIST_HEAD(link_ops); 243static LIST_HEAD(link_ops);
@@ -248,12 +255,11 @@ static LIST_HEAD(link_ops);
248int __rtnl_link_register(struct rtnl_link_ops *ops) 255int __rtnl_link_register(struct rtnl_link_ops *ops)
249{ 256{
250 if (!ops->dellink) 257 if (!ops->dellink)
251 ops->dellink = unregister_netdevice; 258 ops->dellink = unregister_netdevice_queue;
252 259
253 list_add_tail(&ops->list, &link_ops); 260 list_add_tail(&ops->list, &link_ops);
254 return 0; 261 return 0;
255} 262}
256
257EXPORT_SYMBOL_GPL(__rtnl_link_register); 263EXPORT_SYMBOL_GPL(__rtnl_link_register);
258 264
259/** 265/**
@@ -271,19 +277,18 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
271 rtnl_unlock(); 277 rtnl_unlock();
272 return err; 278 return err;
273} 279}
274
275EXPORT_SYMBOL_GPL(rtnl_link_register); 280EXPORT_SYMBOL_GPL(rtnl_link_register);
276 281
277static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) 282static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
278{ 283{
279 struct net_device *dev; 284 struct net_device *dev;
280restart: 285 LIST_HEAD(list_kill);
286
281 for_each_netdev(net, dev) { 287 for_each_netdev(net, dev) {
282 if (dev->rtnl_link_ops == ops) { 288 if (dev->rtnl_link_ops == ops)
283 ops->dellink(dev); 289 ops->dellink(dev, &list_kill);
284 goto restart;
285 }
286 } 290 }
291 unregister_netdevice_many(&list_kill);
287} 292}
288 293
289void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) 294void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
@@ -309,7 +314,6 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
309 } 314 }
310 list_del(&ops->list); 315 list_del(&ops->list);
311} 316}
312
313EXPORT_SYMBOL_GPL(__rtnl_link_unregister); 317EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
314 318
315/** 319/**
@@ -322,7 +326,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
322 __rtnl_link_unregister(ops); 326 __rtnl_link_unregister(ops);
323 rtnl_unlock(); 327 rtnl_unlock();
324} 328}
325
326EXPORT_SYMBOL_GPL(rtnl_link_unregister); 329EXPORT_SYMBOL_GPL(rtnl_link_unregister);
327 330
328static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) 331static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
@@ -427,12 +430,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
427 struct rtattr *rta; 430 struct rtattr *rta;
428 int size = RTA_LENGTH(attrlen); 431 int size = RTA_LENGTH(attrlen);
429 432
430 rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); 433 rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size));
431 rta->rta_type = attrtype; 434 rta->rta_type = attrtype;
432 rta->rta_len = size; 435 rta->rta_len = size;
433 memcpy(RTA_DATA(rta), data, attrlen); 436 memcpy(RTA_DATA(rta), data, attrlen);
434 memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); 437 memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
435} 438}
439EXPORT_SYMBOL(__rta_fill);
436 440
437int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) 441int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
438{ 442{
@@ -454,6 +458,7 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
454 458
455 return nlmsg_unicast(rtnl, skb, pid); 459 return nlmsg_unicast(rtnl, skb, pid);
456} 460}
461EXPORT_SYMBOL(rtnl_unicast);
457 462
458void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, 463void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
459 struct nlmsghdr *nlh, gfp_t flags) 464 struct nlmsghdr *nlh, gfp_t flags)
@@ -466,6 +471,7 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
466 471
467 nlmsg_notify(rtnl, skb, pid, group, report, flags); 472 nlmsg_notify(rtnl, skb, pid, group, report, flags);
468} 473}
474EXPORT_SYMBOL(rtnl_notify);
469 475
470void rtnl_set_sk_err(struct net *net, u32 group, int error) 476void rtnl_set_sk_err(struct net *net, u32 group, int error)
471{ 477{
@@ -473,6 +479,7 @@ void rtnl_set_sk_err(struct net *net, u32 group, int error)
473 479
474 netlink_set_err(rtnl, 0, group, error); 480 netlink_set_err(rtnl, 0, group, error);
475} 481}
482EXPORT_SYMBOL(rtnl_set_sk_err);
476 483
477int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) 484int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
478{ 485{
@@ -501,6 +508,7 @@ nla_put_failure:
501 nla_nest_cancel(skb, mx); 508 nla_nest_cancel(skb, mx);
502 return -EMSGSIZE; 509 return -EMSGSIZE;
503} 510}
511EXPORT_SYMBOL(rtnetlink_put_metrics);
504 512
505int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, 513int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
506 u32 ts, u32 tsage, long expires, u32 error) 514 u32 ts, u32 tsage, long expires, u32 error)
@@ -520,14 +528,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
520 528
521 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); 529 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
522} 530}
523
524EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); 531EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
525 532
526static void set_operstate(struct net_device *dev, unsigned char transition) 533static void set_operstate(struct net_device *dev, unsigned char transition)
527{ 534{
528 unsigned char operstate = dev->operstate; 535 unsigned char operstate = dev->operstate;
529 536
530 switch(transition) { 537 switch (transition) {
531 case IF_OPER_UP: 538 case IF_OPER_UP:
532 if ((operstate == IF_OPER_DORMANT || 539 if ((operstate == IF_OPER_DORMANT ||
533 operstate == IF_OPER_UNKNOWN) && 540 operstate == IF_OPER_UNKNOWN) &&
@@ -550,6 +557,19 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
550 } 557 }
551} 558}
552 559
560static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
561 const struct ifinfomsg *ifm)
562{
563 unsigned int flags = ifm->ifi_flags;
564
565 /* bugwards compatibility: ifi_change == 0 is treated as ~0 */
566 if (ifm->ifi_change)
567 flags = (flags & ifm->ifi_change) |
568 (dev->flags & ~ifm->ifi_change);
569
570 return flags;
571}
572
553static void copy_rtnl_link_stats(struct rtnl_link_stats *a, 573static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
554 const struct net_device_stats *b) 574 const struct net_device_stats *b)
555{ 575{
@@ -582,6 +602,22 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
582 a->tx_compressed = b->tx_compressed; 602 a->tx_compressed = b->tx_compressed;
583}; 603};
584 604
605/* All VF info */
606static inline int rtnl_vfinfo_size(const struct net_device *dev)
607{
608 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
609
610 int num_vfs = dev_num_vf(dev->dev.parent);
611 size_t size = nlmsg_total_size(sizeof(struct nlattr));
612 size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
613 size += num_vfs * (sizeof(struct ifla_vf_mac) +
614 sizeof(struct ifla_vf_vlan) +
615 sizeof(struct ifla_vf_tx_rate));
616 return size;
617 } else
618 return 0;
619}
620
585static inline size_t if_nlmsg_size(const struct net_device *dev) 621static inline size_t if_nlmsg_size(const struct net_device *dev)
586{ 622{
587 return NLMSG_ALIGN(sizeof(struct ifinfomsg)) 623 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -599,6 +635,8 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
599 + nla_total_size(4) /* IFLA_MASTER */ 635 + nla_total_size(4) /* IFLA_MASTER */
600 + nla_total_size(1) /* IFLA_OPERSTATE */ 636 + nla_total_size(1) /* IFLA_OPERSTATE */
601 + nla_total_size(1) /* IFLA_LINKMODE */ 637 + nla_total_size(1) /* IFLA_LINKMODE */
638 + nla_total_size(4) /* IFLA_NUM_VF */
639 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
602 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ 640 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
603} 641}
604 642
@@ -667,6 +705,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
667 stats = dev_get_stats(dev); 705 stats = dev_get_stats(dev);
668 copy_rtnl_link_stats(nla_data(attr), stats); 706 copy_rtnl_link_stats(nla_data(attr), stats);
669 707
708 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
709 int i;
710
711 struct nlattr *vfinfo, *vf;
712 int num_vfs = dev_num_vf(dev->dev.parent);
713
714 NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
715 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
716 if (!vfinfo)
717 goto nla_put_failure;
718 for (i = 0; i < num_vfs; i++) {
719 struct ifla_vf_info ivi;
720 struct ifla_vf_mac vf_mac;
721 struct ifla_vf_vlan vf_vlan;
722 struct ifla_vf_tx_rate vf_tx_rate;
723 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
724 break;
725 vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
726 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
727 vf_vlan.vlan = ivi.vlan;
728 vf_vlan.qos = ivi.qos;
729 vf_tx_rate.rate = ivi.tx_rate;
730 vf = nla_nest_start(skb, IFLA_VF_INFO);
731 if (!vf) {
732 nla_nest_cancel(skb, vfinfo);
733 goto nla_put_failure;
734 }
735 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
736 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
737 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
738 nla_nest_end(skb, vf);
739 }
740 nla_nest_end(skb, vfinfo);
741 }
670 if (dev->rtnl_link_ops) { 742 if (dev->rtnl_link_ops) {
671 if (rtnl_link_fill(skb, dev) < 0) 743 if (rtnl_link_fill(skb, dev) < 0)
672 goto nla_put_failure; 744 goto nla_put_failure;
@@ -682,22 +754,33 @@ nla_put_failure:
682static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 754static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
683{ 755{
684 struct net *net = sock_net(skb->sk); 756 struct net *net = sock_net(skb->sk);
685 int idx; 757 int h, s_h;
686 int s_idx = cb->args[0]; 758 int idx = 0, s_idx;
687 struct net_device *dev; 759 struct net_device *dev;
688 760 struct hlist_head *head;
689 idx = 0; 761 struct hlist_node *node;
690 for_each_netdev(net, dev) { 762
691 if (idx < s_idx) 763 s_h = cb->args[0];
692 goto cont; 764 s_idx = cb->args[1];
693 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 765
694 NETLINK_CB(cb->skb).pid, 766 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
695 cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) 767 idx = 0;
696 break; 768 head = &net->dev_index_head[h];
769 hlist_for_each_entry(dev, node, head, index_hlist) {
770 if (idx < s_idx)
771 goto cont;
772 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
773 NETLINK_CB(cb->skb).pid,
774 cb->nlh->nlmsg_seq, 0,
775 NLM_F_MULTI) <= 0)
776 goto out;
697cont: 777cont:
698 idx++; 778 idx++;
779 }
699 } 780 }
700 cb->args[0] = idx; 781out:
782 cb->args[1] = idx;
783 cb->args[0] = h;
701 784
702 return skb->len; 785 return skb->len;
703} 786}
@@ -716,13 +799,42 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
716 [IFLA_LINKINFO] = { .type = NLA_NESTED }, 799 [IFLA_LINKINFO] = { .type = NLA_NESTED },
717 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 800 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
718 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 801 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
802 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
719}; 803};
804EXPORT_SYMBOL(ifla_policy);
720 805
721static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 806static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
722 [IFLA_INFO_KIND] = { .type = NLA_STRING }, 807 [IFLA_INFO_KIND] = { .type = NLA_STRING },
723 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 808 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
724}; 809};
725 810
811static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
812 [IFLA_VF_INFO] = { .type = NLA_NESTED },
813};
814
815static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
816 [IFLA_VF_MAC] = { .type = NLA_BINARY,
817 .len = sizeof(struct ifla_vf_mac) },
818 [IFLA_VF_VLAN] = { .type = NLA_BINARY,
819 .len = sizeof(struct ifla_vf_vlan) },
820 [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
821 .len = sizeof(struct ifla_vf_tx_rate) },
822};
823
824struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
825{
826 struct net *net;
827 /* Examine the link attributes and figure out which
828 * network namespace we are talking about.
829 */
830 if (tb[IFLA_NET_NS_PID])
831 net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
832 else
833 net = get_net(src_net);
834 return net;
835}
836EXPORT_SYMBOL(rtnl_link_get_net);
837
726static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) 838static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
727{ 839{
728 if (dev) { 840 if (dev) {
@@ -738,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
738 return 0; 850 return 0;
739} 851}
740 852
853static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
854{
855 int rem, err = -EINVAL;
856 struct nlattr *vf;
857 const struct net_device_ops *ops = dev->netdev_ops;
858
859 nla_for_each_nested(vf, attr, rem) {
860 switch (nla_type(vf)) {
861 case IFLA_VF_MAC: {
862 struct ifla_vf_mac *ivm;
863 ivm = nla_data(vf);
864 err = -EOPNOTSUPP;
865 if (ops->ndo_set_vf_mac)
866 err = ops->ndo_set_vf_mac(dev, ivm->vf,
867 ivm->mac);
868 break;
869 }
870 case IFLA_VF_VLAN: {
871 struct ifla_vf_vlan *ivv;
872 ivv = nla_data(vf);
873 err = -EOPNOTSUPP;
874 if (ops->ndo_set_vf_vlan)
875 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
876 ivv->vlan,
877 ivv->qos);
878 break;
879 }
880 case IFLA_VF_TX_RATE: {
881 struct ifla_vf_tx_rate *ivt;
882 ivt = nla_data(vf);
883 err = -EOPNOTSUPP;
884 if (ops->ndo_set_vf_tx_rate)
885 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
886 ivt->rate);
887 break;
888 }
889 default:
890 err = -EINVAL;
891 break;
892 }
893 if (err)
894 break;
895 }
896 return err;
897}
898
741static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 899static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
742 struct nlattr **tb, char *ifname, int modified) 900 struct nlattr **tb, char *ifname, int modified)
743{ 901{
@@ -746,8 +904,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
746 int err; 904 int err;
747 905
748 if (tb[IFLA_NET_NS_PID]) { 906 if (tb[IFLA_NET_NS_PID]) {
749 struct net *net; 907 struct net *net = rtnl_link_get_net(dev_net(dev), tb);
750 net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
751 if (IS_ERR(net)) { 908 if (IS_ERR(net)) {
752 err = PTR_ERR(net); 909 err = PTR_ERR(net);
753 goto errout; 910 goto errout;
@@ -852,13 +1009,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
852 } 1009 }
853 1010
854 if (ifm->ifi_flags || ifm->ifi_change) { 1011 if (ifm->ifi_flags || ifm->ifi_change) {
855 unsigned int flags = ifm->ifi_flags; 1012 err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
856
857 /* bugwards compatibility: ifi_change == 0 is treated as ~0 */
858 if (ifm->ifi_change)
859 flags = (flags & ifm->ifi_change) |
860 (dev->flags & ~ifm->ifi_change);
861 err = dev_change_flags(dev, flags);
862 if (err < 0) 1013 if (err < 0)
863 goto errout; 1014 goto errout;
864 } 1015 }
@@ -875,6 +1026,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
875 write_unlock_bh(&dev_base_lock); 1026 write_unlock_bh(&dev_base_lock);
876 } 1027 }
877 1028
1029 if (tb[IFLA_VFINFO_LIST]) {
1030 struct nlattr *attr;
1031 int rem;
1032 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
1033 if (nla_type(attr) != IFLA_VF_INFO)
1034 goto errout;
1035 err = do_setvfinfo(dev, attr);
1036 if (err < 0)
1037 goto errout;
1038 modified = 1;
1039 }
1040 }
878 err = 0; 1041 err = 0;
879 1042
880errout: 1043errout:
@@ -910,9 +1073,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
910 err = -EINVAL; 1073 err = -EINVAL;
911 ifm = nlmsg_data(nlh); 1074 ifm = nlmsg_data(nlh);
912 if (ifm->ifi_index > 0) 1075 if (ifm->ifi_index > 0)
913 dev = dev_get_by_index(net, ifm->ifi_index); 1076 dev = __dev_get_by_index(net, ifm->ifi_index);
914 else if (tb[IFLA_IFNAME]) 1077 else if (tb[IFLA_IFNAME])
915 dev = dev_get_by_name(net, ifname); 1078 dev = __dev_get_by_name(net, ifname);
916 else 1079 else
917 goto errout; 1080 goto errout;
918 1081
@@ -921,12 +1084,11 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
921 goto errout; 1084 goto errout;
922 } 1085 }
923 1086
924 if ((err = validate_linkmsg(dev, tb)) < 0) 1087 err = validate_linkmsg(dev, tb);
925 goto errout_dev; 1088 if (err < 0)
1089 goto errout;
926 1090
927 err = do_setlink(dev, ifm, tb, ifname, 0); 1091 err = do_setlink(dev, ifm, tb, ifname, 0);
928errout_dev:
929 dev_put(dev);
930errout: 1092errout:
931 return err; 1093 return err;
932} 1094}
@@ -963,12 +1125,32 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
963 if (!ops) 1125 if (!ops)
964 return -EOPNOTSUPP; 1126 return -EOPNOTSUPP;
965 1127
966 ops->dellink(dev); 1128 ops->dellink(dev, NULL);
967 return 0; 1129 return 0;
968} 1130}
969 1131
970struct net_device *rtnl_create_link(struct net *net, char *ifname, 1132int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
971 const struct rtnl_link_ops *ops, struct nlattr *tb[]) 1133{
1134 unsigned int old_flags;
1135 int err;
1136
1137 old_flags = dev->flags;
1138 if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
1139 err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
1140 if (err < 0)
1141 return err;
1142 }
1143
1144 dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
1145 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
1146
1147 __dev_notify_flags(dev, old_flags);
1148 return 0;
1149}
1150EXPORT_SYMBOL(rtnl_configure_link);
1151
1152struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1153 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
972{ 1154{
973 int err; 1155 int err;
974 struct net_device *dev; 1156 struct net_device *dev;
@@ -976,7 +1158,8 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
976 unsigned int real_num_queues = 1; 1158 unsigned int real_num_queues = 1;
977 1159
978 if (ops->get_tx_queues) { 1160 if (ops->get_tx_queues) {
979 err = ops->get_tx_queues(net, tb, &num_queues, &real_num_queues); 1161 err = ops->get_tx_queues(src_net, tb, &num_queues,
1162 &real_num_queues);
980 if (err) 1163 if (err)
981 goto err; 1164 goto err;
982 } 1165 }
@@ -985,16 +1168,17 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
985 if (!dev) 1168 if (!dev)
986 goto err; 1169 goto err;
987 1170
1171 dev_net_set(dev, net);
1172 dev->rtnl_link_ops = ops;
1173 dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
988 dev->real_num_tx_queues = real_num_queues; 1174 dev->real_num_tx_queues = real_num_queues;
1175
989 if (strchr(dev->name, '%')) { 1176 if (strchr(dev->name, '%')) {
990 err = dev_alloc_name(dev, dev->name); 1177 err = dev_alloc_name(dev, dev->name);
991 if (err < 0) 1178 if (err < 0)
992 goto err_free; 1179 goto err_free;
993 } 1180 }
994 1181
995 dev_net_set(dev, net);
996 dev->rtnl_link_ops = ops;
997
998 if (tb[IFLA_MTU]) 1182 if (tb[IFLA_MTU])
999 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1183 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
1000 if (tb[IFLA_ADDRESS]) 1184 if (tb[IFLA_ADDRESS])
@@ -1017,6 +1201,7 @@ err_free:
1017err: 1201err:
1018 return ERR_PTR(err); 1202 return ERR_PTR(err);
1019} 1203}
1204EXPORT_SYMBOL(rtnl_create_link);
1020 1205
1021static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1206static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1022{ 1207{
@@ -1050,7 +1235,8 @@ replay:
1050 else 1235 else
1051 dev = NULL; 1236 dev = NULL;
1052 1237
1053 if ((err = validate_linkmsg(dev, tb)) < 0) 1238 err = validate_linkmsg(dev, tb);
1239 if (err < 0)
1054 return err; 1240 return err;
1055 1241
1056 if (tb[IFLA_LINKINFO]) { 1242 if (tb[IFLA_LINKINFO]) {
@@ -1071,6 +1257,7 @@ replay:
1071 1257
1072 if (1) { 1258 if (1) {
1073 struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; 1259 struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
1260 struct net *dest_net;
1074 1261
1075 if (ops) { 1262 if (ops) {
1076 if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { 1263 if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -1113,7 +1300,7 @@ replay:
1113 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) 1300 if (!(nlh->nlmsg_flags & NLM_F_CREATE))
1114 return -ENODEV; 1301 return -ENODEV;
1115 1302
1116 if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) 1303 if (ifm->ifi_index)
1117 return -EOPNOTSUPP; 1304 return -EOPNOTSUPP;
1118 if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) 1305 if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
1119 return -EOPNOTSUPP; 1306 return -EOPNOTSUPP;
@@ -1135,17 +1322,26 @@ replay:
1135 if (!ifname[0]) 1322 if (!ifname[0])
1136 snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); 1323 snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
1137 1324
1138 dev = rtnl_create_link(net, ifname, ops, tb); 1325 dest_net = rtnl_link_get_net(net, tb);
1326 dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
1139 1327
1140 if (IS_ERR(dev)) 1328 if (IS_ERR(dev))
1141 err = PTR_ERR(dev); 1329 err = PTR_ERR(dev);
1142 else if (ops->newlink) 1330 else if (ops->newlink)
1143 err = ops->newlink(dev, tb, data); 1331 err = ops->newlink(net, dev, tb, data);
1144 else 1332 else
1145 err = register_netdevice(dev); 1333 err = register_netdevice(dev);
1146 1334
1147 if (err < 0 && !IS_ERR(dev)) 1335 if (err < 0 && !IS_ERR(dev))
1148 free_netdev(dev); 1336 free_netdev(dev);
1337 if (err < 0)
1338 goto out;
1339
1340 err = rtnl_configure_link(dev, ifm);
1341 if (err < 0)
1342 unregister_netdevice(dev);
1343out:
1344 put_net(dest_net);
1149 return err; 1345 return err;
1150 } 1346 }
1151} 1347}
@@ -1154,6 +1350,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1154{ 1350{
1155 struct net *net = sock_net(skb->sk); 1351 struct net *net = sock_net(skb->sk);
1156 struct ifinfomsg *ifm; 1352 struct ifinfomsg *ifm;
1353 char ifname[IFNAMSIZ];
1157 struct nlattr *tb[IFLA_MAX+1]; 1354 struct nlattr *tb[IFLA_MAX+1];
1158 struct net_device *dev = NULL; 1355 struct net_device *dev = NULL;
1159 struct sk_buff *nskb; 1356 struct sk_buff *nskb;
@@ -1163,19 +1360,23 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1163 if (err < 0) 1360 if (err < 0)
1164 return err; 1361 return err;
1165 1362
1363 if (tb[IFLA_IFNAME])
1364 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
1365
1166 ifm = nlmsg_data(nlh); 1366 ifm = nlmsg_data(nlh);
1167 if (ifm->ifi_index > 0) { 1367 if (ifm->ifi_index > 0)
1168 dev = dev_get_by_index(net, ifm->ifi_index); 1368 dev = __dev_get_by_index(net, ifm->ifi_index);
1169 if (dev == NULL) 1369 else if (tb[IFLA_IFNAME])
1170 return -ENODEV; 1370 dev = __dev_get_by_name(net, ifname);
1171 } else 1371 else
1172 return -EINVAL; 1372 return -EINVAL;
1173 1373
1374 if (dev == NULL)
1375 return -ENODEV;
1376
1174 nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); 1377 nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
1175 if (nskb == NULL) { 1378 if (nskb == NULL)
1176 err = -ENOBUFS; 1379 return -ENOBUFS;
1177 goto errout;
1178 }
1179 1380
1180 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, 1381 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
1181 nlh->nlmsg_seq, 0, 0); 1382 nlh->nlmsg_seq, 0, 0);
@@ -1183,11 +1384,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1183 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 1384 /* -EMSGSIZE implies BUG in if_nlmsg_size */
1184 WARN_ON(err == -EMSGSIZE); 1385 WARN_ON(err == -EMSGSIZE);
1185 kfree_skb(nskb); 1386 kfree_skb(nskb);
1186 goto errout; 1387 } else
1187 } 1388 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
1188 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
1189errout:
1190 dev_put(dev);
1191 1389
1192 return err; 1390 return err;
1193} 1391}
@@ -1199,7 +1397,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1199 1397
1200 if (s_idx == 0) 1398 if (s_idx == 0)
1201 s_idx = 1; 1399 s_idx = 1;
1202 for (idx=1; idx<NPROTO; idx++) { 1400 for (idx = 1; idx < NPROTO; idx++) {
1203 int type = cb->nlh->nlmsg_type-RTM_BASE; 1401 int type = cb->nlh->nlmsg_type-RTM_BASE;
1204 if (idx < s_idx || idx == PF_PACKET) 1402 if (idx < s_idx || idx == PF_PACKET)
1205 continue; 1403 continue;
@@ -1266,7 +1464,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1266 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) 1464 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
1267 return 0; 1465 return 0;
1268 1466
1269 family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; 1467 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
1270 if (family >= NPROTO) 1468 if (family >= NPROTO)
1271 return -EAFNOSUPPORT; 1469 return -EAFNOSUPPORT;
1272 1470
@@ -1299,7 +1497,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1299 1497
1300 if (nlh->nlmsg_len > min_len) { 1498 if (nlh->nlmsg_len > min_len) {
1301 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); 1499 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
1302 struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); 1500 struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
1303 1501
1304 while (RTA_OK(attr, attrlen)) { 1502 while (RTA_OK(attr, attrlen)) {
1305 unsigned flavor = attr->rta_type; 1503 unsigned flavor = attr->rta_type;
@@ -1331,18 +1529,15 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
1331 struct net_device *dev = ptr; 1529 struct net_device *dev = ptr;
1332 1530
1333 switch (event) { 1531 switch (event) {
1334 case NETDEV_UNREGISTER:
1335 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
1336 break;
1337 case NETDEV_REGISTER:
1338 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
1339 break;
1340 case NETDEV_UP: 1532 case NETDEV_UP:
1341 case NETDEV_DOWN: 1533 case NETDEV_DOWN:
1342 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); 1534 case NETDEV_PRE_UP:
1343 break; 1535 case NETDEV_POST_INIT:
1536 case NETDEV_REGISTER:
1344 case NETDEV_CHANGE: 1537 case NETDEV_CHANGE:
1345 case NETDEV_GOING_DOWN: 1538 case NETDEV_GOING_DOWN:
1539 case NETDEV_UNREGISTER:
1540 case NETDEV_UNREGISTER_BATCH:
1346 break; 1541 break;
1347 default: 1542 default:
1348 rtmsg_ifinfo(RTM_NEWLINK, dev, 0); 1543 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
@@ -1356,7 +1551,7 @@ static struct notifier_block rtnetlink_dev_notifier = {
1356}; 1551};
1357 1552
1358 1553
1359static int rtnetlink_net_init(struct net *net) 1554static int __net_init rtnetlink_net_init(struct net *net)
1360{ 1555{
1361 struct sock *sk; 1556 struct sock *sk;
1362 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, 1557 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
@@ -1367,7 +1562,7 @@ static int rtnetlink_net_init(struct net *net)
1367 return 0; 1562 return 0;
1368} 1563}
1369 1564
1370static void rtnetlink_net_exit(struct net *net) 1565static void __net_exit rtnetlink_net_exit(struct net *net)
1371{ 1566{
1372 netlink_kernel_release(net->rtnl); 1567 netlink_kernel_release(net->rtnl);
1373 net->rtnl = NULL; 1568 net->rtnl = NULL;
@@ -1405,14 +1600,3 @@ void __init rtnetlink_init(void)
1405 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); 1600 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
1406} 1601}
1407 1602
1408EXPORT_SYMBOL(__rta_fill);
1409EXPORT_SYMBOL(rtnetlink_put_metrics);
1410EXPORT_SYMBOL(rtnl_lock);
1411EXPORT_SYMBOL(rtnl_trylock);
1412EXPORT_SYMBOL(rtnl_unlock);
1413EXPORT_SYMBOL(rtnl_is_locked);
1414EXPORT_SYMBOL(rtnl_unicast);
1415EXPORT_SYMBOL(rtnl_notify);
1416EXPORT_SYMBOL(rtnl_set_sk_err);
1417EXPORT_SYMBOL(rtnl_create_link);
1418EXPORT_SYMBOL(ifla_policy);
diff --git a/net/core/scm.c b/net/core/scm.c
index b7ba91b074b3..b88f6f9d0b97 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/pid.h> 27#include <linux/pid.h>
28#include <linux/nsproxy.h> 28#include <linux/nsproxy.h>
29#include <linux/slab.h>
29 30
30#include <asm/system.h> 31#include <asm/system.h>
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
@@ -156,6 +157,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
156 switch (cmsg->cmsg_type) 157 switch (cmsg->cmsg_type)
157 { 158 {
158 case SCM_RIGHTS: 159 case SCM_RIGHTS:
160 if (!sock->ops || sock->ops->family != PF_UNIX)
161 goto error;
159 err=scm_fp_copy(cmsg, &p->fp); 162 err=scm_fp_copy(cmsg, &p->fp);
160 if (err<0) 163 if (err<0)
161 goto error; 164 goto error;
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
deleted file mode 100644
index 79687dfd6957..000000000000
--- a/net/core/skb_dma_map.c
+++ /dev/null
@@ -1,65 +0,0 @@
1/* skb_dma_map.c: DMA mapping helpers for socket buffers.
2 *
3 * Copyright (C) David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/dma-mapping.h>
9#include <linux/skbuff.h>
10
11int skb_dma_map(struct device *dev, struct sk_buff *skb,
12 enum dma_data_direction dir)
13{
14 struct skb_shared_info *sp = skb_shinfo(skb);
15 dma_addr_t map;
16 int i;
17
18 map = dma_map_single(dev, skb->data,
19 skb_headlen(skb), dir);
20 if (dma_mapping_error(dev, map))
21 goto out_err;
22
23 sp->dma_head = map;
24 for (i = 0; i < sp->nr_frags; i++) {
25 skb_frag_t *fp = &sp->frags[i];
26
27 map = dma_map_page(dev, fp->page, fp->page_offset,
28 fp->size, dir);
29 if (dma_mapping_error(dev, map))
30 goto unwind;
31 sp->dma_maps[i] = map;
32 }
33
34 return 0;
35
36unwind:
37 while (--i >= 0) {
38 skb_frag_t *fp = &sp->frags[i];
39
40 dma_unmap_page(dev, sp->dma_maps[i],
41 fp->size, dir);
42 }
43 dma_unmap_single(dev, sp->dma_head,
44 skb_headlen(skb), dir);
45out_err:
46 return -ENOMEM;
47}
48EXPORT_SYMBOL(skb_dma_map);
49
50void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
51 enum dma_data_direction dir)
52{
53 struct skb_shared_info *sp = skb_shinfo(skb);
54 int i;
55
56 dma_unmap_single(dev, sp->dma_head,
57 skb_headlen(skb), dir);
58 for (i = 0; i < sp->nr_frags; i++) {
59 skb_frag_t *fp = &sp->frags[i];
60
61 dma_unmap_page(dev, sp->dma_maps[i],
62 fp->size, dir);
63 }
64}
65EXPORT_SYMBOL(skb_dma_unmap);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ec85681a7dd8..93c4e060c91e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -93,7 +93,7 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
93 93
94 94
95/* Pipe buffer operations for a socket. */ 95/* Pipe buffer operations for a socket. */
96static struct pipe_buf_operations sock_pipe_buf_ops = { 96static const struct pipe_buf_operations sock_pipe_buf_ops = {
97 .can_merge = 0, 97 .can_merge = 0,
98 .map = generic_pipe_buf_map, 98 .map = generic_pipe_buf_map,
99 .unmap = generic_pipe_buf_unmap, 99 .unmap = generic_pipe_buf_unmap,
@@ -493,6 +493,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
493{ 493{
494 struct skb_shared_info *shinfo; 494 struct skb_shared_info *shinfo;
495 495
496 if (irqs_disabled())
497 return 0;
498
496 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) 499 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
497 return 0; 500 return 0;
498 501
@@ -546,7 +549,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
546#endif 549#endif
547 new->protocol = old->protocol; 550 new->protocol = old->protocol;
548 new->mark = old->mark; 551 new->mark = old->mark;
549 new->iif = old->iif; 552 new->skb_iif = old->skb_iif;
550 __nf_copy(new, old); 553 __nf_copy(new, old);
551#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 554#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
552 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 555 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
diff --git a/net/core/sock.c b/net/core/sock.c
index 7626b6aacd68..c5812bbc2cc9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -274,25 +274,27 @@ static void sock_disable_timestamp(struct sock *sk, int flag)
274 274
275int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 275int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
276{ 276{
277 int err = 0; 277 int err;
278 int skb_len; 278 int skb_len;
279 unsigned long flags;
280 struct sk_buff_head *list = &sk->sk_receive_queue;
279 281
280 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces 282 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
281 number of warnings when compiling with -W --ANK 283 number of warnings when compiling with -W --ANK
282 */ 284 */
283 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 285 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
284 (unsigned)sk->sk_rcvbuf) { 286 (unsigned)sk->sk_rcvbuf) {
285 err = -ENOMEM; 287 atomic_inc(&sk->sk_drops);
286 goto out; 288 return -ENOMEM;
287 } 289 }
288 290
289 err = sk_filter(sk, skb); 291 err = sk_filter(sk, skb);
290 if (err) 292 if (err)
291 goto out; 293 return err;
292 294
293 if (!sk_rmem_schedule(sk, skb->truesize)) { 295 if (!sk_rmem_schedule(sk, skb->truesize)) {
294 err = -ENOBUFS; 296 atomic_inc(&sk->sk_drops);
295 goto out; 297 return -ENOBUFS;
296 } 298 }
297 299
298 skb->dev = NULL; 300 skb->dev = NULL;
@@ -305,12 +307,14 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
305 */ 307 */
306 skb_len = skb->len; 308 skb_len = skb->len;
307 309
308 skb_queue_tail(&sk->sk_receive_queue, skb); 310 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb);
313 spin_unlock_irqrestore(&list->lock, flags);
309 314
310 if (!sock_flag(sk, SOCK_DEAD)) 315 if (!sock_flag(sk, SOCK_DEAD))
311 sk->sk_data_ready(sk, skb_len); 316 sk->sk_data_ready(sk, skb_len);
312out: 317 return 0;
313 return err;
314} 318}
315EXPORT_SYMBOL(sock_queue_rcv_skb); 319EXPORT_SYMBOL(sock_queue_rcv_skb);
316 320
@@ -336,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
336 rc = sk_backlog_rcv(sk, skb); 340 rc = sk_backlog_rcv(sk, skb);
337 341
338 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 342 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
339 } else 343 } else if (sk_add_backlog(sk, skb)) {
340 sk_add_backlog(sk, skb); 344 bh_unlock_sock(sk);
345 atomic_inc(&sk->sk_drops);
346 goto discard_and_relse;
347 }
348
341 bh_unlock_sock(sk); 349 bh_unlock_sock(sk);
342out: 350out:
343 sock_put(sk); 351 sock_put(sk);
@@ -348,11 +356,18 @@ discard_and_relse:
348} 356}
349EXPORT_SYMBOL(sk_receive_skb); 357EXPORT_SYMBOL(sk_receive_skb);
350 358
359void sk_reset_txq(struct sock *sk)
360{
361 sk_tx_queue_clear(sk);
362}
363EXPORT_SYMBOL(sk_reset_txq);
364
351struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 365struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
352{ 366{
353 struct dst_entry *dst = sk->sk_dst_cache; 367 struct dst_entry *dst = sk->sk_dst_cache;
354 368
355 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 369 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
370 sk_tx_queue_clear(sk);
356 sk->sk_dst_cache = NULL; 371 sk->sk_dst_cache = NULL;
357 dst_release(dst); 372 dst_release(dst);
358 return NULL; 373 return NULL;
@@ -406,17 +421,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
406 if (copy_from_user(devname, optval, optlen)) 421 if (copy_from_user(devname, optval, optlen))
407 goto out; 422 goto out;
408 423
409 if (devname[0] == '\0') { 424 index = 0;
410 index = 0; 425 if (devname[0] != '\0') {
411 } else { 426 struct net_device *dev;
412 struct net_device *dev = dev_get_by_name(net, devname);
413 427
428 rcu_read_lock();
429 dev = dev_get_by_name_rcu(net, devname);
430 if (dev)
431 index = dev->ifindex;
432 rcu_read_unlock();
414 ret = -ENODEV; 433 ret = -ENODEV;
415 if (!dev) 434 if (!dev)
416 goto out; 435 goto out;
417
418 index = dev->ifindex;
419 dev_put(dev);
420 } 436 }
421 437
422 lock_sock(sk); 438 lock_sock(sk);
@@ -702,6 +718,12 @@ set_rcvbuf:
702 718
703 /* We implement the SO_SNDLOWAT etc to 719 /* We implement the SO_SNDLOWAT etc to
704 not be settable (1003.1g 5.3) */ 720 not be settable (1003.1g 5.3) */
721 case SO_RXQ_OVFL:
722 if (valbool)
723 sock_set_flag(sk, SOCK_RXQ_OVFL);
724 else
725 sock_reset_flag(sk, SOCK_RXQ_OVFL);
726 break;
705 default: 727 default:
706 ret = -ENOPROTOOPT; 728 ret = -ENOPROTOOPT;
707 break; 729 break;
@@ -723,7 +745,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
723 struct timeval tm; 745 struct timeval tm;
724 } v; 746 } v;
725 747
726 unsigned int lv = sizeof(int); 748 int lv = sizeof(int);
727 int len; 749 int len;
728 750
729 if (get_user(len, optlen)) 751 if (get_user(len, optlen))
@@ -901,6 +923,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
901 v.val = sk->sk_mark; 923 v.val = sk->sk_mark;
902 break; 924 break;
903 925
926 case SO_RXQ_OVFL:
927 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
928 break;
929
904 default: 930 default:
905 return -ENOPROTOOPT; 931 return -ENOPROTOOPT;
906 } 932 }
@@ -939,7 +965,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
939 void *sptr = nsk->sk_security; 965 void *sptr = nsk->sk_security;
940#endif 966#endif
941 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != 967 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
942 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); 968 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
969 sizeof(osk->sk_tx_queue_mapping));
943 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, 970 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
944 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); 971 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
945#ifdef CONFIG_SECURITY_NETWORK 972#ifdef CONFIG_SECURITY_NETWORK
@@ -983,6 +1010,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
983 1010
984 if (!try_module_get(prot->owner)) 1011 if (!try_module_get(prot->owner))
985 goto out_free_sec; 1012 goto out_free_sec;
1013 sk_tx_queue_clear(sk);
986 } 1014 }
987 1015
988 return sk; 1016 return sk;
@@ -1049,7 +1077,8 @@ static void __sk_free(struct sock *sk)
1049 if (sk->sk_destruct) 1077 if (sk->sk_destruct)
1050 sk->sk_destruct(sk); 1078 sk->sk_destruct(sk);
1051 1079
1052 filter = rcu_dereference(sk->sk_filter); 1080 filter = rcu_dereference_check(sk->sk_filter,
1081 atomic_read(&sk->sk_wmem_alloc) == 0);
1053 if (filter) { 1082 if (filter) {
1054 sk_filter_uncharge(sk, filter); 1083 sk_filter_uncharge(sk, filter);
1055 rcu_assign_pointer(sk->sk_filter, NULL); 1084 rcu_assign_pointer(sk->sk_filter, NULL);
@@ -1114,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1114 sock_lock_init(newsk); 1143 sock_lock_init(newsk);
1115 bh_lock_sock(newsk); 1144 bh_lock_sock(newsk);
1116 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1145 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1146 newsk->sk_backlog.len = 0;
1117 1147
1118 atomic_set(&newsk->sk_rmem_alloc, 0); 1148 atomic_set(&newsk->sk_rmem_alloc, 0);
1119 /* 1149 /*
@@ -1181,6 +1211,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1181 1211
1182 if (newsk->sk_prot->sockets_allocated) 1212 if (newsk->sk_prot->sockets_allocated)
1183 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1213 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
1214
1215 if (sock_flag(newsk, SOCK_TIMESTAMP) ||
1216 sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1217 net_enable_timestamp();
1184 } 1218 }
1185out: 1219out:
1186 return newsk; 1220 return newsk;
@@ -1513,6 +1547,12 @@ static void __release_sock(struct sock *sk)
1513 1547
1514 bh_lock_sock(sk); 1548 bh_lock_sock(sk);
1515 } while ((skb = sk->sk_backlog.head) != NULL); 1549 } while ((skb = sk->sk_backlog.head) != NULL);
1550
1551 /*
1552 * Doing the zeroing here guarantee we can not loop forever
1553 * while a wild producer attempts to flood us.
1554 */
1555 sk->sk_backlog.len = 0;
1516} 1556}
1517 1557
1518/** 1558/**
@@ -1845,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1845 sk->sk_allocation = GFP_KERNEL; 1885 sk->sk_allocation = GFP_KERNEL;
1846 sk->sk_rcvbuf = sysctl_rmem_default; 1886 sk->sk_rcvbuf = sysctl_rmem_default;
1847 sk->sk_sndbuf = sysctl_wmem_default; 1887 sk->sk_sndbuf = sysctl_wmem_default;
1888 sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
1848 sk->sk_state = TCP_CLOSE; 1889 sk->sk_state = TCP_CLOSE;
1849 sk_set_socket(sk, sock); 1890 sk_set_socket(sk, sock);
1850 1891
@@ -2112,13 +2153,13 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
2112} 2153}
2113EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2154EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2114 2155
2115static int sock_inuse_init_net(struct net *net) 2156static int __net_init sock_inuse_init_net(struct net *net)
2116{ 2157{
2117 net->core.inuse = alloc_percpu(struct prot_inuse); 2158 net->core.inuse = alloc_percpu(struct prot_inuse);
2118 return net->core.inuse ? 0 : -ENOMEM; 2159 return net->core.inuse ? 0 : -ENOMEM;
2119} 2160}
2120 2161
2121static void sock_inuse_exit_net(struct net *net) 2162static void __net_exit sock_inuse_exit_net(struct net *net)
2122{ 2163{
2123 free_percpu(net->core.inuse); 2164 free_percpu(net->core.inuse);
2124} 2165}
@@ -2200,13 +2241,10 @@ int proto_register(struct proto *prot, int alloc_slab)
2200 } 2241 }
2201 2242
2202 if (prot->rsk_prot != NULL) { 2243 if (prot->rsk_prot != NULL) {
2203 static const char mask[] = "request_sock_%s"; 2244 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2204
2205 prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2206 if (prot->rsk_prot->slab_name == NULL) 2245 if (prot->rsk_prot->slab_name == NULL)
2207 goto out_free_sock_slab; 2246 goto out_free_sock_slab;
2208 2247
2209 sprintf(prot->rsk_prot->slab_name, mask, prot->name);
2210 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, 2248 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2211 prot->rsk_prot->obj_size, 0, 2249 prot->rsk_prot->obj_size, 0,
2212 SLAB_HWCACHE_ALIGN, NULL); 2250 SLAB_HWCACHE_ALIGN, NULL);
@@ -2219,14 +2257,11 @@ int proto_register(struct proto *prot, int alloc_slab)
2219 } 2257 }
2220 2258
2221 if (prot->twsk_prot != NULL) { 2259 if (prot->twsk_prot != NULL) {
2222 static const char mask[] = "tw_sock_%s"; 2260 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2223
2224 prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2225 2261
2226 if (prot->twsk_prot->twsk_slab_name == NULL) 2262 if (prot->twsk_prot->twsk_slab_name == NULL)
2227 goto out_free_request_sock_slab; 2263 goto out_free_request_sock_slab;
2228 2264
2229 sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
2230 prot->twsk_prot->twsk_slab = 2265 prot->twsk_prot->twsk_slab =
2231 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2266 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2232 prot->twsk_prot->twsk_obj_size, 2267 prot->twsk_prot->twsk_obj_size,
@@ -2253,7 +2288,8 @@ out_free_request_sock_slab:
2253 prot->rsk_prot->slab = NULL; 2288 prot->rsk_prot->slab = NULL;
2254 } 2289 }
2255out_free_request_sock_slab_name: 2290out_free_request_sock_slab_name:
2256 kfree(prot->rsk_prot->slab_name); 2291 if (prot->rsk_prot)
2292 kfree(prot->rsk_prot->slab_name);
2257out_free_sock_slab: 2293out_free_sock_slab:
2258 kmem_cache_destroy(prot->slab); 2294 kmem_cache_destroy(prot->slab);
2259 prot->slab = NULL; 2295 prot->slab = NULL;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7db1de0497c6..b7b6b8208f75 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,14 +10,16 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/ratelimit.h>
13#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h>
16
14#include <net/ip.h> 17#include <net/ip.h>
15#include <net/sock.h> 18#include <net/sock.h>
16 19
17static struct ctl_table net_core_table[] = { 20static struct ctl_table net_core_table[] = {
18#ifdef CONFIG_NET 21#ifdef CONFIG_NET
19 { 22 {
20 .ctl_name = NET_CORE_WMEM_MAX,
21 .procname = "wmem_max", 23 .procname = "wmem_max",
22 .data = &sysctl_wmem_max, 24 .data = &sysctl_wmem_max,
23 .maxlen = sizeof(int), 25 .maxlen = sizeof(int),
@@ -25,7 +27,6 @@ static struct ctl_table net_core_table[] = {
25 .proc_handler = proc_dointvec 27 .proc_handler = proc_dointvec
26 }, 28 },
27 { 29 {
28 .ctl_name = NET_CORE_RMEM_MAX,
29 .procname = "rmem_max", 30 .procname = "rmem_max",
30 .data = &sysctl_rmem_max, 31 .data = &sysctl_rmem_max,
31 .maxlen = sizeof(int), 32 .maxlen = sizeof(int),
@@ -33,7 +34,6 @@ static struct ctl_table net_core_table[] = {
33 .proc_handler = proc_dointvec 34 .proc_handler = proc_dointvec
34 }, 35 },
35 { 36 {
36 .ctl_name = NET_CORE_WMEM_DEFAULT,
37 .procname = "wmem_default", 37 .procname = "wmem_default",
38 .data = &sysctl_wmem_default, 38 .data = &sysctl_wmem_default,
39 .maxlen = sizeof(int), 39 .maxlen = sizeof(int),
@@ -41,7 +41,6 @@ static struct ctl_table net_core_table[] = {
41 .proc_handler = proc_dointvec 41 .proc_handler = proc_dointvec
42 }, 42 },
43 { 43 {
44 .ctl_name = NET_CORE_RMEM_DEFAULT,
45 .procname = "rmem_default", 44 .procname = "rmem_default",
46 .data = &sysctl_rmem_default, 45 .data = &sysctl_rmem_default,
47 .maxlen = sizeof(int), 46 .maxlen = sizeof(int),
@@ -49,7 +48,6 @@ static struct ctl_table net_core_table[] = {
49 .proc_handler = proc_dointvec 48 .proc_handler = proc_dointvec
50 }, 49 },
51 { 50 {
52 .ctl_name = NET_CORE_DEV_WEIGHT,
53 .procname = "dev_weight", 51 .procname = "dev_weight",
54 .data = &weight_p, 52 .data = &weight_p,
55 .maxlen = sizeof(int), 53 .maxlen = sizeof(int),
@@ -57,7 +55,6 @@ static struct ctl_table net_core_table[] = {
57 .proc_handler = proc_dointvec 55 .proc_handler = proc_dointvec
58 }, 56 },
59 { 57 {
60 .ctl_name = NET_CORE_MAX_BACKLOG,
61 .procname = "netdev_max_backlog", 58 .procname = "netdev_max_backlog",
62 .data = &netdev_max_backlog, 59 .data = &netdev_max_backlog,
63 .maxlen = sizeof(int), 60 .maxlen = sizeof(int),
@@ -65,16 +62,13 @@ static struct ctl_table net_core_table[] = {
65 .proc_handler = proc_dointvec 62 .proc_handler = proc_dointvec
66 }, 63 },
67 { 64 {
68 .ctl_name = NET_CORE_MSG_COST,
69 .procname = "message_cost", 65 .procname = "message_cost",
70 .data = &net_ratelimit_state.interval, 66 .data = &net_ratelimit_state.interval,
71 .maxlen = sizeof(int), 67 .maxlen = sizeof(int),
72 .mode = 0644, 68 .mode = 0644,
73 .proc_handler = proc_dointvec_jiffies, 69 .proc_handler = proc_dointvec_jiffies,
74 .strategy = sysctl_jiffies,
75 }, 70 },
76 { 71 {
77 .ctl_name = NET_CORE_MSG_BURST,
78 .procname = "message_burst", 72 .procname = "message_burst",
79 .data = &net_ratelimit_state.burst, 73 .data = &net_ratelimit_state.burst,
80 .maxlen = sizeof(int), 74 .maxlen = sizeof(int),
@@ -82,7 +76,6 @@ static struct ctl_table net_core_table[] = {
82 .proc_handler = proc_dointvec, 76 .proc_handler = proc_dointvec,
83 }, 77 },
84 { 78 {
85 .ctl_name = NET_CORE_OPTMEM_MAX,
86 .procname = "optmem_max", 79 .procname = "optmem_max",
87 .data = &sysctl_optmem_max, 80 .data = &sysctl_optmem_max,
88 .maxlen = sizeof(int), 81 .maxlen = sizeof(int),
@@ -91,7 +84,6 @@ static struct ctl_table net_core_table[] = {
91 }, 84 },
92#endif /* CONFIG_NET */ 85#endif /* CONFIG_NET */
93 { 86 {
94 .ctl_name = NET_CORE_BUDGET,
95 .procname = "netdev_budget", 87 .procname = "netdev_budget",
96 .data = &netdev_budget, 88 .data = &netdev_budget,
97 .maxlen = sizeof(int), 89 .maxlen = sizeof(int),
@@ -99,31 +91,29 @@ static struct ctl_table net_core_table[] = {
99 .proc_handler = proc_dointvec 91 .proc_handler = proc_dointvec
100 }, 92 },
101 { 93 {
102 .ctl_name = NET_CORE_WARNINGS,
103 .procname = "warnings", 94 .procname = "warnings",
104 .data = &net_msg_warn, 95 .data = &net_msg_warn,
105 .maxlen = sizeof(int), 96 .maxlen = sizeof(int),
106 .mode = 0644, 97 .mode = 0644,
107 .proc_handler = proc_dointvec 98 .proc_handler = proc_dointvec
108 }, 99 },
109 { .ctl_name = 0 } 100 { }
110}; 101};
111 102
112static struct ctl_table netns_core_table[] = { 103static struct ctl_table netns_core_table[] = {
113 { 104 {
114 .ctl_name = NET_CORE_SOMAXCONN,
115 .procname = "somaxconn", 105 .procname = "somaxconn",
116 .data = &init_net.core.sysctl_somaxconn, 106 .data = &init_net.core.sysctl_somaxconn,
117 .maxlen = sizeof(int), 107 .maxlen = sizeof(int),
118 .mode = 0644, 108 .mode = 0644,
119 .proc_handler = proc_dointvec 109 .proc_handler = proc_dointvec
120 }, 110 },
121 { .ctl_name = 0 } 111 { }
122}; 112};
123 113
124__net_initdata struct ctl_path net_core_path[] = { 114__net_initdata struct ctl_path net_core_path[] = {
125 { .procname = "net", .ctl_name = CTL_NET, }, 115 { .procname = "net", },
126 { .procname = "core", .ctl_name = NET_CORE, }, 116 { .procname = "core", },
127 { }, 117 { },
128}; 118};
129 119
@@ -134,7 +124,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
134 net->core.sysctl_somaxconn = SOMAXCONN; 124 net->core.sysctl_somaxconn = SOMAXCONN;
135 125
136 tbl = netns_core_table; 126 tbl = netns_core_table;
137 if (net != &init_net) { 127 if (!net_eq(net, &init_net)) {
138 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 128 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
139 if (tbl == NULL) 129 if (tbl == NULL)
140 goto err_dup; 130 goto err_dup;
diff --git a/net/core/utils.c b/net/core/utils.c
index 83221aee7084..838250241d26 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -24,6 +24,8 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/ratelimit.h>
28
27#include <net/sock.h> 29#include <net/sock.h>
28 30
29#include <asm/byteorder.h> 31#include <asm/byteorder.h>