aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 10:55:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 10:55:01 -0500
commitd7fc02c7bae7b1cf69269992cf880a43a350cdaa (patch)
treea43d56fa72913a1cc98a0bbebe054d08581b3a7c /net/core
parentee1262dbc65ce0b6234a915d8432171e8d77f518 (diff)
parent28b4d5cc17c20786848cdc07b7ea237a309776bb (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1815 commits) mac80211: fix reorder buffer release iwmc3200wifi: Enable wimax core through module parameter iwmc3200wifi: Add wifi-wimax coexistence mode as a module parameter iwmc3200wifi: Coex table command does not expect a response iwmc3200wifi: Update wiwi priority table iwlwifi: driver version track kernel version iwlwifi: indicate uCode type when fail dump error/event log iwl3945: remove duplicated event logging code b43: fix two warnings ipw2100: fix rebooting hang with driver loaded cfg80211: indent regulatory messages with spaces iwmc3200wifi: fix NULL pointer dereference in pmkid update mac80211: Fix TX status reporting for injected data frames ath9k: enable 2GHz band only if the device supports it airo: Fix integer overflow warning rt2x00: Fix padding bug on L2PAD devices. WE: Fix set events not propagated b43legacy: avoid PPC fault during resume b43: avoid PPC fault during resume tcp: fix a timewait refcnt race ... Fix up conflicts due to sysctl cleanups (dead sysctl_check code and CTL_UNNUMBERED removed) in kernel/sysctl_check.c net/ipv4/sysctl_net_ipv4.c net/ipv6/addrconf.c net/sctp/sysctl.c
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c654
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/ethtool.c81
-rw-r--r--net/core/fib_rules.c107
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/gen_stats.c8
-rw-r--r--net/core/link_watch.c94
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c59
-rw-r--r--net/core/net_namespace.c272
-rw-r--r--net/core/pktgen.c30
-rw-r--r--net/core/rtnetlink.c179
-rw-r--r--net/core/skb_dma_map.c65
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock.c58
-rw-r--r--net/core/sysctl_net_core.c2
18 files changed, 1003 insertions, 623 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 796f46eece5f..08791ac3e05a 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -6,7 +6,6 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o 6 gen_stats.o gen_estimator.o net_namespace.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9obj-$(CONFIG_HAS_DMA) += skb_dma_map.o
10 9
11obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
12 neighbour.o rtnetlink.o utils.o link_watch.o filter.o 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4ade3011bb3c..95c2e0840d0d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -271,6 +271,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
271 } 271 }
272 272
273 kfree_skb(skb); 273 kfree_skb(skb);
274 atomic_inc(&sk->sk_drops);
274 sk_mem_reclaim_partial(sk); 275 sk_mem_reclaim_partial(sk);
275 276
276 return err; 277 return err;
diff --git a/net/core/dev.c b/net/core/dev.c
index fe10551d3671..c36a17aafcf3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,7 @@
79#include <linux/cpu.h> 79#include <linux/cpu.h>
80#include <linux/types.h> 80#include <linux/types.h>
81#include <linux/kernel.h> 81#include <linux/kernel.h>
82#include <linux/hash.h>
82#include <linux/sched.h> 83#include <linux/sched.h>
83#include <linux/mutex.h> 84#include <linux/mutex.h>
84#include <linux/string.h> 85#include <linux/string.h>
@@ -104,6 +105,7 @@
104#include <net/dst.h> 105#include <net/dst.h>
105#include <net/pkt_sched.h> 106#include <net/pkt_sched.h>
106#include <net/checksum.h> 107#include <net/checksum.h>
108#include <net/xfrm.h>
107#include <linux/highmem.h> 109#include <linux/highmem.h>
108#include <linux/init.h> 110#include <linux/init.h>
109#include <linux/kmod.h> 111#include <linux/kmod.h>
@@ -175,7 +177,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */
175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176 * semaphore. 178 * semaphore.
177 * 179 *
178 * Pure readers hold dev_base_lock for reading. 180 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
179 * 181 *
180 * Writers must hold the rtnl semaphore while they loop through the 182 * Writers must hold the rtnl semaphore while they loop through the
181 * dev_base_head list, and hold dev_base_lock for writing when they do the 183 * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -193,18 +195,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */
193DEFINE_RWLOCK(dev_base_lock); 195DEFINE_RWLOCK(dev_base_lock);
194EXPORT_SYMBOL(dev_base_lock); 196EXPORT_SYMBOL(dev_base_lock);
195 197
196#define NETDEV_HASHBITS 8
197#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 198static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{ 199{
201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 200 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 201 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
203} 202}
204 203
205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 204static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206{ 205{
207 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 207}
209 208
210/* Device list insertion */ 209/* Device list insertion */
@@ -215,23 +214,26 @@ static int list_netdevice(struct net_device *dev)
215 ASSERT_RTNL(); 214 ASSERT_RTNL();
216 215
217 write_lock_bh(&dev_base_lock); 216 write_lock_bh(&dev_base_lock);
218 list_add_tail(&dev->dev_list, &net->dev_base_head); 217 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 218 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 219 hlist_add_head_rcu(&dev->index_hlist,
220 dev_index_hash(net, dev->ifindex));
221 write_unlock_bh(&dev_base_lock); 221 write_unlock_bh(&dev_base_lock);
222 return 0; 222 return 0;
223} 223}
224 224
225/* Device list removal */ 225/* Device list removal
226 * caller must respect a RCU grace period before freeing/reusing dev
227 */
226static void unlist_netdevice(struct net_device *dev) 228static void unlist_netdevice(struct net_device *dev)
227{ 229{
228 ASSERT_RTNL(); 230 ASSERT_RTNL();
229 231
230 /* Unlink dev from the device chain */ 232 /* Unlink dev from the device chain */
231 write_lock_bh(&dev_base_lock); 233 write_lock_bh(&dev_base_lock);
232 list_del(&dev->dev_list); 234 list_del_rcu(&dev->dev_list);
233 hlist_del(&dev->name_hlist); 235 hlist_del_rcu(&dev->name_hlist);
234 hlist_del(&dev->index_hlist); 236 hlist_del_rcu(&dev->index_hlist);
235 write_unlock_bh(&dev_base_lock); 237 write_unlock_bh(&dev_base_lock);
236} 238}
237 239
@@ -587,18 +589,44 @@ __setup("netdev=", netdev_boot_setup);
587struct net_device *__dev_get_by_name(struct net *net, const char *name) 589struct net_device *__dev_get_by_name(struct net *net, const char *name)
588{ 590{
589 struct hlist_node *p; 591 struct hlist_node *p;
592 struct net_device *dev;
593 struct hlist_head *head = dev_name_hash(net, name);
590 594
591 hlist_for_each(p, dev_name_hash(net, name)) { 595 hlist_for_each_entry(dev, p, head, name_hlist)
592 struct net_device *dev
593 = hlist_entry(p, struct net_device, name_hlist);
594 if (!strncmp(dev->name, name, IFNAMSIZ)) 596 if (!strncmp(dev->name, name, IFNAMSIZ))
595 return dev; 597 return dev;
596 } 598
597 return NULL; 599 return NULL;
598} 600}
599EXPORT_SYMBOL(__dev_get_by_name); 601EXPORT_SYMBOL(__dev_get_by_name);
600 602
601/** 603/**
604 * dev_get_by_name_rcu - find a device by its name
605 * @net: the applicable net namespace
606 * @name: name to find
607 *
608 * Find an interface by name.
609 * If the name is found a pointer to the device is returned.
610 * If the name is not found then %NULL is returned.
611 * The reference counters are not incremented so the caller must be
612 * careful with locks. The caller must hold RCU lock.
613 */
614
615struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
616{
617 struct hlist_node *p;
618 struct net_device *dev;
619 struct hlist_head *head = dev_name_hash(net, name);
620
621 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
622 if (!strncmp(dev->name, name, IFNAMSIZ))
623 return dev;
624
625 return NULL;
626}
627EXPORT_SYMBOL(dev_get_by_name_rcu);
628
629/**
602 * dev_get_by_name - find a device by its name 630 * dev_get_by_name - find a device by its name
603 * @net: the applicable net namespace 631 * @net: the applicable net namespace
604 * @name: name to find 632 * @name: name to find
@@ -614,11 +642,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
614{ 642{
615 struct net_device *dev; 643 struct net_device *dev;
616 644
617 read_lock(&dev_base_lock); 645 rcu_read_lock();
618 dev = __dev_get_by_name(net, name); 646 dev = dev_get_by_name_rcu(net, name);
619 if (dev) 647 if (dev)
620 dev_hold(dev); 648 dev_hold(dev);
621 read_unlock(&dev_base_lock); 649 rcu_read_unlock();
622 return dev; 650 return dev;
623} 651}
624EXPORT_SYMBOL(dev_get_by_name); 652EXPORT_SYMBOL(dev_get_by_name);
@@ -638,17 +666,42 @@ EXPORT_SYMBOL(dev_get_by_name);
638struct net_device *__dev_get_by_index(struct net *net, int ifindex) 666struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639{ 667{
640 struct hlist_node *p; 668 struct hlist_node *p;
669 struct net_device *dev;
670 struct hlist_head *head = dev_index_hash(net, ifindex);
641 671
642 hlist_for_each(p, dev_index_hash(net, ifindex)) { 672 hlist_for_each_entry(dev, p, head, index_hlist)
643 struct net_device *dev
644 = hlist_entry(p, struct net_device, index_hlist);
645 if (dev->ifindex == ifindex) 673 if (dev->ifindex == ifindex)
646 return dev; 674 return dev;
647 } 675
648 return NULL; 676 return NULL;
649} 677}
650EXPORT_SYMBOL(__dev_get_by_index); 678EXPORT_SYMBOL(__dev_get_by_index);
651 679
680/**
681 * dev_get_by_index_rcu - find a device by its ifindex
682 * @net: the applicable net namespace
683 * @ifindex: index of device
684 *
685 * Search for an interface by index. Returns %NULL if the device
686 * is not found or a pointer to the device. The device has not
687 * had its reference counter increased so the caller must be careful
688 * about locking. The caller must hold RCU lock.
689 */
690
691struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
692{
693 struct hlist_node *p;
694 struct net_device *dev;
695 struct hlist_head *head = dev_index_hash(net, ifindex);
696
697 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
698 if (dev->ifindex == ifindex)
699 return dev;
700
701 return NULL;
702}
703EXPORT_SYMBOL(dev_get_by_index_rcu);
704
652 705
653/** 706/**
654 * dev_get_by_index - find a device by its ifindex 707 * dev_get_by_index - find a device by its ifindex
@@ -665,11 +718,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
665{ 718{
666 struct net_device *dev; 719 struct net_device *dev;
667 720
668 read_lock(&dev_base_lock); 721 rcu_read_lock();
669 dev = __dev_get_by_index(net, ifindex); 722 dev = dev_get_by_index_rcu(net, ifindex);
670 if (dev) 723 if (dev)
671 dev_hold(dev); 724 dev_hold(dev);
672 read_unlock(&dev_base_lock); 725 rcu_read_unlock();
673 return dev; 726 return dev;
674} 727}
675EXPORT_SYMBOL(dev_get_by_index); 728EXPORT_SYMBOL(dev_get_by_index);
@@ -748,15 +801,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
748 struct net_device *dev, *ret; 801 struct net_device *dev, *ret;
749 802
750 ret = NULL; 803 ret = NULL;
751 read_lock(&dev_base_lock); 804 rcu_read_lock();
752 for_each_netdev(net, dev) { 805 for_each_netdev_rcu(net, dev) {
753 if (((dev->flags ^ if_flags) & mask) == 0) { 806 if (((dev->flags ^ if_flags) & mask) == 0) {
754 dev_hold(dev); 807 dev_hold(dev);
755 ret = dev; 808 ret = dev;
756 break; 809 break;
757 } 810 }
758 } 811 }
759 read_unlock(&dev_base_lock); 812 rcu_read_unlock();
760 return ret; 813 return ret;
761} 814}
762EXPORT_SYMBOL(dev_get_by_flags); 815EXPORT_SYMBOL(dev_get_by_flags);
@@ -841,7 +894,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
841 free_page((unsigned long) inuse); 894 free_page((unsigned long) inuse);
842 } 895 }
843 896
844 snprintf(buf, IFNAMSIZ, name, i); 897 if (buf != name)
898 snprintf(buf, IFNAMSIZ, name, i);
845 if (!__dev_get_by_name(net, buf)) 899 if (!__dev_get_by_name(net, buf))
846 return i; 900 return i;
847 901
@@ -881,6 +935,21 @@ int dev_alloc_name(struct net_device *dev, const char *name)
881} 935}
882EXPORT_SYMBOL(dev_alloc_name); 936EXPORT_SYMBOL(dev_alloc_name);
883 937
938static int dev_get_valid_name(struct net *net, const char *name, char *buf,
939 bool fmt)
940{
941 if (!dev_valid_name(name))
942 return -EINVAL;
943
944 if (fmt && strchr(name, '%'))
945 return __dev_alloc_name(net, name, buf);
946 else if (__dev_get_by_name(net, name))
947 return -EEXIST;
948 else if (buf != name)
949 strlcpy(buf, name, IFNAMSIZ);
950
951 return 0;
952}
884 953
885/** 954/**
886 * dev_change_name - change name of a device 955 * dev_change_name - change name of a device
@@ -904,28 +973,20 @@ int dev_change_name(struct net_device *dev, const char *newname)
904 if (dev->flags & IFF_UP) 973 if (dev->flags & IFF_UP)
905 return -EBUSY; 974 return -EBUSY;
906 975
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 976 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0; 977 return 0;
912 978
913 memcpy(oldname, dev->name, IFNAMSIZ); 979 memcpy(oldname, dev->name, IFNAMSIZ);
914 980
915 if (strchr(newname, '%')) { 981 err = dev_get_valid_name(net, newname, dev->name, 1);
916 err = dev_alloc_name(dev, newname); 982 if (err < 0)
917 if (err < 0) 983 return err;
918 return err;
919 } else if (__dev_get_by_name(net, newname))
920 return -EEXIST;
921 else
922 strlcpy(dev->name, newname, IFNAMSIZ);
923 984
924rollback: 985rollback:
925 /* For now only devices in the initial network namespace 986 /* For now only devices in the initial network namespace
926 * are in sysfs. 987 * are in sysfs.
927 */ 988 */
928 if (net == &init_net) { 989 if (net_eq(net, &init_net)) {
929 ret = device_rename(&dev->dev, dev->name); 990 ret = device_rename(&dev->dev, dev->name);
930 if (ret) { 991 if (ret) {
931 memcpy(dev->name, oldname, IFNAMSIZ); 992 memcpy(dev->name, oldname, IFNAMSIZ);
@@ -935,7 +996,12 @@ rollback:
935 996
936 write_lock_bh(&dev_base_lock); 997 write_lock_bh(&dev_base_lock);
937 hlist_del(&dev->name_hlist); 998 hlist_del(&dev->name_hlist);
938 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 999 write_unlock_bh(&dev_base_lock);
1000
1001 synchronize_rcu();
1002
1003 write_lock_bh(&dev_base_lock);
1004 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
939 write_unlock_bh(&dev_base_lock); 1005 write_unlock_bh(&dev_base_lock);
940 1006
941 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1007 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1038,9 +1104,9 @@ void dev_load(struct net *net, const char *name)
1038{ 1104{
1039 struct net_device *dev; 1105 struct net_device *dev;
1040 1106
1041 read_lock(&dev_base_lock); 1107 rcu_read_lock();
1042 dev = __dev_get_by_name(net, name); 1108 dev = dev_get_by_name_rcu(net, name);
1043 read_unlock(&dev_base_lock); 1109 rcu_read_unlock();
1044 1110
1045 if (!dev && capable(CAP_NET_ADMIN)) 1111 if (!dev && capable(CAP_NET_ADMIN))
1046 request_module("%s", name); 1112 request_module("%s", name);
@@ -1287,6 +1353,7 @@ rollback:
1287 nb->notifier_call(nb, NETDEV_DOWN, dev); 1353 nb->notifier_call(nb, NETDEV_DOWN, dev);
1288 } 1354 }
1289 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1355 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1356 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1290 } 1357 }
1291 } 1358 }
1292 1359
@@ -1353,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb)
1353 skb->tstamp.tv64 = 0; 1420 skb->tstamp.tv64 = 0;
1354} 1421}
1355 1422
1423/**
1424 * dev_forward_skb - loopback an skb to another netif
1425 *
1426 * @dev: destination network device
1427 * @skb: buffer to forward
1428 *
1429 * return values:
1430 * NET_RX_SUCCESS (no congestion)
1431 * NET_RX_DROP (packet was dropped)
1432 *
1433 * dev_forward_skb can be used for injecting an skb from the
1434 * start_xmit function of one device into the receive queue
1435 * of another device.
1436 *
1437 * The receiving device may be in another namespace, so
1438 * we have to clear all information in the skb that could
1439 * impact namespace isolation.
1440 */
1441int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1442{
1443 skb_orphan(skb);
1444
1445 if (!(dev->flags & IFF_UP))
1446 return NET_RX_DROP;
1447
1448 if (skb->len > (dev->mtu + dev->hard_header_len))
1449 return NET_RX_DROP;
1450
1451 skb_dst_drop(skb);
1452 skb->tstamp.tv64 = 0;
1453 skb->pkt_type = PACKET_HOST;
1454 skb->protocol = eth_type_trans(skb, dev);
1455 skb->mark = 0;
1456 secpath_reset(skb);
1457 nf_reset(skb);
1458 return netif_rx(skb);
1459}
1460EXPORT_SYMBOL_GPL(dev_forward_skb);
1461
1356/* 1462/*
1357 * Support routine. Sends outgoing frames to any network 1463 * Support routine. Sends outgoing frames to any network
1358 * taps currently in use. 1464 * taps currently in use.
@@ -1701,7 +1807,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1701 struct netdev_queue *txq) 1807 struct netdev_queue *txq)
1702{ 1808{
1703 const struct net_device_ops *ops = dev->netdev_ops; 1809 const struct net_device_ops *ops = dev->netdev_ops;
1704 int rc; 1810 int rc = NETDEV_TX_OK;
1705 1811
1706 if (likely(!skb->next)) { 1812 if (likely(!skb->next)) {
1707 if (!list_empty(&ptype_all)) 1813 if (!list_empty(&ptype_all))
@@ -1749,6 +1855,8 @@ gso:
1749 nskb->next = NULL; 1855 nskb->next = NULL;
1750 rc = ops->ndo_start_xmit(nskb, dev); 1856 rc = ops->ndo_start_xmit(nskb, dev);
1751 if (unlikely(rc != NETDEV_TX_OK)) { 1857 if (unlikely(rc != NETDEV_TX_OK)) {
1858 if (rc & ~NETDEV_TX_MASK)
1859 goto out_kfree_gso_skb;
1752 nskb->next = skb->next; 1860 nskb->next = skb->next;
1753 skb->next = nskb; 1861 skb->next = nskb;
1754 return rc; 1862 return rc;
@@ -1758,11 +1866,12 @@ gso:
1758 return NETDEV_TX_BUSY; 1866 return NETDEV_TX_BUSY;
1759 } while (skb->next); 1867 } while (skb->next);
1760 1868
1761 skb->destructor = DEV_GSO_CB(skb)->destructor; 1869out_kfree_gso_skb:
1762 1870 if (likely(skb->next == NULL))
1871 skb->destructor = DEV_GSO_CB(skb)->destructor;
1763out_kfree_skb: 1872out_kfree_skb:
1764 kfree_skb(skb); 1873 kfree_skb(skb);
1765 return NETDEV_TX_OK; 1874 return rc;
1766} 1875}
1767 1876
1768static u32 skb_tx_hashrnd; 1877static u32 skb_tx_hashrnd;
@@ -1789,16 +1898,43 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1789} 1898}
1790EXPORT_SYMBOL(skb_tx_hash); 1899EXPORT_SYMBOL(skb_tx_hash);
1791 1900
1901static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1902{
1903 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1904 if (net_ratelimit()) {
1905 WARN(1, "%s selects TX queue %d, but "
1906 "real number of TX queues is %d\n",
1907 dev->name, queue_index,
1908 dev->real_num_tx_queues);
1909 }
1910 return 0;
1911 }
1912 return queue_index;
1913}
1914
1792static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1915static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1793 struct sk_buff *skb) 1916 struct sk_buff *skb)
1794{ 1917{
1795 const struct net_device_ops *ops = dev->netdev_ops; 1918 u16 queue_index;
1796 u16 queue_index = 0; 1919 struct sock *sk = skb->sk;
1920
1921 if (sk_tx_queue_recorded(sk)) {
1922 queue_index = sk_tx_queue_get(sk);
1923 } else {
1924 const struct net_device_ops *ops = dev->netdev_ops;
1797 1925
1798 if (ops->ndo_select_queue) 1926 if (ops->ndo_select_queue) {
1799 queue_index = ops->ndo_select_queue(dev, skb); 1927 queue_index = ops->ndo_select_queue(dev, skb);
1800 else if (dev->real_num_tx_queues > 1) 1928 queue_index = dev_cap_txqueue(dev, queue_index);
1801 queue_index = skb_tx_hash(dev, skb); 1929 } else {
1930 queue_index = 0;
1931 if (dev->real_num_tx_queues > 1)
1932 queue_index = skb_tx_hash(dev, skb);
1933
1934 if (sk && sk->sk_dst_cache)
1935 sk_tx_queue_set(sk, queue_index);
1936 }
1937 }
1802 1938
1803 skb_set_queue_mapping(skb, queue_index); 1939 skb_set_queue_mapping(skb, queue_index);
1804 return netdev_get_tx_queue(dev, queue_index); 1940 return netdev_get_tx_queue(dev, queue_index);
@@ -1935,8 +2071,8 @@ gso:
1935 HARD_TX_LOCK(dev, txq, cpu); 2071 HARD_TX_LOCK(dev, txq, cpu);
1936 2072
1937 if (!netif_tx_queue_stopped(txq)) { 2073 if (!netif_tx_queue_stopped(txq)) {
1938 rc = NET_XMIT_SUCCESS; 2074 rc = dev_hard_start_xmit(skb, dev, txq);
1939 if (!dev_hard_start_xmit(skb, dev, txq)) { 2075 if (dev_xmit_complete(rc)) {
1940 HARD_TX_UNLOCK(dev, txq); 2076 HARD_TX_UNLOCK(dev, txq);
1941 goto out; 2077 goto out;
1942 } 2078 }
@@ -2191,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb)
2191 if (MAX_RED_LOOP < ttl++) { 2327 if (MAX_RED_LOOP < ttl++) {
2192 printk(KERN_WARNING 2328 printk(KERN_WARNING
2193 "Redir loop detected Dropping packet (%d->%d)\n", 2329 "Redir loop detected Dropping packet (%d->%d)\n",
2194 skb->iif, dev->ifindex); 2330 skb->skb_iif, dev->ifindex);
2195 return TC_ACT_SHOT; 2331 return TC_ACT_SHOT;
2196 } 2332 }
2197 2333
@@ -2292,15 +2428,15 @@ int netif_receive_skb(struct sk_buff *skb)
2292 if (!skb->tstamp.tv64) 2428 if (!skb->tstamp.tv64)
2293 net_timestamp(skb); 2429 net_timestamp(skb);
2294 2430
2295 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2431 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2296 return NET_RX_SUCCESS; 2432 return NET_RX_SUCCESS;
2297 2433
2298 /* if we've gotten here through NAPI, check netpoll */ 2434 /* if we've gotten here through NAPI, check netpoll */
2299 if (netpoll_receive_skb(skb)) 2435 if (netpoll_receive_skb(skb))
2300 return NET_RX_DROP; 2436 return NET_RX_DROP;
2301 2437
2302 if (!skb->iif) 2438 if (!skb->skb_iif)
2303 skb->iif = skb->dev->ifindex; 2439 skb->skb_iif = skb->dev->ifindex;
2304 2440
2305 null_or_orig = NULL; 2441 null_or_orig = NULL;
2306 orig_dev = skb->dev; 2442 orig_dev = skb->dev;
@@ -2440,7 +2576,7 @@ void napi_gro_flush(struct napi_struct *napi)
2440} 2576}
2441EXPORT_SYMBOL(napi_gro_flush); 2577EXPORT_SYMBOL(napi_gro_flush);
2442 2578
2443int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2579enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444{ 2580{
2445 struct sk_buff **pp = NULL; 2581 struct sk_buff **pp = NULL;
2446 struct packet_type *ptype; 2582 struct packet_type *ptype;
@@ -2448,7 +2584,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2448 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2584 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2449 int same_flow; 2585 int same_flow;
2450 int mac_len; 2586 int mac_len;
2451 int ret; 2587 enum gro_result ret;
2452 2588
2453 if (!(skb->dev->features & NETIF_F_GRO)) 2589 if (!(skb->dev->features & NETIF_F_GRO))
2454 goto normal; 2590 goto normal;
@@ -2532,7 +2668,8 @@ normal:
2532} 2668}
2533EXPORT_SYMBOL(dev_gro_receive); 2669EXPORT_SYMBOL(dev_gro_receive);
2534 2670
2535static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2671static gro_result_t
2672__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2536{ 2673{
2537 struct sk_buff *p; 2674 struct sk_buff *p;
2538 2675
@@ -2540,33 +2677,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2540 return GRO_NORMAL; 2677 return GRO_NORMAL;
2541 2678
2542 for (p = napi->gro_list; p; p = p->next) { 2679 for (p = napi->gro_list; p; p = p->next) {
2543 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2680 NAPI_GRO_CB(p)->same_flow =
2544 && !compare_ether_header(skb_mac_header(p), 2681 (p->dev == skb->dev) &&
2545 skb_gro_mac_header(skb)); 2682 !compare_ether_header(skb_mac_header(p),
2683 skb_gro_mac_header(skb));
2546 NAPI_GRO_CB(p)->flush = 0; 2684 NAPI_GRO_CB(p)->flush = 0;
2547 } 2685 }
2548 2686
2549 return dev_gro_receive(napi, skb); 2687 return dev_gro_receive(napi, skb);
2550} 2688}
2551 2689
2552int napi_skb_finish(int ret, struct sk_buff *skb) 2690gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2553{ 2691{
2554 int err = NET_RX_SUCCESS;
2555
2556 switch (ret) { 2692 switch (ret) {
2557 case GRO_NORMAL: 2693 case GRO_NORMAL:
2558 return netif_receive_skb(skb); 2694 if (netif_receive_skb(skb))
2695 ret = GRO_DROP;
2696 break;
2559 2697
2560 case GRO_DROP: 2698 case GRO_DROP:
2561 err = NET_RX_DROP;
2562 /* fall through */
2563
2564 case GRO_MERGED_FREE: 2699 case GRO_MERGED_FREE:
2565 kfree_skb(skb); 2700 kfree_skb(skb);
2566 break; 2701 break;
2702
2703 case GRO_HELD:
2704 case GRO_MERGED:
2705 break;
2567 } 2706 }
2568 2707
2569 return err; 2708 return ret;
2570} 2709}
2571EXPORT_SYMBOL(napi_skb_finish); 2710EXPORT_SYMBOL(napi_skb_finish);
2572 2711
@@ -2586,7 +2725,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
2586} 2725}
2587EXPORT_SYMBOL(skb_gro_reset_offset); 2726EXPORT_SYMBOL(skb_gro_reset_offset);
2588 2727
2589int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2728gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2590{ 2729{
2591 skb_gro_reset_offset(skb); 2730 skb_gro_reset_offset(skb);
2592 2731
@@ -2605,49 +2744,41 @@ EXPORT_SYMBOL(napi_reuse_skb);
2605 2744
2606struct sk_buff *napi_get_frags(struct napi_struct *napi) 2745struct sk_buff *napi_get_frags(struct napi_struct *napi)
2607{ 2746{
2608 struct net_device *dev = napi->dev;
2609 struct sk_buff *skb = napi->skb; 2747 struct sk_buff *skb = napi->skb;
2610 2748
2611 if (!skb) { 2749 if (!skb) {
2612 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2750 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2613 if (!skb) 2751 if (skb)
2614 goto out; 2752 napi->skb = skb;
2615
2616 skb_reserve(skb, NET_IP_ALIGN);
2617
2618 napi->skb = skb;
2619 } 2753 }
2620
2621out:
2622 return skb; 2754 return skb;
2623} 2755}
2624EXPORT_SYMBOL(napi_get_frags); 2756EXPORT_SYMBOL(napi_get_frags);
2625 2757
2626int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2758gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2759 gro_result_t ret)
2627{ 2760{
2628 int err = NET_RX_SUCCESS;
2629
2630 switch (ret) { 2761 switch (ret) {
2631 case GRO_NORMAL: 2762 case GRO_NORMAL:
2632 case GRO_HELD: 2763 case GRO_HELD:
2633 skb->protocol = eth_type_trans(skb, napi->dev); 2764 skb->protocol = eth_type_trans(skb, napi->dev);
2634 2765
2635 if (ret == GRO_NORMAL) 2766 if (ret == GRO_HELD)
2636 return netif_receive_skb(skb); 2767 skb_gro_pull(skb, -ETH_HLEN);
2637 2768 else if (netif_receive_skb(skb))
2638 skb_gro_pull(skb, -ETH_HLEN); 2769 ret = GRO_DROP;
2639 break; 2770 break;
2640 2771
2641 case GRO_DROP: 2772 case GRO_DROP:
2642 err = NET_RX_DROP;
2643 /* fall through */
2644
2645 case GRO_MERGED_FREE: 2773 case GRO_MERGED_FREE:
2646 napi_reuse_skb(napi, skb); 2774 napi_reuse_skb(napi, skb);
2647 break; 2775 break;
2776
2777 case GRO_MERGED:
2778 break;
2648 } 2779 }
2649 2780
2650 return err; 2781 return ret;
2651} 2782}
2652EXPORT_SYMBOL(napi_frags_finish); 2783EXPORT_SYMBOL(napi_frags_finish);
2653 2784
@@ -2688,12 +2819,12 @@ out:
2688} 2819}
2689EXPORT_SYMBOL(napi_frags_skb); 2820EXPORT_SYMBOL(napi_frags_skb);
2690 2821
2691int napi_gro_frags(struct napi_struct *napi) 2822gro_result_t napi_gro_frags(struct napi_struct *napi)
2692{ 2823{
2693 struct sk_buff *skb = napi_frags_skb(napi); 2824 struct sk_buff *skb = napi_frags_skb(napi);
2694 2825
2695 if (!skb) 2826 if (!skb)
2696 return NET_RX_DROP; 2827 return GRO_DROP;
2697 2828
2698 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 2829 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2699} 2830}
@@ -2938,15 +3069,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
2938 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3069 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2939 return -EFAULT; 3070 return -EFAULT;
2940 3071
2941 read_lock(&dev_base_lock); 3072 rcu_read_lock();
2942 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 3073 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
2943 if (!dev) { 3074 if (!dev) {
2944 read_unlock(&dev_base_lock); 3075 rcu_read_unlock();
2945 return -ENODEV; 3076 return -ENODEV;
2946 } 3077 }
2947 3078
2948 strcpy(ifr.ifr_name, dev->name); 3079 strcpy(ifr.ifr_name, dev->name);
2949 read_unlock(&dev_base_lock); 3080 rcu_read_unlock();
2950 3081
2951 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 3082 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2952 return -EFAULT; 3083 return -EFAULT;
@@ -3016,18 +3147,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
3016 * in detail. 3147 * in detail.
3017 */ 3148 */
3018void *dev_seq_start(struct seq_file *seq, loff_t *pos) 3149void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3019 __acquires(dev_base_lock) 3150 __acquires(RCU)
3020{ 3151{
3021 struct net *net = seq_file_net(seq); 3152 struct net *net = seq_file_net(seq);
3022 loff_t off; 3153 loff_t off;
3023 struct net_device *dev; 3154 struct net_device *dev;
3024 3155
3025 read_lock(&dev_base_lock); 3156 rcu_read_lock();
3026 if (!*pos) 3157 if (!*pos)
3027 return SEQ_START_TOKEN; 3158 return SEQ_START_TOKEN;
3028 3159
3029 off = 1; 3160 off = 1;
3030 for_each_netdev(net, dev) 3161 for_each_netdev_rcu(net, dev)
3031 if (off++ == *pos) 3162 if (off++ == *pos)
3032 return dev; 3163 return dev;
3033 3164
@@ -3036,16 +3167,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3036 3167
3037void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3168void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3038{ 3169{
3039 struct net *net = seq_file_net(seq); 3170 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3171 first_net_device(seq_file_net(seq)) :
3172 next_net_device((struct net_device *)v);
3173
3040 ++*pos; 3174 ++*pos;
3041 return v == SEQ_START_TOKEN ? 3175 return rcu_dereference(dev);
3042 first_net_device(net) : next_net_device((struct net_device *)v);
3043} 3176}
3044 3177
3045void dev_seq_stop(struct seq_file *seq, void *v) 3178void dev_seq_stop(struct seq_file *seq, void *v)
3046 __releases(dev_base_lock) 3179 __releases(RCU)
3047{ 3180{
3048 read_unlock(&dev_base_lock); 3181 rcu_read_unlock();
3049} 3182}
3050 3183
3051static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3184static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
@@ -4254,12 +4387,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4254EXPORT_SYMBOL(dev_set_mac_address); 4387EXPORT_SYMBOL(dev_set_mac_address);
4255 4388
4256/* 4389/*
4257 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 4390 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4258 */ 4391 */
4259static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 4392static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4260{ 4393{
4261 int err; 4394 int err;
4262 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 4395 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4263 4396
4264 if (!dev) 4397 if (!dev)
4265 return -ENODEV; 4398 return -ENODEV;
@@ -4491,9 +4624,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4491 case SIOCGIFINDEX: 4624 case SIOCGIFINDEX:
4492 case SIOCGIFTXQLEN: 4625 case SIOCGIFTXQLEN:
4493 dev_load(net, ifr.ifr_name); 4626 dev_load(net, ifr.ifr_name);
4494 read_lock(&dev_base_lock); 4627 rcu_read_lock();
4495 ret = dev_ifsioc_locked(net, &ifr, cmd); 4628 ret = dev_ifsioc_locked(net, &ifr, cmd);
4496 read_unlock(&dev_base_lock); 4629 rcu_read_unlock();
4497 if (!ret) { 4630 if (!ret) {
4498 if (colon) 4631 if (colon)
4499 *colon = ':'; 4632 *colon = ':';
@@ -4636,59 +4769,80 @@ static void net_set_todo(struct net_device *dev)
4636 list_add_tail(&dev->todo_list, &net_todo_list); 4769 list_add_tail(&dev->todo_list, &net_todo_list);
4637} 4770}
4638 4771
4639static void rollback_registered(struct net_device *dev) 4772static void rollback_registered_many(struct list_head *head)
4640{ 4773{
4774 struct net_device *dev;
4775
4641 BUG_ON(dev_boot_phase); 4776 BUG_ON(dev_boot_phase);
4642 ASSERT_RTNL(); 4777 ASSERT_RTNL();
4643 4778
4644 /* Some devices call without registering for initialization unwind. */ 4779 list_for_each_entry(dev, head, unreg_list) {
4645 if (dev->reg_state == NETREG_UNINITIALIZED) { 4780 /* Some devices call without registering
4646 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 4781 * for initialization unwind.
4647 "was registered\n", dev->name, dev); 4782 */
4783 if (dev->reg_state == NETREG_UNINITIALIZED) {
4784 pr_debug("unregister_netdevice: device %s/%p never "
4785 "was registered\n", dev->name, dev);
4648 4786
4649 WARN_ON(1); 4787 WARN_ON(1);
4650 return; 4788 return;
4651 } 4789 }
4652 4790
4653 BUG_ON(dev->reg_state != NETREG_REGISTERED); 4791 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4654 4792
4655 /* If device is running, close it first. */ 4793 /* If device is running, close it first. */
4656 dev_close(dev); 4794 dev_close(dev);
4657 4795
4658 /* And unlink it from device chain. */ 4796 /* And unlink it from device chain. */
4659 unlist_netdevice(dev); 4797 unlist_netdevice(dev);
4660 4798
4661 dev->reg_state = NETREG_UNREGISTERING; 4799 dev->reg_state = NETREG_UNREGISTERING;
4800 }
4662 4801
4663 synchronize_net(); 4802 synchronize_net();
4664 4803
4665 /* Shutdown queueing discipline. */ 4804 list_for_each_entry(dev, head, unreg_list) {
4666 dev_shutdown(dev); 4805 /* Shutdown queueing discipline. */
4806 dev_shutdown(dev);
4667 4807
4668 4808
4669 /* Notify protocols, that we are about to destroy 4809 /* Notify protocols, that we are about to destroy
4670 this device. They should clean all the things. 4810 this device. They should clean all the things.
4671 */ 4811 */
4672 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4812 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4673 4813
4674 /* 4814 /*
4675 * Flush the unicast and multicast chains 4815 * Flush the unicast and multicast chains
4676 */ 4816 */
4677 dev_unicast_flush(dev); 4817 dev_unicast_flush(dev);
4678 dev_addr_discard(dev); 4818 dev_addr_discard(dev);
4679 4819
4680 if (dev->netdev_ops->ndo_uninit) 4820 if (dev->netdev_ops->ndo_uninit)
4681 dev->netdev_ops->ndo_uninit(dev); 4821 dev->netdev_ops->ndo_uninit(dev);
4682 4822
4683 /* Notifier chain MUST detach us from master device. */ 4823 /* Notifier chain MUST detach us from master device. */
4684 WARN_ON(dev->master); 4824 WARN_ON(dev->master);
4685 4825
4686 /* Remove entries from kobject tree */ 4826 /* Remove entries from kobject tree */
4687 netdev_unregister_kobject(dev); 4827 netdev_unregister_kobject(dev);
4828 }
4829
4830 /* Process any work delayed until the end of the batch */
4831 dev = list_entry(head->next, struct net_device, unreg_list);
4832 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4688 4833
4689 synchronize_net(); 4834 synchronize_net();
4690 4835
4691 dev_put(dev); 4836 list_for_each_entry(dev, head, unreg_list)
4837 dev_put(dev);
4838}
4839
4840static void rollback_registered(struct net_device *dev)
4841{
4842 LIST_HEAD(single);
4843
4844 list_add(&dev->unreg_list, &single);
4845 rollback_registered_many(&single);
4692} 4846}
4693 4847
4694static void __netdev_init_queue_locks_one(struct net_device *dev, 4848static void __netdev_init_queue_locks_one(struct net_device *dev,
@@ -4747,6 +4901,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4747EXPORT_SYMBOL(netdev_fix_features); 4901EXPORT_SYMBOL(netdev_fix_features);
4748 4902
4749/** 4903/**
4904 * netif_stacked_transfer_operstate - transfer operstate
4905 * @rootdev: the root or lower level device to transfer state from
4906 * @dev: the device to transfer operstate to
4907 *
4908 * Transfer operational state from root to device. This is normally
4909 * called when a stacking relationship exists between the root
4910 * device and the device(a leaf device).
4911 */
4912void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4913 struct net_device *dev)
4914{
4915 if (rootdev->operstate == IF_OPER_DORMANT)
4916 netif_dormant_on(dev);
4917 else
4918 netif_dormant_off(dev);
4919
4920 if (netif_carrier_ok(rootdev)) {
4921 if (!netif_carrier_ok(dev))
4922 netif_carrier_on(dev);
4923 } else {
4924 if (netif_carrier_ok(dev))
4925 netif_carrier_off(dev);
4926 }
4927}
4928EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4929
4930/**
4750 * register_netdevice - register a network device 4931 * register_netdevice - register a network device
4751 * @dev: device to register 4932 * @dev: device to register
4752 * 4933 *
@@ -4765,8 +4946,6 @@ EXPORT_SYMBOL(netdev_fix_features);
4765 4946
4766int register_netdevice(struct net_device *dev) 4947int register_netdevice(struct net_device *dev)
4767{ 4948{
4768 struct hlist_head *head;
4769 struct hlist_node *p;
4770 int ret; 4949 int ret;
4771 struct net *net = dev_net(dev); 4950 struct net *net = dev_net(dev);
4772 4951
@@ -4795,26 +4974,14 @@ int register_netdevice(struct net_device *dev)
4795 } 4974 }
4796 } 4975 }
4797 4976
4798 if (!dev_valid_name(dev->name)) { 4977 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
4799 ret = -EINVAL; 4978 if (ret)
4800 goto err_uninit; 4979 goto err_uninit;
4801 }
4802 4980
4803 dev->ifindex = dev_new_index(net); 4981 dev->ifindex = dev_new_index(net);
4804 if (dev->iflink == -1) 4982 if (dev->iflink == -1)
4805 dev->iflink = dev->ifindex; 4983 dev->iflink = dev->ifindex;
4806 4984
4807 /* Check for existence of name */
4808 head = dev_name_hash(net, dev->name);
4809 hlist_for_each(p, head) {
4810 struct net_device *d
4811 = hlist_entry(p, struct net_device, name_hlist);
4812 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4813 ret = -EEXIST;
4814 goto err_uninit;
4815 }
4816 }
4817
4818 /* Fix illegal checksum combinations */ 4985 /* Fix illegal checksum combinations */
4819 if ((dev->features & NETIF_F_HW_CSUM) && 4986 if ((dev->features & NETIF_F_HW_CSUM) &&
4820 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4987 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
@@ -4837,6 +5004,12 @@ int register_netdevice(struct net_device *dev)
4837 dev->features |= NETIF_F_GSO; 5004 dev->features |= NETIF_F_GSO;
4838 5005
4839 netdev_initialize_kobject(dev); 5006 netdev_initialize_kobject(dev);
5007
5008 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5009 ret = notifier_to_errno(ret);
5010 if (ret)
5011 goto err_uninit;
5012
4840 ret = netdev_register_kobject(dev); 5013 ret = netdev_register_kobject(dev);
4841 if (ret) 5014 if (ret)
4842 goto err_uninit; 5015 goto err_uninit;
@@ -4961,6 +5134,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4961{ 5134{
4962 unsigned long rebroadcast_time, warning_time; 5135 unsigned long rebroadcast_time, warning_time;
4963 5136
5137 linkwatch_forget_dev(dev);
5138
4964 rebroadcast_time = warning_time = jiffies; 5139 rebroadcast_time = warning_time = jiffies;
4965 while (atomic_read(&dev->refcnt) != 0) { 5140 while (atomic_read(&dev->refcnt) != 0) {
4966 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5141 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -4968,6 +5143,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4968 5143
4969 /* Rebroadcast unregister notification */ 5144 /* Rebroadcast unregister notification */
4970 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5145 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5146 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5147 * should have already handle it the first time */
4971 5148
4972 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5149 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4973 &dev->state)) { 5150 &dev->state)) {
@@ -5063,6 +5240,32 @@ void netdev_run_todo(void)
5063} 5240}
5064 5241
5065/** 5242/**
5243 * dev_txq_stats_fold - fold tx_queues stats
5244 * @dev: device to get statistics from
5245 * @stats: struct net_device_stats to hold results
5246 */
5247void dev_txq_stats_fold(const struct net_device *dev,
5248 struct net_device_stats *stats)
5249{
5250 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5251 unsigned int i;
5252 struct netdev_queue *txq;
5253
5254 for (i = 0; i < dev->num_tx_queues; i++) {
5255 txq = netdev_get_tx_queue(dev, i);
5256 tx_bytes += txq->tx_bytes;
5257 tx_packets += txq->tx_packets;
5258 tx_dropped += txq->tx_dropped;
5259 }
5260 if (tx_bytes || tx_packets || tx_dropped) {
5261 stats->tx_bytes = tx_bytes;
5262 stats->tx_packets = tx_packets;
5263 stats->tx_dropped = tx_dropped;
5264 }
5265}
5266EXPORT_SYMBOL(dev_txq_stats_fold);
5267
5268/**
5066 * dev_get_stats - get network device statistics 5269 * dev_get_stats - get network device statistics
5067 * @dev: device to get statistics from 5270 * @dev: device to get statistics from
5068 * 5271 *
@@ -5076,25 +5279,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev)
5076 5279
5077 if (ops->ndo_get_stats) 5280 if (ops->ndo_get_stats)
5078 return ops->ndo_get_stats(dev); 5281 return ops->ndo_get_stats(dev);
5079 else { 5282
5080 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5283 dev_txq_stats_fold(dev, &dev->stats);
5081 struct net_device_stats *stats = &dev->stats; 5284 return &dev->stats;
5082 unsigned int i;
5083 struct netdev_queue *txq;
5084
5085 for (i = 0; i < dev->num_tx_queues; i++) {
5086 txq = netdev_get_tx_queue(dev, i);
5087 tx_bytes += txq->tx_bytes;
5088 tx_packets += txq->tx_packets;
5089 tx_dropped += txq->tx_dropped;
5090 }
5091 if (tx_bytes || tx_packets || tx_dropped) {
5092 stats->tx_bytes = tx_bytes;
5093 stats->tx_packets = tx_packets;
5094 stats->tx_dropped = tx_dropped;
5095 }
5096 return stats;
5097 }
5098} 5285}
5099EXPORT_SYMBOL(dev_get_stats); 5286EXPORT_SYMBOL(dev_get_stats);
5100 5287
@@ -5174,6 +5361,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5174 netdev_init_queues(dev); 5361 netdev_init_queues(dev);
5175 5362
5176 INIT_LIST_HEAD(&dev->napi_list); 5363 INIT_LIST_HEAD(&dev->napi_list);
5364 INIT_LIST_HEAD(&dev->unreg_list);
5365 INIT_LIST_HEAD(&dev->link_watch_list);
5177 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5366 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5178 setup(dev); 5367 setup(dev);
5179 strcpy(dev->name, name); 5368 strcpy(dev->name, name);
@@ -5238,25 +5427,47 @@ void synchronize_net(void)
5238EXPORT_SYMBOL(synchronize_net); 5427EXPORT_SYMBOL(synchronize_net);
5239 5428
5240/** 5429/**
5241 * unregister_netdevice - remove device from the kernel 5430 * unregister_netdevice_queue - remove device from the kernel
5242 * @dev: device 5431 * @dev: device
5432 * @head: list
5243 * 5433 *
5244 * This function shuts down a device interface and removes it 5434 * This function shuts down a device interface and removes it
5245 * from the kernel tables. 5435 * from the kernel tables.
5436 * If head not NULL, device is queued to be unregistered later.
5246 * 5437 *
5247 * Callers must hold the rtnl semaphore. You may want 5438 * Callers must hold the rtnl semaphore. You may want
5248 * unregister_netdev() instead of this. 5439 * unregister_netdev() instead of this.
5249 */ 5440 */
5250 5441
5251void unregister_netdevice(struct net_device *dev) 5442void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5252{ 5443{
5253 ASSERT_RTNL(); 5444 ASSERT_RTNL();
5254 5445
5255 rollback_registered(dev); 5446 if (head) {
5256 /* Finish processing unregister after unlock */ 5447 list_move_tail(&dev->unreg_list, head);
5257 net_set_todo(dev); 5448 } else {
5449 rollback_registered(dev);
5450 /* Finish processing unregister after unlock */
5451 net_set_todo(dev);
5452 }
5258} 5453}
5259EXPORT_SYMBOL(unregister_netdevice); 5454EXPORT_SYMBOL(unregister_netdevice_queue);
5455
5456/**
5457 * unregister_netdevice_many - unregister many devices
5458 * @head: list of devices
5459 */
5460void unregister_netdevice_many(struct list_head *head)
5461{
5462 struct net_device *dev;
5463
5464 if (!list_empty(head)) {
5465 rollback_registered_many(head);
5466 list_for_each_entry(dev, head, unreg_list)
5467 net_set_todo(dev);
5468 }
5469}
5470EXPORT_SYMBOL(unregister_netdevice_many);
5260 5471
5261/** 5472/**
5262 * unregister_netdev - remove device from the kernel 5473 * unregister_netdev - remove device from the kernel
@@ -5293,8 +5504,6 @@ EXPORT_SYMBOL(unregister_netdev);
5293 5504
5294int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 5505int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5295{ 5506{
5296 char buf[IFNAMSIZ];
5297 const char *destname;
5298 int err; 5507 int err;
5299 5508
5300 ASSERT_RTNL(); 5509 ASSERT_RTNL();
@@ -5327,20 +5536,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5327 * we can use it in the destination network namespace. 5536 * we can use it in the destination network namespace.
5328 */ 5537 */
5329 err = -EEXIST; 5538 err = -EEXIST;
5330 destname = dev->name; 5539 if (__dev_get_by_name(net, dev->name)) {
5331 if (__dev_get_by_name(net, destname)) {
5332 /* We get here if we can't use the current device name */ 5540 /* We get here if we can't use the current device name */
5333 if (!pat) 5541 if (!pat)
5334 goto out; 5542 goto out;
5335 if (!dev_valid_name(pat)) 5543 if (dev_get_valid_name(net, pat, dev->name, 1))
5336 goto out;
5337 if (strchr(pat, '%')) {
5338 if (__dev_alloc_name(net, pat, buf) < 0)
5339 goto out;
5340 destname = buf;
5341 } else
5342 destname = pat;
5343 if (__dev_get_by_name(net, destname))
5344 goto out; 5544 goto out;
5345 } 5545 }
5346 5546
@@ -5364,6 +5564,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5364 this device. They should clean all the things. 5564 this device. They should clean all the things.
5365 */ 5565 */
5366 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5566 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5567 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5367 5568
5368 /* 5569 /*
5369 * Flush the unicast and multicast chains 5570 * Flush the unicast and multicast chains
@@ -5376,10 +5577,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5376 /* Actually switch the network namespace */ 5577 /* Actually switch the network namespace */
5377 dev_net_set(dev, net); 5578 dev_net_set(dev, net);
5378 5579
5379 /* Assign the new device name */
5380 if (destname != dev->name)
5381 strcpy(dev->name, destname);
5382
5383 /* If there is an ifindex conflict assign a new one */ 5580 /* If there is an ifindex conflict assign a new one */
5384 if (__dev_get_by_index(net, dev->ifindex)) { 5581 if (__dev_get_by_index(net, dev->ifindex)) {
5385 int iflink = (dev->iflink == dev->ifindex); 5582 int iflink = (dev->iflink == dev->ifindex);
@@ -5484,7 +5681,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5484 one |= NETIF_F_ALL_CSUM; 5681 one |= NETIF_F_ALL_CSUM;
5485 5682
5486 one |= all & NETIF_F_ONE_FOR_ALL; 5683 one |= all & NETIF_F_ONE_FOR_ALL;
5487 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 5684 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5488 all |= one & mask & NETIF_F_ONE_FOR_ALL; 5685 all |= one & mask & NETIF_F_ONE_FOR_ALL;
5489 5686
5490 return all; 5687 return all;
@@ -5566,14 +5763,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
5566 5763
5567static void __net_exit default_device_exit(struct net *net) 5764static void __net_exit default_device_exit(struct net *net)
5568{ 5765{
5569 struct net_device *dev; 5766 struct net_device *dev, *aux;
5570 /* 5767 /*
5571 * Push all migratable of the network devices back to the 5768 * Push all migratable network devices back to the
5572 * initial network namespace 5769 * initial network namespace
5573 */ 5770 */
5574 rtnl_lock(); 5771 rtnl_lock();
5575restart: 5772 for_each_netdev_safe(net, dev, aux) {
5576 for_each_netdev(net, dev) {
5577 int err; 5773 int err;
5578 char fb_name[IFNAMSIZ]; 5774 char fb_name[IFNAMSIZ];
5579 5775
@@ -5581,11 +5777,9 @@ restart:
5581 if (dev->features & NETIF_F_NETNS_LOCAL) 5777 if (dev->features & NETIF_F_NETNS_LOCAL)
5582 continue; 5778 continue;
5583 5779
5584 /* Delete virtual devices */ 5780 /* Leave virtual devices for the generic cleanup */
5585 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 5781 if (dev->rtnl_link_ops)
5586 dev->rtnl_link_ops->dellink(dev); 5782 continue;
5587 goto restart;
5588 }
5589 5783
5590 /* Push remaing network devices to init_net */ 5784 /* Push remaing network devices to init_net */
5591 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 5785 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5595,13 +5789,37 @@ restart:
5595 __func__, dev->name, err); 5789 __func__, dev->name, err);
5596 BUG(); 5790 BUG();
5597 } 5791 }
5598 goto restart;
5599 } 5792 }
5600 rtnl_unlock(); 5793 rtnl_unlock();
5601} 5794}
5602 5795
5796static void __net_exit default_device_exit_batch(struct list_head *net_list)
5797{
5798 /* At exit all network devices most be removed from a network
5799 * namespace. Do this in the reverse order of registeration.
5800 * Do this across as many network namespaces as possible to
5801 * improve batching efficiency.
5802 */
5803 struct net_device *dev;
5804 struct net *net;
5805 LIST_HEAD(dev_kill_list);
5806
5807 rtnl_lock();
5808 list_for_each_entry(net, net_list, exit_list) {
5809 for_each_netdev_reverse(net, dev) {
5810 if (dev->rtnl_link_ops)
5811 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5812 else
5813 unregister_netdevice_queue(dev, &dev_kill_list);
5814 }
5815 }
5816 unregister_netdevice_many(&dev_kill_list);
5817 rtnl_unlock();
5818}
5819
5603static struct pernet_operations __net_initdata default_device_ops = { 5820static struct pernet_operations __net_initdata default_device_ops = {
5604 .exit = default_device_exit, 5821 .exit = default_device_exit,
5822 .exit_batch = default_device_exit_batch,
5605}; 5823};
5606 5824
5607/* 5825/*
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 0a113f26bc9f..b8e9d3a86887 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -41,7 +41,7 @@ static void send_dm_alert(struct work_struct *unused);
41 * netlink alerts 41 * netlink alerts
42 */ 42 */
43static int trace_state = TRACE_OFF; 43static int trace_state = TRACE_OFF;
44static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; 44static DEFINE_SPINLOCK(trace_state_lock);
45 45
46struct per_cpu_dm_data { 46struct per_cpu_dm_data {
47 struct work_struct dm_alert_work; 47 struct work_struct dm_alert_work;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4c12ddb5f5ee..d8aee584e8d1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -198,13 +198,6 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
198 rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); 198 rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
199 if (rc >= 0) 199 if (rc >= 0)
200 info.n_priv_flags = rc; 200 info.n_priv_flags = rc;
201 } else {
202 /* code path for obsolete hooks */
203
204 if (ops->self_test_count)
205 info.testinfo_len = ops->self_test_count(dev);
206 if (ops->get_stats_count)
207 info.n_stats = ops->get_stats_count(dev);
208 } 201 }
209 if (ops->get_regs_len) 202 if (ops->get_regs_len)
210 info.regdump_len = ops->get_regs_len(dev); 203 info.regdump_len = ops->get_regs_len(dev);
@@ -309,6 +302,26 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
309 return ret; 302 return ret;
310} 303}
311 304
305static int ethtool_reset(struct net_device *dev, char __user *useraddr)
306{
307 struct ethtool_value reset;
308 int ret;
309
310 if (!dev->ethtool_ops->reset)
311 return -EOPNOTSUPP;
312
313 if (copy_from_user(&reset, useraddr, sizeof(reset)))
314 return -EFAULT;
315
316 ret = dev->ethtool_ops->reset(dev, &reset.data);
317 if (ret)
318 return ret;
319
320 if (copy_to_user(useraddr, &reset, sizeof(reset)))
321 return -EFAULT;
322 return 0;
323}
324
312static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) 325static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
313{ 326{
314 struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; 327 struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
@@ -684,16 +697,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
684 u64 *data; 697 u64 *data;
685 int ret, test_len; 698 int ret, test_len;
686 699
687 if (!ops->self_test) 700 if (!ops->self_test || !ops->get_sset_count)
688 return -EOPNOTSUPP;
689 if (!ops->get_sset_count && !ops->self_test_count)
690 return -EOPNOTSUPP; 701 return -EOPNOTSUPP;
691 702
692 if (ops->get_sset_count) 703 test_len = ops->get_sset_count(dev, ETH_SS_TEST);
693 test_len = ops->get_sset_count(dev, ETH_SS_TEST);
694 else
695 /* code path for obsolete hook */
696 test_len = ops->self_test_count(dev);
697 if (test_len < 0) 704 if (test_len < 0)
698 return test_len; 705 return test_len;
699 WARN_ON(test_len == 0); 706 WARN_ON(test_len == 0);
@@ -728,36 +735,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
728 u8 *data; 735 u8 *data;
729 int ret; 736 int ret;
730 737
731 if (!ops->get_strings) 738 if (!ops->get_strings || !ops->get_sset_count)
732 return -EOPNOTSUPP; 739 return -EOPNOTSUPP;
733 740
734 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) 741 if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
735 return -EFAULT; 742 return -EFAULT;
736 743
737 if (ops->get_sset_count) { 744 ret = ops->get_sset_count(dev, gstrings.string_set);
738 ret = ops->get_sset_count(dev, gstrings.string_set); 745 if (ret < 0)
739 if (ret < 0) 746 return ret;
740 return ret; 747
741 748 gstrings.len = ret;
742 gstrings.len = ret;
743 } else {
744 /* code path for obsolete hooks */
745
746 switch (gstrings.string_set) {
747 case ETH_SS_TEST:
748 if (!ops->self_test_count)
749 return -EOPNOTSUPP;
750 gstrings.len = ops->self_test_count(dev);
751 break;
752 case ETH_SS_STATS:
753 if (!ops->get_stats_count)
754 return -EOPNOTSUPP;
755 gstrings.len = ops->get_stats_count(dev);
756 break;
757 default:
758 return -EINVAL;
759 }
760 }
761 749
762 data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); 750 data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
763 if (!data) 751 if (!data)
@@ -798,16 +786,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
798 u64 *data; 786 u64 *data;
799 int ret, n_stats; 787 int ret, n_stats;
800 788
801 if (!ops->get_ethtool_stats) 789 if (!ops->get_ethtool_stats || !ops->get_sset_count)
802 return -EOPNOTSUPP;
803 if (!ops->get_sset_count && !ops->get_stats_count)
804 return -EOPNOTSUPP; 790 return -EOPNOTSUPP;
805 791
806 if (ops->get_sset_count) 792 n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
807 n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
808 else
809 /* code path for obsolete hook */
810 n_stats = ops->get_stats_count(dev);
811 if (n_stats < 0) 793 if (n_stats < 0)
812 return n_stats; 794 return n_stats;
813 WARN_ON(n_stats == 0); 795 WARN_ON(n_stats == 0);
@@ -1127,6 +1109,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1127 case ETHTOOL_FLASHDEV: 1109 case ETHTOOL_FLASHDEV:
1128 rc = ethtool_flash_device(dev, useraddr); 1110 rc = ethtool_flash_device(dev, useraddr);
1129 break; 1111 break;
1112 case ETHTOOL_RESET:
1113 rc = ethtool_reset(dev, useraddr);
1114 break;
1130 default: 1115 default:
1131 rc = -EOPNOTSUPP; 1116 rc = -EOPNOTSUPP;
1132 } 1117 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bd309384f8b8..02a3b2c69c1e 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -72,7 +72,7 @@ static void flush_route_cache(struct fib_rules_ops *ops)
72 ops->flush_cache(ops); 72 ops->flush_cache(ops);
73} 73}
74 74
75int fib_rules_register(struct fib_rules_ops *ops) 75static int __fib_rules_register(struct fib_rules_ops *ops)
76{ 76{
77 int err = -EEXIST; 77 int err = -EEXIST;
78 struct fib_rules_ops *o; 78 struct fib_rules_ops *o;
@@ -102,6 +102,28 @@ errout:
102 return err; 102 return err;
103} 103}
104 104
105struct fib_rules_ops *
106fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
107{
108 struct fib_rules_ops *ops;
109 int err;
110
111 ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
112 if (ops == NULL)
113 return ERR_PTR(-ENOMEM);
114
115 INIT_LIST_HEAD(&ops->rules_list);
116 ops->fro_net = net;
117
118 err = __fib_rules_register(ops);
119 if (err) {
120 kfree(ops);
121 ops = ERR_PTR(err);
122 }
123
124 return ops;
125}
126
105EXPORT_SYMBOL_GPL(fib_rules_register); 127EXPORT_SYMBOL_GPL(fib_rules_register);
106 128
107void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 129void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -115,6 +137,15 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
115} 137}
116EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); 138EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
117 139
140static void fib_rules_put_rcu(struct rcu_head *head)
141{
142 struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
143 struct net *net = ops->fro_net;
144
145 release_net(net);
146 kfree(ops);
147}
148
118void fib_rules_unregister(struct fib_rules_ops *ops) 149void fib_rules_unregister(struct fib_rules_ops *ops)
119{ 150{
120 struct net *net = ops->fro_net; 151 struct net *net = ops->fro_net;
@@ -124,8 +155,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
124 fib_rules_cleanup_ops(ops); 155 fib_rules_cleanup_ops(ops);
125 spin_unlock(&net->rules_mod_lock); 156 spin_unlock(&net->rules_mod_lock);
126 157
127 synchronize_rcu(); 158 call_rcu(&ops->rcu, fib_rules_put_rcu);
128 release_net(net);
129} 159}
130 160
131EXPORT_SYMBOL_GPL(fib_rules_unregister); 161EXPORT_SYMBOL_GPL(fib_rules_unregister);
@@ -135,7 +165,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
135{ 165{
136 int ret = 0; 166 int ret = 0;
137 167
138 if (rule->ifindex && (rule->ifindex != fl->iif)) 168 if (rule->iifindex && (rule->iifindex != fl->iif))
169 goto out;
170
171 if (rule->oifindex && (rule->oifindex != fl->oif))
139 goto out; 172 goto out;
140 173
141 if ((rule->mark ^ fl->mark) & rule->mark_mask) 174 if ((rule->mark ^ fl->mark) & rule->mark_mask)
@@ -248,14 +281,24 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
248 if (tb[FRA_PRIORITY]) 281 if (tb[FRA_PRIORITY])
249 rule->pref = nla_get_u32(tb[FRA_PRIORITY]); 282 rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
250 283
251 if (tb[FRA_IFNAME]) { 284 if (tb[FRA_IIFNAME]) {
285 struct net_device *dev;
286
287 rule->iifindex = -1;
288 nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
289 dev = __dev_get_by_name(net, rule->iifname);
290 if (dev)
291 rule->iifindex = dev->ifindex;
292 }
293
294 if (tb[FRA_OIFNAME]) {
252 struct net_device *dev; 295 struct net_device *dev;
253 296
254 rule->ifindex = -1; 297 rule->oifindex = -1;
255 nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); 298 nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
256 dev = __dev_get_by_name(net, rule->ifname); 299 dev = __dev_get_by_name(net, rule->oifname);
257 if (dev) 300 if (dev)
258 rule->ifindex = dev->ifindex; 301 rule->oifindex = dev->ifindex;
259 } 302 }
260 303
261 if (tb[FRA_FWMARK]) { 304 if (tb[FRA_FWMARK]) {
@@ -274,7 +317,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
274 rule->flags = frh->flags; 317 rule->flags = frh->flags;
275 rule->table = frh_get_table(frh, tb); 318 rule->table = frh_get_table(frh, tb);
276 319
277 if (!rule->pref && ops->default_pref) 320 if (!tb[FRA_PRIORITY] && ops->default_pref)
278 rule->pref = ops->default_pref(ops); 321 rule->pref = ops->default_pref(ops);
279 322
280 err = -EINVAL; 323 err = -EINVAL;
@@ -388,8 +431,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
388 (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) 431 (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
389 continue; 432 continue;
390 433
391 if (tb[FRA_IFNAME] && 434 if (tb[FRA_IIFNAME] &&
392 nla_strcmp(tb[FRA_IFNAME], rule->ifname)) 435 nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
436 continue;
437
438 if (tb[FRA_OIFNAME] &&
439 nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
393 continue; 440 continue;
394 441
395 if (tb[FRA_FWMARK] && 442 if (tb[FRA_FWMARK] &&
@@ -447,7 +494,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
447 struct fib_rule *rule) 494 struct fib_rule *rule)
448{ 495{
449 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) 496 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
450 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ 497 + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
498 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
451 + nla_total_size(4) /* FRA_PRIORITY */ 499 + nla_total_size(4) /* FRA_PRIORITY */
452 + nla_total_size(4) /* FRA_TABLE */ 500 + nla_total_size(4) /* FRA_TABLE */
453 + nla_total_size(4) /* FRA_FWMARK */ 501 + nla_total_size(4) /* FRA_FWMARK */
@@ -481,11 +529,18 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
481 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 529 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
482 frh->flags |= FIB_RULE_UNRESOLVED; 530 frh->flags |= FIB_RULE_UNRESOLVED;
483 531
484 if (rule->ifname[0]) { 532 if (rule->iifname[0]) {
485 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); 533 NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname);
486 534
487 if (rule->ifindex == -1) 535 if (rule->iifindex == -1)
488 frh->flags |= FIB_RULE_DEV_DETACHED; 536 frh->flags |= FIB_RULE_IIF_DETACHED;
537 }
538
539 if (rule->oifname[0]) {
540 NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname);
541
542 if (rule->oifindex == -1)
543 frh->flags |= FIB_RULE_OIF_DETACHED;
489 } 544 }
490 545
491 if (rule->pref) 546 if (rule->pref)
@@ -600,9 +655,12 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
600 struct fib_rule *rule; 655 struct fib_rule *rule;
601 656
602 list_for_each_entry(rule, rules, list) { 657 list_for_each_entry(rule, rules, list) {
603 if (rule->ifindex == -1 && 658 if (rule->iifindex == -1 &&
604 strcmp(dev->name, rule->ifname) == 0) 659 strcmp(dev->name, rule->iifname) == 0)
605 rule->ifindex = dev->ifindex; 660 rule->iifindex = dev->ifindex;
661 if (rule->oifindex == -1 &&
662 strcmp(dev->name, rule->oifname) == 0)
663 rule->oifindex = dev->ifindex;
606 } 664 }
607} 665}
608 666
@@ -610,9 +668,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
610{ 668{
611 struct fib_rule *rule; 669 struct fib_rule *rule;
612 670
613 list_for_each_entry(rule, rules, list) 671 list_for_each_entry(rule, rules, list) {
614 if (rule->ifindex == dev->ifindex) 672 if (rule->iifindex == dev->ifindex)
615 rule->ifindex = -1; 673 rule->iifindex = -1;
674 if (rule->oifindex == dev->ifindex)
675 rule->oifindex = -1;
676 }
616} 677}
617 678
618 679
diff --git a/net/core/filter.c b/net/core/filter.c
index d1d779ca096d..08db7b9143a3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -303,6 +303,12 @@ load_b:
303 case SKF_AD_IFINDEX: 303 case SKF_AD_IFINDEX:
304 A = skb->dev->ifindex; 304 A = skb->dev->ifindex;
305 continue; 305 continue;
306 case SKF_AD_MARK:
307 A = skb->mark;
308 continue;
309 case SKF_AD_QUEUE:
310 A = skb->queue_mapping;
311 continue;
306 case SKF_AD_NLATTR: { 312 case SKF_AD_NLATTR: {
307 struct nlattr *nla; 313 struct nlattr *nla;
308 314
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 8569310268ab..393b1d8618e2 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -127,6 +127,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
127/** 127/**
128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV 128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
129 * @d: dumping handle 129 * @d: dumping handle
130 * @b: basic statistics
130 * @r: rate estimator statistics 131 * @r: rate estimator statistics
131 * 132 *
132 * Appends the rate estimator statistics to the top level TLV created by 133 * Appends the rate estimator statistics to the top level TLV created by
@@ -136,8 +137,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
136 * if the room in the socket buffer was not sufficient. 137 * if the room in the socket buffer was not sufficient.
137 */ 138 */
138int 139int
139gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) 140gnet_stats_copy_rate_est(struct gnet_dump *d,
141 const struct gnet_stats_basic_packed *b,
142 struct gnet_stats_rate_est *r)
140{ 143{
144 if (b && !gen_estimator_active(b, r))
145 return 0;
146
141 if (d->compat_tc_stats) { 147 if (d->compat_tc_stats) {
142 d->tc_stats.bps = r->bps; 148 d->tc_stats.bps = r->bps;
143 d->tc_stats.pps = r->pps; 149 d->tc_stats.pps = r->pps;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bf8f7af699d7..5910b555a54a 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -35,7 +35,7 @@ static unsigned long linkwatch_nextevent;
35static void linkwatch_event(struct work_struct *dummy); 35static void linkwatch_event(struct work_struct *dummy);
36static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); 36static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
37 37
38static struct net_device *lweventlist; 38static LIST_HEAD(lweventlist);
39static DEFINE_SPINLOCK(lweventlist_lock); 39static DEFINE_SPINLOCK(lweventlist_lock);
40 40
41static unsigned char default_operstate(const struct net_device *dev) 41static unsigned char default_operstate(const struct net_device *dev)
@@ -89,8 +89,10 @@ static void linkwatch_add_event(struct net_device *dev)
89 unsigned long flags; 89 unsigned long flags;
90 90
91 spin_lock_irqsave(&lweventlist_lock, flags); 91 spin_lock_irqsave(&lweventlist_lock, flags);
92 dev->link_watch_next = lweventlist; 92 if (list_empty(&dev->link_watch_list)) {
93 lweventlist = dev; 93 list_add_tail(&dev->link_watch_list, &lweventlist);
94 dev_hold(dev);
95 }
94 spin_unlock_irqrestore(&lweventlist_lock, flags); 96 spin_unlock_irqrestore(&lweventlist_lock, flags);
95} 97}
96 98
@@ -133,9 +135,35 @@ static void linkwatch_schedule_work(int urgent)
133} 135}
134 136
135 137
138static void linkwatch_do_dev(struct net_device *dev)
139{
140 /*
141 * Make sure the above read is complete since it can be
142 * rewritten as soon as we clear the bit below.
143 */
144 smp_mb__before_clear_bit();
145
146 /* We are about to handle this device,
147 * so new events can be accepted
148 */
149 clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
150
151 rfc2863_policy(dev);
152 if (dev->flags & IFF_UP) {
153 if (netif_carrier_ok(dev))
154 dev_activate(dev);
155 else
156 dev_deactivate(dev);
157
158 netdev_state_change(dev);
159 }
160 dev_put(dev);
161}
162
136static void __linkwatch_run_queue(int urgent_only) 163static void __linkwatch_run_queue(int urgent_only)
137{ 164{
138 struct net_device *next; 165 struct net_device *dev;
166 LIST_HEAD(wrk);
139 167
140 /* 168 /*
141 * Limit the number of linkwatch events to one 169 * Limit the number of linkwatch events to one
@@ -153,46 +181,40 @@ static void __linkwatch_run_queue(int urgent_only)
153 clear_bit(LW_URGENT, &linkwatch_flags); 181 clear_bit(LW_URGENT, &linkwatch_flags);
154 182
155 spin_lock_irq(&lweventlist_lock); 183 spin_lock_irq(&lweventlist_lock);
156 next = lweventlist; 184 list_splice_init(&lweventlist, &wrk);
157 lweventlist = NULL;
158 spin_unlock_irq(&lweventlist_lock);
159 185
160 while (next) { 186 while (!list_empty(&wrk)) {
161 struct net_device *dev = next;
162 187
163 next = dev->link_watch_next; 188 dev = list_first_entry(&wrk, struct net_device, link_watch_list);
189 list_del_init(&dev->link_watch_list);
164 190
165 if (urgent_only && !linkwatch_urgent_event(dev)) { 191 if (urgent_only && !linkwatch_urgent_event(dev)) {
166 linkwatch_add_event(dev); 192 list_add_tail(&dev->link_watch_list, &lweventlist);
167 continue; 193 continue;
168 } 194 }
169 195 spin_unlock_irq(&lweventlist_lock);
170 /* 196 linkwatch_do_dev(dev);
171 * Make sure the above read is complete since it can be 197 spin_lock_irq(&lweventlist_lock);
172 * rewritten as soon as we clear the bit below.
173 */
174 smp_mb__before_clear_bit();
175
176 /* We are about to handle this device,
177 * so new events can be accepted
178 */
179 clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
180
181 rfc2863_policy(dev);
182 if (dev->flags & IFF_UP) {
183 if (netif_carrier_ok(dev))
184 dev_activate(dev);
185 else
186 dev_deactivate(dev);
187
188 netdev_state_change(dev);
189 }
190
191 dev_put(dev);
192 } 198 }
193 199
194 if (lweventlist) 200 if (!list_empty(&lweventlist))
195 linkwatch_schedule_work(0); 201 linkwatch_schedule_work(0);
202 spin_unlock_irq(&lweventlist_lock);
203}
204
205void linkwatch_forget_dev(struct net_device *dev)
206{
207 unsigned long flags;
208 int clean = 0;
209
210 spin_lock_irqsave(&lweventlist_lock, flags);
211 if (!list_empty(&dev->link_watch_list)) {
212 list_del_init(&dev->link_watch_list);
213 clean = 1;
214 }
215 spin_unlock_irqrestore(&lweventlist_lock, flags);
216 if (clean)
217 linkwatch_do_dev(dev);
196} 218}
197 219
198 220
@@ -216,8 +238,6 @@ void linkwatch_fire_event(struct net_device *dev)
216 bool urgent = linkwatch_urgent_event(dev); 238 bool urgent = linkwatch_urgent_event(dev);
217 239
218 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { 240 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
219 dev_hold(dev);
220
221 linkwatch_add_event(dev); 241 linkwatch_add_event(dev);
222 } else if (!urgent) 242 } else if (!urgent)
223 return; 243 return;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b54e6c6a7c8..f35377b643e4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2092,7 +2092,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2092 if (h > s_h) 2092 if (h > s_h)
2093 s_idx = 0; 2093 s_idx = 0;
2094 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { 2094 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
2095 if (dev_net(n->dev) != net) 2095 if (!net_eq(dev_net(n->dev), net))
2096 continue; 2096 continue;
2097 if (idx < s_idx) 2097 if (idx < s_idx)
2098 goto next; 2098 goto next;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 427ded841224..fbc1c7472c5e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -130,6 +130,48 @@ static ssize_t show_carrier(struct device *dev,
130 return -EINVAL; 130 return -EINVAL;
131} 131}
132 132
133static ssize_t show_speed(struct device *dev,
134 struct device_attribute *attr, char *buf)
135{
136 struct net_device *netdev = to_net_dev(dev);
137 int ret = -EINVAL;
138
139 if (!rtnl_trylock())
140 return restart_syscall();
141
142 if (netif_running(netdev) &&
143 netdev->ethtool_ops &&
144 netdev->ethtool_ops->get_settings) {
145 struct ethtool_cmd cmd = { ETHTOOL_GSET };
146
147 if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
148 ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
149 }
150 rtnl_unlock();
151 return ret;
152}
153
154static ssize_t show_duplex(struct device *dev,
155 struct device_attribute *attr, char *buf)
156{
157 struct net_device *netdev = to_net_dev(dev);
158 int ret = -EINVAL;
159
160 if (!rtnl_trylock())
161 return restart_syscall();
162
163 if (netif_running(netdev) &&
164 netdev->ethtool_ops &&
165 netdev->ethtool_ops->get_settings) {
166 struct ethtool_cmd cmd = { ETHTOOL_GSET };
167
168 if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
169 ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
170 }
171 rtnl_unlock();
172 return ret;
173}
174
133static ssize_t show_dormant(struct device *dev, 175static ssize_t show_dormant(struct device *dev,
134 struct device_attribute *attr, char *buf) 176 struct device_attribute *attr, char *buf)
135{ 177{
@@ -259,6 +301,8 @@ static struct device_attribute net_class_attributes[] = {
259 __ATTR(address, S_IRUGO, show_address, NULL), 301 __ATTR(address, S_IRUGO, show_address, NULL),
260 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 302 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
261 __ATTR(carrier, S_IRUGO, show_carrier, NULL), 303 __ATTR(carrier, S_IRUGO, show_carrier, NULL),
304 __ATTR(speed, S_IRUGO, show_speed, NULL),
305 __ATTR(duplex, S_IRUGO, show_duplex, NULL),
262 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 306 __ATTR(dormant, S_IRUGO, show_dormant, NULL),
263 __ATTR(operstate, S_IRUGO, show_operstate, NULL), 307 __ATTR(operstate, S_IRUGO, show_operstate, NULL),
264 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), 308 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
@@ -481,7 +525,7 @@ void netdev_unregister_kobject(struct net_device * net)
481 525
482 kobject_get(&dev->kobj); 526 kobject_get(&dev->kobj);
483 527
484 if (dev_net(net) != &init_net) 528 if (!net_eq(dev_net(net), &init_net))
485 return; 529 return;
486 530
487 device_del(dev); 531 device_del(dev);
@@ -500,15 +544,22 @@ int netdev_register_kobject(struct net_device *net)
500 dev_set_name(dev, "%s", net->name); 544 dev_set_name(dev, "%s", net->name);
501 545
502#ifdef CONFIG_SYSFS 546#ifdef CONFIG_SYSFS
503 *groups++ = &netstat_group; 547 /* Allow for a device specific group */
548 if (*groups)
549 groups++;
504 550
551 *groups++ = &netstat_group;
505#ifdef CONFIG_WIRELESS_EXT_SYSFS 552#ifdef CONFIG_WIRELESS_EXT_SYSFS
506 if (net->wireless_handlers || net->ieee80211_ptr) 553 if (net->ieee80211_ptr)
554 *groups++ = &wireless_group;
555#ifdef CONFIG_WIRELESS_EXT
556 else if (net->wireless_handlers)
507 *groups++ = &wireless_group; 557 *groups++ = &wireless_group;
508#endif 558#endif
559#endif
509#endif /* CONFIG_SYSFS */ 560#endif /* CONFIG_SYSFS */
510 561
511 if (dev_net(net) != &init_net) 562 if (!net_eq(dev_net(net), &init_net))
512 return 0; 563 return 0;
513 564
514 return device_add(dev); 565 return device_add(dev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1c1af2756f38..bd8c4712ea24 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,14 +27,64 @@ EXPORT_SYMBOL(init_net);
27 27
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29 29
30static int ops_init(const struct pernet_operations *ops, struct net *net)
31{
32 int err;
33 if (ops->id && ops->size) {
34 void *data = kzalloc(ops->size, GFP_KERNEL);
35 if (!data)
36 return -ENOMEM;
37
38 err = net_assign_generic(net, *ops->id, data);
39 if (err) {
40 kfree(data);
41 return err;
42 }
43 }
44 if (ops->init)
45 return ops->init(net);
46 return 0;
47}
48
49static void ops_free(const struct pernet_operations *ops, struct net *net)
50{
51 if (ops->id && ops->size) {
52 int id = *ops->id;
53 kfree(net_generic(net, id));
54 }
55}
56
57static void ops_exit_list(const struct pernet_operations *ops,
58 struct list_head *net_exit_list)
59{
60 struct net *net;
61 if (ops->exit) {
62 list_for_each_entry(net, net_exit_list, exit_list)
63 ops->exit(net);
64 }
65 if (ops->exit_batch)
66 ops->exit_batch(net_exit_list);
67}
68
69static void ops_free_list(const struct pernet_operations *ops,
70 struct list_head *net_exit_list)
71{
72 struct net *net;
73 if (ops->size && ops->id) {
74 list_for_each_entry(net, net_exit_list, exit_list)
75 ops_free(ops, net);
76 }
77}
78
30/* 79/*
31 * setup_net runs the initializers for the network namespace object. 80 * setup_net runs the initializers for the network namespace object.
32 */ 81 */
33static __net_init int setup_net(struct net *net) 82static __net_init int setup_net(struct net *net)
34{ 83{
35 /* Must be called with net_mutex held */ 84 /* Must be called with net_mutex held */
36 struct pernet_operations *ops; 85 const struct pernet_operations *ops, *saved_ops;
37 int error = 0; 86 int error = 0;
87 LIST_HEAD(net_exit_list);
38 88
39 atomic_set(&net->count, 1); 89 atomic_set(&net->count, 1);
40 90
@@ -43,11 +93,9 @@ static __net_init int setup_net(struct net *net)
43#endif 93#endif
44 94
45 list_for_each_entry(ops, &pernet_list, list) { 95 list_for_each_entry(ops, &pernet_list, list) {
46 if (ops->init) { 96 error = ops_init(ops, net);
47 error = ops->init(net); 97 if (error < 0)
48 if (error < 0) 98 goto out_undo;
49 goto out_undo;
50 }
51 } 99 }
52out: 100out:
53 return error; 101 return error;
@@ -56,10 +104,14 @@ out_undo:
56 /* Walk through the list backwards calling the exit functions 104 /* Walk through the list backwards calling the exit functions
57 * for the pernet modules whose init functions did not fail. 105 * for the pernet modules whose init functions did not fail.
58 */ 106 */
59 list_for_each_entry_continue_reverse(ops, &pernet_list, list) { 107 list_add(&net->exit_list, &net_exit_list);
60 if (ops->exit) 108 saved_ops = ops;
61 ops->exit(net); 109 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
62 } 110 ops_exit_list(ops, &net_exit_list);
111
112 ops = saved_ops;
113 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
114 ops_free_list(ops, &net_exit_list);
63 115
64 rcu_barrier(); 116 rcu_barrier();
65 goto out; 117 goto out;
@@ -147,18 +199,29 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
147 return net_create(); 199 return net_create();
148} 200}
149 201
202static DEFINE_SPINLOCK(cleanup_list_lock);
203static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
204
150static void cleanup_net(struct work_struct *work) 205static void cleanup_net(struct work_struct *work)
151{ 206{
152 struct pernet_operations *ops; 207 const struct pernet_operations *ops;
153 struct net *net; 208 struct net *net, *tmp;
209 LIST_HEAD(net_kill_list);
210 LIST_HEAD(net_exit_list);
154 211
155 net = container_of(work, struct net, work); 212 /* Atomically snapshot the list of namespaces to cleanup */
213 spin_lock_irq(&cleanup_list_lock);
214 list_replace_init(&cleanup_list, &net_kill_list);
215 spin_unlock_irq(&cleanup_list_lock);
156 216
157 mutex_lock(&net_mutex); 217 mutex_lock(&net_mutex);
158 218
159 /* Don't let anyone else find us. */ 219 /* Don't let anyone else find us. */
160 rtnl_lock(); 220 rtnl_lock();
161 list_del_rcu(&net->list); 221 list_for_each_entry(net, &net_kill_list, cleanup_list) {
222 list_del_rcu(&net->list);
223 list_add_tail(&net->exit_list, &net_exit_list);
224 }
162 rtnl_unlock(); 225 rtnl_unlock();
163 226
164 /* 227 /*
@@ -169,10 +232,12 @@ static void cleanup_net(struct work_struct *work)
169 synchronize_rcu(); 232 synchronize_rcu();
170 233
171 /* Run all of the network namespace exit methods */ 234 /* Run all of the network namespace exit methods */
172 list_for_each_entry_reverse(ops, &pernet_list, list) { 235 list_for_each_entry_reverse(ops, &pernet_list, list)
173 if (ops->exit) 236 ops_exit_list(ops, &net_exit_list);
174 ops->exit(net); 237
175 } 238 /* Free the net generic variables */
239 list_for_each_entry_reverse(ops, &pernet_list, list)
240 ops_free_list(ops, &net_exit_list);
176 241
177 mutex_unlock(&net_mutex); 242 mutex_unlock(&net_mutex);
178 243
@@ -182,14 +247,23 @@ static void cleanup_net(struct work_struct *work)
182 rcu_barrier(); 247 rcu_barrier();
183 248
184 /* Finally it is safe to free my network namespace structure */ 249 /* Finally it is safe to free my network namespace structure */
185 net_free(net); 250 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
251 list_del_init(&net->exit_list);
252 net_free(net);
253 }
186} 254}
255static DECLARE_WORK(net_cleanup_work, cleanup_net);
187 256
188void __put_net(struct net *net) 257void __put_net(struct net *net)
189{ 258{
190 /* Cleanup the network namespace in process context */ 259 /* Cleanup the network namespace in process context */
191 INIT_WORK(&net->work, cleanup_net); 260 unsigned long flags;
192 queue_work(netns_wq, &net->work); 261
262 spin_lock_irqsave(&cleanup_list_lock, flags);
263 list_add(&net->cleanup_list, &cleanup_list);
264 spin_unlock_irqrestore(&cleanup_list_lock, flags);
265
266 queue_work(netns_wq, &net_cleanup_work);
193} 267}
194EXPORT_SYMBOL_GPL(__put_net); 268EXPORT_SYMBOL_GPL(__put_net);
195 269
@@ -259,18 +333,20 @@ static int __init net_ns_init(void)
259pure_initcall(net_ns_init); 333pure_initcall(net_ns_init);
260 334
261#ifdef CONFIG_NET_NS 335#ifdef CONFIG_NET_NS
262static int register_pernet_operations(struct list_head *list, 336static int __register_pernet_operations(struct list_head *list,
263 struct pernet_operations *ops) 337 struct pernet_operations *ops)
264{ 338{
265 struct net *net, *undo_net; 339 struct net *net;
266 int error; 340 int error;
341 LIST_HEAD(net_exit_list);
267 342
268 list_add_tail(&ops->list, list); 343 list_add_tail(&ops->list, list);
269 if (ops->init) { 344 if (ops->init || (ops->id && ops->size)) {
270 for_each_net(net) { 345 for_each_net(net) {
271 error = ops->init(net); 346 error = ops_init(ops, net);
272 if (error) 347 if (error)
273 goto out_undo; 348 goto out_undo;
349 list_add_tail(&net->exit_list, &net_exit_list);
274 } 350 }
275 } 351 }
276 return 0; 352 return 0;
@@ -278,45 +354,82 @@ static int register_pernet_operations(struct list_head *list,
278out_undo: 354out_undo:
279 /* If I have an error cleanup all namespaces I initialized */ 355 /* If I have an error cleanup all namespaces I initialized */
280 list_del(&ops->list); 356 list_del(&ops->list);
281 if (ops->exit) { 357 ops_exit_list(ops, &net_exit_list);
282 for_each_net(undo_net) { 358 ops_free_list(ops, &net_exit_list);
283 if (undo_net == net)
284 goto undone;
285 ops->exit(undo_net);
286 }
287 }
288undone:
289 return error; 359 return error;
290} 360}
291 361
292static void unregister_pernet_operations(struct pernet_operations *ops) 362static void __unregister_pernet_operations(struct pernet_operations *ops)
293{ 363{
294 struct net *net; 364 struct net *net;
365 LIST_HEAD(net_exit_list);
295 366
296 list_del(&ops->list); 367 list_del(&ops->list);
297 if (ops->exit) 368 for_each_net(net)
298 for_each_net(net) 369 list_add_tail(&net->exit_list, &net_exit_list);
299 ops->exit(net); 370 ops_exit_list(ops, &net_exit_list);
371 ops_free_list(ops, &net_exit_list);
300} 372}
301 373
302#else 374#else
303 375
376static int __register_pernet_operations(struct list_head *list,
377 struct pernet_operations *ops)
378{
379 int err = 0;
380 err = ops_init(ops, &init_net);
381 if (err)
382 ops_free(ops, &init_net);
383 return err;
384
385}
386
387static void __unregister_pernet_operations(struct pernet_operations *ops)
388{
389 LIST_HEAD(net_exit_list);
390 list_add(&init_net.exit_list, &net_exit_list);
391 ops_exit_list(ops, &net_exit_list);
392 ops_free_list(ops, &net_exit_list);
393}
394
395#endif /* CONFIG_NET_NS */
396
397static DEFINE_IDA(net_generic_ids);
398
304static int register_pernet_operations(struct list_head *list, 399static int register_pernet_operations(struct list_head *list,
305 struct pernet_operations *ops) 400 struct pernet_operations *ops)
306{ 401{
307 if (ops->init == NULL) 402 int error;
308 return 0; 403
309 return ops->init(&init_net); 404 if (ops->id) {
405again:
406 error = ida_get_new_above(&net_generic_ids, 1, ops->id);
407 if (error < 0) {
408 if (error == -EAGAIN) {
409 ida_pre_get(&net_generic_ids, GFP_KERNEL);
410 goto again;
411 }
412 return error;
413 }
414 }
415 error = __register_pernet_operations(list, ops);
416 if (error) {
417 rcu_barrier();
418 if (ops->id)
419 ida_remove(&net_generic_ids, *ops->id);
420 }
421
422 return error;
310} 423}
311 424
312static void unregister_pernet_operations(struct pernet_operations *ops) 425static void unregister_pernet_operations(struct pernet_operations *ops)
313{ 426{
314 if (ops->exit) 427
315 ops->exit(&init_net); 428 __unregister_pernet_operations(ops);
429 rcu_barrier();
430 if (ops->id)
431 ida_remove(&net_generic_ids, *ops->id);
316} 432}
317#endif
318
319static DEFINE_IDA(net_generic_ids);
320 433
321/** 434/**
322 * register_pernet_subsys - register a network namespace subsystem 435 * register_pernet_subsys - register a network namespace subsystem
@@ -364,38 +477,6 @@ void unregister_pernet_subsys(struct pernet_operations *module)
364} 477}
365EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 478EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
366 479
367int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
368{
369 int rv;
370
371 mutex_lock(&net_mutex);
372again:
373 rv = ida_get_new_above(&net_generic_ids, 1, id);
374 if (rv < 0) {
375 if (rv == -EAGAIN) {
376 ida_pre_get(&net_generic_ids, GFP_KERNEL);
377 goto again;
378 }
379 goto out;
380 }
381 rv = register_pernet_operations(first_device, ops);
382 if (rv < 0)
383 ida_remove(&net_generic_ids, *id);
384out:
385 mutex_unlock(&net_mutex);
386 return rv;
387}
388EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
389
390void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
391{
392 mutex_lock(&net_mutex);
393 unregister_pernet_operations(ops);
394 ida_remove(&net_generic_ids, id);
395 mutex_unlock(&net_mutex);
396}
397EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
398
399/** 480/**
400 * register_pernet_device - register a network namespace device 481 * register_pernet_device - register a network namespace device
401 * @ops: pernet operations structure for the subsystem 482 * @ops: pernet operations structure for the subsystem
@@ -427,30 +508,6 @@ int register_pernet_device(struct pernet_operations *ops)
427} 508}
428EXPORT_SYMBOL_GPL(register_pernet_device); 509EXPORT_SYMBOL_GPL(register_pernet_device);
429 510
430int register_pernet_gen_device(int *id, struct pernet_operations *ops)
431{
432 int error;
433 mutex_lock(&net_mutex);
434again:
435 error = ida_get_new_above(&net_generic_ids, 1, id);
436 if (error) {
437 if (error == -EAGAIN) {
438 ida_pre_get(&net_generic_ids, GFP_KERNEL);
439 goto again;
440 }
441 goto out;
442 }
443 error = register_pernet_operations(&pernet_list, ops);
444 if (error)
445 ida_remove(&net_generic_ids, *id);
446 else if (first_device == &pernet_list)
447 first_device = &ops->list;
448out:
449 mutex_unlock(&net_mutex);
450 return error;
451}
452EXPORT_SYMBOL_GPL(register_pernet_gen_device);
453
454/** 511/**
455 * unregister_pernet_device - unregister a network namespace netdevice 512 * unregister_pernet_device - unregister a network namespace netdevice
456 * @ops: pernet operations structure to manipulate 513 * @ops: pernet operations structure to manipulate
@@ -470,17 +527,6 @@ void unregister_pernet_device(struct pernet_operations *ops)
470} 527}
471EXPORT_SYMBOL_GPL(unregister_pernet_device); 528EXPORT_SYMBOL_GPL(unregister_pernet_device);
472 529
473void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
474{
475 mutex_lock(&net_mutex);
476 if (&ops->list == first_device)
477 first_device = first_device->next;
478 unregister_pernet_operations(ops);
479 ida_remove(&net_generic_ids, id);
480 mutex_unlock(&net_mutex);
481}
482EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
483
484static void net_generic_release(struct rcu_head *rcu) 530static void net_generic_release(struct rcu_head *rcu)
485{ 531{
486 struct net_generic *ng; 532 struct net_generic *ng;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e79e96cb4f2..a23b45f08ec9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -340,6 +340,7 @@ struct pktgen_dev {
340 __u16 cur_udp_src; 340 __u16 cur_udp_src;
341 __u16 cur_queue_map; 341 __u16 cur_queue_map;
342 __u32 cur_pkt_size; 342 __u32 cur_pkt_size;
343 __u32 last_pkt_size;
343 344
344 __u8 hh[14]; 345 __u8 hh[14];
345 /* = { 346 /* = {
@@ -2051,9 +2052,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2051 read_lock_bh(&idev->lock); 2052 read_lock_bh(&idev->lock);
2052 for (ifp = idev->addr_list; ifp; 2053 for (ifp = idev->addr_list; ifp;
2053 ifp = ifp->if_next) { 2054 ifp = ifp->if_next) {
2054 if (ifp->scope == IFA_LINK 2055 if (ifp->scope == IFA_LINK &&
2055 && !(ifp-> 2056 !(ifp->flags & IFA_F_TENTATIVE)) {
2056 flags & IFA_F_TENTATIVE)) {
2057 ipv6_addr_copy(&pkt_dev-> 2057 ipv6_addr_copy(&pkt_dev->
2058 cur_in6_saddr, 2058 cur_in6_saddr,
2059 &ifp->addr); 2059 &ifp->addr);
@@ -3436,7 +3436,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3436 pkt_dev->clone_count--; /* back out increment, OOM */ 3436 pkt_dev->clone_count--; /* back out increment, OOM */
3437 return; 3437 return;
3438 } 3438 }
3439 3439 pkt_dev->last_pkt_size = pkt_dev->skb->len;
3440 pkt_dev->allocated_skbs++; 3440 pkt_dev->allocated_skbs++;
3441 pkt_dev->clone_count = 0; /* reset counter */ 3441 pkt_dev->clone_count = 0; /* reset counter */
3442 } 3442 }
@@ -3448,12 +3448,14 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3448 txq = netdev_get_tx_queue(odev, queue_map); 3448 txq = netdev_get_tx_queue(odev, queue_map);
3449 3449
3450 __netif_tx_lock_bh(txq); 3450 __netif_tx_lock_bh(txq);
3451 atomic_inc(&(pkt_dev->skb->users));
3452 3451
3453 if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) 3452 if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) {
3454 ret = NETDEV_TX_BUSY; 3453 ret = NETDEV_TX_BUSY;
3455 else 3454 pkt_dev->last_ok = 0;
3456 ret = (*xmit)(pkt_dev->skb, odev); 3455 goto unlock;
3456 }
3457 atomic_inc(&(pkt_dev->skb->users));
3458 ret = (*xmit)(pkt_dev->skb, odev);
3457 3459
3458 switch (ret) { 3460 switch (ret) {
3459 case NETDEV_TX_OK: 3461 case NETDEV_TX_OK:
@@ -3461,7 +3463,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3461 pkt_dev->last_ok = 1; 3463 pkt_dev->last_ok = 1;
3462 pkt_dev->sofar++; 3464 pkt_dev->sofar++;
3463 pkt_dev->seq_num++; 3465 pkt_dev->seq_num++;
3464 pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; 3466 pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
3465 break; 3467 break;
3466 default: /* Drivers are not supposed to return other values! */ 3468 default: /* Drivers are not supposed to return other values! */
3467 if (net_ratelimit()) 3469 if (net_ratelimit())
@@ -3475,6 +3477,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3475 atomic_dec(&(pkt_dev->skb->users)); 3477 atomic_dec(&(pkt_dev->skb->users));
3476 pkt_dev->last_ok = 0; 3478 pkt_dev->last_ok = 0;
3477 } 3479 }
3480unlock:
3478 __netif_tx_unlock_bh(txq); 3481 __netif_tx_unlock_bh(txq);
3479 3482
3480 /* If pkt_dev->count is zero, then run forever */ 3483 /* If pkt_dev->count is zero, then run forever */
@@ -3622,6 +3625,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3622{ 3625{
3623 struct pktgen_dev *pkt_dev; 3626 struct pktgen_dev *pkt_dev;
3624 int err; 3627 int err;
3628 int node = cpu_to_node(t->cpu);
3625 3629
3626 /* We don't allow a device to be on several threads */ 3630 /* We don't allow a device to be on several threads */
3627 3631
@@ -3631,12 +3635,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3631 return -EBUSY; 3635 return -EBUSY;
3632 } 3636 }
3633 3637
3634 pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); 3638 pkt_dev = kzalloc_node(sizeof(struct pktgen_dev), GFP_KERNEL, node);
3635 if (!pkt_dev) 3639 if (!pkt_dev)
3636 return -ENOMEM; 3640 return -ENOMEM;
3637 3641
3638 strcpy(pkt_dev->odevname, ifname); 3642 strcpy(pkt_dev->odevname, ifname);
3639 pkt_dev->flows = vmalloc(MAX_CFLOWS * sizeof(struct flow_state)); 3643 pkt_dev->flows = vmalloc_node(MAX_CFLOWS * sizeof(struct flow_state),
3644 node);
3640 if (pkt_dev->flows == NULL) { 3645 if (pkt_dev->flows == NULL) {
3641 kfree(pkt_dev); 3646 kfree(pkt_dev);
3642 return -ENOMEM; 3647 return -ENOMEM;
@@ -3698,7 +3703,8 @@ static int __init pktgen_create_thread(int cpu)
3698 struct proc_dir_entry *pe; 3703 struct proc_dir_entry *pe;
3699 struct task_struct *p; 3704 struct task_struct *p;
3700 3705
3701 t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); 3706 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
3707 cpu_to_node(cpu));
3702 if (!t) { 3708 if (!t) {
3703 printk(KERN_ERR "pktgen: ERROR: out of memory, can't " 3709 printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
3704 "create new thread.\n"); 3710 "create new thread.\n");
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eb42873f2a3a..33148a568199 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -38,7 +38,6 @@
38 38
39#include <asm/uaccess.h> 39#include <asm/uaccess.h>
40#include <asm/system.h> 40#include <asm/system.h>
41#include <asm/string.h>
42 41
43#include <linux/inet.h> 42#include <linux/inet.h>
44#include <linux/netdevice.h> 43#include <linux/netdevice.h>
@@ -53,8 +52,7 @@
53#include <net/rtnetlink.h> 52#include <net/rtnetlink.h>
54#include <net/net_namespace.h> 53#include <net/net_namespace.h>
55 54
56struct rtnl_link 55struct rtnl_link {
57{
58 rtnl_doit_func doit; 56 rtnl_doit_func doit;
59 rtnl_dumpit_func dumpit; 57 rtnl_dumpit_func dumpit;
60}; 58};
@@ -65,6 +63,7 @@ void rtnl_lock(void)
65{ 63{
66 mutex_lock(&rtnl_mutex); 64 mutex_lock(&rtnl_mutex);
67} 65}
66EXPORT_SYMBOL(rtnl_lock);
68 67
69void __rtnl_unlock(void) 68void __rtnl_unlock(void)
70{ 69{
@@ -76,16 +75,19 @@ void rtnl_unlock(void)
76 /* This fellow will unlock it for us. */ 75 /* This fellow will unlock it for us. */
77 netdev_run_todo(); 76 netdev_run_todo();
78} 77}
78EXPORT_SYMBOL(rtnl_unlock);
79 79
80int rtnl_trylock(void) 80int rtnl_trylock(void)
81{ 81{
82 return mutex_trylock(&rtnl_mutex); 82 return mutex_trylock(&rtnl_mutex);
83} 83}
84EXPORT_SYMBOL(rtnl_trylock);
84 85
85int rtnl_is_locked(void) 86int rtnl_is_locked(void)
86{ 87{
87 return mutex_is_locked(&rtnl_mutex); 88 return mutex_is_locked(&rtnl_mutex);
88} 89}
90EXPORT_SYMBOL(rtnl_is_locked);
89 91
90static struct rtnl_link *rtnl_msg_handlers[NPROTO]; 92static struct rtnl_link *rtnl_msg_handlers[NPROTO];
91 93
@@ -168,7 +170,6 @@ int __rtnl_register(int protocol, int msgtype,
168 170
169 return 0; 171 return 0;
170} 172}
171
172EXPORT_SYMBOL_GPL(__rtnl_register); 173EXPORT_SYMBOL_GPL(__rtnl_register);
173 174
174/** 175/**
@@ -188,7 +189,6 @@ void rtnl_register(int protocol, int msgtype,
188 "protocol = %d, message type = %d\n", 189 "protocol = %d, message type = %d\n",
189 protocol, msgtype); 190 protocol, msgtype);
190} 191}
191
192EXPORT_SYMBOL_GPL(rtnl_register); 192EXPORT_SYMBOL_GPL(rtnl_register);
193 193
194/** 194/**
@@ -213,7 +213,6 @@ int rtnl_unregister(int protocol, int msgtype)
213 213
214 return 0; 214 return 0;
215} 215}
216
217EXPORT_SYMBOL_GPL(rtnl_unregister); 216EXPORT_SYMBOL_GPL(rtnl_unregister);
218 217
219/** 218/**
@@ -230,7 +229,6 @@ void rtnl_unregister_all(int protocol)
230 kfree(rtnl_msg_handlers[protocol]); 229 kfree(rtnl_msg_handlers[protocol]);
231 rtnl_msg_handlers[protocol] = NULL; 230 rtnl_msg_handlers[protocol] = NULL;
232} 231}
233
234EXPORT_SYMBOL_GPL(rtnl_unregister_all); 232EXPORT_SYMBOL_GPL(rtnl_unregister_all);
235 233
236static LIST_HEAD(link_ops); 234static LIST_HEAD(link_ops);
@@ -248,12 +246,11 @@ static LIST_HEAD(link_ops);
248int __rtnl_link_register(struct rtnl_link_ops *ops) 246int __rtnl_link_register(struct rtnl_link_ops *ops)
249{ 247{
250 if (!ops->dellink) 248 if (!ops->dellink)
251 ops->dellink = unregister_netdevice; 249 ops->dellink = unregister_netdevice_queue;
252 250
253 list_add_tail(&ops->list, &link_ops); 251 list_add_tail(&ops->list, &link_ops);
254 return 0; 252 return 0;
255} 253}
256
257EXPORT_SYMBOL_GPL(__rtnl_link_register); 254EXPORT_SYMBOL_GPL(__rtnl_link_register);
258 255
259/** 256/**
@@ -271,19 +268,18 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
271 rtnl_unlock(); 268 rtnl_unlock();
272 return err; 269 return err;
273} 270}
274
275EXPORT_SYMBOL_GPL(rtnl_link_register); 271EXPORT_SYMBOL_GPL(rtnl_link_register);
276 272
277static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) 273static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
278{ 274{
279 struct net_device *dev; 275 struct net_device *dev;
280restart: 276 LIST_HEAD(list_kill);
277
281 for_each_netdev(net, dev) { 278 for_each_netdev(net, dev) {
282 if (dev->rtnl_link_ops == ops) { 279 if (dev->rtnl_link_ops == ops)
283 ops->dellink(dev); 280 ops->dellink(dev, &list_kill);
284 goto restart;
285 }
286 } 281 }
282 unregister_netdevice_many(&list_kill);
287} 283}
288 284
289void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) 285void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
@@ -309,7 +305,6 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
309 } 305 }
310 list_del(&ops->list); 306 list_del(&ops->list);
311} 307}
312
313EXPORT_SYMBOL_GPL(__rtnl_link_unregister); 308EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
314 309
315/** 310/**
@@ -322,7 +317,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
322 __rtnl_link_unregister(ops); 317 __rtnl_link_unregister(ops);
323 rtnl_unlock(); 318 rtnl_unlock();
324} 319}
325
326EXPORT_SYMBOL_GPL(rtnl_link_unregister); 320EXPORT_SYMBOL_GPL(rtnl_link_unregister);
327 321
328static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) 322static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
@@ -427,12 +421,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
427 struct rtattr *rta; 421 struct rtattr *rta;
428 int size = RTA_LENGTH(attrlen); 422 int size = RTA_LENGTH(attrlen);
429 423
430 rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); 424 rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size));
431 rta->rta_type = attrtype; 425 rta->rta_type = attrtype;
432 rta->rta_len = size; 426 rta->rta_len = size;
433 memcpy(RTA_DATA(rta), data, attrlen); 427 memcpy(RTA_DATA(rta), data, attrlen);
434 memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); 428 memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
435} 429}
430EXPORT_SYMBOL(__rta_fill);
436 431
437int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) 432int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
438{ 433{
@@ -454,6 +449,7 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
454 449
455 return nlmsg_unicast(rtnl, skb, pid); 450 return nlmsg_unicast(rtnl, skb, pid);
456} 451}
452EXPORT_SYMBOL(rtnl_unicast);
457 453
458void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, 454void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
459 struct nlmsghdr *nlh, gfp_t flags) 455 struct nlmsghdr *nlh, gfp_t flags)
@@ -466,6 +462,7 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
466 462
467 nlmsg_notify(rtnl, skb, pid, group, report, flags); 463 nlmsg_notify(rtnl, skb, pid, group, report, flags);
468} 464}
465EXPORT_SYMBOL(rtnl_notify);
469 466
470void rtnl_set_sk_err(struct net *net, u32 group, int error) 467void rtnl_set_sk_err(struct net *net, u32 group, int error)
471{ 468{
@@ -473,6 +470,7 @@ void rtnl_set_sk_err(struct net *net, u32 group, int error)
473 470
474 netlink_set_err(rtnl, 0, group, error); 471 netlink_set_err(rtnl, 0, group, error);
475} 472}
473EXPORT_SYMBOL(rtnl_set_sk_err);
476 474
477int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) 475int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
478{ 476{
@@ -501,6 +499,7 @@ nla_put_failure:
501 nla_nest_cancel(skb, mx); 499 nla_nest_cancel(skb, mx);
502 return -EMSGSIZE; 500 return -EMSGSIZE;
503} 501}
502EXPORT_SYMBOL(rtnetlink_put_metrics);
504 503
505int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, 504int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
506 u32 ts, u32 tsage, long expires, u32 error) 505 u32 ts, u32 tsage, long expires, u32 error)
@@ -520,14 +519,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
520 519
521 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); 520 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
522} 521}
523
524EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); 522EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
525 523
526static void set_operstate(struct net_device *dev, unsigned char transition) 524static void set_operstate(struct net_device *dev, unsigned char transition)
527{ 525{
528 unsigned char operstate = dev->operstate; 526 unsigned char operstate = dev->operstate;
529 527
530 switch(transition) { 528 switch (transition) {
531 case IF_OPER_UP: 529 case IF_OPER_UP:
532 if ((operstate == IF_OPER_DORMANT || 530 if ((operstate == IF_OPER_DORMANT ||
533 operstate == IF_OPER_UNKNOWN) && 531 operstate == IF_OPER_UNKNOWN) &&
@@ -682,22 +680,33 @@ nla_put_failure:
682static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 680static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
683{ 681{
684 struct net *net = sock_net(skb->sk); 682 struct net *net = sock_net(skb->sk);
685 int idx; 683 int h, s_h;
686 int s_idx = cb->args[0]; 684 int idx = 0, s_idx;
687 struct net_device *dev; 685 struct net_device *dev;
688 686 struct hlist_head *head;
689 idx = 0; 687 struct hlist_node *node;
690 for_each_netdev(net, dev) { 688
691 if (idx < s_idx) 689 s_h = cb->args[0];
692 goto cont; 690 s_idx = cb->args[1];
693 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 691
694 NETLINK_CB(cb->skb).pid, 692 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
695 cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) 693 idx = 0;
696 break; 694 head = &net->dev_index_head[h];
695 hlist_for_each_entry(dev, node, head, index_hlist) {
696 if (idx < s_idx)
697 goto cont;
698 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
699 NETLINK_CB(cb->skb).pid,
700 cb->nlh->nlmsg_seq, 0,
701 NLM_F_MULTI) <= 0)
702 goto out;
697cont: 703cont:
698 idx++; 704 idx++;
705 }
699 } 706 }
700 cb->args[0] = idx; 707out:
708 cb->args[1] = idx;
709 cb->args[0] = h;
701 710
702 return skb->len; 711 return skb->len;
703} 712}
@@ -717,12 +726,27 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
717 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 726 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
718 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 727 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
719}; 728};
729EXPORT_SYMBOL(ifla_policy);
720 730
721static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 731static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
722 [IFLA_INFO_KIND] = { .type = NLA_STRING }, 732 [IFLA_INFO_KIND] = { .type = NLA_STRING },
723 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 733 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
724}; 734};
725 735
736struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
737{
738 struct net *net;
739 /* Examine the link attributes and figure out which
740 * network namespace we are talking about.
741 */
742 if (tb[IFLA_NET_NS_PID])
743 net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
744 else
745 net = get_net(src_net);
746 return net;
747}
748EXPORT_SYMBOL(rtnl_link_get_net);
749
726static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) 750static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
727{ 751{
728 if (dev) { 752 if (dev) {
@@ -746,8 +770,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
746 int err; 770 int err;
747 771
748 if (tb[IFLA_NET_NS_PID]) { 772 if (tb[IFLA_NET_NS_PID]) {
749 struct net *net; 773 struct net *net = rtnl_link_get_net(dev_net(dev), tb);
750 net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
751 if (IS_ERR(net)) { 774 if (IS_ERR(net)) {
752 err = PTR_ERR(net); 775 err = PTR_ERR(net);
753 goto errout; 776 goto errout;
@@ -910,9 +933,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
910 err = -EINVAL; 933 err = -EINVAL;
911 ifm = nlmsg_data(nlh); 934 ifm = nlmsg_data(nlh);
912 if (ifm->ifi_index > 0) 935 if (ifm->ifi_index > 0)
913 dev = dev_get_by_index(net, ifm->ifi_index); 936 dev = __dev_get_by_index(net, ifm->ifi_index);
914 else if (tb[IFLA_IFNAME]) 937 else if (tb[IFLA_IFNAME])
915 dev = dev_get_by_name(net, ifname); 938 dev = __dev_get_by_name(net, ifname);
916 else 939 else
917 goto errout; 940 goto errout;
918 941
@@ -921,12 +944,11 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
921 goto errout; 944 goto errout;
922 } 945 }
923 946
924 if ((err = validate_linkmsg(dev, tb)) < 0) 947 err = validate_linkmsg(dev, tb);
925 goto errout_dev; 948 if (err < 0)
949 goto errout;
926 950
927 err = do_setlink(dev, ifm, tb, ifname, 0); 951 err = do_setlink(dev, ifm, tb, ifname, 0);
928errout_dev:
929 dev_put(dev);
930errout: 952errout:
931 return err; 953 return err;
932} 954}
@@ -963,12 +985,12 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
963 if (!ops) 985 if (!ops)
964 return -EOPNOTSUPP; 986 return -EOPNOTSUPP;
965 987
966 ops->dellink(dev); 988 ops->dellink(dev, NULL);
967 return 0; 989 return 0;
968} 990}
969 991
970struct net_device *rtnl_create_link(struct net *net, char *ifname, 992struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
971 const struct rtnl_link_ops *ops, struct nlattr *tb[]) 993 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
972{ 994{
973 int err; 995 int err;
974 struct net_device *dev; 996 struct net_device *dev;
@@ -976,7 +998,8 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
976 unsigned int real_num_queues = 1; 998 unsigned int real_num_queues = 1;
977 999
978 if (ops->get_tx_queues) { 1000 if (ops->get_tx_queues) {
979 err = ops->get_tx_queues(net, tb, &num_queues, &real_num_queues); 1001 err = ops->get_tx_queues(src_net, tb, &num_queues,
1002 &real_num_queues);
980 if (err) 1003 if (err)
981 goto err; 1004 goto err;
982 } 1005 }
@@ -985,16 +1008,16 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
985 if (!dev) 1008 if (!dev)
986 goto err; 1009 goto err;
987 1010
1011 dev_net_set(dev, net);
1012 dev->rtnl_link_ops = ops;
988 dev->real_num_tx_queues = real_num_queues; 1013 dev->real_num_tx_queues = real_num_queues;
1014
989 if (strchr(dev->name, '%')) { 1015 if (strchr(dev->name, '%')) {
990 err = dev_alloc_name(dev, dev->name); 1016 err = dev_alloc_name(dev, dev->name);
991 if (err < 0) 1017 if (err < 0)
992 goto err_free; 1018 goto err_free;
993 } 1019 }
994 1020
995 dev_net_set(dev, net);
996 dev->rtnl_link_ops = ops;
997
998 if (tb[IFLA_MTU]) 1021 if (tb[IFLA_MTU])
999 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1022 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
1000 if (tb[IFLA_ADDRESS]) 1023 if (tb[IFLA_ADDRESS])
@@ -1017,6 +1040,7 @@ err_free:
1017err: 1040err:
1018 return ERR_PTR(err); 1041 return ERR_PTR(err);
1019} 1042}
1043EXPORT_SYMBOL(rtnl_create_link);
1020 1044
1021static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1045static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1022{ 1046{
@@ -1050,7 +1074,8 @@ replay:
1050 else 1074 else
1051 dev = NULL; 1075 dev = NULL;
1052 1076
1053 if ((err = validate_linkmsg(dev, tb)) < 0) 1077 err = validate_linkmsg(dev, tb);
1078 if (err < 0)
1054 return err; 1079 return err;
1055 1080
1056 if (tb[IFLA_LINKINFO]) { 1081 if (tb[IFLA_LINKINFO]) {
@@ -1071,6 +1096,7 @@ replay:
1071 1096
1072 if (1) { 1097 if (1) {
1073 struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; 1098 struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
1099 struct net *dest_net;
1074 1100
1075 if (ops) { 1101 if (ops) {
1076 if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { 1102 if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -1135,17 +1161,19 @@ replay:
1135 if (!ifname[0]) 1161 if (!ifname[0])
1136 snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); 1162 snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
1137 1163
1138 dev = rtnl_create_link(net, ifname, ops, tb); 1164 dest_net = rtnl_link_get_net(net, tb);
1165 dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
1139 1166
1140 if (IS_ERR(dev)) 1167 if (IS_ERR(dev))
1141 err = PTR_ERR(dev); 1168 err = PTR_ERR(dev);
1142 else if (ops->newlink) 1169 else if (ops->newlink)
1143 err = ops->newlink(dev, tb, data); 1170 err = ops->newlink(net, dev, tb, data);
1144 else 1171 else
1145 err = register_netdevice(dev); 1172 err = register_netdevice(dev);
1146
1147 if (err < 0 && !IS_ERR(dev)) 1173 if (err < 0 && !IS_ERR(dev))
1148 free_netdev(dev); 1174 free_netdev(dev);
1175
1176 put_net(dest_net);
1149 return err; 1177 return err;
1150 } 1178 }
1151} 1179}
@@ -1154,6 +1182,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1154{ 1182{
1155 struct net *net = sock_net(skb->sk); 1183 struct net *net = sock_net(skb->sk);
1156 struct ifinfomsg *ifm; 1184 struct ifinfomsg *ifm;
1185 char ifname[IFNAMSIZ];
1157 struct nlattr *tb[IFLA_MAX+1]; 1186 struct nlattr *tb[IFLA_MAX+1];
1158 struct net_device *dev = NULL; 1187 struct net_device *dev = NULL;
1159 struct sk_buff *nskb; 1188 struct sk_buff *nskb;
@@ -1163,19 +1192,23 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1163 if (err < 0) 1192 if (err < 0)
1164 return err; 1193 return err;
1165 1194
1195 if (tb[IFLA_IFNAME])
1196 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
1197
1166 ifm = nlmsg_data(nlh); 1198 ifm = nlmsg_data(nlh);
1167 if (ifm->ifi_index > 0) { 1199 if (ifm->ifi_index > 0)
1168 dev = dev_get_by_index(net, ifm->ifi_index); 1200 dev = __dev_get_by_index(net, ifm->ifi_index);
1169 if (dev == NULL) 1201 else if (tb[IFLA_IFNAME])
1170 return -ENODEV; 1202 dev = __dev_get_by_name(net, ifname);
1171 } else 1203 else
1172 return -EINVAL; 1204 return -EINVAL;
1173 1205
1206 if (dev == NULL)
1207 return -ENODEV;
1208
1174 nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); 1209 nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
1175 if (nskb == NULL) { 1210 if (nskb == NULL)
1176 err = -ENOBUFS; 1211 return -ENOBUFS;
1177 goto errout;
1178 }
1179 1212
1180 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, 1213 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
1181 nlh->nlmsg_seq, 0, 0); 1214 nlh->nlmsg_seq, 0, 0);
@@ -1183,11 +1216,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1183 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 1216 /* -EMSGSIZE implies BUG in if_nlmsg_size */
1184 WARN_ON(err == -EMSGSIZE); 1217 WARN_ON(err == -EMSGSIZE);
1185 kfree_skb(nskb); 1218 kfree_skb(nskb);
1186 goto errout; 1219 } else
1187 } 1220 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
1188 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
1189errout:
1190 dev_put(dev);
1191 1221
1192 return err; 1222 return err;
1193} 1223}
@@ -1199,7 +1229,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1199 1229
1200 if (s_idx == 0) 1230 if (s_idx == 0)
1201 s_idx = 1; 1231 s_idx = 1;
1202 for (idx=1; idx<NPROTO; idx++) { 1232 for (idx = 1; idx < NPROTO; idx++) {
1203 int type = cb->nlh->nlmsg_type-RTM_BASE; 1233 int type = cb->nlh->nlmsg_type-RTM_BASE;
1204 if (idx < s_idx || idx == PF_PACKET) 1234 if (idx < s_idx || idx == PF_PACKET)
1205 continue; 1235 continue;
@@ -1266,7 +1296,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1266 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) 1296 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
1267 return 0; 1297 return 0;
1268 1298
1269 family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; 1299 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
1270 if (family >= NPROTO) 1300 if (family >= NPROTO)
1271 return -EAFNOSUPPORT; 1301 return -EAFNOSUPPORT;
1272 1302
@@ -1299,7 +1329,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1299 1329
1300 if (nlh->nlmsg_len > min_len) { 1330 if (nlh->nlmsg_len > min_len) {
1301 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); 1331 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
1302 struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); 1332 struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
1303 1333
1304 while (RTA_OK(attr, attrlen)) { 1334 while (RTA_OK(attr, attrlen)) {
1305 unsigned flavor = attr->rta_type; 1335 unsigned flavor = attr->rta_type;
@@ -1405,14 +1435,3 @@ void __init rtnetlink_init(void)
1405 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); 1435 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
1406} 1436}
1407 1437
1408EXPORT_SYMBOL(__rta_fill);
1409EXPORT_SYMBOL(rtnetlink_put_metrics);
1410EXPORT_SYMBOL(rtnl_lock);
1411EXPORT_SYMBOL(rtnl_trylock);
1412EXPORT_SYMBOL(rtnl_unlock);
1413EXPORT_SYMBOL(rtnl_is_locked);
1414EXPORT_SYMBOL(rtnl_unicast);
1415EXPORT_SYMBOL(rtnl_notify);
1416EXPORT_SYMBOL(rtnl_set_sk_err);
1417EXPORT_SYMBOL(rtnl_create_link);
1418EXPORT_SYMBOL(ifla_policy);
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
deleted file mode 100644
index 79687dfd6957..000000000000
--- a/net/core/skb_dma_map.c
+++ /dev/null
@@ -1,65 +0,0 @@
1/* skb_dma_map.c: DMA mapping helpers for socket buffers.
2 *
3 * Copyright (C) David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/dma-mapping.h>
9#include <linux/skbuff.h>
10
11int skb_dma_map(struct device *dev, struct sk_buff *skb,
12 enum dma_data_direction dir)
13{
14 struct skb_shared_info *sp = skb_shinfo(skb);
15 dma_addr_t map;
16 int i;
17
18 map = dma_map_single(dev, skb->data,
19 skb_headlen(skb), dir);
20 if (dma_mapping_error(dev, map))
21 goto out_err;
22
23 sp->dma_head = map;
24 for (i = 0; i < sp->nr_frags; i++) {
25 skb_frag_t *fp = &sp->frags[i];
26
27 map = dma_map_page(dev, fp->page, fp->page_offset,
28 fp->size, dir);
29 if (dma_mapping_error(dev, map))
30 goto unwind;
31 sp->dma_maps[i] = map;
32 }
33
34 return 0;
35
36unwind:
37 while (--i >= 0) {
38 skb_frag_t *fp = &sp->frags[i];
39
40 dma_unmap_page(dev, sp->dma_maps[i],
41 fp->size, dir);
42 }
43 dma_unmap_single(dev, sp->dma_head,
44 skb_headlen(skb), dir);
45out_err:
46 return -ENOMEM;
47}
48EXPORT_SYMBOL(skb_dma_map);
49
50void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
51 enum dma_data_direction dir)
52{
53 struct skb_shared_info *sp = skb_shinfo(skb);
54 int i;
55
56 dma_unmap_single(dev, sp->dma_head,
57 skb_headlen(skb), dir);
58 for (i = 0; i < sp->nr_frags; i++) {
59 skb_frag_t *fp = &sp->frags[i];
60
61 dma_unmap_page(dev, sp->dma_maps[i],
62 fp->size, dir);
63 }
64}
65EXPORT_SYMBOL(skb_dma_unmap);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ec85681a7dd8..bfa3e7865a8c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -493,6 +493,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
493{ 493{
494 struct skb_shared_info *shinfo; 494 struct skb_shared_info *shinfo;
495 495
496 if (irqs_disabled())
497 return 0;
498
496 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) 499 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
497 return 0; 500 return 0;
498 501
@@ -546,7 +549,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
546#endif 549#endif
547 new->protocol = old->protocol; 550 new->protocol = old->protocol;
548 new->mark = old->mark; 551 new->mark = old->mark;
549 new->iif = old->iif; 552 new->skb_iif = old->skb_iif;
550 __nf_copy(new, old); 553 __nf_copy(new, old);
551#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 554#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
552 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 555 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
diff --git a/net/core/sock.c b/net/core/sock.c
index 7626b6aacd68..76ff58d43e26 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -274,25 +274,27 @@ static void sock_disable_timestamp(struct sock *sk, int flag)
274 274
275int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 275int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
276{ 276{
277 int err = 0; 277 int err;
278 int skb_len; 278 int skb_len;
279 unsigned long flags;
280 struct sk_buff_head *list = &sk->sk_receive_queue;
279 281
280 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces 282 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
281 number of warnings when compiling with -W --ANK 283 number of warnings when compiling with -W --ANK
282 */ 284 */
283 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 285 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
284 (unsigned)sk->sk_rcvbuf) { 286 (unsigned)sk->sk_rcvbuf) {
285 err = -ENOMEM; 287 atomic_inc(&sk->sk_drops);
286 goto out; 288 return -ENOMEM;
287 } 289 }
288 290
289 err = sk_filter(sk, skb); 291 err = sk_filter(sk, skb);
290 if (err) 292 if (err)
291 goto out; 293 return err;
292 294
293 if (!sk_rmem_schedule(sk, skb->truesize)) { 295 if (!sk_rmem_schedule(sk, skb->truesize)) {
294 err = -ENOBUFS; 296 atomic_inc(&sk->sk_drops);
295 goto out; 297 return -ENOBUFS;
296 } 298 }
297 299
298 skb->dev = NULL; 300 skb->dev = NULL;
@@ -305,12 +307,14 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
305 */ 307 */
306 skb_len = skb->len; 308 skb_len = skb->len;
307 309
308 skb_queue_tail(&sk->sk_receive_queue, skb); 310 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb);
313 spin_unlock_irqrestore(&list->lock, flags);
309 314
310 if (!sock_flag(sk, SOCK_DEAD)) 315 if (!sock_flag(sk, SOCK_DEAD))
311 sk->sk_data_ready(sk, skb_len); 316 sk->sk_data_ready(sk, skb_len);
312out: 317 return 0;
313 return err;
314} 318}
315EXPORT_SYMBOL(sock_queue_rcv_skb); 319EXPORT_SYMBOL(sock_queue_rcv_skb);
316 320
@@ -348,11 +352,18 @@ discard_and_relse:
348} 352}
349EXPORT_SYMBOL(sk_receive_skb); 353EXPORT_SYMBOL(sk_receive_skb);
350 354
355void sk_reset_txq(struct sock *sk)
356{
357 sk_tx_queue_clear(sk);
358}
359EXPORT_SYMBOL(sk_reset_txq);
360
351struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 361struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
352{ 362{
353 struct dst_entry *dst = sk->sk_dst_cache; 363 struct dst_entry *dst = sk->sk_dst_cache;
354 364
355 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 365 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
366 sk_tx_queue_clear(sk);
356 sk->sk_dst_cache = NULL; 367 sk->sk_dst_cache = NULL;
357 dst_release(dst); 368 dst_release(dst);
358 return NULL; 369 return NULL;
@@ -406,17 +417,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
406 if (copy_from_user(devname, optval, optlen)) 417 if (copy_from_user(devname, optval, optlen))
407 goto out; 418 goto out;
408 419
409 if (devname[0] == '\0') { 420 index = 0;
410 index = 0; 421 if (devname[0] != '\0') {
411 } else { 422 struct net_device *dev;
412 struct net_device *dev = dev_get_by_name(net, devname);
413 423
424 rcu_read_lock();
425 dev = dev_get_by_name_rcu(net, devname);
426 if (dev)
427 index = dev->ifindex;
428 rcu_read_unlock();
414 ret = -ENODEV; 429 ret = -ENODEV;
415 if (!dev) 430 if (!dev)
416 goto out; 431 goto out;
417
418 index = dev->ifindex;
419 dev_put(dev);
420 } 432 }
421 433
422 lock_sock(sk); 434 lock_sock(sk);
@@ -702,6 +714,12 @@ set_rcvbuf:
702 714
703 /* We implement the SO_SNDLOWAT etc to 715 /* We implement the SO_SNDLOWAT etc to
704 not be settable (1003.1g 5.3) */ 716 not be settable (1003.1g 5.3) */
717 case SO_RXQ_OVFL:
718 if (valbool)
719 sock_set_flag(sk, SOCK_RXQ_OVFL);
720 else
721 sock_reset_flag(sk, SOCK_RXQ_OVFL);
722 break;
705 default: 723 default:
706 ret = -ENOPROTOOPT; 724 ret = -ENOPROTOOPT;
707 break; 725 break;
@@ -901,6 +919,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
901 v.val = sk->sk_mark; 919 v.val = sk->sk_mark;
902 break; 920 break;
903 921
922 case SO_RXQ_OVFL:
923 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
924 break;
925
904 default: 926 default:
905 return -ENOPROTOOPT; 927 return -ENOPROTOOPT;
906 } 928 }
@@ -939,7 +961,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
939 void *sptr = nsk->sk_security; 961 void *sptr = nsk->sk_security;
940#endif 962#endif
941 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != 963 BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
942 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); 964 sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
965 sizeof(osk->sk_tx_queue_mapping));
943 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, 966 memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
944 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); 967 osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
945#ifdef CONFIG_SECURITY_NETWORK 968#ifdef CONFIG_SECURITY_NETWORK
@@ -983,6 +1006,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
983 1006
984 if (!try_module_get(prot->owner)) 1007 if (!try_module_get(prot->owner))
985 goto out_free_sec; 1008 goto out_free_sec;
1009 sk_tx_queue_clear(sk);
986 } 1010 }
987 1011
988 return sk; 1012 return sk;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 267314664813..06124872af5b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -123,7 +123,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
123 net->core.sysctl_somaxconn = SOMAXCONN; 123 net->core.sysctl_somaxconn = SOMAXCONN;
124 124
125 tbl = netns_core_table; 125 tbl = netns_core_table;
126 if (net != &init_net) { 126 if (!net_eq(net, &init_net)) {
127 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 127 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
128 if (tbl == NULL) 128 if (tbl == NULL)
129 goto err_dup; 129 goto err_dup;