aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c678
1 files changed, 455 insertions, 223 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index b8f74cfb1bfd..be9924f60ec3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,7 @@
79#include <linux/cpu.h> 79#include <linux/cpu.h>
80#include <linux/types.h> 80#include <linux/types.h>
81#include <linux/kernel.h> 81#include <linux/kernel.h>
82#include <linux/hash.h>
82#include <linux/sched.h> 83#include <linux/sched.h>
83#include <linux/mutex.h> 84#include <linux/mutex.h>
84#include <linux/string.h> 85#include <linux/string.h>
@@ -104,6 +105,7 @@
104#include <net/dst.h> 105#include <net/dst.h>
105#include <net/pkt_sched.h> 106#include <net/pkt_sched.h>
106#include <net/checksum.h> 107#include <net/checksum.h>
108#include <net/xfrm.h>
107#include <linux/highmem.h> 109#include <linux/highmem.h>
108#include <linux/init.h> 110#include <linux/init.h>
109#include <linux/kmod.h> 111#include <linux/kmod.h>
@@ -175,7 +177,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */
175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176 * semaphore. 178 * semaphore.
177 * 179 *
178 * Pure readers hold dev_base_lock for reading. 180 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
179 * 181 *
180 * Writers must hold the rtnl semaphore while they loop through the 182 * Writers must hold the rtnl semaphore while they loop through the
181 * dev_base_head list, and hold dev_base_lock for writing when they do the 183 * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -193,18 +195,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */
193DEFINE_RWLOCK(dev_base_lock); 195DEFINE_RWLOCK(dev_base_lock);
194EXPORT_SYMBOL(dev_base_lock); 196EXPORT_SYMBOL(dev_base_lock);
195 197
196#define NETDEV_HASHBITS 8
197#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 198static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{ 199{
201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 200 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 201 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
203} 202}
204 203
205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 204static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206{ 205{
207 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 207}
209 208
210/* Device list insertion */ 209/* Device list insertion */
@@ -215,23 +214,26 @@ static int list_netdevice(struct net_device *dev)
215 ASSERT_RTNL(); 214 ASSERT_RTNL();
216 215
217 write_lock_bh(&dev_base_lock); 216 write_lock_bh(&dev_base_lock);
218 list_add_tail(&dev->dev_list, &net->dev_base_head); 217 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 218 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 219 hlist_add_head_rcu(&dev->index_hlist,
220 dev_index_hash(net, dev->ifindex));
221 write_unlock_bh(&dev_base_lock); 221 write_unlock_bh(&dev_base_lock);
222 return 0; 222 return 0;
223} 223}
224 224
225/* Device list removal */ 225/* Device list removal
226 * caller must respect a RCU grace period before freeing/reusing dev
227 */
226static void unlist_netdevice(struct net_device *dev) 228static void unlist_netdevice(struct net_device *dev)
227{ 229{
228 ASSERT_RTNL(); 230 ASSERT_RTNL();
229 231
230 /* Unlink dev from the device chain */ 232 /* Unlink dev from the device chain */
231 write_lock_bh(&dev_base_lock); 233 write_lock_bh(&dev_base_lock);
232 list_del(&dev->dev_list); 234 list_del_rcu(&dev->dev_list);
233 hlist_del(&dev->name_hlist); 235 hlist_del_rcu(&dev->name_hlist);
234 hlist_del(&dev->index_hlist); 236 hlist_del_rcu(&dev->index_hlist);
235 write_unlock_bh(&dev_base_lock); 237 write_unlock_bh(&dev_base_lock);
236} 238}
237 239
@@ -587,18 +589,44 @@ __setup("netdev=", netdev_boot_setup);
587struct net_device *__dev_get_by_name(struct net *net, const char *name) 589struct net_device *__dev_get_by_name(struct net *net, const char *name)
588{ 590{
589 struct hlist_node *p; 591 struct hlist_node *p;
592 struct net_device *dev;
593 struct hlist_head *head = dev_name_hash(net, name);
590 594
591 hlist_for_each(p, dev_name_hash(net, name)) { 595 hlist_for_each_entry(dev, p, head, name_hlist)
592 struct net_device *dev
593 = hlist_entry(p, struct net_device, name_hlist);
594 if (!strncmp(dev->name, name, IFNAMSIZ)) 596 if (!strncmp(dev->name, name, IFNAMSIZ))
595 return dev; 597 return dev;
596 } 598
597 return NULL; 599 return NULL;
598} 600}
599EXPORT_SYMBOL(__dev_get_by_name); 601EXPORT_SYMBOL(__dev_get_by_name);
600 602
601/** 603/**
604 * dev_get_by_name_rcu - find a device by its name
605 * @net: the applicable net namespace
606 * @name: name to find
607 *
608 * Find an interface by name.
609 * If the name is found a pointer to the device is returned.
610 * If the name is not found then %NULL is returned.
611 * The reference counters are not incremented so the caller must be
612 * careful with locks. The caller must hold RCU lock.
613 */
614
615struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
616{
617 struct hlist_node *p;
618 struct net_device *dev;
619 struct hlist_head *head = dev_name_hash(net, name);
620
621 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
622 if (!strncmp(dev->name, name, IFNAMSIZ))
623 return dev;
624
625 return NULL;
626}
627EXPORT_SYMBOL(dev_get_by_name_rcu);
628
629/**
602 * dev_get_by_name - find a device by its name 630 * dev_get_by_name - find a device by its name
603 * @net: the applicable net namespace 631 * @net: the applicable net namespace
604 * @name: name to find 632 * @name: name to find
@@ -614,11 +642,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
614{ 642{
615 struct net_device *dev; 643 struct net_device *dev;
616 644
617 read_lock(&dev_base_lock); 645 rcu_read_lock();
618 dev = __dev_get_by_name(net, name); 646 dev = dev_get_by_name_rcu(net, name);
619 if (dev) 647 if (dev)
620 dev_hold(dev); 648 dev_hold(dev);
621 read_unlock(&dev_base_lock); 649 rcu_read_unlock();
622 return dev; 650 return dev;
623} 651}
624EXPORT_SYMBOL(dev_get_by_name); 652EXPORT_SYMBOL(dev_get_by_name);
@@ -638,17 +666,42 @@ EXPORT_SYMBOL(dev_get_by_name);
638struct net_device *__dev_get_by_index(struct net *net, int ifindex) 666struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639{ 667{
640 struct hlist_node *p; 668 struct hlist_node *p;
669 struct net_device *dev;
670 struct hlist_head *head = dev_index_hash(net, ifindex);
641 671
642 hlist_for_each(p, dev_index_hash(net, ifindex)) { 672 hlist_for_each_entry(dev, p, head, index_hlist)
643 struct net_device *dev
644 = hlist_entry(p, struct net_device, index_hlist);
645 if (dev->ifindex == ifindex) 673 if (dev->ifindex == ifindex)
646 return dev; 674 return dev;
647 } 675
648 return NULL; 676 return NULL;
649} 677}
650EXPORT_SYMBOL(__dev_get_by_index); 678EXPORT_SYMBOL(__dev_get_by_index);
651 679
680/**
681 * dev_get_by_index_rcu - find a device by its ifindex
682 * @net: the applicable net namespace
683 * @ifindex: index of device
684 *
685 * Search for an interface by index. Returns %NULL if the device
686 * is not found or a pointer to the device. The device has not
687 * had its reference counter increased so the caller must be careful
688 * about locking. The caller must hold RCU lock.
689 */
690
691struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
692{
693 struct hlist_node *p;
694 struct net_device *dev;
695 struct hlist_head *head = dev_index_hash(net, ifindex);
696
697 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
698 if (dev->ifindex == ifindex)
699 return dev;
700
701 return NULL;
702}
703EXPORT_SYMBOL(dev_get_by_index_rcu);
704
652 705
653/** 706/**
654 * dev_get_by_index - find a device by its ifindex 707 * dev_get_by_index - find a device by its ifindex
@@ -665,11 +718,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
665{ 718{
666 struct net_device *dev; 719 struct net_device *dev;
667 720
668 read_lock(&dev_base_lock); 721 rcu_read_lock();
669 dev = __dev_get_by_index(net, ifindex); 722 dev = dev_get_by_index_rcu(net, ifindex);
670 if (dev) 723 if (dev)
671 dev_hold(dev); 724 dev_hold(dev);
672 read_unlock(&dev_base_lock); 725 rcu_read_unlock();
673 return dev; 726 return dev;
674} 727}
675EXPORT_SYMBOL(dev_get_by_index); 728EXPORT_SYMBOL(dev_get_by_index);
@@ -748,15 +801,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
748 struct net_device *dev, *ret; 801 struct net_device *dev, *ret;
749 802
750 ret = NULL; 803 ret = NULL;
751 read_lock(&dev_base_lock); 804 rcu_read_lock();
752 for_each_netdev(net, dev) { 805 for_each_netdev_rcu(net, dev) {
753 if (((dev->flags ^ if_flags) & mask) == 0) { 806 if (((dev->flags ^ if_flags) & mask) == 0) {
754 dev_hold(dev); 807 dev_hold(dev);
755 ret = dev; 808 ret = dev;
756 break; 809 break;
757 } 810 }
758 } 811 }
759 read_unlock(&dev_base_lock); 812 rcu_read_unlock();
760 return ret; 813 return ret;
761} 814}
762EXPORT_SYMBOL(dev_get_by_flags); 815EXPORT_SYMBOL(dev_get_by_flags);
@@ -841,7 +894,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
841 free_page((unsigned long) inuse); 894 free_page((unsigned long) inuse);
842 } 895 }
843 896
844 snprintf(buf, IFNAMSIZ, name, i); 897 if (buf != name)
898 snprintf(buf, IFNAMSIZ, name, i);
845 if (!__dev_get_by_name(net, buf)) 899 if (!__dev_get_by_name(net, buf))
846 return i; 900 return i;
847 901
@@ -881,6 +935,21 @@ int dev_alloc_name(struct net_device *dev, const char *name)
881} 935}
882EXPORT_SYMBOL(dev_alloc_name); 936EXPORT_SYMBOL(dev_alloc_name);
883 937
938static int dev_get_valid_name(struct net *net, const char *name, char *buf,
939 bool fmt)
940{
941 if (!dev_valid_name(name))
942 return -EINVAL;
943
944 if (fmt && strchr(name, '%'))
945 return __dev_alloc_name(net, name, buf);
946 else if (__dev_get_by_name(net, name))
947 return -EEXIST;
948 else if (buf != name)
949 strlcpy(buf, name, IFNAMSIZ);
950
951 return 0;
952}
884 953
885/** 954/**
886 * dev_change_name - change name of a device 955 * dev_change_name - change name of a device
@@ -904,28 +973,20 @@ int dev_change_name(struct net_device *dev, const char *newname)
904 if (dev->flags & IFF_UP) 973 if (dev->flags & IFF_UP)
905 return -EBUSY; 974 return -EBUSY;
906 975
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 976 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0; 977 return 0;
912 978
913 memcpy(oldname, dev->name, IFNAMSIZ); 979 memcpy(oldname, dev->name, IFNAMSIZ);
914 980
915 if (strchr(newname, '%')) { 981 err = dev_get_valid_name(net, newname, dev->name, 1);
916 err = dev_alloc_name(dev, newname); 982 if (err < 0)
917 if (err < 0) 983 return err;
918 return err;
919 } else if (__dev_get_by_name(net, newname))
920 return -EEXIST;
921 else
922 strlcpy(dev->name, newname, IFNAMSIZ);
923 984
924rollback: 985rollback:
925 /* For now only devices in the initial network namespace 986 /* For now only devices in the initial network namespace
926 * are in sysfs. 987 * are in sysfs.
927 */ 988 */
928 if (net == &init_net) { 989 if (net_eq(net, &init_net)) {
929 ret = device_rename(&dev->dev, dev->name); 990 ret = device_rename(&dev->dev, dev->name);
930 if (ret) { 991 if (ret) {
931 memcpy(dev->name, oldname, IFNAMSIZ); 992 memcpy(dev->name, oldname, IFNAMSIZ);
@@ -935,21 +996,27 @@ rollback:
935 996
936 write_lock_bh(&dev_base_lock); 997 write_lock_bh(&dev_base_lock);
937 hlist_del(&dev->name_hlist); 998 hlist_del(&dev->name_hlist);
938 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 999 write_unlock_bh(&dev_base_lock);
1000
1001 synchronize_rcu();
1002
1003 write_lock_bh(&dev_base_lock);
1004 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
939 write_unlock_bh(&dev_base_lock); 1005 write_unlock_bh(&dev_base_lock);
940 1006
941 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1007 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
942 ret = notifier_to_errno(ret); 1008 ret = notifier_to_errno(ret);
943 1009
944 if (ret) { 1010 if (ret) {
945 if (err) { 1011 /* err >= 0 after dev_alloc_name() or stores the first errno */
946 printk(KERN_ERR 1012 if (err >= 0) {
947 "%s: name change rollback failed: %d.\n",
948 dev->name, ret);
949 } else {
950 err = ret; 1013 err = ret;
951 memcpy(dev->name, oldname, IFNAMSIZ); 1014 memcpy(dev->name, oldname, IFNAMSIZ);
952 goto rollback; 1015 goto rollback;
1016 } else {
1017 printk(KERN_ERR
1018 "%s: name change rollback failed: %d.\n",
1019 dev->name, ret);
953 } 1020 }
954 } 1021 }
955 1022
@@ -1037,9 +1104,9 @@ void dev_load(struct net *net, const char *name)
1037{ 1104{
1038 struct net_device *dev; 1105 struct net_device *dev;
1039 1106
1040 read_lock(&dev_base_lock); 1107 rcu_read_lock();
1041 dev = __dev_get_by_name(net, name); 1108 dev = dev_get_by_name_rcu(net, name);
1042 read_unlock(&dev_base_lock); 1109 rcu_read_unlock();
1043 1110
1044 if (!dev && capable(CAP_NET_ADMIN)) 1111 if (!dev && capable(CAP_NET_ADMIN))
1045 request_module("%s", name); 1112 request_module("%s", name);
@@ -1286,6 +1353,7 @@ rollback:
1286 nb->notifier_call(nb, NETDEV_DOWN, dev); 1353 nb->notifier_call(nb, NETDEV_DOWN, dev);
1287 } 1354 }
1288 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1355 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1356 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1289 } 1357 }
1290 } 1358 }
1291 1359
@@ -1352,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb)
1352 skb->tstamp.tv64 = 0; 1420 skb->tstamp.tv64 = 0;
1353} 1421}
1354 1422
1423/**
1424 * dev_forward_skb - loopback an skb to another netif
1425 *
1426 * @dev: destination network device
1427 * @skb: buffer to forward
1428 *
1429 * return values:
1430 * NET_RX_SUCCESS (no congestion)
1431 * NET_RX_DROP (packet was dropped)
1432 *
1433 * dev_forward_skb can be used for injecting an skb from the
1434 * start_xmit function of one device into the receive queue
1435 * of another device.
1436 *
1437 * The receiving device may be in another namespace, so
1438 * we have to clear all information in the skb that could
1439 * impact namespace isolation.
1440 */
1441int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1442{
1443 skb_orphan(skb);
1444
1445 if (!(dev->flags & IFF_UP))
1446 return NET_RX_DROP;
1447
1448 if (skb->len > (dev->mtu + dev->hard_header_len))
1449 return NET_RX_DROP;
1450
1451 skb_dst_drop(skb);
1452 skb->tstamp.tv64 = 0;
1453 skb->pkt_type = PACKET_HOST;
1454 skb->protocol = eth_type_trans(skb, dev);
1455 skb->mark = 0;
1456 secpath_reset(skb);
1457 nf_reset(skb);
1458 return netif_rx(skb);
1459}
1460EXPORT_SYMBOL_GPL(dev_forward_skb);
1461
1355/* 1462/*
1356 * Support routine. Sends outgoing frames to any network 1463 * Support routine. Sends outgoing frames to any network
1357 * taps currently in use. 1464 * taps currently in use.
@@ -1700,7 +1807,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1700 struct netdev_queue *txq) 1807 struct netdev_queue *txq)
1701{ 1808{
1702 const struct net_device_ops *ops = dev->netdev_ops; 1809 const struct net_device_ops *ops = dev->netdev_ops;
1703 int rc; 1810 int rc = NETDEV_TX_OK;
1704 1811
1705 if (likely(!skb->next)) { 1812 if (likely(!skb->next)) {
1706 if (!list_empty(&ptype_all)) 1813 if (!list_empty(&ptype_all))
@@ -1748,6 +1855,8 @@ gso:
1748 nskb->next = NULL; 1855 nskb->next = NULL;
1749 rc = ops->ndo_start_xmit(nskb, dev); 1856 rc = ops->ndo_start_xmit(nskb, dev);
1750 if (unlikely(rc != NETDEV_TX_OK)) { 1857 if (unlikely(rc != NETDEV_TX_OK)) {
1858 if (rc & ~NETDEV_TX_MASK)
1859 goto out_kfree_gso_skb;
1751 nskb->next = skb->next; 1860 nskb->next = skb->next;
1752 skb->next = nskb; 1861 skb->next = nskb;
1753 return rc; 1862 return rc;
@@ -1757,11 +1866,12 @@ gso:
1757 return NETDEV_TX_BUSY; 1866 return NETDEV_TX_BUSY;
1758 } while (skb->next); 1867 } while (skb->next);
1759 1868
1760 skb->destructor = DEV_GSO_CB(skb)->destructor; 1869out_kfree_gso_skb:
1761 1870 if (likely(skb->next == NULL))
1871 skb->destructor = DEV_GSO_CB(skb)->destructor;
1762out_kfree_skb: 1872out_kfree_skb:
1763 kfree_skb(skb); 1873 kfree_skb(skb);
1764 return NETDEV_TX_OK; 1874 return rc;
1765} 1875}
1766 1876
1767static u32 skb_tx_hashrnd; 1877static u32 skb_tx_hashrnd;
@@ -1788,16 +1898,43 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1788} 1898}
1789EXPORT_SYMBOL(skb_tx_hash); 1899EXPORT_SYMBOL(skb_tx_hash);
1790 1900
1901static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1902{
1903 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1904 if (net_ratelimit()) {
1905 WARN(1, "%s selects TX queue %d, but "
1906 "real number of TX queues is %d\n",
1907 dev->name, queue_index,
1908 dev->real_num_tx_queues);
1909 }
1910 return 0;
1911 }
1912 return queue_index;
1913}
1914
1791static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1915static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1792 struct sk_buff *skb) 1916 struct sk_buff *skb)
1793{ 1917{
1794 const struct net_device_ops *ops = dev->netdev_ops; 1918 u16 queue_index;
1795 u16 queue_index = 0; 1919 struct sock *sk = skb->sk;
1920
1921 if (sk_tx_queue_recorded(sk)) {
1922 queue_index = sk_tx_queue_get(sk);
1923 } else {
1924 const struct net_device_ops *ops = dev->netdev_ops;
1925
1926 if (ops->ndo_select_queue) {
1927 queue_index = ops->ndo_select_queue(dev, skb);
1928 queue_index = dev_cap_txqueue(dev, queue_index);
1929 } else {
1930 queue_index = 0;
1931 if (dev->real_num_tx_queues > 1)
1932 queue_index = skb_tx_hash(dev, skb);
1796 1933
1797 if (ops->ndo_select_queue) 1934 if (sk && sk->sk_dst_cache)
1798 queue_index = ops->ndo_select_queue(dev, skb); 1935 sk_tx_queue_set(sk, queue_index);
1799 else if (dev->real_num_tx_queues > 1) 1936 }
1800 queue_index = skb_tx_hash(dev, skb); 1937 }
1801 1938
1802 skb_set_queue_mapping(skb, queue_index); 1939 skb_set_queue_mapping(skb, queue_index);
1803 return netdev_get_tx_queue(dev, queue_index); 1940 return netdev_get_tx_queue(dev, queue_index);
@@ -1934,8 +2071,8 @@ gso:
1934 HARD_TX_LOCK(dev, txq, cpu); 2071 HARD_TX_LOCK(dev, txq, cpu);
1935 2072
1936 if (!netif_tx_queue_stopped(txq)) { 2073 if (!netif_tx_queue_stopped(txq)) {
1937 rc = NET_XMIT_SUCCESS; 2074 rc = dev_hard_start_xmit(skb, dev, txq);
1938 if (!dev_hard_start_xmit(skb, dev, txq)) { 2075 if (dev_xmit_complete(rc)) {
1939 HARD_TX_UNLOCK(dev, txq); 2076 HARD_TX_UNLOCK(dev, txq);
1940 goto out; 2077 goto out;
1941 } 2078 }
@@ -2190,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb)
2190 if (MAX_RED_LOOP < ttl++) { 2327 if (MAX_RED_LOOP < ttl++) {
2191 printk(KERN_WARNING 2328 printk(KERN_WARNING
2192 "Redir loop detected Dropping packet (%d->%d)\n", 2329 "Redir loop detected Dropping packet (%d->%d)\n",
2193 skb->iif, dev->ifindex); 2330 skb->skb_iif, dev->ifindex);
2194 return TC_ACT_SHOT; 2331 return TC_ACT_SHOT;
2195 } 2332 }
2196 2333
@@ -2291,15 +2428,15 @@ int netif_receive_skb(struct sk_buff *skb)
2291 if (!skb->tstamp.tv64) 2428 if (!skb->tstamp.tv64)
2292 net_timestamp(skb); 2429 net_timestamp(skb);
2293 2430
2294 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2431 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2295 return NET_RX_SUCCESS; 2432 return NET_RX_SUCCESS;
2296 2433
2297 /* if we've gotten here through NAPI, check netpoll */ 2434 /* if we've gotten here through NAPI, check netpoll */
2298 if (netpoll_receive_skb(skb)) 2435 if (netpoll_receive_skb(skb))
2299 return NET_RX_DROP; 2436 return NET_RX_DROP;
2300 2437
2301 if (!skb->iif) 2438 if (!skb->skb_iif)
2302 skb->iif = skb->dev->ifindex; 2439 skb->skb_iif = skb->dev->ifindex;
2303 2440
2304 null_or_orig = NULL; 2441 null_or_orig = NULL;
2305 orig_dev = skb->dev; 2442 orig_dev = skb->dev;
@@ -2439,7 +2576,7 @@ void napi_gro_flush(struct napi_struct *napi)
2439} 2576}
2440EXPORT_SYMBOL(napi_gro_flush); 2577EXPORT_SYMBOL(napi_gro_flush);
2441 2578
2442int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2579enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2443{ 2580{
2444 struct sk_buff **pp = NULL; 2581 struct sk_buff **pp = NULL;
2445 struct packet_type *ptype; 2582 struct packet_type *ptype;
@@ -2447,7 +2584,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2447 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2584 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2448 int same_flow; 2585 int same_flow;
2449 int mac_len; 2586 int mac_len;
2450 int ret; 2587 enum gro_result ret;
2451 2588
2452 if (!(skb->dev->features & NETIF_F_GRO)) 2589 if (!(skb->dev->features & NETIF_F_GRO))
2453 goto normal; 2590 goto normal;
@@ -2531,7 +2668,8 @@ normal:
2531} 2668}
2532EXPORT_SYMBOL(dev_gro_receive); 2669EXPORT_SYMBOL(dev_gro_receive);
2533 2670
2534static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2671static gro_result_t
2672__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2535{ 2673{
2536 struct sk_buff *p; 2674 struct sk_buff *p;
2537 2675
@@ -2539,33 +2677,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2539 return GRO_NORMAL; 2677 return GRO_NORMAL;
2540 2678
2541 for (p = napi->gro_list; p; p = p->next) { 2679 for (p = napi->gro_list; p; p = p->next) {
2542 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2680 NAPI_GRO_CB(p)->same_flow =
2543 && !compare_ether_header(skb_mac_header(p), 2681 (p->dev == skb->dev) &&
2544 skb_gro_mac_header(skb)); 2682 !compare_ether_header(skb_mac_header(p),
2683 skb_gro_mac_header(skb));
2545 NAPI_GRO_CB(p)->flush = 0; 2684 NAPI_GRO_CB(p)->flush = 0;
2546 } 2685 }
2547 2686
2548 return dev_gro_receive(napi, skb); 2687 return dev_gro_receive(napi, skb);
2549} 2688}
2550 2689
2551int napi_skb_finish(int ret, struct sk_buff *skb) 2690gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2552{ 2691{
2553 int err = NET_RX_SUCCESS;
2554
2555 switch (ret) { 2692 switch (ret) {
2556 case GRO_NORMAL: 2693 case GRO_NORMAL:
2557 return netif_receive_skb(skb); 2694 if (netif_receive_skb(skb))
2695 ret = GRO_DROP;
2696 break;
2558 2697
2559 case GRO_DROP: 2698 case GRO_DROP:
2560 err = NET_RX_DROP;
2561 /* fall through */
2562
2563 case GRO_MERGED_FREE: 2699 case GRO_MERGED_FREE:
2564 kfree_skb(skb); 2700 kfree_skb(skb);
2565 break; 2701 break;
2702
2703 case GRO_HELD:
2704 case GRO_MERGED:
2705 break;
2566 } 2706 }
2567 2707
2568 return err; 2708 return ret;
2569} 2709}
2570EXPORT_SYMBOL(napi_skb_finish); 2710EXPORT_SYMBOL(napi_skb_finish);
2571 2711
@@ -2585,7 +2725,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
2585} 2725}
2586EXPORT_SYMBOL(skb_gro_reset_offset); 2726EXPORT_SYMBOL(skb_gro_reset_offset);
2587 2727
2588int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2728gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2589{ 2729{
2590 skb_gro_reset_offset(skb); 2730 skb_gro_reset_offset(skb);
2591 2731
@@ -2604,49 +2744,41 @@ EXPORT_SYMBOL(napi_reuse_skb);
2604 2744
2605struct sk_buff *napi_get_frags(struct napi_struct *napi) 2745struct sk_buff *napi_get_frags(struct napi_struct *napi)
2606{ 2746{
2607 struct net_device *dev = napi->dev;
2608 struct sk_buff *skb = napi->skb; 2747 struct sk_buff *skb = napi->skb;
2609 2748
2610 if (!skb) { 2749 if (!skb) {
2611 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2750 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2612 if (!skb) 2751 if (skb)
2613 goto out; 2752 napi->skb = skb;
2614
2615 skb_reserve(skb, NET_IP_ALIGN);
2616
2617 napi->skb = skb;
2618 } 2753 }
2619
2620out:
2621 return skb; 2754 return skb;
2622} 2755}
2623EXPORT_SYMBOL(napi_get_frags); 2756EXPORT_SYMBOL(napi_get_frags);
2624 2757
2625int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2758gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2759 gro_result_t ret)
2626{ 2760{
2627 int err = NET_RX_SUCCESS;
2628
2629 switch (ret) { 2761 switch (ret) {
2630 case GRO_NORMAL: 2762 case GRO_NORMAL:
2631 case GRO_HELD: 2763 case GRO_HELD:
2632 skb->protocol = eth_type_trans(skb, napi->dev); 2764 skb->protocol = eth_type_trans(skb, napi->dev);
2633 2765
2634 if (ret == GRO_NORMAL) 2766 if (ret == GRO_HELD)
2635 return netif_receive_skb(skb); 2767 skb_gro_pull(skb, -ETH_HLEN);
2636 2768 else if (netif_receive_skb(skb))
2637 skb_gro_pull(skb, -ETH_HLEN); 2769 ret = GRO_DROP;
2638 break; 2770 break;
2639 2771
2640 case GRO_DROP: 2772 case GRO_DROP:
2641 err = NET_RX_DROP;
2642 /* fall through */
2643
2644 case GRO_MERGED_FREE: 2773 case GRO_MERGED_FREE:
2645 napi_reuse_skb(napi, skb); 2774 napi_reuse_skb(napi, skb);
2646 break; 2775 break;
2776
2777 case GRO_MERGED:
2778 break;
2647 } 2779 }
2648 2780
2649 return err; 2781 return ret;
2650} 2782}
2651EXPORT_SYMBOL(napi_frags_finish); 2783EXPORT_SYMBOL(napi_frags_finish);
2652 2784
@@ -2687,12 +2819,12 @@ out:
2687} 2819}
2688EXPORT_SYMBOL(napi_frags_skb); 2820EXPORT_SYMBOL(napi_frags_skb);
2689 2821
2690int napi_gro_frags(struct napi_struct *napi) 2822gro_result_t napi_gro_frags(struct napi_struct *napi)
2691{ 2823{
2692 struct sk_buff *skb = napi_frags_skb(napi); 2824 struct sk_buff *skb = napi_frags_skb(napi);
2693 2825
2694 if (!skb) 2826 if (!skb)
2695 return NET_RX_DROP; 2827 return GRO_DROP;
2696 2828
2697 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 2829 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2698} 2830}
@@ -2937,15 +3069,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
2937 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3069 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2938 return -EFAULT; 3070 return -EFAULT;
2939 3071
2940 read_lock(&dev_base_lock); 3072 rcu_read_lock();
2941 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 3073 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
2942 if (!dev) { 3074 if (!dev) {
2943 read_unlock(&dev_base_lock); 3075 rcu_read_unlock();
2944 return -ENODEV; 3076 return -ENODEV;
2945 } 3077 }
2946 3078
2947 strcpy(ifr.ifr_name, dev->name); 3079 strcpy(ifr.ifr_name, dev->name);
2948 read_unlock(&dev_base_lock); 3080 rcu_read_unlock();
2949 3081
2950 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 3082 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2951 return -EFAULT; 3083 return -EFAULT;
@@ -3015,18 +3147,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
3015 * in detail. 3147 * in detail.
3016 */ 3148 */
3017void *dev_seq_start(struct seq_file *seq, loff_t *pos) 3149void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3018 __acquires(dev_base_lock) 3150 __acquires(RCU)
3019{ 3151{
3020 struct net *net = seq_file_net(seq); 3152 struct net *net = seq_file_net(seq);
3021 loff_t off; 3153 loff_t off;
3022 struct net_device *dev; 3154 struct net_device *dev;
3023 3155
3024 read_lock(&dev_base_lock); 3156 rcu_read_lock();
3025 if (!*pos) 3157 if (!*pos)
3026 return SEQ_START_TOKEN; 3158 return SEQ_START_TOKEN;
3027 3159
3028 off = 1; 3160 off = 1;
3029 for_each_netdev(net, dev) 3161 for_each_netdev_rcu(net, dev)
3030 if (off++ == *pos) 3162 if (off++ == *pos)
3031 return dev; 3163 return dev;
3032 3164
@@ -3035,16 +3167,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3035 3167
3036void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3168void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3037{ 3169{
3038 struct net *net = seq_file_net(seq); 3170 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3171 first_net_device(seq_file_net(seq)) :
3172 next_net_device((struct net_device *)v);
3173
3039 ++*pos; 3174 ++*pos;
3040 return v == SEQ_START_TOKEN ? 3175 return rcu_dereference(dev);
3041 first_net_device(net) : next_net_device((struct net_device *)v);
3042} 3176}
3043 3177
3044void dev_seq_stop(struct seq_file *seq, void *v) 3178void dev_seq_stop(struct seq_file *seq, void *v)
3045 __releases(dev_base_lock) 3179 __releases(RCU)
3046{ 3180{
3047 read_unlock(&dev_base_lock); 3181 rcu_read_unlock();
3048} 3182}
3049 3183
3050static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3184static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
@@ -4253,12 +4387,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4253EXPORT_SYMBOL(dev_set_mac_address); 4387EXPORT_SYMBOL(dev_set_mac_address);
4254 4388
4255/* 4389/*
4256 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 4390 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4257 */ 4391 */
4258static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 4392static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4259{ 4393{
4260 int err; 4394 int err;
4261 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 4395 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4262 4396
4263 if (!dev) 4397 if (!dev)
4264 return -ENODEV; 4398 return -ENODEV;
@@ -4490,9 +4624,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4490 case SIOCGIFINDEX: 4624 case SIOCGIFINDEX:
4491 case SIOCGIFTXQLEN: 4625 case SIOCGIFTXQLEN:
4492 dev_load(net, ifr.ifr_name); 4626 dev_load(net, ifr.ifr_name);
4493 read_lock(&dev_base_lock); 4627 rcu_read_lock();
4494 ret = dev_ifsioc_locked(net, &ifr, cmd); 4628 ret = dev_ifsioc_locked(net, &ifr, cmd);
4495 read_unlock(&dev_base_lock); 4629 rcu_read_unlock();
4496 if (!ret) { 4630 if (!ret) {
4497 if (colon) 4631 if (colon)
4498 *colon = ':'; 4632 *colon = ':';
@@ -4635,59 +4769,82 @@ static void net_set_todo(struct net_device *dev)
4635 list_add_tail(&dev->todo_list, &net_todo_list); 4769 list_add_tail(&dev->todo_list, &net_todo_list);
4636} 4770}
4637 4771
4638static void rollback_registered(struct net_device *dev) 4772static void rollback_registered_many(struct list_head *head)
4639{ 4773{
4774 struct net_device *dev, *tmp;
4775
4640 BUG_ON(dev_boot_phase); 4776 BUG_ON(dev_boot_phase);
4641 ASSERT_RTNL(); 4777 ASSERT_RTNL();
4642 4778
4643 /* Some devices call without registering for initialization unwind. */ 4779 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4644 if (dev->reg_state == NETREG_UNINITIALIZED) { 4780 /* Some devices call without registering
4645 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 4781 * for initialization unwind. Remove those
4646 "was registered\n", dev->name, dev); 4782 * devices and proceed with the remaining.
4783 */
4784 if (dev->reg_state == NETREG_UNINITIALIZED) {
4785 pr_debug("unregister_netdevice: device %s/%p never "
4786 "was registered\n", dev->name, dev);
4647 4787
4648 WARN_ON(1); 4788 WARN_ON(1);
4649 return; 4789 list_del(&dev->unreg_list);
4650 } 4790 continue;
4791 }
4651 4792
4652 BUG_ON(dev->reg_state != NETREG_REGISTERED); 4793 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4653 4794
4654 /* If device is running, close it first. */ 4795 /* If device is running, close it first. */
4655 dev_close(dev); 4796 dev_close(dev);
4656 4797
4657 /* And unlink it from device chain. */ 4798 /* And unlink it from device chain. */
4658 unlist_netdevice(dev); 4799 unlist_netdevice(dev);
4659 4800
4660 dev->reg_state = NETREG_UNREGISTERING; 4801 dev->reg_state = NETREG_UNREGISTERING;
4802 }
4661 4803
4662 synchronize_net(); 4804 synchronize_net();
4663 4805
4664 /* Shutdown queueing discipline. */ 4806 list_for_each_entry(dev, head, unreg_list) {
4665 dev_shutdown(dev); 4807 /* Shutdown queueing discipline. */
4808 dev_shutdown(dev);
4666 4809
4667 4810
4668 /* Notify protocols, that we are about to destroy 4811 /* Notify protocols, that we are about to destroy
4669 this device. They should clean all the things. 4812 this device. They should clean all the things.
4670 */ 4813 */
4671 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4814 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4672 4815
4673 /* 4816 /*
4674 * Flush the unicast and multicast chains 4817 * Flush the unicast and multicast chains
4675 */ 4818 */
4676 dev_unicast_flush(dev); 4819 dev_unicast_flush(dev);
4677 dev_addr_discard(dev); 4820 dev_addr_discard(dev);
4678 4821
4679 if (dev->netdev_ops->ndo_uninit) 4822 if (dev->netdev_ops->ndo_uninit)
4680 dev->netdev_ops->ndo_uninit(dev); 4823 dev->netdev_ops->ndo_uninit(dev);
4681 4824
4682 /* Notifier chain MUST detach us from master device. */ 4825 /* Notifier chain MUST detach us from master device. */
4683 WARN_ON(dev->master); 4826 WARN_ON(dev->master);
4684 4827
4685 /* Remove entries from kobject tree */ 4828 /* Remove entries from kobject tree */
4686 netdev_unregister_kobject(dev); 4829 netdev_unregister_kobject(dev);
4830 }
4831
4832 /* Process any work delayed until the end of the batch */
4833 dev = list_entry(head->next, struct net_device, unreg_list);
4834 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4687 4835
4688 synchronize_net(); 4836 synchronize_net();
4689 4837
4690 dev_put(dev); 4838 list_for_each_entry(dev, head, unreg_list)
4839 dev_put(dev);
4840}
4841
4842static void rollback_registered(struct net_device *dev)
4843{
4844 LIST_HEAD(single);
4845
4846 list_add(&dev->unreg_list, &single);
4847 rollback_registered_many(&single);
4691} 4848}
4692 4849
4693static void __netdev_init_queue_locks_one(struct net_device *dev, 4850static void __netdev_init_queue_locks_one(struct net_device *dev,
@@ -4746,6 +4903,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4746EXPORT_SYMBOL(netdev_fix_features); 4903EXPORT_SYMBOL(netdev_fix_features);
4747 4904
4748/** 4905/**
4906 * netif_stacked_transfer_operstate - transfer operstate
4907 * @rootdev: the root or lower level device to transfer state from
4908 * @dev: the device to transfer operstate to
4909 *
4910 * Transfer operational state from root to device. This is normally
4911 * called when a stacking relationship exists between the root
4912 * device and the device(a leaf device).
4913 */
4914void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4915 struct net_device *dev)
4916{
4917 if (rootdev->operstate == IF_OPER_DORMANT)
4918 netif_dormant_on(dev);
4919 else
4920 netif_dormant_off(dev);
4921
4922 if (netif_carrier_ok(rootdev)) {
4923 if (!netif_carrier_ok(dev))
4924 netif_carrier_on(dev);
4925 } else {
4926 if (netif_carrier_ok(dev))
4927 netif_carrier_off(dev);
4928 }
4929}
4930EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4931
4932/**
4749 * register_netdevice - register a network device 4933 * register_netdevice - register a network device
4750 * @dev: device to register 4934 * @dev: device to register
4751 * 4935 *
@@ -4764,8 +4948,6 @@ EXPORT_SYMBOL(netdev_fix_features);
4764 4948
4765int register_netdevice(struct net_device *dev) 4949int register_netdevice(struct net_device *dev)
4766{ 4950{
4767 struct hlist_head *head;
4768 struct hlist_node *p;
4769 int ret; 4951 int ret;
4770 struct net *net = dev_net(dev); 4952 struct net *net = dev_net(dev);
4771 4953
@@ -4794,26 +4976,14 @@ int register_netdevice(struct net_device *dev)
4794 } 4976 }
4795 } 4977 }
4796 4978
4797 if (!dev_valid_name(dev->name)) { 4979 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
4798 ret = -EINVAL; 4980 if (ret)
4799 goto err_uninit; 4981 goto err_uninit;
4800 }
4801 4982
4802 dev->ifindex = dev_new_index(net); 4983 dev->ifindex = dev_new_index(net);
4803 if (dev->iflink == -1) 4984 if (dev->iflink == -1)
4804 dev->iflink = dev->ifindex; 4985 dev->iflink = dev->ifindex;
4805 4986
4806 /* Check for existence of name */
4807 head = dev_name_hash(net, dev->name);
4808 hlist_for_each(p, head) {
4809 struct net_device *d
4810 = hlist_entry(p, struct net_device, name_hlist);
4811 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4812 ret = -EEXIST;
4813 goto err_uninit;
4814 }
4815 }
4816
4817 /* Fix illegal checksum combinations */ 4987 /* Fix illegal checksum combinations */
4818 if ((dev->features & NETIF_F_HW_CSUM) && 4988 if ((dev->features & NETIF_F_HW_CSUM) &&
4819 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4989 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
@@ -4836,6 +5006,12 @@ int register_netdevice(struct net_device *dev)
4836 dev->features |= NETIF_F_GSO; 5006 dev->features |= NETIF_F_GSO;
4837 5007
4838 netdev_initialize_kobject(dev); 5008 netdev_initialize_kobject(dev);
5009
5010 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5011 ret = notifier_to_errno(ret);
5012 if (ret)
5013 goto err_uninit;
5014
4839 ret = netdev_register_kobject(dev); 5015 ret = netdev_register_kobject(dev);
4840 if (ret) 5016 if (ret)
4841 goto err_uninit; 5017 goto err_uninit;
@@ -4859,6 +5035,11 @@ int register_netdevice(struct net_device *dev)
4859 rollback_registered(dev); 5035 rollback_registered(dev);
4860 dev->reg_state = NETREG_UNREGISTERED; 5036 dev->reg_state = NETREG_UNREGISTERED;
4861 } 5037 }
5038 /*
5039 * Prevent userspace races by waiting until the network
5040 * device is fully setup before sending notifications.
5041 */
5042 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
4862 5043
4863out: 5044out:
4864 return ret; 5045 return ret;
@@ -4960,6 +5141,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4960{ 5141{
4961 unsigned long rebroadcast_time, warning_time; 5142 unsigned long rebroadcast_time, warning_time;
4962 5143
5144 linkwatch_forget_dev(dev);
5145
4963 rebroadcast_time = warning_time = jiffies; 5146 rebroadcast_time = warning_time = jiffies;
4964 while (atomic_read(&dev->refcnt) != 0) { 5147 while (atomic_read(&dev->refcnt) != 0) {
4965 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5148 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -4967,6 +5150,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4967 5150
4968 /* Rebroadcast unregister notification */ 5151 /* Rebroadcast unregister notification */
4969 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5152 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5153 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5154 * should have already handle it the first time */
4970 5155
4971 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5156 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4972 &dev->state)) { 5157 &dev->state)) {
@@ -5062,6 +5247,32 @@ void netdev_run_todo(void)
5062} 5247}
5063 5248
5064/** 5249/**
5250 * dev_txq_stats_fold - fold tx_queues stats
5251 * @dev: device to get statistics from
5252 * @stats: struct net_device_stats to hold results
5253 */
5254void dev_txq_stats_fold(const struct net_device *dev,
5255 struct net_device_stats *stats)
5256{
5257 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5258 unsigned int i;
5259 struct netdev_queue *txq;
5260
5261 for (i = 0; i < dev->num_tx_queues; i++) {
5262 txq = netdev_get_tx_queue(dev, i);
5263 tx_bytes += txq->tx_bytes;
5264 tx_packets += txq->tx_packets;
5265 tx_dropped += txq->tx_dropped;
5266 }
5267 if (tx_bytes || tx_packets || tx_dropped) {
5268 stats->tx_bytes = tx_bytes;
5269 stats->tx_packets = tx_packets;
5270 stats->tx_dropped = tx_dropped;
5271 }
5272}
5273EXPORT_SYMBOL(dev_txq_stats_fold);
5274
5275/**
5065 * dev_get_stats - get network device statistics 5276 * dev_get_stats - get network device statistics
5066 * @dev: device to get statistics from 5277 * @dev: device to get statistics from
5067 * 5278 *
@@ -5075,25 +5286,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev)
5075 5286
5076 if (ops->ndo_get_stats) 5287 if (ops->ndo_get_stats)
5077 return ops->ndo_get_stats(dev); 5288 return ops->ndo_get_stats(dev);
5078 else { 5289
5079 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5290 dev_txq_stats_fold(dev, &dev->stats);
5080 struct net_device_stats *stats = &dev->stats; 5291 return &dev->stats;
5081 unsigned int i;
5082 struct netdev_queue *txq;
5083
5084 for (i = 0; i < dev->num_tx_queues; i++) {
5085 txq = netdev_get_tx_queue(dev, i);
5086 tx_bytes += txq->tx_bytes;
5087 tx_packets += txq->tx_packets;
5088 tx_dropped += txq->tx_dropped;
5089 }
5090 if (tx_bytes || tx_packets || tx_dropped) {
5091 stats->tx_bytes = tx_bytes;
5092 stats->tx_packets = tx_packets;
5093 stats->tx_dropped = tx_dropped;
5094 }
5095 return stats;
5096 }
5097} 5292}
5098EXPORT_SYMBOL(dev_get_stats); 5293EXPORT_SYMBOL(dev_get_stats);
5099 5294
@@ -5173,6 +5368,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5173 netdev_init_queues(dev); 5368 netdev_init_queues(dev);
5174 5369
5175 INIT_LIST_HEAD(&dev->napi_list); 5370 INIT_LIST_HEAD(&dev->napi_list);
5371 INIT_LIST_HEAD(&dev->unreg_list);
5372 INIT_LIST_HEAD(&dev->link_watch_list);
5176 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5373 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5177 setup(dev); 5374 setup(dev);
5178 strcpy(dev->name, name); 5375 strcpy(dev->name, name);
@@ -5237,25 +5434,47 @@ void synchronize_net(void)
5237EXPORT_SYMBOL(synchronize_net); 5434EXPORT_SYMBOL(synchronize_net);
5238 5435
5239/** 5436/**
5240 * unregister_netdevice - remove device from the kernel 5437 * unregister_netdevice_queue - remove device from the kernel
5241 * @dev: device 5438 * @dev: device
5439 * @head: list
5242 * 5440 *
5243 * This function shuts down a device interface and removes it 5441 * This function shuts down a device interface and removes it
5244 * from the kernel tables. 5442 * from the kernel tables.
5443 * If head not NULL, device is queued to be unregistered later.
5245 * 5444 *
5246 * Callers must hold the rtnl semaphore. You may want 5445 * Callers must hold the rtnl semaphore. You may want
5247 * unregister_netdev() instead of this. 5446 * unregister_netdev() instead of this.
5248 */ 5447 */
5249 5448
5250void unregister_netdevice(struct net_device *dev) 5449void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5251{ 5450{
5252 ASSERT_RTNL(); 5451 ASSERT_RTNL();
5253 5452
5254 rollback_registered(dev); 5453 if (head) {
5255 /* Finish processing unregister after unlock */ 5454 list_move_tail(&dev->unreg_list, head);
5256 net_set_todo(dev); 5455 } else {
5456 rollback_registered(dev);
5457 /* Finish processing unregister after unlock */
5458 net_set_todo(dev);
5459 }
5460}
5461EXPORT_SYMBOL(unregister_netdevice_queue);
5462
5463/**
5464 * unregister_netdevice_many - unregister many devices
5465 * @head: list of devices
5466 */
5467void unregister_netdevice_many(struct list_head *head)
5468{
5469 struct net_device *dev;
5470
5471 if (!list_empty(head)) {
5472 rollback_registered_many(head);
5473 list_for_each_entry(dev, head, unreg_list)
5474 net_set_todo(dev);
5475 }
5257} 5476}
5258EXPORT_SYMBOL(unregister_netdevice); 5477EXPORT_SYMBOL(unregister_netdevice_many);
5259 5478
5260/** 5479/**
5261 * unregister_netdev - remove device from the kernel 5480 * unregister_netdev - remove device from the kernel
@@ -5292,8 +5511,6 @@ EXPORT_SYMBOL(unregister_netdev);
5292 5511
5293int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 5512int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5294{ 5513{
5295 char buf[IFNAMSIZ];
5296 const char *destname;
5297 int err; 5514 int err;
5298 5515
5299 ASSERT_RTNL(); 5516 ASSERT_RTNL();
@@ -5326,20 +5543,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5326 * we can use it in the destination network namespace. 5543 * we can use it in the destination network namespace.
5327 */ 5544 */
5328 err = -EEXIST; 5545 err = -EEXIST;
5329 destname = dev->name; 5546 if (__dev_get_by_name(net, dev->name)) {
5330 if (__dev_get_by_name(net, destname)) {
5331 /* We get here if we can't use the current device name */ 5547 /* We get here if we can't use the current device name */
5332 if (!pat) 5548 if (!pat)
5333 goto out; 5549 goto out;
5334 if (!dev_valid_name(pat)) 5550 if (dev_get_valid_name(net, pat, dev->name, 1))
5335 goto out;
5336 if (strchr(pat, '%')) {
5337 if (__dev_alloc_name(net, pat, buf) < 0)
5338 goto out;
5339 destname = buf;
5340 } else
5341 destname = pat;
5342 if (__dev_get_by_name(net, destname))
5343 goto out; 5551 goto out;
5344 } 5552 }
5345 5553
@@ -5363,6 +5571,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5363 this device. They should clean all the things. 5571 this device. They should clean all the things.
5364 */ 5572 */
5365 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5573 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5574 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5366 5575
5367 /* 5576 /*
5368 * Flush the unicast and multicast chains 5577 * Flush the unicast and multicast chains
@@ -5375,10 +5584,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5375 /* Actually switch the network namespace */ 5584 /* Actually switch the network namespace */
5376 dev_net_set(dev, net); 5585 dev_net_set(dev, net);
5377 5586
5378 /* Assign the new device name */
5379 if (destname != dev->name)
5380 strcpy(dev->name, destname);
5381
5382 /* If there is an ifindex conflict assign a new one */ 5587 /* If there is an ifindex conflict assign a new one */
5383 if (__dev_get_by_index(net, dev->ifindex)) { 5588 if (__dev_get_by_index(net, dev->ifindex)) {
5384 int iflink = (dev->iflink == dev->ifindex); 5589 int iflink = (dev->iflink == dev->ifindex);
@@ -5397,6 +5602,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5397 /* Notify protocols, that a new device appeared. */ 5602 /* Notify protocols, that a new device appeared. */
5398 call_netdevice_notifiers(NETDEV_REGISTER, dev); 5603 call_netdevice_notifiers(NETDEV_REGISTER, dev);
5399 5604
5605 /*
5606 * Prevent userspace races by waiting until the network
5607 * device is fully setup before sending notifications.
5608 */
5609 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5610
5400 synchronize_net(); 5611 synchronize_net();
5401 err = 0; 5612 err = 0;
5402out: 5613out:
@@ -5483,7 +5694,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5483 one |= NETIF_F_ALL_CSUM; 5694 one |= NETIF_F_ALL_CSUM;
5484 5695
5485 one |= all & NETIF_F_ONE_FOR_ALL; 5696 one |= all & NETIF_F_ONE_FOR_ALL;
5486 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 5697 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5487 all |= one & mask & NETIF_F_ONE_FOR_ALL; 5698 all |= one & mask & NETIF_F_ONE_FOR_ALL;
5488 5699
5489 return all; 5700 return all;
@@ -5565,14 +5776,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
5565 5776
5566static void __net_exit default_device_exit(struct net *net) 5777static void __net_exit default_device_exit(struct net *net)
5567{ 5778{
5568 struct net_device *dev; 5779 struct net_device *dev, *aux;
5569 /* 5780 /*
5570 * Push all migratable of the network devices back to the 5781 * Push all migratable network devices back to the
5571 * initial network namespace 5782 * initial network namespace
5572 */ 5783 */
5573 rtnl_lock(); 5784 rtnl_lock();
5574restart: 5785 for_each_netdev_safe(net, dev, aux) {
5575 for_each_netdev(net, dev) {
5576 int err; 5786 int err;
5577 char fb_name[IFNAMSIZ]; 5787 char fb_name[IFNAMSIZ];
5578 5788
@@ -5580,11 +5790,9 @@ restart:
5580 if (dev->features & NETIF_F_NETNS_LOCAL) 5790 if (dev->features & NETIF_F_NETNS_LOCAL)
5581 continue; 5791 continue;
5582 5792
5583 /* Delete virtual devices */ 5793 /* Leave virtual devices for the generic cleanup */
5584 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 5794 if (dev->rtnl_link_ops)
5585 dev->rtnl_link_ops->dellink(dev); 5795 continue;
5586 goto restart;
5587 }
5588 5796
5589 /* Push remaing network devices to init_net */ 5797 /* Push remaing network devices to init_net */
5590 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 5798 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5594,13 +5802,37 @@ restart:
5594 __func__, dev->name, err); 5802 __func__, dev->name, err);
5595 BUG(); 5803 BUG();
5596 } 5804 }
5597 goto restart;
5598 } 5805 }
5599 rtnl_unlock(); 5806 rtnl_unlock();
5600} 5807}
5601 5808
5809static void __net_exit default_device_exit_batch(struct list_head *net_list)
5810{
5811 /* At exit all network devices most be removed from a network
5812 * namespace. Do this in the reverse order of registeration.
5813 * Do this across as many network namespaces as possible to
5814 * improve batching efficiency.
5815 */
5816 struct net_device *dev;
5817 struct net *net;
5818 LIST_HEAD(dev_kill_list);
5819
5820 rtnl_lock();
5821 list_for_each_entry(net, net_list, exit_list) {
5822 for_each_netdev_reverse(net, dev) {
5823 if (dev->rtnl_link_ops)
5824 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5825 else
5826 unregister_netdevice_queue(dev, &dev_kill_list);
5827 }
5828 }
5829 unregister_netdevice_many(&dev_kill_list);
5830 rtnl_unlock();
5831}
5832
5602static struct pernet_operations __net_initdata default_device_ops = { 5833static struct pernet_operations __net_initdata default_device_ops = {
5603 .exit = default_device_exit, 5834 .exit = default_device_exit,
5835 .exit_batch = default_device_exit_batch,
5604}; 5836};
5605 5837
5606/* 5838/*