aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c654
1 files changed, 436 insertions, 218 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index fe10551d3671..c36a17aafcf3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,7 @@
79#include <linux/cpu.h> 79#include <linux/cpu.h>
80#include <linux/types.h> 80#include <linux/types.h>
81#include <linux/kernel.h> 81#include <linux/kernel.h>
82#include <linux/hash.h>
82#include <linux/sched.h> 83#include <linux/sched.h>
83#include <linux/mutex.h> 84#include <linux/mutex.h>
84#include <linux/string.h> 85#include <linux/string.h>
@@ -104,6 +105,7 @@
104#include <net/dst.h> 105#include <net/dst.h>
105#include <net/pkt_sched.h> 106#include <net/pkt_sched.h>
106#include <net/checksum.h> 107#include <net/checksum.h>
108#include <net/xfrm.h>
107#include <linux/highmem.h> 109#include <linux/highmem.h>
108#include <linux/init.h> 110#include <linux/init.h>
109#include <linux/kmod.h> 111#include <linux/kmod.h>
@@ -175,7 +177,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */
175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176 * semaphore. 178 * semaphore.
177 * 179 *
178 * Pure readers hold dev_base_lock for reading. 180 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
179 * 181 *
180 * Writers must hold the rtnl semaphore while they loop through the 182 * Writers must hold the rtnl semaphore while they loop through the
181 * dev_base_head list, and hold dev_base_lock for writing when they do the 183 * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -193,18 +195,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */
193DEFINE_RWLOCK(dev_base_lock); 195DEFINE_RWLOCK(dev_base_lock);
194EXPORT_SYMBOL(dev_base_lock); 196EXPORT_SYMBOL(dev_base_lock);
195 197
196#define NETDEV_HASHBITS 8
197#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 198static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{ 199{
201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 200 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 201 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
203} 202}
204 203
205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 204static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206{ 205{
207 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 207}
209 208
210/* Device list insertion */ 209/* Device list insertion */
@@ -215,23 +214,26 @@ static int list_netdevice(struct net_device *dev)
215 ASSERT_RTNL(); 214 ASSERT_RTNL();
216 215
217 write_lock_bh(&dev_base_lock); 216 write_lock_bh(&dev_base_lock);
218 list_add_tail(&dev->dev_list, &net->dev_base_head); 217 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 218 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 219 hlist_add_head_rcu(&dev->index_hlist,
220 dev_index_hash(net, dev->ifindex));
221 write_unlock_bh(&dev_base_lock); 221 write_unlock_bh(&dev_base_lock);
222 return 0; 222 return 0;
223} 223}
224 224
225/* Device list removal */ 225/* Device list removal
226 * caller must respect a RCU grace period before freeing/reusing dev
227 */
226static void unlist_netdevice(struct net_device *dev) 228static void unlist_netdevice(struct net_device *dev)
227{ 229{
228 ASSERT_RTNL(); 230 ASSERT_RTNL();
229 231
230 /* Unlink dev from the device chain */ 232 /* Unlink dev from the device chain */
231 write_lock_bh(&dev_base_lock); 233 write_lock_bh(&dev_base_lock);
232 list_del(&dev->dev_list); 234 list_del_rcu(&dev->dev_list);
233 hlist_del(&dev->name_hlist); 235 hlist_del_rcu(&dev->name_hlist);
234 hlist_del(&dev->index_hlist); 236 hlist_del_rcu(&dev->index_hlist);
235 write_unlock_bh(&dev_base_lock); 237 write_unlock_bh(&dev_base_lock);
236} 238}
237 239
@@ -587,18 +589,44 @@ __setup("netdev=", netdev_boot_setup);
587struct net_device *__dev_get_by_name(struct net *net, const char *name) 589struct net_device *__dev_get_by_name(struct net *net, const char *name)
588{ 590{
589 struct hlist_node *p; 591 struct hlist_node *p;
592 struct net_device *dev;
593 struct hlist_head *head = dev_name_hash(net, name);
590 594
591 hlist_for_each(p, dev_name_hash(net, name)) { 595 hlist_for_each_entry(dev, p, head, name_hlist)
592 struct net_device *dev
593 = hlist_entry(p, struct net_device, name_hlist);
594 if (!strncmp(dev->name, name, IFNAMSIZ)) 596 if (!strncmp(dev->name, name, IFNAMSIZ))
595 return dev; 597 return dev;
596 } 598
597 return NULL; 599 return NULL;
598} 600}
599EXPORT_SYMBOL(__dev_get_by_name); 601EXPORT_SYMBOL(__dev_get_by_name);
600 602
601/** 603/**
604 * dev_get_by_name_rcu - find a device by its name
605 * @net: the applicable net namespace
606 * @name: name to find
607 *
608 * Find an interface by name.
609 * If the name is found a pointer to the device is returned.
610 * If the name is not found then %NULL is returned.
611 * The reference counters are not incremented so the caller must be
612 * careful with locks. The caller must hold RCU lock.
613 */
614
615struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
616{
617 struct hlist_node *p;
618 struct net_device *dev;
619 struct hlist_head *head = dev_name_hash(net, name);
620
621 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
622 if (!strncmp(dev->name, name, IFNAMSIZ))
623 return dev;
624
625 return NULL;
626}
627EXPORT_SYMBOL(dev_get_by_name_rcu);
628
629/**
602 * dev_get_by_name - find a device by its name 630 * dev_get_by_name - find a device by its name
603 * @net: the applicable net namespace 631 * @net: the applicable net namespace
604 * @name: name to find 632 * @name: name to find
@@ -614,11 +642,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
614{ 642{
615 struct net_device *dev; 643 struct net_device *dev;
616 644
617 read_lock(&dev_base_lock); 645 rcu_read_lock();
618 dev = __dev_get_by_name(net, name); 646 dev = dev_get_by_name_rcu(net, name);
619 if (dev) 647 if (dev)
620 dev_hold(dev); 648 dev_hold(dev);
621 read_unlock(&dev_base_lock); 649 rcu_read_unlock();
622 return dev; 650 return dev;
623} 651}
624EXPORT_SYMBOL(dev_get_by_name); 652EXPORT_SYMBOL(dev_get_by_name);
@@ -638,17 +666,42 @@ EXPORT_SYMBOL(dev_get_by_name);
638struct net_device *__dev_get_by_index(struct net *net, int ifindex) 666struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639{ 667{
640 struct hlist_node *p; 668 struct hlist_node *p;
669 struct net_device *dev;
670 struct hlist_head *head = dev_index_hash(net, ifindex);
641 671
642 hlist_for_each(p, dev_index_hash(net, ifindex)) { 672 hlist_for_each_entry(dev, p, head, index_hlist)
643 struct net_device *dev
644 = hlist_entry(p, struct net_device, index_hlist);
645 if (dev->ifindex == ifindex) 673 if (dev->ifindex == ifindex)
646 return dev; 674 return dev;
647 } 675
648 return NULL; 676 return NULL;
649} 677}
650EXPORT_SYMBOL(__dev_get_by_index); 678EXPORT_SYMBOL(__dev_get_by_index);
651 679
680/**
681 * dev_get_by_index_rcu - find a device by its ifindex
682 * @net: the applicable net namespace
683 * @ifindex: index of device
684 *
685 * Search for an interface by index. Returns %NULL if the device
686 * is not found or a pointer to the device. The device has not
687 * had its reference counter increased so the caller must be careful
688 * about locking. The caller must hold RCU lock.
689 */
690
691struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
692{
693 struct hlist_node *p;
694 struct net_device *dev;
695 struct hlist_head *head = dev_index_hash(net, ifindex);
696
697 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
698 if (dev->ifindex == ifindex)
699 return dev;
700
701 return NULL;
702}
703EXPORT_SYMBOL(dev_get_by_index_rcu);
704
652 705
653/** 706/**
654 * dev_get_by_index - find a device by its ifindex 707 * dev_get_by_index - find a device by its ifindex
@@ -665,11 +718,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
665{ 718{
666 struct net_device *dev; 719 struct net_device *dev;
667 720
668 read_lock(&dev_base_lock); 721 rcu_read_lock();
669 dev = __dev_get_by_index(net, ifindex); 722 dev = dev_get_by_index_rcu(net, ifindex);
670 if (dev) 723 if (dev)
671 dev_hold(dev); 724 dev_hold(dev);
672 read_unlock(&dev_base_lock); 725 rcu_read_unlock();
673 return dev; 726 return dev;
674} 727}
675EXPORT_SYMBOL(dev_get_by_index); 728EXPORT_SYMBOL(dev_get_by_index);
@@ -748,15 +801,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
748 struct net_device *dev, *ret; 801 struct net_device *dev, *ret;
749 802
750 ret = NULL; 803 ret = NULL;
751 read_lock(&dev_base_lock); 804 rcu_read_lock();
752 for_each_netdev(net, dev) { 805 for_each_netdev_rcu(net, dev) {
753 if (((dev->flags ^ if_flags) & mask) == 0) { 806 if (((dev->flags ^ if_flags) & mask) == 0) {
754 dev_hold(dev); 807 dev_hold(dev);
755 ret = dev; 808 ret = dev;
756 break; 809 break;
757 } 810 }
758 } 811 }
759 read_unlock(&dev_base_lock); 812 rcu_read_unlock();
760 return ret; 813 return ret;
761} 814}
762EXPORT_SYMBOL(dev_get_by_flags); 815EXPORT_SYMBOL(dev_get_by_flags);
@@ -841,7 +894,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
841 free_page((unsigned long) inuse); 894 free_page((unsigned long) inuse);
842 } 895 }
843 896
844 snprintf(buf, IFNAMSIZ, name, i); 897 if (buf != name)
898 snprintf(buf, IFNAMSIZ, name, i);
845 if (!__dev_get_by_name(net, buf)) 899 if (!__dev_get_by_name(net, buf))
846 return i; 900 return i;
847 901
@@ -881,6 +935,21 @@ int dev_alloc_name(struct net_device *dev, const char *name)
881} 935}
882EXPORT_SYMBOL(dev_alloc_name); 936EXPORT_SYMBOL(dev_alloc_name);
883 937
938static int dev_get_valid_name(struct net *net, const char *name, char *buf,
939 bool fmt)
940{
941 if (!dev_valid_name(name))
942 return -EINVAL;
943
944 if (fmt && strchr(name, '%'))
945 return __dev_alloc_name(net, name, buf);
946 else if (__dev_get_by_name(net, name))
947 return -EEXIST;
948 else if (buf != name)
949 strlcpy(buf, name, IFNAMSIZ);
950
951 return 0;
952}
884 953
885/** 954/**
886 * dev_change_name - change name of a device 955 * dev_change_name - change name of a device
@@ -904,28 +973,20 @@ int dev_change_name(struct net_device *dev, const char *newname)
904 if (dev->flags & IFF_UP) 973 if (dev->flags & IFF_UP)
905 return -EBUSY; 974 return -EBUSY;
906 975
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 976 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0; 977 return 0;
912 978
913 memcpy(oldname, dev->name, IFNAMSIZ); 979 memcpy(oldname, dev->name, IFNAMSIZ);
914 980
915 if (strchr(newname, '%')) { 981 err = dev_get_valid_name(net, newname, dev->name, 1);
916 err = dev_alloc_name(dev, newname); 982 if (err < 0)
917 if (err < 0) 983 return err;
918 return err;
919 } else if (__dev_get_by_name(net, newname))
920 return -EEXIST;
921 else
922 strlcpy(dev->name, newname, IFNAMSIZ);
923 984
924rollback: 985rollback:
925 /* For now only devices in the initial network namespace 986 /* For now only devices in the initial network namespace
926 * are in sysfs. 987 * are in sysfs.
927 */ 988 */
928 if (net == &init_net) { 989 if (net_eq(net, &init_net)) {
929 ret = device_rename(&dev->dev, dev->name); 990 ret = device_rename(&dev->dev, dev->name);
930 if (ret) { 991 if (ret) {
931 memcpy(dev->name, oldname, IFNAMSIZ); 992 memcpy(dev->name, oldname, IFNAMSIZ);
@@ -935,7 +996,12 @@ rollback:
935 996
936 write_lock_bh(&dev_base_lock); 997 write_lock_bh(&dev_base_lock);
937 hlist_del(&dev->name_hlist); 998 hlist_del(&dev->name_hlist);
938 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 999 write_unlock_bh(&dev_base_lock);
1000
1001 synchronize_rcu();
1002
1003 write_lock_bh(&dev_base_lock);
1004 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
939 write_unlock_bh(&dev_base_lock); 1005 write_unlock_bh(&dev_base_lock);
940 1006
941 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1007 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1038,9 +1104,9 @@ void dev_load(struct net *net, const char *name)
1038{ 1104{
1039 struct net_device *dev; 1105 struct net_device *dev;
1040 1106
1041 read_lock(&dev_base_lock); 1107 rcu_read_lock();
1042 dev = __dev_get_by_name(net, name); 1108 dev = dev_get_by_name_rcu(net, name);
1043 read_unlock(&dev_base_lock); 1109 rcu_read_unlock();
1044 1110
1045 if (!dev && capable(CAP_NET_ADMIN)) 1111 if (!dev && capable(CAP_NET_ADMIN))
1046 request_module("%s", name); 1112 request_module("%s", name);
@@ -1287,6 +1353,7 @@ rollback:
1287 nb->notifier_call(nb, NETDEV_DOWN, dev); 1353 nb->notifier_call(nb, NETDEV_DOWN, dev);
1288 } 1354 }
1289 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1355 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1356 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1290 } 1357 }
1291 } 1358 }
1292 1359
@@ -1353,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb)
1353 skb->tstamp.tv64 = 0; 1420 skb->tstamp.tv64 = 0;
1354} 1421}
1355 1422
1423/**
1424 * dev_forward_skb - loopback an skb to another netif
1425 *
1426 * @dev: destination network device
1427 * @skb: buffer to forward
1428 *
1429 * return values:
1430 * NET_RX_SUCCESS (no congestion)
1431 * NET_RX_DROP (packet was dropped)
1432 *
1433 * dev_forward_skb can be used for injecting an skb from the
1434 * start_xmit function of one device into the receive queue
1435 * of another device.
1436 *
1437 * The receiving device may be in another namespace, so
1438 * we have to clear all information in the skb that could
1439 * impact namespace isolation.
1440 */
1441int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1442{
1443 skb_orphan(skb);
1444
1445 if (!(dev->flags & IFF_UP))
1446 return NET_RX_DROP;
1447
1448 if (skb->len > (dev->mtu + dev->hard_header_len))
1449 return NET_RX_DROP;
1450
1451 skb_dst_drop(skb);
1452 skb->tstamp.tv64 = 0;
1453 skb->pkt_type = PACKET_HOST;
1454 skb->protocol = eth_type_trans(skb, dev);
1455 skb->mark = 0;
1456 secpath_reset(skb);
1457 nf_reset(skb);
1458 return netif_rx(skb);
1459}
1460EXPORT_SYMBOL_GPL(dev_forward_skb);
1461
1356/* 1462/*
1357 * Support routine. Sends outgoing frames to any network 1463 * Support routine. Sends outgoing frames to any network
1358 * taps currently in use. 1464 * taps currently in use.
@@ -1701,7 +1807,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1701 struct netdev_queue *txq) 1807 struct netdev_queue *txq)
1702{ 1808{
1703 const struct net_device_ops *ops = dev->netdev_ops; 1809 const struct net_device_ops *ops = dev->netdev_ops;
1704 int rc; 1810 int rc = NETDEV_TX_OK;
1705 1811
1706 if (likely(!skb->next)) { 1812 if (likely(!skb->next)) {
1707 if (!list_empty(&ptype_all)) 1813 if (!list_empty(&ptype_all))
@@ -1749,6 +1855,8 @@ gso:
1749 nskb->next = NULL; 1855 nskb->next = NULL;
1750 rc = ops->ndo_start_xmit(nskb, dev); 1856 rc = ops->ndo_start_xmit(nskb, dev);
1751 if (unlikely(rc != NETDEV_TX_OK)) { 1857 if (unlikely(rc != NETDEV_TX_OK)) {
1858 if (rc & ~NETDEV_TX_MASK)
1859 goto out_kfree_gso_skb;
1752 nskb->next = skb->next; 1860 nskb->next = skb->next;
1753 skb->next = nskb; 1861 skb->next = nskb;
1754 return rc; 1862 return rc;
@@ -1758,11 +1866,12 @@ gso:
1758 return NETDEV_TX_BUSY; 1866 return NETDEV_TX_BUSY;
1759 } while (skb->next); 1867 } while (skb->next);
1760 1868
1761 skb->destructor = DEV_GSO_CB(skb)->destructor; 1869out_kfree_gso_skb:
1762 1870 if (likely(skb->next == NULL))
1871 skb->destructor = DEV_GSO_CB(skb)->destructor;
1763out_kfree_skb: 1872out_kfree_skb:
1764 kfree_skb(skb); 1873 kfree_skb(skb);
1765 return NETDEV_TX_OK; 1874 return rc;
1766} 1875}
1767 1876
1768static u32 skb_tx_hashrnd; 1877static u32 skb_tx_hashrnd;
@@ -1789,16 +1898,43 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1789} 1898}
1790EXPORT_SYMBOL(skb_tx_hash); 1899EXPORT_SYMBOL(skb_tx_hash);
1791 1900
1901static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1902{
1903 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1904 if (net_ratelimit()) {
1905 WARN(1, "%s selects TX queue %d, but "
1906 "real number of TX queues is %d\n",
1907 dev->name, queue_index,
1908 dev->real_num_tx_queues);
1909 }
1910 return 0;
1911 }
1912 return queue_index;
1913}
1914
1792static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1915static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1793 struct sk_buff *skb) 1916 struct sk_buff *skb)
1794{ 1917{
1795 const struct net_device_ops *ops = dev->netdev_ops; 1918 u16 queue_index;
1796 u16 queue_index = 0; 1919 struct sock *sk = skb->sk;
1920
1921 if (sk_tx_queue_recorded(sk)) {
1922 queue_index = sk_tx_queue_get(sk);
1923 } else {
1924 const struct net_device_ops *ops = dev->netdev_ops;
1797 1925
1798 if (ops->ndo_select_queue) 1926 if (ops->ndo_select_queue) {
1799 queue_index = ops->ndo_select_queue(dev, skb); 1927 queue_index = ops->ndo_select_queue(dev, skb);
1800 else if (dev->real_num_tx_queues > 1) 1928 queue_index = dev_cap_txqueue(dev, queue_index);
1801 queue_index = skb_tx_hash(dev, skb); 1929 } else {
1930 queue_index = 0;
1931 if (dev->real_num_tx_queues > 1)
1932 queue_index = skb_tx_hash(dev, skb);
1933
1934 if (sk && sk->sk_dst_cache)
1935 sk_tx_queue_set(sk, queue_index);
1936 }
1937 }
1802 1938
1803 skb_set_queue_mapping(skb, queue_index); 1939 skb_set_queue_mapping(skb, queue_index);
1804 return netdev_get_tx_queue(dev, queue_index); 1940 return netdev_get_tx_queue(dev, queue_index);
@@ -1935,8 +2071,8 @@ gso:
1935 HARD_TX_LOCK(dev, txq, cpu); 2071 HARD_TX_LOCK(dev, txq, cpu);
1936 2072
1937 if (!netif_tx_queue_stopped(txq)) { 2073 if (!netif_tx_queue_stopped(txq)) {
1938 rc = NET_XMIT_SUCCESS; 2074 rc = dev_hard_start_xmit(skb, dev, txq);
1939 if (!dev_hard_start_xmit(skb, dev, txq)) { 2075 if (dev_xmit_complete(rc)) {
1940 HARD_TX_UNLOCK(dev, txq); 2076 HARD_TX_UNLOCK(dev, txq);
1941 goto out; 2077 goto out;
1942 } 2078 }
@@ -2191,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb)
2191 if (MAX_RED_LOOP < ttl++) { 2327 if (MAX_RED_LOOP < ttl++) {
2192 printk(KERN_WARNING 2328 printk(KERN_WARNING
2193 "Redir loop detected Dropping packet (%d->%d)\n", 2329 "Redir loop detected Dropping packet (%d->%d)\n",
2194 skb->iif, dev->ifindex); 2330 skb->skb_iif, dev->ifindex);
2195 return TC_ACT_SHOT; 2331 return TC_ACT_SHOT;
2196 } 2332 }
2197 2333
@@ -2292,15 +2428,15 @@ int netif_receive_skb(struct sk_buff *skb)
2292 if (!skb->tstamp.tv64) 2428 if (!skb->tstamp.tv64)
2293 net_timestamp(skb); 2429 net_timestamp(skb);
2294 2430
2295 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2431 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2296 return NET_RX_SUCCESS; 2432 return NET_RX_SUCCESS;
2297 2433
2298 /* if we've gotten here through NAPI, check netpoll */ 2434 /* if we've gotten here through NAPI, check netpoll */
2299 if (netpoll_receive_skb(skb)) 2435 if (netpoll_receive_skb(skb))
2300 return NET_RX_DROP; 2436 return NET_RX_DROP;
2301 2437
2302 if (!skb->iif) 2438 if (!skb->skb_iif)
2303 skb->iif = skb->dev->ifindex; 2439 skb->skb_iif = skb->dev->ifindex;
2304 2440
2305 null_or_orig = NULL; 2441 null_or_orig = NULL;
2306 orig_dev = skb->dev; 2442 orig_dev = skb->dev;
@@ -2440,7 +2576,7 @@ void napi_gro_flush(struct napi_struct *napi)
2440} 2576}
2441EXPORT_SYMBOL(napi_gro_flush); 2577EXPORT_SYMBOL(napi_gro_flush);
2442 2578
2443int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2579enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444{ 2580{
2445 struct sk_buff **pp = NULL; 2581 struct sk_buff **pp = NULL;
2446 struct packet_type *ptype; 2582 struct packet_type *ptype;
@@ -2448,7 +2584,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2448 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2584 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2449 int same_flow; 2585 int same_flow;
2450 int mac_len; 2586 int mac_len;
2451 int ret; 2587 enum gro_result ret;
2452 2588
2453 if (!(skb->dev->features & NETIF_F_GRO)) 2589 if (!(skb->dev->features & NETIF_F_GRO))
2454 goto normal; 2590 goto normal;
@@ -2532,7 +2668,8 @@ normal:
2532} 2668}
2533EXPORT_SYMBOL(dev_gro_receive); 2669EXPORT_SYMBOL(dev_gro_receive);
2534 2670
2535static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2671static gro_result_t
2672__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2536{ 2673{
2537 struct sk_buff *p; 2674 struct sk_buff *p;
2538 2675
@@ -2540,33 +2677,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2540 return GRO_NORMAL; 2677 return GRO_NORMAL;
2541 2678
2542 for (p = napi->gro_list; p; p = p->next) { 2679 for (p = napi->gro_list; p; p = p->next) {
2543 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2680 NAPI_GRO_CB(p)->same_flow =
2544 && !compare_ether_header(skb_mac_header(p), 2681 (p->dev == skb->dev) &&
2545 skb_gro_mac_header(skb)); 2682 !compare_ether_header(skb_mac_header(p),
2683 skb_gro_mac_header(skb));
2546 NAPI_GRO_CB(p)->flush = 0; 2684 NAPI_GRO_CB(p)->flush = 0;
2547 } 2685 }
2548 2686
2549 return dev_gro_receive(napi, skb); 2687 return dev_gro_receive(napi, skb);
2550} 2688}
2551 2689
2552int napi_skb_finish(int ret, struct sk_buff *skb) 2690gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2553{ 2691{
2554 int err = NET_RX_SUCCESS;
2555
2556 switch (ret) { 2692 switch (ret) {
2557 case GRO_NORMAL: 2693 case GRO_NORMAL:
2558 return netif_receive_skb(skb); 2694 if (netif_receive_skb(skb))
2695 ret = GRO_DROP;
2696 break;
2559 2697
2560 case GRO_DROP: 2698 case GRO_DROP:
2561 err = NET_RX_DROP;
2562 /* fall through */
2563
2564 case GRO_MERGED_FREE: 2699 case GRO_MERGED_FREE:
2565 kfree_skb(skb); 2700 kfree_skb(skb);
2566 break; 2701 break;
2702
2703 case GRO_HELD:
2704 case GRO_MERGED:
2705 break;
2567 } 2706 }
2568 2707
2569 return err; 2708 return ret;
2570} 2709}
2571EXPORT_SYMBOL(napi_skb_finish); 2710EXPORT_SYMBOL(napi_skb_finish);
2572 2711
@@ -2586,7 +2725,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
2586} 2725}
2587EXPORT_SYMBOL(skb_gro_reset_offset); 2726EXPORT_SYMBOL(skb_gro_reset_offset);
2588 2727
2589int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2728gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2590{ 2729{
2591 skb_gro_reset_offset(skb); 2730 skb_gro_reset_offset(skb);
2592 2731
@@ -2605,49 +2744,41 @@ EXPORT_SYMBOL(napi_reuse_skb);
2605 2744
2606struct sk_buff *napi_get_frags(struct napi_struct *napi) 2745struct sk_buff *napi_get_frags(struct napi_struct *napi)
2607{ 2746{
2608 struct net_device *dev = napi->dev;
2609 struct sk_buff *skb = napi->skb; 2747 struct sk_buff *skb = napi->skb;
2610 2748
2611 if (!skb) { 2749 if (!skb) {
2612 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2750 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2613 if (!skb) 2751 if (skb)
2614 goto out; 2752 napi->skb = skb;
2615
2616 skb_reserve(skb, NET_IP_ALIGN);
2617
2618 napi->skb = skb;
2619 } 2753 }
2620
2621out:
2622 return skb; 2754 return skb;
2623} 2755}
2624EXPORT_SYMBOL(napi_get_frags); 2756EXPORT_SYMBOL(napi_get_frags);
2625 2757
2626int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2758gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2759 gro_result_t ret)
2627{ 2760{
2628 int err = NET_RX_SUCCESS;
2629
2630 switch (ret) { 2761 switch (ret) {
2631 case GRO_NORMAL: 2762 case GRO_NORMAL:
2632 case GRO_HELD: 2763 case GRO_HELD:
2633 skb->protocol = eth_type_trans(skb, napi->dev); 2764 skb->protocol = eth_type_trans(skb, napi->dev);
2634 2765
2635 if (ret == GRO_NORMAL) 2766 if (ret == GRO_HELD)
2636 return netif_receive_skb(skb); 2767 skb_gro_pull(skb, -ETH_HLEN);
2637 2768 else if (netif_receive_skb(skb))
2638 skb_gro_pull(skb, -ETH_HLEN); 2769 ret = GRO_DROP;
2639 break; 2770 break;
2640 2771
2641 case GRO_DROP: 2772 case GRO_DROP:
2642 err = NET_RX_DROP;
2643 /* fall through */
2644
2645 case GRO_MERGED_FREE: 2773 case GRO_MERGED_FREE:
2646 napi_reuse_skb(napi, skb); 2774 napi_reuse_skb(napi, skb);
2647 break; 2775 break;
2776
2777 case GRO_MERGED:
2778 break;
2648 } 2779 }
2649 2780
2650 return err; 2781 return ret;
2651} 2782}
2652EXPORT_SYMBOL(napi_frags_finish); 2783EXPORT_SYMBOL(napi_frags_finish);
2653 2784
@@ -2688,12 +2819,12 @@ out:
2688} 2819}
2689EXPORT_SYMBOL(napi_frags_skb); 2820EXPORT_SYMBOL(napi_frags_skb);
2690 2821
2691int napi_gro_frags(struct napi_struct *napi) 2822gro_result_t napi_gro_frags(struct napi_struct *napi)
2692{ 2823{
2693 struct sk_buff *skb = napi_frags_skb(napi); 2824 struct sk_buff *skb = napi_frags_skb(napi);
2694 2825
2695 if (!skb) 2826 if (!skb)
2696 return NET_RX_DROP; 2827 return GRO_DROP;
2697 2828
2698 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 2829 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2699} 2830}
@@ -2938,15 +3069,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
2938 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3069 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2939 return -EFAULT; 3070 return -EFAULT;
2940 3071
2941 read_lock(&dev_base_lock); 3072 rcu_read_lock();
2942 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 3073 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
2943 if (!dev) { 3074 if (!dev) {
2944 read_unlock(&dev_base_lock); 3075 rcu_read_unlock();
2945 return -ENODEV; 3076 return -ENODEV;
2946 } 3077 }
2947 3078
2948 strcpy(ifr.ifr_name, dev->name); 3079 strcpy(ifr.ifr_name, dev->name);
2949 read_unlock(&dev_base_lock); 3080 rcu_read_unlock();
2950 3081
2951 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 3082 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2952 return -EFAULT; 3083 return -EFAULT;
@@ -3016,18 +3147,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
3016 * in detail. 3147 * in detail.
3017 */ 3148 */
3018void *dev_seq_start(struct seq_file *seq, loff_t *pos) 3149void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3019 __acquires(dev_base_lock) 3150 __acquires(RCU)
3020{ 3151{
3021 struct net *net = seq_file_net(seq); 3152 struct net *net = seq_file_net(seq);
3022 loff_t off; 3153 loff_t off;
3023 struct net_device *dev; 3154 struct net_device *dev;
3024 3155
3025 read_lock(&dev_base_lock); 3156 rcu_read_lock();
3026 if (!*pos) 3157 if (!*pos)
3027 return SEQ_START_TOKEN; 3158 return SEQ_START_TOKEN;
3028 3159
3029 off = 1; 3160 off = 1;
3030 for_each_netdev(net, dev) 3161 for_each_netdev_rcu(net, dev)
3031 if (off++ == *pos) 3162 if (off++ == *pos)
3032 return dev; 3163 return dev;
3033 3164
@@ -3036,16 +3167,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3036 3167
3037void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3168void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3038{ 3169{
3039 struct net *net = seq_file_net(seq); 3170 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3171 first_net_device(seq_file_net(seq)) :
3172 next_net_device((struct net_device *)v);
3173
3040 ++*pos; 3174 ++*pos;
3041 return v == SEQ_START_TOKEN ? 3175 return rcu_dereference(dev);
3042 first_net_device(net) : next_net_device((struct net_device *)v);
3043} 3176}
3044 3177
3045void dev_seq_stop(struct seq_file *seq, void *v) 3178void dev_seq_stop(struct seq_file *seq, void *v)
3046 __releases(dev_base_lock) 3179 __releases(RCU)
3047{ 3180{
3048 read_unlock(&dev_base_lock); 3181 rcu_read_unlock();
3049} 3182}
3050 3183
3051static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3184static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
@@ -4254,12 +4387,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4254EXPORT_SYMBOL(dev_set_mac_address); 4387EXPORT_SYMBOL(dev_set_mac_address);
4255 4388
4256/* 4389/*
4257 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 4390 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4258 */ 4391 */
4259static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 4392static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4260{ 4393{
4261 int err; 4394 int err;
4262 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 4395 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4263 4396
4264 if (!dev) 4397 if (!dev)
4265 return -ENODEV; 4398 return -ENODEV;
@@ -4491,9 +4624,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4491 case SIOCGIFINDEX: 4624 case SIOCGIFINDEX:
4492 case SIOCGIFTXQLEN: 4625 case SIOCGIFTXQLEN:
4493 dev_load(net, ifr.ifr_name); 4626 dev_load(net, ifr.ifr_name);
4494 read_lock(&dev_base_lock); 4627 rcu_read_lock();
4495 ret = dev_ifsioc_locked(net, &ifr, cmd); 4628 ret = dev_ifsioc_locked(net, &ifr, cmd);
4496 read_unlock(&dev_base_lock); 4629 rcu_read_unlock();
4497 if (!ret) { 4630 if (!ret) {
4498 if (colon) 4631 if (colon)
4499 *colon = ':'; 4632 *colon = ':';
@@ -4636,59 +4769,80 @@ static void net_set_todo(struct net_device *dev)
4636 list_add_tail(&dev->todo_list, &net_todo_list); 4769 list_add_tail(&dev->todo_list, &net_todo_list);
4637} 4770}
4638 4771
4639static void rollback_registered(struct net_device *dev) 4772static void rollback_registered_many(struct list_head *head)
4640{ 4773{
4774 struct net_device *dev;
4775
4641 BUG_ON(dev_boot_phase); 4776 BUG_ON(dev_boot_phase);
4642 ASSERT_RTNL(); 4777 ASSERT_RTNL();
4643 4778
4644 /* Some devices call without registering for initialization unwind. */ 4779 list_for_each_entry(dev, head, unreg_list) {
4645 if (dev->reg_state == NETREG_UNINITIALIZED) { 4780 /* Some devices call without registering
4646 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 4781 * for initialization unwind.
4647 "was registered\n", dev->name, dev); 4782 */
4783 if (dev->reg_state == NETREG_UNINITIALIZED) {
4784 pr_debug("unregister_netdevice: device %s/%p never "
4785 "was registered\n", dev->name, dev);
4648 4786
4649 WARN_ON(1); 4787 WARN_ON(1);
4650 return; 4788 return;
4651 } 4789 }
4652 4790
4653 BUG_ON(dev->reg_state != NETREG_REGISTERED); 4791 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4654 4792
4655 /* If device is running, close it first. */ 4793 /* If device is running, close it first. */
4656 dev_close(dev); 4794 dev_close(dev);
4657 4795
4658 /* And unlink it from device chain. */ 4796 /* And unlink it from device chain. */
4659 unlist_netdevice(dev); 4797 unlist_netdevice(dev);
4660 4798
4661 dev->reg_state = NETREG_UNREGISTERING; 4799 dev->reg_state = NETREG_UNREGISTERING;
4800 }
4662 4801
4663 synchronize_net(); 4802 synchronize_net();
4664 4803
4665 /* Shutdown queueing discipline. */ 4804 list_for_each_entry(dev, head, unreg_list) {
4666 dev_shutdown(dev); 4805 /* Shutdown queueing discipline. */
4806 dev_shutdown(dev);
4667 4807
4668 4808
4669 /* Notify protocols, that we are about to destroy 4809 /* Notify protocols, that we are about to destroy
4670 this device. They should clean all the things. 4810 this device. They should clean all the things.
4671 */ 4811 */
4672 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4812 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4673 4813
4674 /* 4814 /*
4675 * Flush the unicast and multicast chains 4815 * Flush the unicast and multicast chains
4676 */ 4816 */
4677 dev_unicast_flush(dev); 4817 dev_unicast_flush(dev);
4678 dev_addr_discard(dev); 4818 dev_addr_discard(dev);
4679 4819
4680 if (dev->netdev_ops->ndo_uninit) 4820 if (dev->netdev_ops->ndo_uninit)
4681 dev->netdev_ops->ndo_uninit(dev); 4821 dev->netdev_ops->ndo_uninit(dev);
4682 4822
4683 /* Notifier chain MUST detach us from master device. */ 4823 /* Notifier chain MUST detach us from master device. */
4684 WARN_ON(dev->master); 4824 WARN_ON(dev->master);
4685 4825
4686 /* Remove entries from kobject tree */ 4826 /* Remove entries from kobject tree */
4687 netdev_unregister_kobject(dev); 4827 netdev_unregister_kobject(dev);
4828 }
4829
4830 /* Process any work delayed until the end of the batch */
4831 dev = list_entry(head->next, struct net_device, unreg_list);
4832 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4688 4833
4689 synchronize_net(); 4834 synchronize_net();
4690 4835
4691 dev_put(dev); 4836 list_for_each_entry(dev, head, unreg_list)
4837 dev_put(dev);
4838}
4839
4840static void rollback_registered(struct net_device *dev)
4841{
4842 LIST_HEAD(single);
4843
4844 list_add(&dev->unreg_list, &single);
4845 rollback_registered_many(&single);
4692} 4846}
4693 4847
4694static void __netdev_init_queue_locks_one(struct net_device *dev, 4848static void __netdev_init_queue_locks_one(struct net_device *dev,
@@ -4747,6 +4901,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4747EXPORT_SYMBOL(netdev_fix_features); 4901EXPORT_SYMBOL(netdev_fix_features);
4748 4902
4749/** 4903/**
4904 * netif_stacked_transfer_operstate - transfer operstate
4905 * @rootdev: the root or lower level device to transfer state from
4906 * @dev: the device to transfer operstate to
4907 *
4908 * Transfer operational state from root to device. This is normally
4909 * called when a stacking relationship exists between the root
4910 * device and the device(a leaf device).
4911 */
4912void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4913 struct net_device *dev)
4914{
4915 if (rootdev->operstate == IF_OPER_DORMANT)
4916 netif_dormant_on(dev);
4917 else
4918 netif_dormant_off(dev);
4919
4920 if (netif_carrier_ok(rootdev)) {
4921 if (!netif_carrier_ok(dev))
4922 netif_carrier_on(dev);
4923 } else {
4924 if (netif_carrier_ok(dev))
4925 netif_carrier_off(dev);
4926 }
4927}
4928EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4929
4930/**
4750 * register_netdevice - register a network device 4931 * register_netdevice - register a network device
4751 * @dev: device to register 4932 * @dev: device to register
4752 * 4933 *
@@ -4765,8 +4946,6 @@ EXPORT_SYMBOL(netdev_fix_features);
4765 4946
4766int register_netdevice(struct net_device *dev) 4947int register_netdevice(struct net_device *dev)
4767{ 4948{
4768 struct hlist_head *head;
4769 struct hlist_node *p;
4770 int ret; 4949 int ret;
4771 struct net *net = dev_net(dev); 4950 struct net *net = dev_net(dev);
4772 4951
@@ -4795,26 +4974,14 @@ int register_netdevice(struct net_device *dev)
4795 } 4974 }
4796 } 4975 }
4797 4976
4798 if (!dev_valid_name(dev->name)) { 4977 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
4799 ret = -EINVAL; 4978 if (ret)
4800 goto err_uninit; 4979 goto err_uninit;
4801 }
4802 4980
4803 dev->ifindex = dev_new_index(net); 4981 dev->ifindex = dev_new_index(net);
4804 if (dev->iflink == -1) 4982 if (dev->iflink == -1)
4805 dev->iflink = dev->ifindex; 4983 dev->iflink = dev->ifindex;
4806 4984
4807 /* Check for existence of name */
4808 head = dev_name_hash(net, dev->name);
4809 hlist_for_each(p, head) {
4810 struct net_device *d
4811 = hlist_entry(p, struct net_device, name_hlist);
4812 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4813 ret = -EEXIST;
4814 goto err_uninit;
4815 }
4816 }
4817
4818 /* Fix illegal checksum combinations */ 4985 /* Fix illegal checksum combinations */
4819 if ((dev->features & NETIF_F_HW_CSUM) && 4986 if ((dev->features & NETIF_F_HW_CSUM) &&
4820 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 4987 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
@@ -4837,6 +5004,12 @@ int register_netdevice(struct net_device *dev)
4837 dev->features |= NETIF_F_GSO; 5004 dev->features |= NETIF_F_GSO;
4838 5005
4839 netdev_initialize_kobject(dev); 5006 netdev_initialize_kobject(dev);
5007
5008 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5009 ret = notifier_to_errno(ret);
5010 if (ret)
5011 goto err_uninit;
5012
4840 ret = netdev_register_kobject(dev); 5013 ret = netdev_register_kobject(dev);
4841 if (ret) 5014 if (ret)
4842 goto err_uninit; 5015 goto err_uninit;
@@ -4961,6 +5134,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4961{ 5134{
4962 unsigned long rebroadcast_time, warning_time; 5135 unsigned long rebroadcast_time, warning_time;
4963 5136
5137 linkwatch_forget_dev(dev);
5138
4964 rebroadcast_time = warning_time = jiffies; 5139 rebroadcast_time = warning_time = jiffies;
4965 while (atomic_read(&dev->refcnt) != 0) { 5140 while (atomic_read(&dev->refcnt) != 0) {
4966 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5141 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -4968,6 +5143,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4968 5143
4969 /* Rebroadcast unregister notification */ 5144 /* Rebroadcast unregister notification */
4970 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5145 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5146 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5147 * should have already handle it the first time */
4971 5148
4972 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5149 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4973 &dev->state)) { 5150 &dev->state)) {
@@ -5063,6 +5240,32 @@ void netdev_run_todo(void)
5063} 5240}
5064 5241
5065/** 5242/**
5243 * dev_txq_stats_fold - fold tx_queues stats
5244 * @dev: device to get statistics from
5245 * @stats: struct net_device_stats to hold results
5246 */
5247void dev_txq_stats_fold(const struct net_device *dev,
5248 struct net_device_stats *stats)
5249{
5250 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5251 unsigned int i;
5252 struct netdev_queue *txq;
5253
5254 for (i = 0; i < dev->num_tx_queues; i++) {
5255 txq = netdev_get_tx_queue(dev, i);
5256 tx_bytes += txq->tx_bytes;
5257 tx_packets += txq->tx_packets;
5258 tx_dropped += txq->tx_dropped;
5259 }
5260 if (tx_bytes || tx_packets || tx_dropped) {
5261 stats->tx_bytes = tx_bytes;
5262 stats->tx_packets = tx_packets;
5263 stats->tx_dropped = tx_dropped;
5264 }
5265}
5266EXPORT_SYMBOL(dev_txq_stats_fold);
5267
5268/**
5066 * dev_get_stats - get network device statistics 5269 * dev_get_stats - get network device statistics
5067 * @dev: device to get statistics from 5270 * @dev: device to get statistics from
5068 * 5271 *
@@ -5076,25 +5279,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev)
5076 5279
5077 if (ops->ndo_get_stats) 5280 if (ops->ndo_get_stats)
5078 return ops->ndo_get_stats(dev); 5281 return ops->ndo_get_stats(dev);
5079 else { 5282
5080 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5283 dev_txq_stats_fold(dev, &dev->stats);
5081 struct net_device_stats *stats = &dev->stats; 5284 return &dev->stats;
5082 unsigned int i;
5083 struct netdev_queue *txq;
5084
5085 for (i = 0; i < dev->num_tx_queues; i++) {
5086 txq = netdev_get_tx_queue(dev, i);
5087 tx_bytes += txq->tx_bytes;
5088 tx_packets += txq->tx_packets;
5089 tx_dropped += txq->tx_dropped;
5090 }
5091 if (tx_bytes || tx_packets || tx_dropped) {
5092 stats->tx_bytes = tx_bytes;
5093 stats->tx_packets = tx_packets;
5094 stats->tx_dropped = tx_dropped;
5095 }
5096 return stats;
5097 }
5098} 5285}
5099EXPORT_SYMBOL(dev_get_stats); 5286EXPORT_SYMBOL(dev_get_stats);
5100 5287
@@ -5174,6 +5361,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5174 netdev_init_queues(dev); 5361 netdev_init_queues(dev);
5175 5362
5176 INIT_LIST_HEAD(&dev->napi_list); 5363 INIT_LIST_HEAD(&dev->napi_list);
5364 INIT_LIST_HEAD(&dev->unreg_list);
5365 INIT_LIST_HEAD(&dev->link_watch_list);
5177 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5366 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5178 setup(dev); 5367 setup(dev);
5179 strcpy(dev->name, name); 5368 strcpy(dev->name, name);
@@ -5238,25 +5427,47 @@ void synchronize_net(void)
5238EXPORT_SYMBOL(synchronize_net); 5427EXPORT_SYMBOL(synchronize_net);
5239 5428
5240/** 5429/**
5241 * unregister_netdevice - remove device from the kernel 5430 * unregister_netdevice_queue - remove device from the kernel
5242 * @dev: device 5431 * @dev: device
5432 * @head: list
5243 * 5433 *
5244 * This function shuts down a device interface and removes it 5434 * This function shuts down a device interface and removes it
5245 * from the kernel tables. 5435 * from the kernel tables.
5436 * If head not NULL, device is queued to be unregistered later.
5246 * 5437 *
5247 * Callers must hold the rtnl semaphore. You may want 5438 * Callers must hold the rtnl semaphore. You may want
5248 * unregister_netdev() instead of this. 5439 * unregister_netdev() instead of this.
5249 */ 5440 */
5250 5441
5251void unregister_netdevice(struct net_device *dev) 5442void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5252{ 5443{
5253 ASSERT_RTNL(); 5444 ASSERT_RTNL();
5254 5445
5255 rollback_registered(dev); 5446 if (head) {
5256 /* Finish processing unregister after unlock */ 5447 list_move_tail(&dev->unreg_list, head);
5257 net_set_todo(dev); 5448 } else {
5449 rollback_registered(dev);
5450 /* Finish processing unregister after unlock */
5451 net_set_todo(dev);
5452 }
5258} 5453}
5259EXPORT_SYMBOL(unregister_netdevice); 5454EXPORT_SYMBOL(unregister_netdevice_queue);
5455
5456/**
5457 * unregister_netdevice_many - unregister many devices
5458 * @head: list of devices
5459 */
5460void unregister_netdevice_many(struct list_head *head)
5461{
5462 struct net_device *dev;
5463
5464 if (!list_empty(head)) {
5465 rollback_registered_many(head);
5466 list_for_each_entry(dev, head, unreg_list)
5467 net_set_todo(dev);
5468 }
5469}
5470EXPORT_SYMBOL(unregister_netdevice_many);
5260 5471
5261/** 5472/**
5262 * unregister_netdev - remove device from the kernel 5473 * unregister_netdev - remove device from the kernel
@@ -5293,8 +5504,6 @@ EXPORT_SYMBOL(unregister_netdev);
5293 5504
5294int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 5505int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5295{ 5506{
5296 char buf[IFNAMSIZ];
5297 const char *destname;
5298 int err; 5507 int err;
5299 5508
5300 ASSERT_RTNL(); 5509 ASSERT_RTNL();
@@ -5327,20 +5536,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5327 * we can use it in the destination network namespace. 5536 * we can use it in the destination network namespace.
5328 */ 5537 */
5329 err = -EEXIST; 5538 err = -EEXIST;
5330 destname = dev->name; 5539 if (__dev_get_by_name(net, dev->name)) {
5331 if (__dev_get_by_name(net, destname)) {
5332 /* We get here if we can't use the current device name */ 5540 /* We get here if we can't use the current device name */
5333 if (!pat) 5541 if (!pat)
5334 goto out; 5542 goto out;
5335 if (!dev_valid_name(pat)) 5543 if (dev_get_valid_name(net, pat, dev->name, 1))
5336 goto out;
5337 if (strchr(pat, '%')) {
5338 if (__dev_alloc_name(net, pat, buf) < 0)
5339 goto out;
5340 destname = buf;
5341 } else
5342 destname = pat;
5343 if (__dev_get_by_name(net, destname))
5344 goto out; 5544 goto out;
5345 } 5545 }
5346 5546
@@ -5364,6 +5564,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5364 this device. They should clean all the things. 5564 this device. They should clean all the things.
5365 */ 5565 */
5366 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5566 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5567 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5367 5568
5368 /* 5569 /*
5369 * Flush the unicast and multicast chains 5570 * Flush the unicast and multicast chains
@@ -5376,10 +5577,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5376 /* Actually switch the network namespace */ 5577 /* Actually switch the network namespace */
5377 dev_net_set(dev, net); 5578 dev_net_set(dev, net);
5378 5579
5379 /* Assign the new device name */
5380 if (destname != dev->name)
5381 strcpy(dev->name, destname);
5382
5383 /* If there is an ifindex conflict assign a new one */ 5580 /* If there is an ifindex conflict assign a new one */
5384 if (__dev_get_by_index(net, dev->ifindex)) { 5581 if (__dev_get_by_index(net, dev->ifindex)) {
5385 int iflink = (dev->iflink == dev->ifindex); 5582 int iflink = (dev->iflink == dev->ifindex);
@@ -5484,7 +5681,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5484 one |= NETIF_F_ALL_CSUM; 5681 one |= NETIF_F_ALL_CSUM;
5485 5682
5486 one |= all & NETIF_F_ONE_FOR_ALL; 5683 one |= all & NETIF_F_ONE_FOR_ALL;
5487 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 5684 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5488 all |= one & mask & NETIF_F_ONE_FOR_ALL; 5685 all |= one & mask & NETIF_F_ONE_FOR_ALL;
5489 5686
5490 return all; 5687 return all;
@@ -5566,14 +5763,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
5566 5763
5567static void __net_exit default_device_exit(struct net *net) 5764static void __net_exit default_device_exit(struct net *net)
5568{ 5765{
5569 struct net_device *dev; 5766 struct net_device *dev, *aux;
5570 /* 5767 /*
5571 * Push all migratable of the network devices back to the 5768 * Push all migratable network devices back to the
5572 * initial network namespace 5769 * initial network namespace
5573 */ 5770 */
5574 rtnl_lock(); 5771 rtnl_lock();
5575restart: 5772 for_each_netdev_safe(net, dev, aux) {
5576 for_each_netdev(net, dev) {
5577 int err; 5773 int err;
5578 char fb_name[IFNAMSIZ]; 5774 char fb_name[IFNAMSIZ];
5579 5775
@@ -5581,11 +5777,9 @@ restart:
5581 if (dev->features & NETIF_F_NETNS_LOCAL) 5777 if (dev->features & NETIF_F_NETNS_LOCAL)
5582 continue; 5778 continue;
5583 5779
5584 /* Delete virtual devices */ 5780 /* Leave virtual devices for the generic cleanup */
5585 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 5781 if (dev->rtnl_link_ops)
5586 dev->rtnl_link_ops->dellink(dev); 5782 continue;
5587 goto restart;
5588 }
5589 5783
5590 /* Push remaing network devices to init_net */ 5784 /* Push remaing network devices to init_net */
5591 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 5785 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5595,13 +5789,37 @@ restart:
5595 __func__, dev->name, err); 5789 __func__, dev->name, err);
5596 BUG(); 5790 BUG();
5597 } 5791 }
5598 goto restart;
5599 } 5792 }
5600 rtnl_unlock(); 5793 rtnl_unlock();
5601} 5794}
5602 5795
5796static void __net_exit default_device_exit_batch(struct list_head *net_list)
5797{
5798 /* At exit all network devices most be removed from a network
5799 * namespace. Do this in the reverse order of registeration.
5800 * Do this across as many network namespaces as possible to
5801 * improve batching efficiency.
5802 */
5803 struct net_device *dev;
5804 struct net *net;
5805 LIST_HEAD(dev_kill_list);
5806
5807 rtnl_lock();
5808 list_for_each_entry(net, net_list, exit_list) {
5809 for_each_netdev_reverse(net, dev) {
5810 if (dev->rtnl_link_ops)
5811 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5812 else
5813 unregister_netdevice_queue(dev, &dev_kill_list);
5814 }
5815 }
5816 unregister_netdevice_many(&dev_kill_list);
5817 rtnl_unlock();
5818}
5819
5603static struct pernet_operations __net_initdata default_device_ops = { 5820static struct pernet_operations __net_initdata default_device_ops = {
5604 .exit = default_device_exit, 5821 .exit = default_device_exit,
5822 .exit_batch = default_device_exit_batch,
5605}; 5823};
5606 5824
5607/* 5825/*