aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c955
1 files changed, 650 insertions, 305 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index fe10551d3671..264137fce3a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,8 @@
79#include <linux/cpu.h> 79#include <linux/cpu.h>
80#include <linux/types.h> 80#include <linux/types.h>
81#include <linux/kernel.h> 81#include <linux/kernel.h>
82#include <linux/hash.h>
83#include <linux/slab.h>
82#include <linux/sched.h> 84#include <linux/sched.h>
83#include <linux/mutex.h> 85#include <linux/mutex.h>
84#include <linux/string.h> 86#include <linux/string.h>
@@ -104,6 +106,7 @@
104#include <net/dst.h> 106#include <net/dst.h>
105#include <net/pkt_sched.h> 107#include <net/pkt_sched.h>
106#include <net/checksum.h> 108#include <net/checksum.h>
109#include <net/xfrm.h>
107#include <linux/highmem.h> 110#include <linux/highmem.h>
108#include <linux/init.h> 111#include <linux/init.h>
109#include <linux/kmod.h> 112#include <linux/kmod.h>
@@ -175,7 +178,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */
175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl 178 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176 * semaphore. 179 * semaphore.
177 * 180 *
178 * Pure readers hold dev_base_lock for reading. 181 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
179 * 182 *
180 * Writers must hold the rtnl semaphore while they loop through the 183 * Writers must hold the rtnl semaphore while they loop through the
181 * dev_base_head list, and hold dev_base_lock for writing when they do the 184 * dev_base_head list, and hold dev_base_lock for writing when they do the
@@ -193,18 +196,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */
193DEFINE_RWLOCK(dev_base_lock); 196DEFINE_RWLOCK(dev_base_lock);
194EXPORT_SYMBOL(dev_base_lock); 197EXPORT_SYMBOL(dev_base_lock);
195 198
196#define NETDEV_HASHBITS 8
197#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) 199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{ 200{
201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); 201 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; 202 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
203} 203}
204 204
205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) 205static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206{ 206{
207 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; 207 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 208}
209 209
210/* Device list insertion */ 210/* Device list insertion */
@@ -215,23 +215,26 @@ static int list_netdevice(struct net_device *dev)
215 ASSERT_RTNL(); 215 ASSERT_RTNL();
216 216
217 write_lock_bh(&dev_base_lock); 217 write_lock_bh(&dev_base_lock);
218 list_add_tail(&dev->dev_list, &net->dev_base_head); 218 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 219 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); 220 hlist_add_head_rcu(&dev->index_hlist,
221 dev_index_hash(net, dev->ifindex));
221 write_unlock_bh(&dev_base_lock); 222 write_unlock_bh(&dev_base_lock);
222 return 0; 223 return 0;
223} 224}
224 225
225/* Device list removal */ 226/* Device list removal
227 * caller must respect a RCU grace period before freeing/reusing dev
228 */
226static void unlist_netdevice(struct net_device *dev) 229static void unlist_netdevice(struct net_device *dev)
227{ 230{
228 ASSERT_RTNL(); 231 ASSERT_RTNL();
229 232
230 /* Unlink dev from the device chain */ 233 /* Unlink dev from the device chain */
231 write_lock_bh(&dev_base_lock); 234 write_lock_bh(&dev_base_lock);
232 list_del(&dev->dev_list); 235 list_del_rcu(&dev->dev_list);
233 hlist_del(&dev->name_hlist); 236 hlist_del_rcu(&dev->name_hlist);
234 hlist_del(&dev->index_hlist); 237 hlist_del_rcu(&dev->index_hlist);
235 write_unlock_bh(&dev_base_lock); 238 write_unlock_bh(&dev_base_lock);
236} 239}
237 240
@@ -587,18 +590,44 @@ __setup("netdev=", netdev_boot_setup);
587struct net_device *__dev_get_by_name(struct net *net, const char *name) 590struct net_device *__dev_get_by_name(struct net *net, const char *name)
588{ 591{
589 struct hlist_node *p; 592 struct hlist_node *p;
593 struct net_device *dev;
594 struct hlist_head *head = dev_name_hash(net, name);
590 595
591 hlist_for_each(p, dev_name_hash(net, name)) { 596 hlist_for_each_entry(dev, p, head, name_hlist)
592 struct net_device *dev
593 = hlist_entry(p, struct net_device, name_hlist);
594 if (!strncmp(dev->name, name, IFNAMSIZ)) 597 if (!strncmp(dev->name, name, IFNAMSIZ))
595 return dev; 598 return dev;
596 } 599
597 return NULL; 600 return NULL;
598} 601}
599EXPORT_SYMBOL(__dev_get_by_name); 602EXPORT_SYMBOL(__dev_get_by_name);
600 603
601/** 604/**
605 * dev_get_by_name_rcu - find a device by its name
606 * @net: the applicable net namespace
607 * @name: name to find
608 *
609 * Find an interface by name.
610 * If the name is found a pointer to the device is returned.
611 * If the name is not found then %NULL is returned.
612 * The reference counters are not incremented so the caller must be
613 * careful with locks. The caller must hold RCU lock.
614 */
615
616struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
617{
618 struct hlist_node *p;
619 struct net_device *dev;
620 struct hlist_head *head = dev_name_hash(net, name);
621
622 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
623 if (!strncmp(dev->name, name, IFNAMSIZ))
624 return dev;
625
626 return NULL;
627}
628EXPORT_SYMBOL(dev_get_by_name_rcu);
629
630/**
602 * dev_get_by_name - find a device by its name 631 * dev_get_by_name - find a device by its name
603 * @net: the applicable net namespace 632 * @net: the applicable net namespace
604 * @name: name to find 633 * @name: name to find
@@ -614,11 +643,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
614{ 643{
615 struct net_device *dev; 644 struct net_device *dev;
616 645
617 read_lock(&dev_base_lock); 646 rcu_read_lock();
618 dev = __dev_get_by_name(net, name); 647 dev = dev_get_by_name_rcu(net, name);
619 if (dev) 648 if (dev)
620 dev_hold(dev); 649 dev_hold(dev);
621 read_unlock(&dev_base_lock); 650 rcu_read_unlock();
622 return dev; 651 return dev;
623} 652}
624EXPORT_SYMBOL(dev_get_by_name); 653EXPORT_SYMBOL(dev_get_by_name);
@@ -638,17 +667,42 @@ EXPORT_SYMBOL(dev_get_by_name);
638struct net_device *__dev_get_by_index(struct net *net, int ifindex) 667struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639{ 668{
640 struct hlist_node *p; 669 struct hlist_node *p;
670 struct net_device *dev;
671 struct hlist_head *head = dev_index_hash(net, ifindex);
641 672
642 hlist_for_each(p, dev_index_hash(net, ifindex)) { 673 hlist_for_each_entry(dev, p, head, index_hlist)
643 struct net_device *dev
644 = hlist_entry(p, struct net_device, index_hlist);
645 if (dev->ifindex == ifindex) 674 if (dev->ifindex == ifindex)
646 return dev; 675 return dev;
647 } 676
648 return NULL; 677 return NULL;
649} 678}
650EXPORT_SYMBOL(__dev_get_by_index); 679EXPORT_SYMBOL(__dev_get_by_index);
651 680
681/**
682 * dev_get_by_index_rcu - find a device by its ifindex
683 * @net: the applicable net namespace
684 * @ifindex: index of device
685 *
686 * Search for an interface by index. Returns %NULL if the device
687 * is not found or a pointer to the device. The device has not
688 * had its reference counter increased so the caller must be careful
689 * about locking. The caller must hold RCU lock.
690 */
691
692struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
693{
694 struct hlist_node *p;
695 struct net_device *dev;
696 struct hlist_head *head = dev_index_hash(net, ifindex);
697
698 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
699 if (dev->ifindex == ifindex)
700 return dev;
701
702 return NULL;
703}
704EXPORT_SYMBOL(dev_get_by_index_rcu);
705
652 706
653/** 707/**
654 * dev_get_by_index - find a device by its ifindex 708 * dev_get_by_index - find a device by its ifindex
@@ -665,11 +719,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
665{ 719{
666 struct net_device *dev; 720 struct net_device *dev;
667 721
668 read_lock(&dev_base_lock); 722 rcu_read_lock();
669 dev = __dev_get_by_index(net, ifindex); 723 dev = dev_get_by_index_rcu(net, ifindex);
670 if (dev) 724 if (dev)
671 dev_hold(dev); 725 dev_hold(dev);
672 read_unlock(&dev_base_lock); 726 rcu_read_unlock();
673 return dev; 727 return dev;
674} 728}
675EXPORT_SYMBOL(dev_get_by_index); 729EXPORT_SYMBOL(dev_get_by_index);
@@ -748,15 +802,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
748 struct net_device *dev, *ret; 802 struct net_device *dev, *ret;
749 803
750 ret = NULL; 804 ret = NULL;
751 read_lock(&dev_base_lock); 805 rcu_read_lock();
752 for_each_netdev(net, dev) { 806 for_each_netdev_rcu(net, dev) {
753 if (((dev->flags ^ if_flags) & mask) == 0) { 807 if (((dev->flags ^ if_flags) & mask) == 0) {
754 dev_hold(dev); 808 dev_hold(dev);
755 ret = dev; 809 ret = dev;
756 break; 810 break;
757 } 811 }
758 } 812 }
759 read_unlock(&dev_base_lock); 813 rcu_read_unlock();
760 return ret; 814 return ret;
761} 815}
762EXPORT_SYMBOL(dev_get_by_flags); 816EXPORT_SYMBOL(dev_get_by_flags);
@@ -841,7 +895,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
841 free_page((unsigned long) inuse); 895 free_page((unsigned long) inuse);
842 } 896 }
843 897
844 snprintf(buf, IFNAMSIZ, name, i); 898 if (buf != name)
899 snprintf(buf, IFNAMSIZ, name, i);
845 if (!__dev_get_by_name(net, buf)) 900 if (!__dev_get_by_name(net, buf))
846 return i; 901 return i;
847 902
@@ -881,6 +936,21 @@ int dev_alloc_name(struct net_device *dev, const char *name)
881} 936}
882EXPORT_SYMBOL(dev_alloc_name); 937EXPORT_SYMBOL(dev_alloc_name);
883 938
939static int dev_get_valid_name(struct net *net, const char *name, char *buf,
940 bool fmt)
941{
942 if (!dev_valid_name(name))
943 return -EINVAL;
944
945 if (fmt && strchr(name, '%'))
946 return __dev_alloc_name(net, name, buf);
947 else if (__dev_get_by_name(net, name))
948 return -EEXIST;
949 else if (buf != name)
950 strlcpy(buf, name, IFNAMSIZ);
951
952 return 0;
953}
884 954
885/** 955/**
886 * dev_change_name - change name of a device 956 * dev_change_name - change name of a device
@@ -904,28 +974,20 @@ int dev_change_name(struct net_device *dev, const char *newname)
904 if (dev->flags & IFF_UP) 974 if (dev->flags & IFF_UP)
905 return -EBUSY; 975 return -EBUSY;
906 976
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) 977 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0; 978 return 0;
912 979
913 memcpy(oldname, dev->name, IFNAMSIZ); 980 memcpy(oldname, dev->name, IFNAMSIZ);
914 981
915 if (strchr(newname, '%')) { 982 err = dev_get_valid_name(net, newname, dev->name, 1);
916 err = dev_alloc_name(dev, newname); 983 if (err < 0)
917 if (err < 0) 984 return err;
918 return err;
919 } else if (__dev_get_by_name(net, newname))
920 return -EEXIST;
921 else
922 strlcpy(dev->name, newname, IFNAMSIZ);
923 985
924rollback: 986rollback:
925 /* For now only devices in the initial network namespace 987 /* For now only devices in the initial network namespace
926 * are in sysfs. 988 * are in sysfs.
927 */ 989 */
928 if (net == &init_net) { 990 if (net_eq(net, &init_net)) {
929 ret = device_rename(&dev->dev, dev->name); 991 ret = device_rename(&dev->dev, dev->name);
930 if (ret) { 992 if (ret) {
931 memcpy(dev->name, oldname, IFNAMSIZ); 993 memcpy(dev->name, oldname, IFNAMSIZ);
@@ -935,7 +997,12 @@ rollback:
935 997
936 write_lock_bh(&dev_base_lock); 998 write_lock_bh(&dev_base_lock);
937 hlist_del(&dev->name_hlist); 999 hlist_del(&dev->name_hlist);
938 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); 1000 write_unlock_bh(&dev_base_lock);
1001
1002 synchronize_rcu();
1003
1004 write_lock_bh(&dev_base_lock);
1005 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
939 write_unlock_bh(&dev_base_lock); 1006 write_unlock_bh(&dev_base_lock);
940 1007
941 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); 1008 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1038,28 +1105,16 @@ void dev_load(struct net *net, const char *name)
1038{ 1105{
1039 struct net_device *dev; 1106 struct net_device *dev;
1040 1107
1041 read_lock(&dev_base_lock); 1108 rcu_read_lock();
1042 dev = __dev_get_by_name(net, name); 1109 dev = dev_get_by_name_rcu(net, name);
1043 read_unlock(&dev_base_lock); 1110 rcu_read_unlock();
1044 1111
1045 if (!dev && capable(CAP_NET_ADMIN)) 1112 if (!dev && capable(CAP_NET_ADMIN))
1046 request_module("%s", name); 1113 request_module("%s", name);
1047} 1114}
1048EXPORT_SYMBOL(dev_load); 1115EXPORT_SYMBOL(dev_load);
1049 1116
1050/** 1117static int __dev_open(struct net_device *dev)
1051 * dev_open - prepare an interface for use.
1052 * @dev: device to open
1053 *
1054 * Takes a device from down to up state. The device's private open
1055 * function is invoked and then the multicast lists are loaded. Finally
1056 * the device is moved into the up state and a %NETDEV_UP message is
1057 * sent to the netdev notifier chain.
1058 *
1059 * Calling this function on an active interface is a nop. On a failure
1060 * a negative errno code is returned.
1061 */
1062int dev_open(struct net_device *dev)
1063{ 1118{
1064 const struct net_device_ops *ops = dev->netdev_ops; 1119 const struct net_device_ops *ops = dev->netdev_ops;
1065 int ret; 1120 int ret;
@@ -1067,13 +1122,6 @@ int dev_open(struct net_device *dev)
1067 ASSERT_RTNL(); 1122 ASSERT_RTNL();
1068 1123
1069 /* 1124 /*
1070 * Is it already up?
1071 */
1072
1073 if (dev->flags & IFF_UP)
1074 return 0;
1075
1076 /*
1077 * Is it even present? 1125 * Is it even present?
1078 */ 1126 */
1079 if (!netif_device_present(dev)) 1127 if (!netif_device_present(dev))
@@ -1121,36 +1169,57 @@ int dev_open(struct net_device *dev)
1121 * Wakeup transmit queue engine 1169 * Wakeup transmit queue engine
1122 */ 1170 */
1123 dev_activate(dev); 1171 dev_activate(dev);
1124
1125 /*
1126 * ... and announce new interface.
1127 */
1128 call_netdevice_notifiers(NETDEV_UP, dev);
1129 } 1172 }
1130 1173
1131 return ret; 1174 return ret;
1132} 1175}
1133EXPORT_SYMBOL(dev_open);
1134 1176
1135/** 1177/**
1136 * dev_close - shutdown an interface. 1178 * dev_open - prepare an interface for use.
1137 * @dev: device to shutdown 1179 * @dev: device to open
1138 * 1180 *
1139 * This function moves an active device into down state. A 1181 * Takes a device from down to up state. The device's private open
1140 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device 1182 * function is invoked and then the multicast lists are loaded. Finally
1141 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier 1183 * the device is moved into the up state and a %NETDEV_UP message is
1142 * chain. 1184 * sent to the netdev notifier chain.
1185 *
1186 * Calling this function on an active interface is a nop. On a failure
1187 * a negative errno code is returned.
1143 */ 1188 */
1144int dev_close(struct net_device *dev) 1189int dev_open(struct net_device *dev)
1190{
1191 int ret;
1192
1193 /*
1194 * Is it already up?
1195 */
1196 if (dev->flags & IFF_UP)
1197 return 0;
1198
1199 /*
1200 * Open device
1201 */
1202 ret = __dev_open(dev);
1203 if (ret < 0)
1204 return ret;
1205
1206 /*
1207 * ... and announce new interface.
1208 */
1209 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1210 call_netdevice_notifiers(NETDEV_UP, dev);
1211
1212 return ret;
1213}
1214EXPORT_SYMBOL(dev_open);
1215
1216static int __dev_close(struct net_device *dev)
1145{ 1217{
1146 const struct net_device_ops *ops = dev->netdev_ops; 1218 const struct net_device_ops *ops = dev->netdev_ops;
1147 ASSERT_RTNL();
1148 1219
1220 ASSERT_RTNL();
1149 might_sleep(); 1221 might_sleep();
1150 1222
1151 if (!(dev->flags & IFF_UP))
1152 return 0;
1153
1154 /* 1223 /*
1155 * Tell people we are going down, so that they can 1224 * Tell people we are going down, so that they can
1156 * prepare to death, when device is still operating. 1225 * prepare to death, when device is still operating.
@@ -1186,14 +1255,34 @@ int dev_close(struct net_device *dev)
1186 dev->flags &= ~IFF_UP; 1255 dev->flags &= ~IFF_UP;
1187 1256
1188 /* 1257 /*
1189 * Tell people we are down 1258 * Shutdown NET_DMA
1190 */ 1259 */
1191 call_netdevice_notifiers(NETDEV_DOWN, dev); 1260 net_dmaengine_put();
1261
1262 return 0;
1263}
1264
1265/**
1266 * dev_close - shutdown an interface.
1267 * @dev: device to shutdown
1268 *
1269 * This function moves an active device into down state. A
1270 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1271 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1272 * chain.
1273 */
1274int dev_close(struct net_device *dev)
1275{
1276 if (!(dev->flags & IFF_UP))
1277 return 0;
1278
1279 __dev_close(dev);
1192 1280
1193 /* 1281 /*
1194 * Shutdown NET_DMA 1282 * Tell people we are down
1195 */ 1283 */
1196 net_dmaengine_put(); 1284 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1285 call_netdevice_notifiers(NETDEV_DOWN, dev);
1197 1286
1198 return 0; 1287 return 0;
1199} 1288}
@@ -1287,6 +1376,7 @@ rollback:
1287 nb->notifier_call(nb, NETDEV_DOWN, dev); 1376 nb->notifier_call(nb, NETDEV_DOWN, dev);
1288 } 1377 }
1289 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1378 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1379 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1290 } 1380 }
1291 } 1381 }
1292 1382
@@ -1353,6 +1443,41 @@ static inline void net_timestamp(struct sk_buff *skb)
1353 skb->tstamp.tv64 = 0; 1443 skb->tstamp.tv64 = 0;
1354} 1444}
1355 1445
1446/**
1447 * dev_forward_skb - loopback an skb to another netif
1448 *
1449 * @dev: destination network device
1450 * @skb: buffer to forward
1451 *
1452 * return values:
1453 * NET_RX_SUCCESS (no congestion)
1454 * NET_RX_DROP (packet was dropped, but freed)
1455 *
1456 * dev_forward_skb can be used for injecting an skb from the
1457 * start_xmit function of one device into the receive queue
1458 * of another device.
1459 *
1460 * The receiving device may be in another namespace, so
1461 * we have to clear all information in the skb that could
1462 * impact namespace isolation.
1463 */
1464int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1465{
1466 skb_orphan(skb);
1467
1468 if (!(dev->flags & IFF_UP) ||
1469 (skb->len > (dev->mtu + dev->hard_header_len))) {
1470 kfree_skb(skb);
1471 return NET_RX_DROP;
1472 }
1473 skb_set_dev(skb, dev);
1474 skb->tstamp.tv64 = 0;
1475 skb->pkt_type = PACKET_HOST;
1476 skb->protocol = eth_type_trans(skb, dev);
1477 return netif_rx(skb);
1478}
1479EXPORT_SYMBOL_GPL(dev_forward_skb);
1480
1356/* 1481/*
1357 * Support routine. Sends outgoing frames to any network 1482 * Support routine. Sends outgoing frames to any network
1358 * taps currently in use. 1483 * taps currently in use.
@@ -1508,6 +1633,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1508 return false; 1633 return false;
1509} 1634}
1510 1635
1636/**
1637 * skb_dev_set -- assign a new device to a buffer
1638 * @skb: buffer for the new device
1639 * @dev: network device
1640 *
1641 * If an skb is owned by a device already, we have to reset
1642 * all data private to the namespace a device belongs to
1643 * before assigning it a new device.
1644 */
1645#ifdef CONFIG_NET_NS
1646void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1647{
1648 skb_dst_drop(skb);
1649 if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1650 secpath_reset(skb);
1651 nf_reset(skb);
1652 skb_init_secmark(skb);
1653 skb->mark = 0;
1654 skb->priority = 0;
1655 skb->nf_trace = 0;
1656 skb->ipvs_property = 0;
1657#ifdef CONFIG_NET_SCHED
1658 skb->tc_index = 0;
1659#endif
1660 }
1661 skb->dev = dev;
1662}
1663EXPORT_SYMBOL(skb_set_dev);
1664#endif /* CONFIG_NET_NS */
1665
1511/* 1666/*
1512 * Invalidate hardware checksum when packet is to be mangled, and 1667 * Invalidate hardware checksum when packet is to be mangled, and
1513 * complete checksum manually on outgoing path. 1668 * complete checksum manually on outgoing path.
@@ -1701,7 +1856,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1701 struct netdev_queue *txq) 1856 struct netdev_queue *txq)
1702{ 1857{
1703 const struct net_device_ops *ops = dev->netdev_ops; 1858 const struct net_device_ops *ops = dev->netdev_ops;
1704 int rc; 1859 int rc = NETDEV_TX_OK;
1705 1860
1706 if (likely(!skb->next)) { 1861 if (likely(!skb->next)) {
1707 if (!list_empty(&ptype_all)) 1862 if (!list_empty(&ptype_all))
@@ -1747,8 +1902,18 @@ gso:
1747 1902
1748 skb->next = nskb->next; 1903 skb->next = nskb->next;
1749 nskb->next = NULL; 1904 nskb->next = NULL;
1905
1906 /*
1907 * If device doesnt need nskb->dst, release it right now while
1908 * its hot in this cpu cache
1909 */
1910 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1911 skb_dst_drop(nskb);
1912
1750 rc = ops->ndo_start_xmit(nskb, dev); 1913 rc = ops->ndo_start_xmit(nskb, dev);
1751 if (unlikely(rc != NETDEV_TX_OK)) { 1914 if (unlikely(rc != NETDEV_TX_OK)) {
1915 if (rc & ~NETDEV_TX_MASK)
1916 goto out_kfree_gso_skb;
1752 nskb->next = skb->next; 1917 nskb->next = skb->next;
1753 skb->next = nskb; 1918 skb->next = nskb;
1754 return rc; 1919 return rc;
@@ -1758,11 +1923,12 @@ gso:
1758 return NETDEV_TX_BUSY; 1923 return NETDEV_TX_BUSY;
1759 } while (skb->next); 1924 } while (skb->next);
1760 1925
1761 skb->destructor = DEV_GSO_CB(skb)->destructor; 1926out_kfree_gso_skb:
1762 1927 if (likely(skb->next == NULL))
1928 skb->destructor = DEV_GSO_CB(skb)->destructor;
1763out_kfree_skb: 1929out_kfree_skb:
1764 kfree_skb(skb); 1930 kfree_skb(skb);
1765 return NETDEV_TX_OK; 1931 return rc;
1766} 1932}
1767 1933
1768static u32 skb_tx_hashrnd; 1934static u32 skb_tx_hashrnd;
@@ -1789,16 +1955,47 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1789} 1955}
1790EXPORT_SYMBOL(skb_tx_hash); 1956EXPORT_SYMBOL(skb_tx_hash);
1791 1957
1958static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1959{
1960 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1961 if (net_ratelimit()) {
1962 WARN(1, "%s selects TX queue %d, but "
1963 "real number of TX queues is %d\n",
1964 dev->name, queue_index,
1965 dev->real_num_tx_queues);
1966 }
1967 return 0;
1968 }
1969 return queue_index;
1970}
1971
1792static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1972static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1793 struct sk_buff *skb) 1973 struct sk_buff *skb)
1794{ 1974{
1795 const struct net_device_ops *ops = dev->netdev_ops; 1975 u16 queue_index;
1796 u16 queue_index = 0; 1976 struct sock *sk = skb->sk;
1977
1978 if (sk_tx_queue_recorded(sk)) {
1979 queue_index = sk_tx_queue_get(sk);
1980 } else {
1981 const struct net_device_ops *ops = dev->netdev_ops;
1982
1983 if (ops->ndo_select_queue) {
1984 queue_index = ops->ndo_select_queue(dev, skb);
1985 queue_index = dev_cap_txqueue(dev, queue_index);
1986 } else {
1987 queue_index = 0;
1988 if (dev->real_num_tx_queues > 1)
1989 queue_index = skb_tx_hash(dev, skb);
1990
1991 if (sk) {
1992 struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache);
1797 1993
1798 if (ops->ndo_select_queue) 1994 if (dst && skb_dst(skb) == dst)
1799 queue_index = ops->ndo_select_queue(dev, skb); 1995 sk_tx_queue_set(sk, queue_index);
1800 else if (dev->real_num_tx_queues > 1) 1996 }
1801 queue_index = skb_tx_hash(dev, skb); 1997 }
1998 }
1802 1999
1803 skb_set_queue_mapping(skb, queue_index); 2000 skb_set_queue_mapping(skb, queue_index);
1804 return netdev_get_tx_queue(dev, queue_index); 2001 return netdev_get_tx_queue(dev, queue_index);
@@ -1838,6 +2035,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1838 return rc; 2035 return rc;
1839} 2036}
1840 2037
2038/*
2039 * Returns true if either:
2040 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2041 * 2. skb is fragmented and the device does not support SG, or if
2042 * at least one of fragments is in highmem and device does not
2043 * support DMA from it.
2044 */
2045static inline int skb_needs_linearize(struct sk_buff *skb,
2046 struct net_device *dev)
2047{
2048 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2049 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2050 illegal_highdma(dev, skb)));
2051}
2052
1841/** 2053/**
1842 * dev_queue_xmit - transmit a buffer 2054 * dev_queue_xmit - transmit a buffer
1843 * @skb: buffer to transmit 2055 * @skb: buffer to transmit
@@ -1874,18 +2086,8 @@ int dev_queue_xmit(struct sk_buff *skb)
1874 if (netif_needs_gso(dev, skb)) 2086 if (netif_needs_gso(dev, skb))
1875 goto gso; 2087 goto gso;
1876 2088
1877 if (skb_has_frags(skb) && 2089 /* Convert a paged skb to linear, if required */
1878 !(dev->features & NETIF_F_FRAGLIST) && 2090 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
1879 __skb_linearize(skb))
1880 goto out_kfree_skb;
1881
1882 /* Fragmented skb is linearized if device does not support SG,
1883 * or if at least one of fragments is in highmem and device
1884 * does not support DMA from it.
1885 */
1886 if (skb_shinfo(skb)->nr_frags &&
1887 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1888 __skb_linearize(skb))
1889 goto out_kfree_skb; 2091 goto out_kfree_skb;
1890 2092
1891 /* If packet is not checksummed and device does not support 2093 /* If packet is not checksummed and device does not support
@@ -1905,7 +2107,7 @@ gso:
1905 rcu_read_lock_bh(); 2107 rcu_read_lock_bh();
1906 2108
1907 txq = dev_pick_tx(dev, skb); 2109 txq = dev_pick_tx(dev, skb);
1908 q = rcu_dereference(txq->qdisc); 2110 q = rcu_dereference_bh(txq->qdisc);
1909 2111
1910#ifdef CONFIG_NET_CLS_ACT 2112#ifdef CONFIG_NET_CLS_ACT
1911 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2113 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
@@ -1935,8 +2137,8 @@ gso:
1935 HARD_TX_LOCK(dev, txq, cpu); 2137 HARD_TX_LOCK(dev, txq, cpu);
1936 2138
1937 if (!netif_tx_queue_stopped(txq)) { 2139 if (!netif_tx_queue_stopped(txq)) {
1938 rc = NET_XMIT_SUCCESS; 2140 rc = dev_hard_start_xmit(skb, dev, txq);
1939 if (!dev_hard_start_xmit(skb, dev, txq)) { 2141 if (dev_xmit_complete(rc)) {
1940 HARD_TX_UNLOCK(dev, txq); 2142 HARD_TX_UNLOCK(dev, txq);
1941 goto out; 2143 goto out;
1942 } 2144 }
@@ -2191,7 +2393,7 @@ static int ing_filter(struct sk_buff *skb)
2191 if (MAX_RED_LOOP < ttl++) { 2393 if (MAX_RED_LOOP < ttl++) {
2192 printk(KERN_WARNING 2394 printk(KERN_WARNING
2193 "Redir loop detected Dropping packet (%d->%d)\n", 2395 "Redir loop detected Dropping packet (%d->%d)\n",
2194 skb->iif, dev->ifindex); 2396 skb->skb_iif, dev->ifindex);
2195 return TC_ACT_SHOT; 2397 return TC_ACT_SHOT;
2196 } 2398 }
2197 2399
@@ -2285,30 +2487,33 @@ int netif_receive_skb(struct sk_buff *skb)
2285{ 2487{
2286 struct packet_type *ptype, *pt_prev; 2488 struct packet_type *ptype, *pt_prev;
2287 struct net_device *orig_dev; 2489 struct net_device *orig_dev;
2490 struct net_device *master;
2288 struct net_device *null_or_orig; 2491 struct net_device *null_or_orig;
2492 struct net_device *null_or_bond;
2289 int ret = NET_RX_DROP; 2493 int ret = NET_RX_DROP;
2290 __be16 type; 2494 __be16 type;
2291 2495
2292 if (!skb->tstamp.tv64) 2496 if (!skb->tstamp.tv64)
2293 net_timestamp(skb); 2497 net_timestamp(skb);
2294 2498
2295 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) 2499 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2296 return NET_RX_SUCCESS; 2500 return NET_RX_SUCCESS;
2297 2501
2298 /* if we've gotten here through NAPI, check netpoll */ 2502 /* if we've gotten here through NAPI, check netpoll */
2299 if (netpoll_receive_skb(skb)) 2503 if (netpoll_receive_skb(skb))
2300 return NET_RX_DROP; 2504 return NET_RX_DROP;
2301 2505
2302 if (!skb->iif) 2506 if (!skb->skb_iif)
2303 skb->iif = skb->dev->ifindex; 2507 skb->skb_iif = skb->dev->ifindex;
2304 2508
2305 null_or_orig = NULL; 2509 null_or_orig = NULL;
2306 orig_dev = skb->dev; 2510 orig_dev = skb->dev;
2307 if (orig_dev->master) { 2511 master = ACCESS_ONCE(orig_dev->master);
2308 if (skb_bond_should_drop(skb)) 2512 if (master) {
2513 if (skb_bond_should_drop(skb, master))
2309 null_or_orig = orig_dev; /* deliver only exact match */ 2514 null_or_orig = orig_dev; /* deliver only exact match */
2310 else 2515 else
2311 skb->dev = orig_dev->master; 2516 skb->dev = master;
2312 } 2517 }
2313 2518
2314 __get_cpu_var(netdev_rx_stat).total++; 2519 __get_cpu_var(netdev_rx_stat).total++;
@@ -2351,12 +2556,24 @@ ncls:
2351 if (!skb) 2556 if (!skb)
2352 goto out; 2557 goto out;
2353 2558
2559 /*
2560 * Make sure frames received on VLAN interfaces stacked on
2561 * bonding interfaces still make their way to any base bonding
2562 * device that may have registered for a specific ptype. The
2563 * handler may have to adjust skb->dev and orig_dev.
2564 */
2565 null_or_bond = NULL;
2566 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2567 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2568 null_or_bond = vlan_dev_real_dev(skb->dev);
2569 }
2570
2354 type = skb->protocol; 2571 type = skb->protocol;
2355 list_for_each_entry_rcu(ptype, 2572 list_for_each_entry_rcu(ptype,
2356 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2573 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2357 if (ptype->type == type && 2574 if (ptype->type == type && (ptype->dev == null_or_orig ||
2358 (ptype->dev == null_or_orig || ptype->dev == skb->dev || 2575 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2359 ptype->dev == orig_dev)) { 2576 ptype->dev == null_or_bond)) {
2360 if (pt_prev) 2577 if (pt_prev)
2361 ret = deliver_skb(skb, pt_prev, orig_dev); 2578 ret = deliver_skb(skb, pt_prev, orig_dev);
2362 pt_prev = ptype; 2579 pt_prev = ptype;
@@ -2425,7 +2642,7 @@ out:
2425 return netif_receive_skb(skb); 2642 return netif_receive_skb(skb);
2426} 2643}
2427 2644
2428void napi_gro_flush(struct napi_struct *napi) 2645static void napi_gro_flush(struct napi_struct *napi)
2429{ 2646{
2430 struct sk_buff *skb, *next; 2647 struct sk_buff *skb, *next;
2431 2648
@@ -2438,9 +2655,8 @@ void napi_gro_flush(struct napi_struct *napi)
2438 napi->gro_count = 0; 2655 napi->gro_count = 0;
2439 napi->gro_list = NULL; 2656 napi->gro_list = NULL;
2440} 2657}
2441EXPORT_SYMBOL(napi_gro_flush);
2442 2658
2443int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2659enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444{ 2660{
2445 struct sk_buff **pp = NULL; 2661 struct sk_buff **pp = NULL;
2446 struct packet_type *ptype; 2662 struct packet_type *ptype;
@@ -2448,7 +2664,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2448 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2664 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2449 int same_flow; 2665 int same_flow;
2450 int mac_len; 2666 int mac_len;
2451 int ret; 2667 enum gro_result ret;
2452 2668
2453 if (!(skb->dev->features & NETIF_F_GRO)) 2669 if (!(skb->dev->features & NETIF_F_GRO))
2454 goto normal; 2670 goto normal;
@@ -2532,7 +2748,8 @@ normal:
2532} 2748}
2533EXPORT_SYMBOL(dev_gro_receive); 2749EXPORT_SYMBOL(dev_gro_receive);
2534 2750
2535static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2751static gro_result_t
2752__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2536{ 2753{
2537 struct sk_buff *p; 2754 struct sk_buff *p;
2538 2755
@@ -2540,33 +2757,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2540 return GRO_NORMAL; 2757 return GRO_NORMAL;
2541 2758
2542 for (p = napi->gro_list; p; p = p->next) { 2759 for (p = napi->gro_list; p; p = p->next) {
2543 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) 2760 NAPI_GRO_CB(p)->same_flow =
2544 && !compare_ether_header(skb_mac_header(p), 2761 (p->dev == skb->dev) &&
2545 skb_gro_mac_header(skb)); 2762 !compare_ether_header(skb_mac_header(p),
2763 skb_gro_mac_header(skb));
2546 NAPI_GRO_CB(p)->flush = 0; 2764 NAPI_GRO_CB(p)->flush = 0;
2547 } 2765 }
2548 2766
2549 return dev_gro_receive(napi, skb); 2767 return dev_gro_receive(napi, skb);
2550} 2768}
2551 2769
2552int napi_skb_finish(int ret, struct sk_buff *skb) 2770gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2553{ 2771{
2554 int err = NET_RX_SUCCESS;
2555
2556 switch (ret) { 2772 switch (ret) {
2557 case GRO_NORMAL: 2773 case GRO_NORMAL:
2558 return netif_receive_skb(skb); 2774 if (netif_receive_skb(skb))
2775 ret = GRO_DROP;
2776 break;
2559 2777
2560 case GRO_DROP: 2778 case GRO_DROP:
2561 err = NET_RX_DROP;
2562 /* fall through */
2563
2564 case GRO_MERGED_FREE: 2779 case GRO_MERGED_FREE:
2565 kfree_skb(skb); 2780 kfree_skb(skb);
2566 break; 2781 break;
2782
2783 case GRO_HELD:
2784 case GRO_MERGED:
2785 break;
2567 } 2786 }
2568 2787
2569 return err; 2788 return ret;
2570} 2789}
2571EXPORT_SYMBOL(napi_skb_finish); 2790EXPORT_SYMBOL(napi_skb_finish);
2572 2791
@@ -2586,7 +2805,7 @@ void skb_gro_reset_offset(struct sk_buff *skb)
2586} 2805}
2587EXPORT_SYMBOL(skb_gro_reset_offset); 2806EXPORT_SYMBOL(skb_gro_reset_offset);
2588 2807
2589int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2808gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2590{ 2809{
2591 skb_gro_reset_offset(skb); 2810 skb_gro_reset_offset(skb);
2592 2811
@@ -2605,49 +2824,41 @@ EXPORT_SYMBOL(napi_reuse_skb);
2605 2824
2606struct sk_buff *napi_get_frags(struct napi_struct *napi) 2825struct sk_buff *napi_get_frags(struct napi_struct *napi)
2607{ 2826{
2608 struct net_device *dev = napi->dev;
2609 struct sk_buff *skb = napi->skb; 2827 struct sk_buff *skb = napi->skb;
2610 2828
2611 if (!skb) { 2829 if (!skb) {
2612 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2830 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2613 if (!skb) 2831 if (skb)
2614 goto out; 2832 napi->skb = skb;
2615
2616 skb_reserve(skb, NET_IP_ALIGN);
2617
2618 napi->skb = skb;
2619 } 2833 }
2620
2621out:
2622 return skb; 2834 return skb;
2623} 2835}
2624EXPORT_SYMBOL(napi_get_frags); 2836EXPORT_SYMBOL(napi_get_frags);
2625 2837
2626int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2838gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2839 gro_result_t ret)
2627{ 2840{
2628 int err = NET_RX_SUCCESS;
2629
2630 switch (ret) { 2841 switch (ret) {
2631 case GRO_NORMAL: 2842 case GRO_NORMAL:
2632 case GRO_HELD: 2843 case GRO_HELD:
2633 skb->protocol = eth_type_trans(skb, napi->dev); 2844 skb->protocol = eth_type_trans(skb, skb->dev);
2634 2845
2635 if (ret == GRO_NORMAL) 2846 if (ret == GRO_HELD)
2636 return netif_receive_skb(skb); 2847 skb_gro_pull(skb, -ETH_HLEN);
2637 2848 else if (netif_receive_skb(skb))
2638 skb_gro_pull(skb, -ETH_HLEN); 2849 ret = GRO_DROP;
2639 break; 2850 break;
2640 2851
2641 case GRO_DROP: 2852 case GRO_DROP:
2642 err = NET_RX_DROP;
2643 /* fall through */
2644
2645 case GRO_MERGED_FREE: 2853 case GRO_MERGED_FREE:
2646 napi_reuse_skb(napi, skb); 2854 napi_reuse_skb(napi, skb);
2647 break; 2855 break;
2856
2857 case GRO_MERGED:
2858 break;
2648 } 2859 }
2649 2860
2650 return err; 2861 return ret;
2651} 2862}
2652EXPORT_SYMBOL(napi_frags_finish); 2863EXPORT_SYMBOL(napi_frags_finish);
2653 2864
@@ -2688,12 +2899,12 @@ out:
2688} 2899}
2689EXPORT_SYMBOL(napi_frags_skb); 2900EXPORT_SYMBOL(napi_frags_skb);
2690 2901
2691int napi_gro_frags(struct napi_struct *napi) 2902gro_result_t napi_gro_frags(struct napi_struct *napi)
2692{ 2903{
2693 struct sk_buff *skb = napi_frags_skb(napi); 2904 struct sk_buff *skb = napi_frags_skb(napi);
2694 2905
2695 if (!skb) 2906 if (!skb)
2696 return NET_RX_DROP; 2907 return GRO_DROP;
2697 2908
2698 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); 2909 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2699} 2910}
@@ -2835,7 +3046,7 @@ static void net_rx_action(struct softirq_action *h)
2835 * entries to the tail of this list, and only ->poll() 3046 * entries to the tail of this list, and only ->poll()
2836 * calls can remove this head entry from the list. 3047 * calls can remove this head entry from the list.
2837 */ 3048 */
2838 n = list_entry(list->next, struct napi_struct, poll_list); 3049 n = list_first_entry(list, struct napi_struct, poll_list);
2839 3050
2840 have = netpoll_poll_lock(n); 3051 have = netpoll_poll_lock(n);
2841 3052
@@ -2938,15 +3149,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
2938 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 3149 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2939 return -EFAULT; 3150 return -EFAULT;
2940 3151
2941 read_lock(&dev_base_lock); 3152 rcu_read_lock();
2942 dev = __dev_get_by_index(net, ifr.ifr_ifindex); 3153 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
2943 if (!dev) { 3154 if (!dev) {
2944 read_unlock(&dev_base_lock); 3155 rcu_read_unlock();
2945 return -ENODEV; 3156 return -ENODEV;
2946 } 3157 }
2947 3158
2948 strcpy(ifr.ifr_name, dev->name); 3159 strcpy(ifr.ifr_name, dev->name);
2949 read_unlock(&dev_base_lock); 3160 rcu_read_unlock();
2950 3161
2951 if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) 3162 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2952 return -EFAULT; 3163 return -EFAULT;
@@ -3016,18 +3227,18 @@ static int dev_ifconf(struct net *net, char __user *arg)
3016 * in detail. 3227 * in detail.
3017 */ 3228 */
3018void *dev_seq_start(struct seq_file *seq, loff_t *pos) 3229void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3019 __acquires(dev_base_lock) 3230 __acquires(RCU)
3020{ 3231{
3021 struct net *net = seq_file_net(seq); 3232 struct net *net = seq_file_net(seq);
3022 loff_t off; 3233 loff_t off;
3023 struct net_device *dev; 3234 struct net_device *dev;
3024 3235
3025 read_lock(&dev_base_lock); 3236 rcu_read_lock();
3026 if (!*pos) 3237 if (!*pos)
3027 return SEQ_START_TOKEN; 3238 return SEQ_START_TOKEN;
3028 3239
3029 off = 1; 3240 off = 1;
3030 for_each_netdev(net, dev) 3241 for_each_netdev_rcu(net, dev)
3031 if (off++ == *pos) 3242 if (off++ == *pos)
3032 return dev; 3243 return dev;
3033 3244
@@ -3036,23 +3247,25 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3036 3247
3037void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3248void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3038{ 3249{
3039 struct net *net = seq_file_net(seq); 3250 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3251 first_net_device(seq_file_net(seq)) :
3252 next_net_device((struct net_device *)v);
3253
3040 ++*pos; 3254 ++*pos;
3041 return v == SEQ_START_TOKEN ? 3255 return rcu_dereference(dev);
3042 first_net_device(net) : next_net_device((struct net_device *)v);
3043} 3256}
3044 3257
3045void dev_seq_stop(struct seq_file *seq, void *v) 3258void dev_seq_stop(struct seq_file *seq, void *v)
3046 __releases(dev_base_lock) 3259 __releases(RCU)
3047{ 3260{
3048 read_unlock(&dev_base_lock); 3261 rcu_read_unlock();
3049} 3262}
3050 3263
3051static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3264static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3052{ 3265{
3053 const struct net_device_stats *stats = dev_get_stats(dev); 3266 const struct net_device_stats *stats = dev_get_stats(dev);
3054 3267
3055 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 3268 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3056 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 3269 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3057 dev->name, stats->rx_bytes, stats->rx_packets, 3270 dev->name, stats->rx_bytes, stats->rx_packets,
3058 stats->rx_errors, 3271 stats->rx_errors,
@@ -3507,10 +3720,10 @@ void __dev_set_rx_mode(struct net_device *dev)
3507 /* Unicast addresses changes may only happen under the rtnl, 3720 /* Unicast addresses changes may only happen under the rtnl,
3508 * therefore calling __dev_set_promiscuity here is safe. 3721 * therefore calling __dev_set_promiscuity here is safe.
3509 */ 3722 */
3510 if (dev->uc.count > 0 && !dev->uc_promisc) { 3723 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
3511 __dev_set_promiscuity(dev, 1); 3724 __dev_set_promiscuity(dev, 1);
3512 dev->uc_promisc = 1; 3725 dev->uc_promisc = 1;
3513 } else if (dev->uc.count == 0 && dev->uc_promisc) { 3726 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
3514 __dev_set_promiscuity(dev, -1); 3727 __dev_set_promiscuity(dev, -1);
3515 dev->uc_promisc = 0; 3728 dev->uc_promisc = 0;
3516 } 3729 }
@@ -4078,7 +4291,7 @@ static void dev_addr_discard(struct net_device *dev)
4078 netif_addr_lock_bh(dev); 4291 netif_addr_lock_bh(dev);
4079 4292
4080 __dev_addr_discard(&dev->mc_list); 4293 __dev_addr_discard(&dev->mc_list);
4081 dev->mc_count = 0; 4294 netdev_mc_count(dev) = 0;
4082 4295
4083 netif_addr_unlock_bh(dev); 4296 netif_addr_unlock_bh(dev);
4084} 4297}
@@ -4114,18 +4327,10 @@ unsigned dev_get_flags(const struct net_device *dev)
4114} 4327}
4115EXPORT_SYMBOL(dev_get_flags); 4328EXPORT_SYMBOL(dev_get_flags);
4116 4329
4117/** 4330int __dev_change_flags(struct net_device *dev, unsigned int flags)
4118 * dev_change_flags - change device settings
4119 * @dev: device
4120 * @flags: device state flags
4121 *
4122 * Change settings on device based state flags. The flags are
4123 * in the userspace exported format.
4124 */
4125int dev_change_flags(struct net_device *dev, unsigned flags)
4126{ 4331{
4127 int ret, changes;
4128 int old_flags = dev->flags; 4332 int old_flags = dev->flags;
4333 int ret;
4129 4334
4130 ASSERT_RTNL(); 4335 ASSERT_RTNL();
4131 4336
@@ -4156,17 +4361,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
4156 4361
4157 ret = 0; 4362 ret = 0;
4158 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 4363 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
4159 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); 4364 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4160 4365
4161 if (!ret) 4366 if (!ret)
4162 dev_set_rx_mode(dev); 4367 dev_set_rx_mode(dev);
4163 } 4368 }
4164 4369
4165 if (dev->flags & IFF_UP &&
4166 ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4167 IFF_VOLATILE)))
4168 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4169
4170 if ((flags ^ dev->gflags) & IFF_PROMISC) { 4370 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4171 int inc = (flags & IFF_PROMISC) ? 1 : -1; 4371 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4172 4372
@@ -4185,11 +4385,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
4185 dev_set_allmulti(dev, inc); 4385 dev_set_allmulti(dev, inc);
4186 } 4386 }
4187 4387
4188 /* Exclude state transition flags, already notified */ 4388 return ret;
4189 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); 4389}
4390
4391void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4392{
4393 unsigned int changes = dev->flags ^ old_flags;
4394
4395 if (changes & IFF_UP) {
4396 if (dev->flags & IFF_UP)
4397 call_netdevice_notifiers(NETDEV_UP, dev);
4398 else
4399 call_netdevice_notifiers(NETDEV_DOWN, dev);
4400 }
4401
4402 if (dev->flags & IFF_UP &&
4403 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4404 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4405}
4406
4407/**
4408 * dev_change_flags - change device settings
4409 * @dev: device
4410 * @flags: device state flags
4411 *
4412 * Change settings on device based state flags. The flags are
4413 * in the userspace exported format.
4414 */
4415int dev_change_flags(struct net_device *dev, unsigned flags)
4416{
4417 int ret, changes;
4418 int old_flags = dev->flags;
4419
4420 ret = __dev_change_flags(dev, flags);
4421 if (ret < 0)
4422 return ret;
4423
4424 changes = old_flags ^ dev->flags;
4190 if (changes) 4425 if (changes)
4191 rtmsg_ifinfo(RTM_NEWLINK, dev, changes); 4426 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4192 4427
4428 __dev_notify_flags(dev, old_flags);
4193 return ret; 4429 return ret;
4194} 4430}
4195EXPORT_SYMBOL(dev_change_flags); 4431EXPORT_SYMBOL(dev_change_flags);
@@ -4254,12 +4490,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4254EXPORT_SYMBOL(dev_set_mac_address); 4490EXPORT_SYMBOL(dev_set_mac_address);
4255 4491
4256/* 4492/*
4257 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) 4493 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4258 */ 4494 */
4259static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) 4495static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4260{ 4496{
4261 int err; 4497 int err;
4262 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 4498 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4263 4499
4264 if (!dev) 4500 if (!dev)
4265 return -ENODEV; 4501 return -ENODEV;
@@ -4491,9 +4727,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4491 case SIOCGIFINDEX: 4727 case SIOCGIFINDEX:
4492 case SIOCGIFTXQLEN: 4728 case SIOCGIFTXQLEN:
4493 dev_load(net, ifr.ifr_name); 4729 dev_load(net, ifr.ifr_name);
4494 read_lock(&dev_base_lock); 4730 rcu_read_lock();
4495 ret = dev_ifsioc_locked(net, &ifr, cmd); 4731 ret = dev_ifsioc_locked(net, &ifr, cmd);
4496 read_unlock(&dev_base_lock); 4732 rcu_read_unlock();
4497 if (!ret) { 4733 if (!ret) {
4498 if (colon) 4734 if (colon)
4499 *colon = ':'; 4735 *colon = ':';
@@ -4636,59 +4872,86 @@ static void net_set_todo(struct net_device *dev)
4636 list_add_tail(&dev->todo_list, &net_todo_list); 4872 list_add_tail(&dev->todo_list, &net_todo_list);
4637} 4873}
4638 4874
4639static void rollback_registered(struct net_device *dev) 4875static void rollback_registered_many(struct list_head *head)
4640{ 4876{
4877 struct net_device *dev, *tmp;
4878
4641 BUG_ON(dev_boot_phase); 4879 BUG_ON(dev_boot_phase);
4642 ASSERT_RTNL(); 4880 ASSERT_RTNL();
4643 4881
4644 /* Some devices call without registering for initialization unwind. */ 4882 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4645 if (dev->reg_state == NETREG_UNINITIALIZED) { 4883 /* Some devices call without registering
4646 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " 4884 * for initialization unwind. Remove those
4647 "was registered\n", dev->name, dev); 4885 * devices and proceed with the remaining.
4886 */
4887 if (dev->reg_state == NETREG_UNINITIALIZED) {
4888 pr_debug("unregister_netdevice: device %s/%p never "
4889 "was registered\n", dev->name, dev);
4648 4890
4649 WARN_ON(1); 4891 WARN_ON(1);
4650 return; 4892 list_del(&dev->unreg_list);
4651 } 4893 continue;
4894 }
4652 4895
4653 BUG_ON(dev->reg_state != NETREG_REGISTERED); 4896 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4654 4897
4655 /* If device is running, close it first. */ 4898 /* If device is running, close it first. */
4656 dev_close(dev); 4899 dev_close(dev);
4657 4900
4658 /* And unlink it from device chain. */ 4901 /* And unlink it from device chain. */
4659 unlist_netdevice(dev); 4902 unlist_netdevice(dev);
4660 4903
4661 dev->reg_state = NETREG_UNREGISTERING; 4904 dev->reg_state = NETREG_UNREGISTERING;
4905 }
4662 4906
4663 synchronize_net(); 4907 synchronize_net();
4664 4908
4665 /* Shutdown queueing discipline. */ 4909 list_for_each_entry(dev, head, unreg_list) {
4666 dev_shutdown(dev); 4910 /* Shutdown queueing discipline. */
4911 dev_shutdown(dev);
4667 4912
4668 4913
4669 /* Notify protocols, that we are about to destroy 4914 /* Notify protocols, that we are about to destroy
4670 this device. They should clean all the things. 4915 this device. They should clean all the things.
4671 */ 4916 */
4672 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 4917 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4673 4918
4674 /* 4919 if (!dev->rtnl_link_ops ||
4675 * Flush the unicast and multicast chains 4920 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4676 */ 4921 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4677 dev_unicast_flush(dev);
4678 dev_addr_discard(dev);
4679 4922
4680 if (dev->netdev_ops->ndo_uninit) 4923 /*
4681 dev->netdev_ops->ndo_uninit(dev); 4924 * Flush the unicast and multicast chains
4925 */
4926 dev_unicast_flush(dev);
4927 dev_addr_discard(dev);
4682 4928
4683 /* Notifier chain MUST detach us from master device. */ 4929 if (dev->netdev_ops->ndo_uninit)
4684 WARN_ON(dev->master); 4930 dev->netdev_ops->ndo_uninit(dev);
4685 4931
4686 /* Remove entries from kobject tree */ 4932 /* Notifier chain MUST detach us from master device. */
4687 netdev_unregister_kobject(dev); 4933 WARN_ON(dev->master);
4934
4935 /* Remove entries from kobject tree */
4936 netdev_unregister_kobject(dev);
4937 }
4938
4939 /* Process any work delayed until the end of the batch */
4940 dev = list_first_entry(head, struct net_device, unreg_list);
4941 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4688 4942
4689 synchronize_net(); 4943 synchronize_net();
4690 4944
4691 dev_put(dev); 4945 list_for_each_entry(dev, head, unreg_list)
4946 dev_put(dev);
4947}
4948
4949static void rollback_registered(struct net_device *dev)
4950{
4951 LIST_HEAD(single);
4952
4953 list_add(&dev->unreg_list, &single);
4954 rollback_registered_many(&single);
4692} 4955}
4693 4956
4694static void __netdev_init_queue_locks_one(struct net_device *dev, 4957static void __netdev_init_queue_locks_one(struct net_device *dev,
@@ -4747,6 +5010,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4747EXPORT_SYMBOL(netdev_fix_features); 5010EXPORT_SYMBOL(netdev_fix_features);
4748 5011
4749/** 5012/**
5013 * netif_stacked_transfer_operstate - transfer operstate
5014 * @rootdev: the root or lower level device to transfer state from
5015 * @dev: the device to transfer operstate to
5016 *
5017 * Transfer operational state from root to device. This is normally
5018 * called when a stacking relationship exists between the root
5019 * device and the device(a leaf device).
5020 */
5021void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5022 struct net_device *dev)
5023{
5024 if (rootdev->operstate == IF_OPER_DORMANT)
5025 netif_dormant_on(dev);
5026 else
5027 netif_dormant_off(dev);
5028
5029 if (netif_carrier_ok(rootdev)) {
5030 if (!netif_carrier_ok(dev))
5031 netif_carrier_on(dev);
5032 } else {
5033 if (netif_carrier_ok(dev))
5034 netif_carrier_off(dev);
5035 }
5036}
5037EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5038
5039/**
4750 * register_netdevice - register a network device 5040 * register_netdevice - register a network device
4751 * @dev: device to register 5041 * @dev: device to register
4752 * 5042 *
@@ -4765,8 +5055,6 @@ EXPORT_SYMBOL(netdev_fix_features);
4765 5055
4766int register_netdevice(struct net_device *dev) 5056int register_netdevice(struct net_device *dev)
4767{ 5057{
4768 struct hlist_head *head;
4769 struct hlist_node *p;
4770 int ret; 5058 int ret;
4771 struct net *net = dev_net(dev); 5059 struct net *net = dev_net(dev);
4772 5060
@@ -4795,26 +5083,14 @@ int register_netdevice(struct net_device *dev)
4795 } 5083 }
4796 } 5084 }
4797 5085
4798 if (!dev_valid_name(dev->name)) { 5086 ret = dev_get_valid_name(net, dev->name, dev->name, 0);
4799 ret = -EINVAL; 5087 if (ret)
4800 goto err_uninit; 5088 goto err_uninit;
4801 }
4802 5089
4803 dev->ifindex = dev_new_index(net); 5090 dev->ifindex = dev_new_index(net);
4804 if (dev->iflink == -1) 5091 if (dev->iflink == -1)
4805 dev->iflink = dev->ifindex; 5092 dev->iflink = dev->ifindex;
4806 5093
4807 /* Check for existence of name */
4808 head = dev_name_hash(net, dev->name);
4809 hlist_for_each(p, head) {
4810 struct net_device *d
4811 = hlist_entry(p, struct net_device, name_hlist);
4812 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4813 ret = -EEXIST;
4814 goto err_uninit;
4815 }
4816 }
4817
4818 /* Fix illegal checksum combinations */ 5094 /* Fix illegal checksum combinations */
4819 if ((dev->features & NETIF_F_HW_CSUM) && 5095 if ((dev->features & NETIF_F_HW_CSUM) &&
4820 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { 5096 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
@@ -4837,6 +5113,12 @@ int register_netdevice(struct net_device *dev)
4837 dev->features |= NETIF_F_GSO; 5113 dev->features |= NETIF_F_GSO;
4838 5114
4839 netdev_initialize_kobject(dev); 5115 netdev_initialize_kobject(dev);
5116
5117 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5118 ret = notifier_to_errno(ret);
5119 if (ret)
5120 goto err_uninit;
5121
4840 ret = netdev_register_kobject(dev); 5122 ret = netdev_register_kobject(dev);
4841 if (ret) 5123 if (ret)
4842 goto err_uninit; 5124 goto err_uninit;
@@ -4860,6 +5142,13 @@ int register_netdevice(struct net_device *dev)
4860 rollback_registered(dev); 5142 rollback_registered(dev);
4861 dev->reg_state = NETREG_UNREGISTERED; 5143 dev->reg_state = NETREG_UNREGISTERED;
4862 } 5144 }
5145 /*
5146 * Prevent userspace races by waiting until the network
5147 * device is fully setup before sending notifications.
5148 */
5149 if (!dev->rtnl_link_ops ||
5150 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5151 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
4863 5152
4864out: 5153out:
4865 return ret; 5154 return ret;
@@ -4961,6 +5250,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4961{ 5250{
4962 unsigned long rebroadcast_time, warning_time; 5251 unsigned long rebroadcast_time, warning_time;
4963 5252
5253 linkwatch_forget_dev(dev);
5254
4964 rebroadcast_time = warning_time = jiffies; 5255 rebroadcast_time = warning_time = jiffies;
4965 while (atomic_read(&dev->refcnt) != 0) { 5256 while (atomic_read(&dev->refcnt) != 0) {
4966 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5257 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
@@ -4968,6 +5259,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
4968 5259
4969 /* Rebroadcast unregister notification */ 5260 /* Rebroadcast unregister notification */
4970 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5261 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5262 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5263 * should have already handle it the first time */
4971 5264
4972 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5265 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4973 &dev->state)) { 5266 &dev->state)) {
@@ -5032,7 +5325,7 @@ void netdev_run_todo(void)
5032 5325
5033 while (!list_empty(&list)) { 5326 while (!list_empty(&list)) {
5034 struct net_device *dev 5327 struct net_device *dev
5035 = list_entry(list.next, struct net_device, todo_list); 5328 = list_first_entry(&list, struct net_device, todo_list);
5036 list_del(&dev->todo_list); 5329 list_del(&dev->todo_list);
5037 5330
5038 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 5331 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5063,6 +5356,32 @@ void netdev_run_todo(void)
5063} 5356}
5064 5357
5065/** 5358/**
5359 * dev_txq_stats_fold - fold tx_queues stats
5360 * @dev: device to get statistics from
5361 * @stats: struct net_device_stats to hold results
5362 */
5363void dev_txq_stats_fold(const struct net_device *dev,
5364 struct net_device_stats *stats)
5365{
5366 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5367 unsigned int i;
5368 struct netdev_queue *txq;
5369
5370 for (i = 0; i < dev->num_tx_queues; i++) {
5371 txq = netdev_get_tx_queue(dev, i);
5372 tx_bytes += txq->tx_bytes;
5373 tx_packets += txq->tx_packets;
5374 tx_dropped += txq->tx_dropped;
5375 }
5376 if (tx_bytes || tx_packets || tx_dropped) {
5377 stats->tx_bytes = tx_bytes;
5378 stats->tx_packets = tx_packets;
5379 stats->tx_dropped = tx_dropped;
5380 }
5381}
5382EXPORT_SYMBOL(dev_txq_stats_fold);
5383
5384/**
5066 * dev_get_stats - get network device statistics 5385 * dev_get_stats - get network device statistics
5067 * @dev: device to get statistics from 5386 * @dev: device to get statistics from
5068 * 5387 *
@@ -5076,25 +5395,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev)
5076 5395
5077 if (ops->ndo_get_stats) 5396 if (ops->ndo_get_stats)
5078 return ops->ndo_get_stats(dev); 5397 return ops->ndo_get_stats(dev);
5079 else { 5398
5080 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5399 dev_txq_stats_fold(dev, &dev->stats);
5081 struct net_device_stats *stats = &dev->stats; 5400 return &dev->stats;
5082 unsigned int i;
5083 struct netdev_queue *txq;
5084
5085 for (i = 0; i < dev->num_tx_queues; i++) {
5086 txq = netdev_get_tx_queue(dev, i);
5087 tx_bytes += txq->tx_bytes;
5088 tx_packets += txq->tx_packets;
5089 tx_dropped += txq->tx_dropped;
5090 }
5091 if (tx_bytes || tx_packets || tx_dropped) {
5092 stats->tx_bytes = tx_bytes;
5093 stats->tx_packets = tx_packets;
5094 stats->tx_dropped = tx_dropped;
5095 }
5096 return stats;
5097 }
5098} 5401}
5099EXPORT_SYMBOL(dev_get_stats); 5402EXPORT_SYMBOL(dev_get_stats);
5100 5403
@@ -5173,7 +5476,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5173 5476
5174 netdev_init_queues(dev); 5477 netdev_init_queues(dev);
5175 5478
5479 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5480 dev->ethtool_ntuple_list.count = 0;
5176 INIT_LIST_HEAD(&dev->napi_list); 5481 INIT_LIST_HEAD(&dev->napi_list);
5482 INIT_LIST_HEAD(&dev->unreg_list);
5483 INIT_LIST_HEAD(&dev->link_watch_list);
5177 dev->priv_flags = IFF_XMIT_DST_RELEASE; 5484 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5178 setup(dev); 5485 setup(dev);
5179 strcpy(dev->name, name); 5486 strcpy(dev->name, name);
@@ -5207,6 +5514,9 @@ void free_netdev(struct net_device *dev)
5207 /* Flush device addresses */ 5514 /* Flush device addresses */
5208 dev_addr_flush(dev); 5515 dev_addr_flush(dev);
5209 5516
5517 /* Clear ethtool n-tuple list */
5518 ethtool_ntuple_flush(dev);
5519
5210 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5520 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5211 netif_napi_del(p); 5521 netif_napi_del(p);
5212 5522
@@ -5238,25 +5548,47 @@ void synchronize_net(void)
5238EXPORT_SYMBOL(synchronize_net); 5548EXPORT_SYMBOL(synchronize_net);
5239 5549
5240/** 5550/**
5241 * unregister_netdevice - remove device from the kernel 5551 * unregister_netdevice_queue - remove device from the kernel
5242 * @dev: device 5552 * @dev: device
5553 * @head: list
5243 * 5554 *
5244 * This function shuts down a device interface and removes it 5555 * This function shuts down a device interface and removes it
5245 * from the kernel tables. 5556 * from the kernel tables.
5557 * If head not NULL, device is queued to be unregistered later.
5246 * 5558 *
5247 * Callers must hold the rtnl semaphore. You may want 5559 * Callers must hold the rtnl semaphore. You may want
5248 * unregister_netdev() instead of this. 5560 * unregister_netdev() instead of this.
5249 */ 5561 */
5250 5562
5251void unregister_netdevice(struct net_device *dev) 5563void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5252{ 5564{
5253 ASSERT_RTNL(); 5565 ASSERT_RTNL();
5254 5566
5255 rollback_registered(dev); 5567 if (head) {
5256 /* Finish processing unregister after unlock */ 5568 list_move_tail(&dev->unreg_list, head);
5257 net_set_todo(dev); 5569 } else {
5570 rollback_registered(dev);
5571 /* Finish processing unregister after unlock */
5572 net_set_todo(dev);
5573 }
5258} 5574}
5259EXPORT_SYMBOL(unregister_netdevice); 5575EXPORT_SYMBOL(unregister_netdevice_queue);
5576
5577/**
5578 * unregister_netdevice_many - unregister many devices
5579 * @head: list of devices
5580 */
5581void unregister_netdevice_many(struct list_head *head)
5582{
5583 struct net_device *dev;
5584
5585 if (!list_empty(head)) {
5586 rollback_registered_many(head);
5587 list_for_each_entry(dev, head, unreg_list)
5588 net_set_todo(dev);
5589 }
5590}
5591EXPORT_SYMBOL(unregister_netdevice_many);
5260 5592
5261/** 5593/**
5262 * unregister_netdev - remove device from the kernel 5594 * unregister_netdev - remove device from the kernel
@@ -5293,8 +5625,6 @@ EXPORT_SYMBOL(unregister_netdev);
5293 5625
5294int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) 5626int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5295{ 5627{
5296 char buf[IFNAMSIZ];
5297 const char *destname;
5298 int err; 5628 int err;
5299 5629
5300 ASSERT_RTNL(); 5630 ASSERT_RTNL();
@@ -5327,20 +5657,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5327 * we can use it in the destination network namespace. 5657 * we can use it in the destination network namespace.
5328 */ 5658 */
5329 err = -EEXIST; 5659 err = -EEXIST;
5330 destname = dev->name; 5660 if (__dev_get_by_name(net, dev->name)) {
5331 if (__dev_get_by_name(net, destname)) {
5332 /* We get here if we can't use the current device name */ 5661 /* We get here if we can't use the current device name */
5333 if (!pat) 5662 if (!pat)
5334 goto out; 5663 goto out;
5335 if (!dev_valid_name(pat)) 5664 if (dev_get_valid_name(net, pat, dev->name, 1))
5336 goto out;
5337 if (strchr(pat, '%')) {
5338 if (__dev_alloc_name(net, pat, buf) < 0)
5339 goto out;
5340 destname = buf;
5341 } else
5342 destname = pat;
5343 if (__dev_get_by_name(net, destname))
5344 goto out; 5665 goto out;
5345 } 5666 }
5346 5667
@@ -5364,6 +5685,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5364 this device. They should clean all the things. 5685 this device. They should clean all the things.
5365 */ 5686 */
5366 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5687 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5688 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5367 5689
5368 /* 5690 /*
5369 * Flush the unicast and multicast chains 5691 * Flush the unicast and multicast chains
@@ -5376,10 +5698,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5376 /* Actually switch the network namespace */ 5698 /* Actually switch the network namespace */
5377 dev_net_set(dev, net); 5699 dev_net_set(dev, net);
5378 5700
5379 /* Assign the new device name */
5380 if (destname != dev->name)
5381 strcpy(dev->name, destname);
5382
5383 /* If there is an ifindex conflict assign a new one */ 5701 /* If there is an ifindex conflict assign a new one */
5384 if (__dev_get_by_index(net, dev->ifindex)) { 5702 if (__dev_get_by_index(net, dev->ifindex)) {
5385 int iflink = (dev->iflink == dev->ifindex); 5703 int iflink = (dev->iflink == dev->ifindex);
@@ -5398,6 +5716,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5398 /* Notify protocols, that a new device appeared. */ 5716 /* Notify protocols, that a new device appeared. */
5399 call_netdevice_notifiers(NETDEV_REGISTER, dev); 5717 call_netdevice_notifiers(NETDEV_REGISTER, dev);
5400 5718
5719 /*
5720 * Prevent userspace races by waiting until the network
5721 * device is fully setup before sending notifications.
5722 */
5723 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5724
5401 synchronize_net(); 5725 synchronize_net();
5402 err = 0; 5726 err = 0;
5403out: 5727out:
@@ -5484,7 +5808,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5484 one |= NETIF_F_ALL_CSUM; 5808 one |= NETIF_F_ALL_CSUM;
5485 5809
5486 one |= all & NETIF_F_ONE_FOR_ALL; 5810 one |= all & NETIF_F_ONE_FOR_ALL;
5487 all &= one | NETIF_F_LLTX | NETIF_F_GSO; 5811 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5488 all |= one & mask & NETIF_F_ONE_FOR_ALL; 5812 all |= one & mask & NETIF_F_ONE_FOR_ALL;
5489 5813
5490 return all; 5814 return all;
@@ -5566,14 +5890,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
5566 5890
5567static void __net_exit default_device_exit(struct net *net) 5891static void __net_exit default_device_exit(struct net *net)
5568{ 5892{
5569 struct net_device *dev; 5893 struct net_device *dev, *aux;
5570 /* 5894 /*
5571 * Push all migratable of the network devices back to the 5895 * Push all migratable network devices back to the
5572 * initial network namespace 5896 * initial network namespace
5573 */ 5897 */
5574 rtnl_lock(); 5898 rtnl_lock();
5575restart: 5899 for_each_netdev_safe(net, dev, aux) {
5576 for_each_netdev(net, dev) {
5577 int err; 5900 int err;
5578 char fb_name[IFNAMSIZ]; 5901 char fb_name[IFNAMSIZ];
5579 5902
@@ -5581,11 +5904,9 @@ restart:
5581 if (dev->features & NETIF_F_NETNS_LOCAL) 5904 if (dev->features & NETIF_F_NETNS_LOCAL)
5582 continue; 5905 continue;
5583 5906
5584 /* Delete virtual devices */ 5907 /* Leave virtual devices for the generic cleanup */
5585 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { 5908 if (dev->rtnl_link_ops)
5586 dev->rtnl_link_ops->dellink(dev); 5909 continue;
5587 goto restart;
5588 }
5589 5910
5590 /* Push remaing network devices to init_net */ 5911 /* Push remaing network devices to init_net */
5591 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); 5912 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
@@ -5595,13 +5916,37 @@ restart:
5595 __func__, dev->name, err); 5916 __func__, dev->name, err);
5596 BUG(); 5917 BUG();
5597 } 5918 }
5598 goto restart;
5599 } 5919 }
5600 rtnl_unlock(); 5920 rtnl_unlock();
5601} 5921}
5602 5922
5923static void __net_exit default_device_exit_batch(struct list_head *net_list)
5924{
5925 /* At exit all network devices most be removed from a network
5926 * namespace. Do this in the reverse order of registeration.
5927 * Do this across as many network namespaces as possible to
5928 * improve batching efficiency.
5929 */
5930 struct net_device *dev;
5931 struct net *net;
5932 LIST_HEAD(dev_kill_list);
5933
5934 rtnl_lock();
5935 list_for_each_entry(net, net_list, exit_list) {
5936 for_each_netdev_reverse(net, dev) {
5937 if (dev->rtnl_link_ops)
5938 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5939 else
5940 unregister_netdevice_queue(dev, &dev_kill_list);
5941 }
5942 }
5943 unregister_netdevice_many(&dev_kill_list);
5944 rtnl_unlock();
5945}
5946
5603static struct pernet_operations __net_initdata default_device_ops = { 5947static struct pernet_operations __net_initdata default_device_ops = {
5604 .exit = default_device_exit, 5948 .exit = default_device_exit,
5949 .exit_batch = default_device_exit_batch,
5605}; 5950};
5606 5951
5607/* 5952/*