diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 955 |
1 files changed, 650 insertions, 305 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index fe10551d3671..264137fce3a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -79,6 +79,8 @@ | |||
79 | #include <linux/cpu.h> | 79 | #include <linux/cpu.h> |
80 | #include <linux/types.h> | 80 | #include <linux/types.h> |
81 | #include <linux/kernel.h> | 81 | #include <linux/kernel.h> |
82 | #include <linux/hash.h> | ||
83 | #include <linux/slab.h> | ||
82 | #include <linux/sched.h> | 84 | #include <linux/sched.h> |
83 | #include <linux/mutex.h> | 85 | #include <linux/mutex.h> |
84 | #include <linux/string.h> | 86 | #include <linux/string.h> |
@@ -104,6 +106,7 @@ | |||
104 | #include <net/dst.h> | 106 | #include <net/dst.h> |
105 | #include <net/pkt_sched.h> | 107 | #include <net/pkt_sched.h> |
106 | #include <net/checksum.h> | 108 | #include <net/checksum.h> |
109 | #include <net/xfrm.h> | ||
107 | #include <linux/highmem.h> | 110 | #include <linux/highmem.h> |
108 | #include <linux/init.h> | 111 | #include <linux/init.h> |
109 | #include <linux/kmod.h> | 112 | #include <linux/kmod.h> |
@@ -175,7 +178,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */ | |||
175 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl | 178 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl |
176 | * semaphore. | 179 | * semaphore. |
177 | * | 180 | * |
178 | * Pure readers hold dev_base_lock for reading. | 181 | * Pure readers hold dev_base_lock for reading, or rcu_read_lock() |
179 | * | 182 | * |
180 | * Writers must hold the rtnl semaphore while they loop through the | 183 | * Writers must hold the rtnl semaphore while they loop through the |
181 | * dev_base_head list, and hold dev_base_lock for writing when they do the | 184 | * dev_base_head list, and hold dev_base_lock for writing when they do the |
@@ -193,18 +196,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */ | |||
193 | DEFINE_RWLOCK(dev_base_lock); | 196 | DEFINE_RWLOCK(dev_base_lock); |
194 | EXPORT_SYMBOL(dev_base_lock); | 197 | EXPORT_SYMBOL(dev_base_lock); |
195 | 198 | ||
196 | #define NETDEV_HASHBITS 8 | ||
197 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) | ||
198 | |||
199 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) | 199 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) |
200 | { | 200 | { |
201 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); | 201 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); |
202 | return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; | 202 | return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; |
203 | } | 203 | } |
204 | 204 | ||
205 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | 205 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) |
206 | { | 206 | { |
207 | return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; | 207 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; |
208 | } | 208 | } |
209 | 209 | ||
210 | /* Device list insertion */ | 210 | /* Device list insertion */ |
@@ -215,23 +215,26 @@ static int list_netdevice(struct net_device *dev) | |||
215 | ASSERT_RTNL(); | 215 | ASSERT_RTNL(); |
216 | 216 | ||
217 | write_lock_bh(&dev_base_lock); | 217 | write_lock_bh(&dev_base_lock); |
218 | list_add_tail(&dev->dev_list, &net->dev_base_head); | 218 | list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); |
219 | hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); | 219 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
220 | hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); | 220 | hlist_add_head_rcu(&dev->index_hlist, |
221 | dev_index_hash(net, dev->ifindex)); | ||
221 | write_unlock_bh(&dev_base_lock); | 222 | write_unlock_bh(&dev_base_lock); |
222 | return 0; | 223 | return 0; |
223 | } | 224 | } |
224 | 225 | ||
225 | /* Device list removal */ | 226 | /* Device list removal |
227 | * caller must respect a RCU grace period before freeing/reusing dev | ||
228 | */ | ||
226 | static void unlist_netdevice(struct net_device *dev) | 229 | static void unlist_netdevice(struct net_device *dev) |
227 | { | 230 | { |
228 | ASSERT_RTNL(); | 231 | ASSERT_RTNL(); |
229 | 232 | ||
230 | /* Unlink dev from the device chain */ | 233 | /* Unlink dev from the device chain */ |
231 | write_lock_bh(&dev_base_lock); | 234 | write_lock_bh(&dev_base_lock); |
232 | list_del(&dev->dev_list); | 235 | list_del_rcu(&dev->dev_list); |
233 | hlist_del(&dev->name_hlist); | 236 | hlist_del_rcu(&dev->name_hlist); |
234 | hlist_del(&dev->index_hlist); | 237 | hlist_del_rcu(&dev->index_hlist); |
235 | write_unlock_bh(&dev_base_lock); | 238 | write_unlock_bh(&dev_base_lock); |
236 | } | 239 | } |
237 | 240 | ||
@@ -587,18 +590,44 @@ __setup("netdev=", netdev_boot_setup); | |||
587 | struct net_device *__dev_get_by_name(struct net *net, const char *name) | 590 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
588 | { | 591 | { |
589 | struct hlist_node *p; | 592 | struct hlist_node *p; |
593 | struct net_device *dev; | ||
594 | struct hlist_head *head = dev_name_hash(net, name); | ||
590 | 595 | ||
591 | hlist_for_each(p, dev_name_hash(net, name)) { | 596 | hlist_for_each_entry(dev, p, head, name_hlist) |
592 | struct net_device *dev | ||
593 | = hlist_entry(p, struct net_device, name_hlist); | ||
594 | if (!strncmp(dev->name, name, IFNAMSIZ)) | 597 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
595 | return dev; | 598 | return dev; |
596 | } | 599 | |
597 | return NULL; | 600 | return NULL; |
598 | } | 601 | } |
599 | EXPORT_SYMBOL(__dev_get_by_name); | 602 | EXPORT_SYMBOL(__dev_get_by_name); |
600 | 603 | ||
601 | /** | 604 | /** |
605 | * dev_get_by_name_rcu - find a device by its name | ||
606 | * @net: the applicable net namespace | ||
607 | * @name: name to find | ||
608 | * | ||
609 | * Find an interface by name. | ||
610 | * If the name is found a pointer to the device is returned. | ||
611 | * If the name is not found then %NULL is returned. | ||
612 | * The reference counters are not incremented so the caller must be | ||
613 | * careful with locks. The caller must hold RCU lock. | ||
614 | */ | ||
615 | |||
616 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) | ||
617 | { | ||
618 | struct hlist_node *p; | ||
619 | struct net_device *dev; | ||
620 | struct hlist_head *head = dev_name_hash(net, name); | ||
621 | |||
622 | hlist_for_each_entry_rcu(dev, p, head, name_hlist) | ||
623 | if (!strncmp(dev->name, name, IFNAMSIZ)) | ||
624 | return dev; | ||
625 | |||
626 | return NULL; | ||
627 | } | ||
628 | EXPORT_SYMBOL(dev_get_by_name_rcu); | ||
629 | |||
630 | /** | ||
602 | * dev_get_by_name - find a device by its name | 631 | * dev_get_by_name - find a device by its name |
603 | * @net: the applicable net namespace | 632 | * @net: the applicable net namespace |
604 | * @name: name to find | 633 | * @name: name to find |
@@ -614,11 +643,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) | |||
614 | { | 643 | { |
615 | struct net_device *dev; | 644 | struct net_device *dev; |
616 | 645 | ||
617 | read_lock(&dev_base_lock); | 646 | rcu_read_lock(); |
618 | dev = __dev_get_by_name(net, name); | 647 | dev = dev_get_by_name_rcu(net, name); |
619 | if (dev) | 648 | if (dev) |
620 | dev_hold(dev); | 649 | dev_hold(dev); |
621 | read_unlock(&dev_base_lock); | 650 | rcu_read_unlock(); |
622 | return dev; | 651 | return dev; |
623 | } | 652 | } |
624 | EXPORT_SYMBOL(dev_get_by_name); | 653 | EXPORT_SYMBOL(dev_get_by_name); |
@@ -638,17 +667,42 @@ EXPORT_SYMBOL(dev_get_by_name); | |||
638 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) | 667 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) |
639 | { | 668 | { |
640 | struct hlist_node *p; | 669 | struct hlist_node *p; |
670 | struct net_device *dev; | ||
671 | struct hlist_head *head = dev_index_hash(net, ifindex); | ||
641 | 672 | ||
642 | hlist_for_each(p, dev_index_hash(net, ifindex)) { | 673 | hlist_for_each_entry(dev, p, head, index_hlist) |
643 | struct net_device *dev | ||
644 | = hlist_entry(p, struct net_device, index_hlist); | ||
645 | if (dev->ifindex == ifindex) | 674 | if (dev->ifindex == ifindex) |
646 | return dev; | 675 | return dev; |
647 | } | 676 | |
648 | return NULL; | 677 | return NULL; |
649 | } | 678 | } |
650 | EXPORT_SYMBOL(__dev_get_by_index); | 679 | EXPORT_SYMBOL(__dev_get_by_index); |
651 | 680 | ||
681 | /** | ||
682 | * dev_get_by_index_rcu - find a device by its ifindex | ||
683 | * @net: the applicable net namespace | ||
684 | * @ifindex: index of device | ||
685 | * | ||
686 | * Search for an interface by index. Returns %NULL if the device | ||
687 | * is not found or a pointer to the device. The device has not | ||
688 | * had its reference counter increased so the caller must be careful | ||
689 | * about locking. The caller must hold RCU lock. | ||
690 | */ | ||
691 | |||
692 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) | ||
693 | { | ||
694 | struct hlist_node *p; | ||
695 | struct net_device *dev; | ||
696 | struct hlist_head *head = dev_index_hash(net, ifindex); | ||
697 | |||
698 | hlist_for_each_entry_rcu(dev, p, head, index_hlist) | ||
699 | if (dev->ifindex == ifindex) | ||
700 | return dev; | ||
701 | |||
702 | return NULL; | ||
703 | } | ||
704 | EXPORT_SYMBOL(dev_get_by_index_rcu); | ||
705 | |||
652 | 706 | ||
653 | /** | 707 | /** |
654 | * dev_get_by_index - find a device by its ifindex | 708 | * dev_get_by_index - find a device by its ifindex |
@@ -665,11 +719,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) | |||
665 | { | 719 | { |
666 | struct net_device *dev; | 720 | struct net_device *dev; |
667 | 721 | ||
668 | read_lock(&dev_base_lock); | 722 | rcu_read_lock(); |
669 | dev = __dev_get_by_index(net, ifindex); | 723 | dev = dev_get_by_index_rcu(net, ifindex); |
670 | if (dev) | 724 | if (dev) |
671 | dev_hold(dev); | 725 | dev_hold(dev); |
672 | read_unlock(&dev_base_lock); | 726 | rcu_read_unlock(); |
673 | return dev; | 727 | return dev; |
674 | } | 728 | } |
675 | EXPORT_SYMBOL(dev_get_by_index); | 729 | EXPORT_SYMBOL(dev_get_by_index); |
@@ -748,15 +802,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, | |||
748 | struct net_device *dev, *ret; | 802 | struct net_device *dev, *ret; |
749 | 803 | ||
750 | ret = NULL; | 804 | ret = NULL; |
751 | read_lock(&dev_base_lock); | 805 | rcu_read_lock(); |
752 | for_each_netdev(net, dev) { | 806 | for_each_netdev_rcu(net, dev) { |
753 | if (((dev->flags ^ if_flags) & mask) == 0) { | 807 | if (((dev->flags ^ if_flags) & mask) == 0) { |
754 | dev_hold(dev); | 808 | dev_hold(dev); |
755 | ret = dev; | 809 | ret = dev; |
756 | break; | 810 | break; |
757 | } | 811 | } |
758 | } | 812 | } |
759 | read_unlock(&dev_base_lock); | 813 | rcu_read_unlock(); |
760 | return ret; | 814 | return ret; |
761 | } | 815 | } |
762 | EXPORT_SYMBOL(dev_get_by_flags); | 816 | EXPORT_SYMBOL(dev_get_by_flags); |
@@ -841,7 +895,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) | |||
841 | free_page((unsigned long) inuse); | 895 | free_page((unsigned long) inuse); |
842 | } | 896 | } |
843 | 897 | ||
844 | snprintf(buf, IFNAMSIZ, name, i); | 898 | if (buf != name) |
899 | snprintf(buf, IFNAMSIZ, name, i); | ||
845 | if (!__dev_get_by_name(net, buf)) | 900 | if (!__dev_get_by_name(net, buf)) |
846 | return i; | 901 | return i; |
847 | 902 | ||
@@ -881,6 +936,21 @@ int dev_alloc_name(struct net_device *dev, const char *name) | |||
881 | } | 936 | } |
882 | EXPORT_SYMBOL(dev_alloc_name); | 937 | EXPORT_SYMBOL(dev_alloc_name); |
883 | 938 | ||
939 | static int dev_get_valid_name(struct net *net, const char *name, char *buf, | ||
940 | bool fmt) | ||
941 | { | ||
942 | if (!dev_valid_name(name)) | ||
943 | return -EINVAL; | ||
944 | |||
945 | if (fmt && strchr(name, '%')) | ||
946 | return __dev_alloc_name(net, name, buf); | ||
947 | else if (__dev_get_by_name(net, name)) | ||
948 | return -EEXIST; | ||
949 | else if (buf != name) | ||
950 | strlcpy(buf, name, IFNAMSIZ); | ||
951 | |||
952 | return 0; | ||
953 | } | ||
884 | 954 | ||
885 | /** | 955 | /** |
886 | * dev_change_name - change name of a device | 956 | * dev_change_name - change name of a device |
@@ -904,28 +974,20 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
904 | if (dev->flags & IFF_UP) | 974 | if (dev->flags & IFF_UP) |
905 | return -EBUSY; | 975 | return -EBUSY; |
906 | 976 | ||
907 | if (!dev_valid_name(newname)) | ||
908 | return -EINVAL; | ||
909 | |||
910 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | 977 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) |
911 | return 0; | 978 | return 0; |
912 | 979 | ||
913 | memcpy(oldname, dev->name, IFNAMSIZ); | 980 | memcpy(oldname, dev->name, IFNAMSIZ); |
914 | 981 | ||
915 | if (strchr(newname, '%')) { | 982 | err = dev_get_valid_name(net, newname, dev->name, 1); |
916 | err = dev_alloc_name(dev, newname); | 983 | if (err < 0) |
917 | if (err < 0) | 984 | return err; |
918 | return err; | ||
919 | } else if (__dev_get_by_name(net, newname)) | ||
920 | return -EEXIST; | ||
921 | else | ||
922 | strlcpy(dev->name, newname, IFNAMSIZ); | ||
923 | 985 | ||
924 | rollback: | 986 | rollback: |
925 | /* For now only devices in the initial network namespace | 987 | /* For now only devices in the initial network namespace |
926 | * are in sysfs. | 988 | * are in sysfs. |
927 | */ | 989 | */ |
928 | if (net == &init_net) { | 990 | if (net_eq(net, &init_net)) { |
929 | ret = device_rename(&dev->dev, dev->name); | 991 | ret = device_rename(&dev->dev, dev->name); |
930 | if (ret) { | 992 | if (ret) { |
931 | memcpy(dev->name, oldname, IFNAMSIZ); | 993 | memcpy(dev->name, oldname, IFNAMSIZ); |
@@ -935,7 +997,12 @@ rollback: | |||
935 | 997 | ||
936 | write_lock_bh(&dev_base_lock); | 998 | write_lock_bh(&dev_base_lock); |
937 | hlist_del(&dev->name_hlist); | 999 | hlist_del(&dev->name_hlist); |
938 | hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); | 1000 | write_unlock_bh(&dev_base_lock); |
1001 | |||
1002 | synchronize_rcu(); | ||
1003 | |||
1004 | write_lock_bh(&dev_base_lock); | ||
1005 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); | ||
939 | write_unlock_bh(&dev_base_lock); | 1006 | write_unlock_bh(&dev_base_lock); |
940 | 1007 | ||
941 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); | 1008 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); |
@@ -1038,28 +1105,16 @@ void dev_load(struct net *net, const char *name) | |||
1038 | { | 1105 | { |
1039 | struct net_device *dev; | 1106 | struct net_device *dev; |
1040 | 1107 | ||
1041 | read_lock(&dev_base_lock); | 1108 | rcu_read_lock(); |
1042 | dev = __dev_get_by_name(net, name); | 1109 | dev = dev_get_by_name_rcu(net, name); |
1043 | read_unlock(&dev_base_lock); | 1110 | rcu_read_unlock(); |
1044 | 1111 | ||
1045 | if (!dev && capable(CAP_NET_ADMIN)) | 1112 | if (!dev && capable(CAP_NET_ADMIN)) |
1046 | request_module("%s", name); | 1113 | request_module("%s", name); |
1047 | } | 1114 | } |
1048 | EXPORT_SYMBOL(dev_load); | 1115 | EXPORT_SYMBOL(dev_load); |
1049 | 1116 | ||
1050 | /** | 1117 | static int __dev_open(struct net_device *dev) |
1051 | * dev_open - prepare an interface for use. | ||
1052 | * @dev: device to open | ||
1053 | * | ||
1054 | * Takes a device from down to up state. The device's private open | ||
1055 | * function is invoked and then the multicast lists are loaded. Finally | ||
1056 | * the device is moved into the up state and a %NETDEV_UP message is | ||
1057 | * sent to the netdev notifier chain. | ||
1058 | * | ||
1059 | * Calling this function on an active interface is a nop. On a failure | ||
1060 | * a negative errno code is returned. | ||
1061 | */ | ||
1062 | int dev_open(struct net_device *dev) | ||
1063 | { | 1118 | { |
1064 | const struct net_device_ops *ops = dev->netdev_ops; | 1119 | const struct net_device_ops *ops = dev->netdev_ops; |
1065 | int ret; | 1120 | int ret; |
@@ -1067,13 +1122,6 @@ int dev_open(struct net_device *dev) | |||
1067 | ASSERT_RTNL(); | 1122 | ASSERT_RTNL(); |
1068 | 1123 | ||
1069 | /* | 1124 | /* |
1070 | * Is it already up? | ||
1071 | */ | ||
1072 | |||
1073 | if (dev->flags & IFF_UP) | ||
1074 | return 0; | ||
1075 | |||
1076 | /* | ||
1077 | * Is it even present? | 1125 | * Is it even present? |
1078 | */ | 1126 | */ |
1079 | if (!netif_device_present(dev)) | 1127 | if (!netif_device_present(dev)) |
@@ -1121,36 +1169,57 @@ int dev_open(struct net_device *dev) | |||
1121 | * Wakeup transmit queue engine | 1169 | * Wakeup transmit queue engine |
1122 | */ | 1170 | */ |
1123 | dev_activate(dev); | 1171 | dev_activate(dev); |
1124 | |||
1125 | /* | ||
1126 | * ... and announce new interface. | ||
1127 | */ | ||
1128 | call_netdevice_notifiers(NETDEV_UP, dev); | ||
1129 | } | 1172 | } |
1130 | 1173 | ||
1131 | return ret; | 1174 | return ret; |
1132 | } | 1175 | } |
1133 | EXPORT_SYMBOL(dev_open); | ||
1134 | 1176 | ||
1135 | /** | 1177 | /** |
1136 | * dev_close - shutdown an interface. | 1178 | * dev_open - prepare an interface for use. |
1137 | * @dev: device to shutdown | 1179 | * @dev: device to open |
1138 | * | 1180 | * |
1139 | * This function moves an active device into down state. A | 1181 | * Takes a device from down to up state. The device's private open |
1140 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device | 1182 | * function is invoked and then the multicast lists are loaded. Finally |
1141 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier | 1183 | * the device is moved into the up state and a %NETDEV_UP message is |
1142 | * chain. | 1184 | * sent to the netdev notifier chain. |
1185 | * | ||
1186 | * Calling this function on an active interface is a nop. On a failure | ||
1187 | * a negative errno code is returned. | ||
1143 | */ | 1188 | */ |
1144 | int dev_close(struct net_device *dev) | 1189 | int dev_open(struct net_device *dev) |
1190 | { | ||
1191 | int ret; | ||
1192 | |||
1193 | /* | ||
1194 | * Is it already up? | ||
1195 | */ | ||
1196 | if (dev->flags & IFF_UP) | ||
1197 | return 0; | ||
1198 | |||
1199 | /* | ||
1200 | * Open device | ||
1201 | */ | ||
1202 | ret = __dev_open(dev); | ||
1203 | if (ret < 0) | ||
1204 | return ret; | ||
1205 | |||
1206 | /* | ||
1207 | * ... and announce new interface. | ||
1208 | */ | ||
1209 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | ||
1210 | call_netdevice_notifiers(NETDEV_UP, dev); | ||
1211 | |||
1212 | return ret; | ||
1213 | } | ||
1214 | EXPORT_SYMBOL(dev_open); | ||
1215 | |||
1216 | static int __dev_close(struct net_device *dev) | ||
1145 | { | 1217 | { |
1146 | const struct net_device_ops *ops = dev->netdev_ops; | 1218 | const struct net_device_ops *ops = dev->netdev_ops; |
1147 | ASSERT_RTNL(); | ||
1148 | 1219 | ||
1220 | ASSERT_RTNL(); | ||
1149 | might_sleep(); | 1221 | might_sleep(); |
1150 | 1222 | ||
1151 | if (!(dev->flags & IFF_UP)) | ||
1152 | return 0; | ||
1153 | |||
1154 | /* | 1223 | /* |
1155 | * Tell people we are going down, so that they can | 1224 | * Tell people we are going down, so that they can |
1156 | * prepare to death, when device is still operating. | 1225 | * prepare to death, when device is still operating. |
@@ -1186,14 +1255,34 @@ int dev_close(struct net_device *dev) | |||
1186 | dev->flags &= ~IFF_UP; | 1255 | dev->flags &= ~IFF_UP; |
1187 | 1256 | ||
1188 | /* | 1257 | /* |
1189 | * Tell people we are down | 1258 | * Shutdown NET_DMA |
1190 | */ | 1259 | */ |
1191 | call_netdevice_notifiers(NETDEV_DOWN, dev); | 1260 | net_dmaengine_put(); |
1261 | |||
1262 | return 0; | ||
1263 | } | ||
1264 | |||
1265 | /** | ||
1266 | * dev_close - shutdown an interface. | ||
1267 | * @dev: device to shutdown | ||
1268 | * | ||
1269 | * This function moves an active device into down state. A | ||
1270 | * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device | ||
1271 | * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier | ||
1272 | * chain. | ||
1273 | */ | ||
1274 | int dev_close(struct net_device *dev) | ||
1275 | { | ||
1276 | if (!(dev->flags & IFF_UP)) | ||
1277 | return 0; | ||
1278 | |||
1279 | __dev_close(dev); | ||
1192 | 1280 | ||
1193 | /* | 1281 | /* |
1194 | * Shutdown NET_DMA | 1282 | * Tell people we are down |
1195 | */ | 1283 | */ |
1196 | net_dmaengine_put(); | 1284 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1285 | call_netdevice_notifiers(NETDEV_DOWN, dev); | ||
1197 | 1286 | ||
1198 | return 0; | 1287 | return 0; |
1199 | } | 1288 | } |
@@ -1287,6 +1376,7 @@ rollback: | |||
1287 | nb->notifier_call(nb, NETDEV_DOWN, dev); | 1376 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1288 | } | 1377 | } |
1289 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); | 1378 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1379 | nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); | ||
1290 | } | 1380 | } |
1291 | } | 1381 | } |
1292 | 1382 | ||
@@ -1353,6 +1443,41 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1353 | skb->tstamp.tv64 = 0; | 1443 | skb->tstamp.tv64 = 0; |
1354 | } | 1444 | } |
1355 | 1445 | ||
1446 | /** | ||
1447 | * dev_forward_skb - loopback an skb to another netif | ||
1448 | * | ||
1449 | * @dev: destination network device | ||
1450 | * @skb: buffer to forward | ||
1451 | * | ||
1452 | * return values: | ||
1453 | * NET_RX_SUCCESS (no congestion) | ||
1454 | * NET_RX_DROP (packet was dropped, but freed) | ||
1455 | * | ||
1456 | * dev_forward_skb can be used for injecting an skb from the | ||
1457 | * start_xmit function of one device into the receive queue | ||
1458 | * of another device. | ||
1459 | * | ||
1460 | * The receiving device may be in another namespace, so | ||
1461 | * we have to clear all information in the skb that could | ||
1462 | * impact namespace isolation. | ||
1463 | */ | ||
1464 | int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | ||
1465 | { | ||
1466 | skb_orphan(skb); | ||
1467 | |||
1468 | if (!(dev->flags & IFF_UP) || | ||
1469 | (skb->len > (dev->mtu + dev->hard_header_len))) { | ||
1470 | kfree_skb(skb); | ||
1471 | return NET_RX_DROP; | ||
1472 | } | ||
1473 | skb_set_dev(skb, dev); | ||
1474 | skb->tstamp.tv64 = 0; | ||
1475 | skb->pkt_type = PACKET_HOST; | ||
1476 | skb->protocol = eth_type_trans(skb, dev); | ||
1477 | return netif_rx(skb); | ||
1478 | } | ||
1479 | EXPORT_SYMBOL_GPL(dev_forward_skb); | ||
1480 | |||
1356 | /* | 1481 | /* |
1357 | * Support routine. Sends outgoing frames to any network | 1482 | * Support routine. Sends outgoing frames to any network |
1358 | * taps currently in use. | 1483 | * taps currently in use. |
@@ -1508,6 +1633,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) | |||
1508 | return false; | 1633 | return false; |
1509 | } | 1634 | } |
1510 | 1635 | ||
1636 | /** | ||
1637 | * skb_dev_set -- assign a new device to a buffer | ||
1638 | * @skb: buffer for the new device | ||
1639 | * @dev: network device | ||
1640 | * | ||
1641 | * If an skb is owned by a device already, we have to reset | ||
1642 | * all data private to the namespace a device belongs to | ||
1643 | * before assigning it a new device. | ||
1644 | */ | ||
1645 | #ifdef CONFIG_NET_NS | ||
1646 | void skb_set_dev(struct sk_buff *skb, struct net_device *dev) | ||
1647 | { | ||
1648 | skb_dst_drop(skb); | ||
1649 | if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { | ||
1650 | secpath_reset(skb); | ||
1651 | nf_reset(skb); | ||
1652 | skb_init_secmark(skb); | ||
1653 | skb->mark = 0; | ||
1654 | skb->priority = 0; | ||
1655 | skb->nf_trace = 0; | ||
1656 | skb->ipvs_property = 0; | ||
1657 | #ifdef CONFIG_NET_SCHED | ||
1658 | skb->tc_index = 0; | ||
1659 | #endif | ||
1660 | } | ||
1661 | skb->dev = dev; | ||
1662 | } | ||
1663 | EXPORT_SYMBOL(skb_set_dev); | ||
1664 | #endif /* CONFIG_NET_NS */ | ||
1665 | |||
1511 | /* | 1666 | /* |
1512 | * Invalidate hardware checksum when packet is to be mangled, and | 1667 | * Invalidate hardware checksum when packet is to be mangled, and |
1513 | * complete checksum manually on outgoing path. | 1668 | * complete checksum manually on outgoing path. |
@@ -1701,7 +1856,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1701 | struct netdev_queue *txq) | 1856 | struct netdev_queue *txq) |
1702 | { | 1857 | { |
1703 | const struct net_device_ops *ops = dev->netdev_ops; | 1858 | const struct net_device_ops *ops = dev->netdev_ops; |
1704 | int rc; | 1859 | int rc = NETDEV_TX_OK; |
1705 | 1860 | ||
1706 | if (likely(!skb->next)) { | 1861 | if (likely(!skb->next)) { |
1707 | if (!list_empty(&ptype_all)) | 1862 | if (!list_empty(&ptype_all)) |
@@ -1747,8 +1902,18 @@ gso: | |||
1747 | 1902 | ||
1748 | skb->next = nskb->next; | 1903 | skb->next = nskb->next; |
1749 | nskb->next = NULL; | 1904 | nskb->next = NULL; |
1905 | |||
1906 | /* | ||
1907 | * If device doesnt need nskb->dst, release it right now while | ||
1908 | * its hot in this cpu cache | ||
1909 | */ | ||
1910 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | ||
1911 | skb_dst_drop(nskb); | ||
1912 | |||
1750 | rc = ops->ndo_start_xmit(nskb, dev); | 1913 | rc = ops->ndo_start_xmit(nskb, dev); |
1751 | if (unlikely(rc != NETDEV_TX_OK)) { | 1914 | if (unlikely(rc != NETDEV_TX_OK)) { |
1915 | if (rc & ~NETDEV_TX_MASK) | ||
1916 | goto out_kfree_gso_skb; | ||
1752 | nskb->next = skb->next; | 1917 | nskb->next = skb->next; |
1753 | skb->next = nskb; | 1918 | skb->next = nskb; |
1754 | return rc; | 1919 | return rc; |
@@ -1758,11 +1923,12 @@ gso: | |||
1758 | return NETDEV_TX_BUSY; | 1923 | return NETDEV_TX_BUSY; |
1759 | } while (skb->next); | 1924 | } while (skb->next); |
1760 | 1925 | ||
1761 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 1926 | out_kfree_gso_skb: |
1762 | 1927 | if (likely(skb->next == NULL)) | |
1928 | skb->destructor = DEV_GSO_CB(skb)->destructor; | ||
1763 | out_kfree_skb: | 1929 | out_kfree_skb: |
1764 | kfree_skb(skb); | 1930 | kfree_skb(skb); |
1765 | return NETDEV_TX_OK; | 1931 | return rc; |
1766 | } | 1932 | } |
1767 | 1933 | ||
1768 | static u32 skb_tx_hashrnd; | 1934 | static u32 skb_tx_hashrnd; |
@@ -1789,16 +1955,47 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
1789 | } | 1955 | } |
1790 | EXPORT_SYMBOL(skb_tx_hash); | 1956 | EXPORT_SYMBOL(skb_tx_hash); |
1791 | 1957 | ||
1958 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | ||
1959 | { | ||
1960 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | ||
1961 | if (net_ratelimit()) { | ||
1962 | WARN(1, "%s selects TX queue %d, but " | ||
1963 | "real number of TX queues is %d\n", | ||
1964 | dev->name, queue_index, | ||
1965 | dev->real_num_tx_queues); | ||
1966 | } | ||
1967 | return 0; | ||
1968 | } | ||
1969 | return queue_index; | ||
1970 | } | ||
1971 | |||
1792 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 1972 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
1793 | struct sk_buff *skb) | 1973 | struct sk_buff *skb) |
1794 | { | 1974 | { |
1795 | const struct net_device_ops *ops = dev->netdev_ops; | 1975 | u16 queue_index; |
1796 | u16 queue_index = 0; | 1976 | struct sock *sk = skb->sk; |
1977 | |||
1978 | if (sk_tx_queue_recorded(sk)) { | ||
1979 | queue_index = sk_tx_queue_get(sk); | ||
1980 | } else { | ||
1981 | const struct net_device_ops *ops = dev->netdev_ops; | ||
1982 | |||
1983 | if (ops->ndo_select_queue) { | ||
1984 | queue_index = ops->ndo_select_queue(dev, skb); | ||
1985 | queue_index = dev_cap_txqueue(dev, queue_index); | ||
1986 | } else { | ||
1987 | queue_index = 0; | ||
1988 | if (dev->real_num_tx_queues > 1) | ||
1989 | queue_index = skb_tx_hash(dev, skb); | ||
1990 | |||
1991 | if (sk) { | ||
1992 | struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); | ||
1797 | 1993 | ||
1798 | if (ops->ndo_select_queue) | 1994 | if (dst && skb_dst(skb) == dst) |
1799 | queue_index = ops->ndo_select_queue(dev, skb); | 1995 | sk_tx_queue_set(sk, queue_index); |
1800 | else if (dev->real_num_tx_queues > 1) | 1996 | } |
1801 | queue_index = skb_tx_hash(dev, skb); | 1997 | } |
1998 | } | ||
1802 | 1999 | ||
1803 | skb_set_queue_mapping(skb, queue_index); | 2000 | skb_set_queue_mapping(skb, queue_index); |
1804 | return netdev_get_tx_queue(dev, queue_index); | 2001 | return netdev_get_tx_queue(dev, queue_index); |
@@ -1838,6 +2035,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
1838 | return rc; | 2035 | return rc; |
1839 | } | 2036 | } |
1840 | 2037 | ||
2038 | /* | ||
2039 | * Returns true if either: | ||
2040 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | ||
2041 | * 2. skb is fragmented and the device does not support SG, or if | ||
2042 | * at least one of fragments is in highmem and device does not | ||
2043 | * support DMA from it. | ||
2044 | */ | ||
2045 | static inline int skb_needs_linearize(struct sk_buff *skb, | ||
2046 | struct net_device *dev) | ||
2047 | { | ||
2048 | return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | ||
2049 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | ||
2050 | illegal_highdma(dev, skb))); | ||
2051 | } | ||
2052 | |||
1841 | /** | 2053 | /** |
1842 | * dev_queue_xmit - transmit a buffer | 2054 | * dev_queue_xmit - transmit a buffer |
1843 | * @skb: buffer to transmit | 2055 | * @skb: buffer to transmit |
@@ -1874,18 +2086,8 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
1874 | if (netif_needs_gso(dev, skb)) | 2086 | if (netif_needs_gso(dev, skb)) |
1875 | goto gso; | 2087 | goto gso; |
1876 | 2088 | ||
1877 | if (skb_has_frags(skb) && | 2089 | /* Convert a paged skb to linear, if required */ |
1878 | !(dev->features & NETIF_F_FRAGLIST) && | 2090 | if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) |
1879 | __skb_linearize(skb)) | ||
1880 | goto out_kfree_skb; | ||
1881 | |||
1882 | /* Fragmented skb is linearized if device does not support SG, | ||
1883 | * or if at least one of fragments is in highmem and device | ||
1884 | * does not support DMA from it. | ||
1885 | */ | ||
1886 | if (skb_shinfo(skb)->nr_frags && | ||
1887 | (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && | ||
1888 | __skb_linearize(skb)) | ||
1889 | goto out_kfree_skb; | 2091 | goto out_kfree_skb; |
1890 | 2092 | ||
1891 | /* If packet is not checksummed and device does not support | 2093 | /* If packet is not checksummed and device does not support |
@@ -1905,7 +2107,7 @@ gso: | |||
1905 | rcu_read_lock_bh(); | 2107 | rcu_read_lock_bh(); |
1906 | 2108 | ||
1907 | txq = dev_pick_tx(dev, skb); | 2109 | txq = dev_pick_tx(dev, skb); |
1908 | q = rcu_dereference(txq->qdisc); | 2110 | q = rcu_dereference_bh(txq->qdisc); |
1909 | 2111 | ||
1910 | #ifdef CONFIG_NET_CLS_ACT | 2112 | #ifdef CONFIG_NET_CLS_ACT |
1911 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | 2113 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); |
@@ -1935,8 +2137,8 @@ gso: | |||
1935 | HARD_TX_LOCK(dev, txq, cpu); | 2137 | HARD_TX_LOCK(dev, txq, cpu); |
1936 | 2138 | ||
1937 | if (!netif_tx_queue_stopped(txq)) { | 2139 | if (!netif_tx_queue_stopped(txq)) { |
1938 | rc = NET_XMIT_SUCCESS; | 2140 | rc = dev_hard_start_xmit(skb, dev, txq); |
1939 | if (!dev_hard_start_xmit(skb, dev, txq)) { | 2141 | if (dev_xmit_complete(rc)) { |
1940 | HARD_TX_UNLOCK(dev, txq); | 2142 | HARD_TX_UNLOCK(dev, txq); |
1941 | goto out; | 2143 | goto out; |
1942 | } | 2144 | } |
@@ -2191,7 +2393,7 @@ static int ing_filter(struct sk_buff *skb) | |||
2191 | if (MAX_RED_LOOP < ttl++) { | 2393 | if (MAX_RED_LOOP < ttl++) { |
2192 | printk(KERN_WARNING | 2394 | printk(KERN_WARNING |
2193 | "Redir loop detected Dropping packet (%d->%d)\n", | 2395 | "Redir loop detected Dropping packet (%d->%d)\n", |
2194 | skb->iif, dev->ifindex); | 2396 | skb->skb_iif, dev->ifindex); |
2195 | return TC_ACT_SHOT; | 2397 | return TC_ACT_SHOT; |
2196 | } | 2398 | } |
2197 | 2399 | ||
@@ -2285,30 +2487,33 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2285 | { | 2487 | { |
2286 | struct packet_type *ptype, *pt_prev; | 2488 | struct packet_type *ptype, *pt_prev; |
2287 | struct net_device *orig_dev; | 2489 | struct net_device *orig_dev; |
2490 | struct net_device *master; | ||
2288 | struct net_device *null_or_orig; | 2491 | struct net_device *null_or_orig; |
2492 | struct net_device *null_or_bond; | ||
2289 | int ret = NET_RX_DROP; | 2493 | int ret = NET_RX_DROP; |
2290 | __be16 type; | 2494 | __be16 type; |
2291 | 2495 | ||
2292 | if (!skb->tstamp.tv64) | 2496 | if (!skb->tstamp.tv64) |
2293 | net_timestamp(skb); | 2497 | net_timestamp(skb); |
2294 | 2498 | ||
2295 | if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) | 2499 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) |
2296 | return NET_RX_SUCCESS; | 2500 | return NET_RX_SUCCESS; |
2297 | 2501 | ||
2298 | /* if we've gotten here through NAPI, check netpoll */ | 2502 | /* if we've gotten here through NAPI, check netpoll */ |
2299 | if (netpoll_receive_skb(skb)) | 2503 | if (netpoll_receive_skb(skb)) |
2300 | return NET_RX_DROP; | 2504 | return NET_RX_DROP; |
2301 | 2505 | ||
2302 | if (!skb->iif) | 2506 | if (!skb->skb_iif) |
2303 | skb->iif = skb->dev->ifindex; | 2507 | skb->skb_iif = skb->dev->ifindex; |
2304 | 2508 | ||
2305 | null_or_orig = NULL; | 2509 | null_or_orig = NULL; |
2306 | orig_dev = skb->dev; | 2510 | orig_dev = skb->dev; |
2307 | if (orig_dev->master) { | 2511 | master = ACCESS_ONCE(orig_dev->master); |
2308 | if (skb_bond_should_drop(skb)) | 2512 | if (master) { |
2513 | if (skb_bond_should_drop(skb, master)) | ||
2309 | null_or_orig = orig_dev; /* deliver only exact match */ | 2514 | null_or_orig = orig_dev; /* deliver only exact match */ |
2310 | else | 2515 | else |
2311 | skb->dev = orig_dev->master; | 2516 | skb->dev = master; |
2312 | } | 2517 | } |
2313 | 2518 | ||
2314 | __get_cpu_var(netdev_rx_stat).total++; | 2519 | __get_cpu_var(netdev_rx_stat).total++; |
@@ -2351,12 +2556,24 @@ ncls: | |||
2351 | if (!skb) | 2556 | if (!skb) |
2352 | goto out; | 2557 | goto out; |
2353 | 2558 | ||
2559 | /* | ||
2560 | * Make sure frames received on VLAN interfaces stacked on | ||
2561 | * bonding interfaces still make their way to any base bonding | ||
2562 | * device that may have registered for a specific ptype. The | ||
2563 | * handler may have to adjust skb->dev and orig_dev. | ||
2564 | */ | ||
2565 | null_or_bond = NULL; | ||
2566 | if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && | ||
2567 | (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { | ||
2568 | null_or_bond = vlan_dev_real_dev(skb->dev); | ||
2569 | } | ||
2570 | |||
2354 | type = skb->protocol; | 2571 | type = skb->protocol; |
2355 | list_for_each_entry_rcu(ptype, | 2572 | list_for_each_entry_rcu(ptype, |
2356 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 2573 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
2357 | if (ptype->type == type && | 2574 | if (ptype->type == type && (ptype->dev == null_or_orig || |
2358 | (ptype->dev == null_or_orig || ptype->dev == skb->dev || | 2575 | ptype->dev == skb->dev || ptype->dev == orig_dev || |
2359 | ptype->dev == orig_dev)) { | 2576 | ptype->dev == null_or_bond)) { |
2360 | if (pt_prev) | 2577 | if (pt_prev) |
2361 | ret = deliver_skb(skb, pt_prev, orig_dev); | 2578 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2362 | pt_prev = ptype; | 2579 | pt_prev = ptype; |
@@ -2425,7 +2642,7 @@ out: | |||
2425 | return netif_receive_skb(skb); | 2642 | return netif_receive_skb(skb); |
2426 | } | 2643 | } |
2427 | 2644 | ||
2428 | void napi_gro_flush(struct napi_struct *napi) | 2645 | static void napi_gro_flush(struct napi_struct *napi) |
2429 | { | 2646 | { |
2430 | struct sk_buff *skb, *next; | 2647 | struct sk_buff *skb, *next; |
2431 | 2648 | ||
@@ -2438,9 +2655,8 @@ void napi_gro_flush(struct napi_struct *napi) | |||
2438 | napi->gro_count = 0; | 2655 | napi->gro_count = 0; |
2439 | napi->gro_list = NULL; | 2656 | napi->gro_list = NULL; |
2440 | } | 2657 | } |
2441 | EXPORT_SYMBOL(napi_gro_flush); | ||
2442 | 2658 | ||
2443 | int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2659 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2444 | { | 2660 | { |
2445 | struct sk_buff **pp = NULL; | 2661 | struct sk_buff **pp = NULL; |
2446 | struct packet_type *ptype; | 2662 | struct packet_type *ptype; |
@@ -2448,7 +2664,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2448 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | 2664 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
2449 | int same_flow; | 2665 | int same_flow; |
2450 | int mac_len; | 2666 | int mac_len; |
2451 | int ret; | 2667 | enum gro_result ret; |
2452 | 2668 | ||
2453 | if (!(skb->dev->features & NETIF_F_GRO)) | 2669 | if (!(skb->dev->features & NETIF_F_GRO)) |
2454 | goto normal; | 2670 | goto normal; |
@@ -2532,7 +2748,8 @@ normal: | |||
2532 | } | 2748 | } |
2533 | EXPORT_SYMBOL(dev_gro_receive); | 2749 | EXPORT_SYMBOL(dev_gro_receive); |
2534 | 2750 | ||
2535 | static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2751 | static gro_result_t |
2752 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | ||
2536 | { | 2753 | { |
2537 | struct sk_buff *p; | 2754 | struct sk_buff *p; |
2538 | 2755 | ||
@@ -2540,33 +2757,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2540 | return GRO_NORMAL; | 2757 | return GRO_NORMAL; |
2541 | 2758 | ||
2542 | for (p = napi->gro_list; p; p = p->next) { | 2759 | for (p = napi->gro_list; p; p = p->next) { |
2543 | NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) | 2760 | NAPI_GRO_CB(p)->same_flow = |
2544 | && !compare_ether_header(skb_mac_header(p), | 2761 | (p->dev == skb->dev) && |
2545 | skb_gro_mac_header(skb)); | 2762 | !compare_ether_header(skb_mac_header(p), |
2763 | skb_gro_mac_header(skb)); | ||
2546 | NAPI_GRO_CB(p)->flush = 0; | 2764 | NAPI_GRO_CB(p)->flush = 0; |
2547 | } | 2765 | } |
2548 | 2766 | ||
2549 | return dev_gro_receive(napi, skb); | 2767 | return dev_gro_receive(napi, skb); |
2550 | } | 2768 | } |
2551 | 2769 | ||
2552 | int napi_skb_finish(int ret, struct sk_buff *skb) | 2770 | gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
2553 | { | 2771 | { |
2554 | int err = NET_RX_SUCCESS; | ||
2555 | |||
2556 | switch (ret) { | 2772 | switch (ret) { |
2557 | case GRO_NORMAL: | 2773 | case GRO_NORMAL: |
2558 | return netif_receive_skb(skb); | 2774 | if (netif_receive_skb(skb)) |
2775 | ret = GRO_DROP; | ||
2776 | break; | ||
2559 | 2777 | ||
2560 | case GRO_DROP: | 2778 | case GRO_DROP: |
2561 | err = NET_RX_DROP; | ||
2562 | /* fall through */ | ||
2563 | |||
2564 | case GRO_MERGED_FREE: | 2779 | case GRO_MERGED_FREE: |
2565 | kfree_skb(skb); | 2780 | kfree_skb(skb); |
2566 | break; | 2781 | break; |
2782 | |||
2783 | case GRO_HELD: | ||
2784 | case GRO_MERGED: | ||
2785 | break; | ||
2567 | } | 2786 | } |
2568 | 2787 | ||
2569 | return err; | 2788 | return ret; |
2570 | } | 2789 | } |
2571 | EXPORT_SYMBOL(napi_skb_finish); | 2790 | EXPORT_SYMBOL(napi_skb_finish); |
2572 | 2791 | ||
@@ -2586,7 +2805,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) | |||
2586 | } | 2805 | } |
2587 | EXPORT_SYMBOL(skb_gro_reset_offset); | 2806 | EXPORT_SYMBOL(skb_gro_reset_offset); |
2588 | 2807 | ||
2589 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2808 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2590 | { | 2809 | { |
2591 | skb_gro_reset_offset(skb); | 2810 | skb_gro_reset_offset(skb); |
2592 | 2811 | ||
@@ -2605,49 +2824,41 @@ EXPORT_SYMBOL(napi_reuse_skb); | |||
2605 | 2824 | ||
2606 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 2825 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
2607 | { | 2826 | { |
2608 | struct net_device *dev = napi->dev; | ||
2609 | struct sk_buff *skb = napi->skb; | 2827 | struct sk_buff *skb = napi->skb; |
2610 | 2828 | ||
2611 | if (!skb) { | 2829 | if (!skb) { |
2612 | skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); | 2830 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); |
2613 | if (!skb) | 2831 | if (skb) |
2614 | goto out; | 2832 | napi->skb = skb; |
2615 | |||
2616 | skb_reserve(skb, NET_IP_ALIGN); | ||
2617 | |||
2618 | napi->skb = skb; | ||
2619 | } | 2833 | } |
2620 | |||
2621 | out: | ||
2622 | return skb; | 2834 | return skb; |
2623 | } | 2835 | } |
2624 | EXPORT_SYMBOL(napi_get_frags); | 2836 | EXPORT_SYMBOL(napi_get_frags); |
2625 | 2837 | ||
2626 | int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) | 2838 | gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, |
2839 | gro_result_t ret) | ||
2627 | { | 2840 | { |
2628 | int err = NET_RX_SUCCESS; | ||
2629 | |||
2630 | switch (ret) { | 2841 | switch (ret) { |
2631 | case GRO_NORMAL: | 2842 | case GRO_NORMAL: |
2632 | case GRO_HELD: | 2843 | case GRO_HELD: |
2633 | skb->protocol = eth_type_trans(skb, napi->dev); | 2844 | skb->protocol = eth_type_trans(skb, skb->dev); |
2634 | 2845 | ||
2635 | if (ret == GRO_NORMAL) | 2846 | if (ret == GRO_HELD) |
2636 | return netif_receive_skb(skb); | 2847 | skb_gro_pull(skb, -ETH_HLEN); |
2637 | 2848 | else if (netif_receive_skb(skb)) | |
2638 | skb_gro_pull(skb, -ETH_HLEN); | 2849 | ret = GRO_DROP; |
2639 | break; | 2850 | break; |
2640 | 2851 | ||
2641 | case GRO_DROP: | 2852 | case GRO_DROP: |
2642 | err = NET_RX_DROP; | ||
2643 | /* fall through */ | ||
2644 | |||
2645 | case GRO_MERGED_FREE: | 2853 | case GRO_MERGED_FREE: |
2646 | napi_reuse_skb(napi, skb); | 2854 | napi_reuse_skb(napi, skb); |
2647 | break; | 2855 | break; |
2856 | |||
2857 | case GRO_MERGED: | ||
2858 | break; | ||
2648 | } | 2859 | } |
2649 | 2860 | ||
2650 | return err; | 2861 | return ret; |
2651 | } | 2862 | } |
2652 | EXPORT_SYMBOL(napi_frags_finish); | 2863 | EXPORT_SYMBOL(napi_frags_finish); |
2653 | 2864 | ||
@@ -2688,12 +2899,12 @@ out: | |||
2688 | } | 2899 | } |
2689 | EXPORT_SYMBOL(napi_frags_skb); | 2900 | EXPORT_SYMBOL(napi_frags_skb); |
2690 | 2901 | ||
2691 | int napi_gro_frags(struct napi_struct *napi) | 2902 | gro_result_t napi_gro_frags(struct napi_struct *napi) |
2692 | { | 2903 | { |
2693 | struct sk_buff *skb = napi_frags_skb(napi); | 2904 | struct sk_buff *skb = napi_frags_skb(napi); |
2694 | 2905 | ||
2695 | if (!skb) | 2906 | if (!skb) |
2696 | return NET_RX_DROP; | 2907 | return GRO_DROP; |
2697 | 2908 | ||
2698 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); | 2909 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); |
2699 | } | 2910 | } |
@@ -2835,7 +3046,7 @@ static void net_rx_action(struct softirq_action *h) | |||
2835 | * entries to the tail of this list, and only ->poll() | 3046 | * entries to the tail of this list, and only ->poll() |
2836 | * calls can remove this head entry from the list. | 3047 | * calls can remove this head entry from the list. |
2837 | */ | 3048 | */ |
2838 | n = list_entry(list->next, struct napi_struct, poll_list); | 3049 | n = list_first_entry(list, struct napi_struct, poll_list); |
2839 | 3050 | ||
2840 | have = netpoll_poll_lock(n); | 3051 | have = netpoll_poll_lock(n); |
2841 | 3052 | ||
@@ -2938,15 +3149,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) | |||
2938 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | 3149 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
2939 | return -EFAULT; | 3150 | return -EFAULT; |
2940 | 3151 | ||
2941 | read_lock(&dev_base_lock); | 3152 | rcu_read_lock(); |
2942 | dev = __dev_get_by_index(net, ifr.ifr_ifindex); | 3153 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); |
2943 | if (!dev) { | 3154 | if (!dev) { |
2944 | read_unlock(&dev_base_lock); | 3155 | rcu_read_unlock(); |
2945 | return -ENODEV; | 3156 | return -ENODEV; |
2946 | } | 3157 | } |
2947 | 3158 | ||
2948 | strcpy(ifr.ifr_name, dev->name); | 3159 | strcpy(ifr.ifr_name, dev->name); |
2949 | read_unlock(&dev_base_lock); | 3160 | rcu_read_unlock(); |
2950 | 3161 | ||
2951 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | 3162 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
2952 | return -EFAULT; | 3163 | return -EFAULT; |
@@ -3016,18 +3227,18 @@ static int dev_ifconf(struct net *net, char __user *arg) | |||
3016 | * in detail. | 3227 | * in detail. |
3017 | */ | 3228 | */ |
3018 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) | 3229 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) |
3019 | __acquires(dev_base_lock) | 3230 | __acquires(RCU) |
3020 | { | 3231 | { |
3021 | struct net *net = seq_file_net(seq); | 3232 | struct net *net = seq_file_net(seq); |
3022 | loff_t off; | 3233 | loff_t off; |
3023 | struct net_device *dev; | 3234 | struct net_device *dev; |
3024 | 3235 | ||
3025 | read_lock(&dev_base_lock); | 3236 | rcu_read_lock(); |
3026 | if (!*pos) | 3237 | if (!*pos) |
3027 | return SEQ_START_TOKEN; | 3238 | return SEQ_START_TOKEN; |
3028 | 3239 | ||
3029 | off = 1; | 3240 | off = 1; |
3030 | for_each_netdev(net, dev) | 3241 | for_each_netdev_rcu(net, dev) |
3031 | if (off++ == *pos) | 3242 | if (off++ == *pos) |
3032 | return dev; | 3243 | return dev; |
3033 | 3244 | ||
@@ -3036,23 +3247,25 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) | |||
3036 | 3247 | ||
3037 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 3248 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3038 | { | 3249 | { |
3039 | struct net *net = seq_file_net(seq); | 3250 | struct net_device *dev = (v == SEQ_START_TOKEN) ? |
3251 | first_net_device(seq_file_net(seq)) : | ||
3252 | next_net_device((struct net_device *)v); | ||
3253 | |||
3040 | ++*pos; | 3254 | ++*pos; |
3041 | return v == SEQ_START_TOKEN ? | 3255 | return rcu_dereference(dev); |
3042 | first_net_device(net) : next_net_device((struct net_device *)v); | ||
3043 | } | 3256 | } |
3044 | 3257 | ||
3045 | void dev_seq_stop(struct seq_file *seq, void *v) | 3258 | void dev_seq_stop(struct seq_file *seq, void *v) |
3046 | __releases(dev_base_lock) | 3259 | __releases(RCU) |
3047 | { | 3260 | { |
3048 | read_unlock(&dev_base_lock); | 3261 | rcu_read_unlock(); |
3049 | } | 3262 | } |
3050 | 3263 | ||
3051 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | 3264 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) |
3052 | { | 3265 | { |
3053 | const struct net_device_stats *stats = dev_get_stats(dev); | 3266 | const struct net_device_stats *stats = dev_get_stats(dev); |
3054 | 3267 | ||
3055 | seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " | 3268 | seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " |
3056 | "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", | 3269 | "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", |
3057 | dev->name, stats->rx_bytes, stats->rx_packets, | 3270 | dev->name, stats->rx_bytes, stats->rx_packets, |
3058 | stats->rx_errors, | 3271 | stats->rx_errors, |
@@ -3507,10 +3720,10 @@ void __dev_set_rx_mode(struct net_device *dev) | |||
3507 | /* Unicast addresses changes may only happen under the rtnl, | 3720 | /* Unicast addresses changes may only happen under the rtnl, |
3508 | * therefore calling __dev_set_promiscuity here is safe. | 3721 | * therefore calling __dev_set_promiscuity here is safe. |
3509 | */ | 3722 | */ |
3510 | if (dev->uc.count > 0 && !dev->uc_promisc) { | 3723 | if (!netdev_uc_empty(dev) && !dev->uc_promisc) { |
3511 | __dev_set_promiscuity(dev, 1); | 3724 | __dev_set_promiscuity(dev, 1); |
3512 | dev->uc_promisc = 1; | 3725 | dev->uc_promisc = 1; |
3513 | } else if (dev->uc.count == 0 && dev->uc_promisc) { | 3726 | } else if (netdev_uc_empty(dev) && dev->uc_promisc) { |
3514 | __dev_set_promiscuity(dev, -1); | 3727 | __dev_set_promiscuity(dev, -1); |
3515 | dev->uc_promisc = 0; | 3728 | dev->uc_promisc = 0; |
3516 | } | 3729 | } |
@@ -4078,7 +4291,7 @@ static void dev_addr_discard(struct net_device *dev) | |||
4078 | netif_addr_lock_bh(dev); | 4291 | netif_addr_lock_bh(dev); |
4079 | 4292 | ||
4080 | __dev_addr_discard(&dev->mc_list); | 4293 | __dev_addr_discard(&dev->mc_list); |
4081 | dev->mc_count = 0; | 4294 | netdev_mc_count(dev) = 0; |
4082 | 4295 | ||
4083 | netif_addr_unlock_bh(dev); | 4296 | netif_addr_unlock_bh(dev); |
4084 | } | 4297 | } |
@@ -4114,18 +4327,10 @@ unsigned dev_get_flags(const struct net_device *dev) | |||
4114 | } | 4327 | } |
4115 | EXPORT_SYMBOL(dev_get_flags); | 4328 | EXPORT_SYMBOL(dev_get_flags); |
4116 | 4329 | ||
4117 | /** | 4330 | int __dev_change_flags(struct net_device *dev, unsigned int flags) |
4118 | * dev_change_flags - change device settings | ||
4119 | * @dev: device | ||
4120 | * @flags: device state flags | ||
4121 | * | ||
4122 | * Change settings on device based state flags. The flags are | ||
4123 | * in the userspace exported format. | ||
4124 | */ | ||
4125 | int dev_change_flags(struct net_device *dev, unsigned flags) | ||
4126 | { | 4331 | { |
4127 | int ret, changes; | ||
4128 | int old_flags = dev->flags; | 4332 | int old_flags = dev->flags; |
4333 | int ret; | ||
4129 | 4334 | ||
4130 | ASSERT_RTNL(); | 4335 | ASSERT_RTNL(); |
4131 | 4336 | ||
@@ -4156,17 +4361,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags) | |||
4156 | 4361 | ||
4157 | ret = 0; | 4362 | ret = 0; |
4158 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ | 4363 | if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ |
4159 | ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); | 4364 | ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); |
4160 | 4365 | ||
4161 | if (!ret) | 4366 | if (!ret) |
4162 | dev_set_rx_mode(dev); | 4367 | dev_set_rx_mode(dev); |
4163 | } | 4368 | } |
4164 | 4369 | ||
4165 | if (dev->flags & IFF_UP && | ||
4166 | ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | | ||
4167 | IFF_VOLATILE))) | ||
4168 | call_netdevice_notifiers(NETDEV_CHANGE, dev); | ||
4169 | |||
4170 | if ((flags ^ dev->gflags) & IFF_PROMISC) { | 4370 | if ((flags ^ dev->gflags) & IFF_PROMISC) { |
4171 | int inc = (flags & IFF_PROMISC) ? 1 : -1; | 4371 | int inc = (flags & IFF_PROMISC) ? 1 : -1; |
4172 | 4372 | ||
@@ -4185,11 +4385,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags) | |||
4185 | dev_set_allmulti(dev, inc); | 4385 | dev_set_allmulti(dev, inc); |
4186 | } | 4386 | } |
4187 | 4387 | ||
4188 | /* Exclude state transition flags, already notified */ | 4388 | return ret; |
4189 | changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); | 4389 | } |
4390 | |||
4391 | void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) | ||
4392 | { | ||
4393 | unsigned int changes = dev->flags ^ old_flags; | ||
4394 | |||
4395 | if (changes & IFF_UP) { | ||
4396 | if (dev->flags & IFF_UP) | ||
4397 | call_netdevice_notifiers(NETDEV_UP, dev); | ||
4398 | else | ||
4399 | call_netdevice_notifiers(NETDEV_DOWN, dev); | ||
4400 | } | ||
4401 | |||
4402 | if (dev->flags & IFF_UP && | ||
4403 | (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) | ||
4404 | call_netdevice_notifiers(NETDEV_CHANGE, dev); | ||
4405 | } | ||
4406 | |||
4407 | /** | ||
4408 | * dev_change_flags - change device settings | ||
4409 | * @dev: device | ||
4410 | * @flags: device state flags | ||
4411 | * | ||
4412 | * Change settings on device based state flags. The flags are | ||
4413 | * in the userspace exported format. | ||
4414 | */ | ||
4415 | int dev_change_flags(struct net_device *dev, unsigned flags) | ||
4416 | { | ||
4417 | int ret, changes; | ||
4418 | int old_flags = dev->flags; | ||
4419 | |||
4420 | ret = __dev_change_flags(dev, flags); | ||
4421 | if (ret < 0) | ||
4422 | return ret; | ||
4423 | |||
4424 | changes = old_flags ^ dev->flags; | ||
4190 | if (changes) | 4425 | if (changes) |
4191 | rtmsg_ifinfo(RTM_NEWLINK, dev, changes); | 4426 | rtmsg_ifinfo(RTM_NEWLINK, dev, changes); |
4192 | 4427 | ||
4428 | __dev_notify_flags(dev, old_flags); | ||
4193 | return ret; | 4429 | return ret; |
4194 | } | 4430 | } |
4195 | EXPORT_SYMBOL(dev_change_flags); | 4431 | EXPORT_SYMBOL(dev_change_flags); |
@@ -4254,12 +4490,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | |||
4254 | EXPORT_SYMBOL(dev_set_mac_address); | 4490 | EXPORT_SYMBOL(dev_set_mac_address); |
4255 | 4491 | ||
4256 | /* | 4492 | /* |
4257 | * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) | 4493 | * Perform the SIOCxIFxxx calls, inside rcu_read_lock() |
4258 | */ | 4494 | */ |
4259 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) | 4495 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) |
4260 | { | 4496 | { |
4261 | int err; | 4497 | int err; |
4262 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); | 4498 | struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); |
4263 | 4499 | ||
4264 | if (!dev) | 4500 | if (!dev) |
4265 | return -ENODEV; | 4501 | return -ENODEV; |
@@ -4491,9 +4727,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4491 | case SIOCGIFINDEX: | 4727 | case SIOCGIFINDEX: |
4492 | case SIOCGIFTXQLEN: | 4728 | case SIOCGIFTXQLEN: |
4493 | dev_load(net, ifr.ifr_name); | 4729 | dev_load(net, ifr.ifr_name); |
4494 | read_lock(&dev_base_lock); | 4730 | rcu_read_lock(); |
4495 | ret = dev_ifsioc_locked(net, &ifr, cmd); | 4731 | ret = dev_ifsioc_locked(net, &ifr, cmd); |
4496 | read_unlock(&dev_base_lock); | 4732 | rcu_read_unlock(); |
4497 | if (!ret) { | 4733 | if (!ret) { |
4498 | if (colon) | 4734 | if (colon) |
4499 | *colon = ':'; | 4735 | *colon = ':'; |
@@ -4636,59 +4872,86 @@ static void net_set_todo(struct net_device *dev) | |||
4636 | list_add_tail(&dev->todo_list, &net_todo_list); | 4872 | list_add_tail(&dev->todo_list, &net_todo_list); |
4637 | } | 4873 | } |
4638 | 4874 | ||
4639 | static void rollback_registered(struct net_device *dev) | 4875 | static void rollback_registered_many(struct list_head *head) |
4640 | { | 4876 | { |
4877 | struct net_device *dev, *tmp; | ||
4878 | |||
4641 | BUG_ON(dev_boot_phase); | 4879 | BUG_ON(dev_boot_phase); |
4642 | ASSERT_RTNL(); | 4880 | ASSERT_RTNL(); |
4643 | 4881 | ||
4644 | /* Some devices call without registering for initialization unwind. */ | 4882 | list_for_each_entry_safe(dev, tmp, head, unreg_list) { |
4645 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 4883 | /* Some devices call without registering |
4646 | printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " | 4884 | * for initialization unwind. Remove those |
4647 | "was registered\n", dev->name, dev); | 4885 | * devices and proceed with the remaining. |
4886 | */ | ||
4887 | if (dev->reg_state == NETREG_UNINITIALIZED) { | ||
4888 | pr_debug("unregister_netdevice: device %s/%p never " | ||
4889 | "was registered\n", dev->name, dev); | ||
4648 | 4890 | ||
4649 | WARN_ON(1); | 4891 | WARN_ON(1); |
4650 | return; | 4892 | list_del(&dev->unreg_list); |
4651 | } | 4893 | continue; |
4894 | } | ||
4652 | 4895 | ||
4653 | BUG_ON(dev->reg_state != NETREG_REGISTERED); | 4896 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
4654 | 4897 | ||
4655 | /* If device is running, close it first. */ | 4898 | /* If device is running, close it first. */ |
4656 | dev_close(dev); | 4899 | dev_close(dev); |
4657 | 4900 | ||
4658 | /* And unlink it from device chain. */ | 4901 | /* And unlink it from device chain. */ |
4659 | unlist_netdevice(dev); | 4902 | unlist_netdevice(dev); |
4660 | 4903 | ||
4661 | dev->reg_state = NETREG_UNREGISTERING; | 4904 | dev->reg_state = NETREG_UNREGISTERING; |
4905 | } | ||
4662 | 4906 | ||
4663 | synchronize_net(); | 4907 | synchronize_net(); |
4664 | 4908 | ||
4665 | /* Shutdown queueing discipline. */ | 4909 | list_for_each_entry(dev, head, unreg_list) { |
4666 | dev_shutdown(dev); | 4910 | /* Shutdown queueing discipline. */ |
4911 | dev_shutdown(dev); | ||
4667 | 4912 | ||
4668 | 4913 | ||
4669 | /* Notify protocols, that we are about to destroy | 4914 | /* Notify protocols, that we are about to destroy |
4670 | this device. They should clean all the things. | 4915 | this device. They should clean all the things. |
4671 | */ | 4916 | */ |
4672 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 4917 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
4673 | 4918 | ||
4674 | /* | 4919 | if (!dev->rtnl_link_ops || |
4675 | * Flush the unicast and multicast chains | 4920 | dev->rtnl_link_state == RTNL_LINK_INITIALIZED) |
4676 | */ | 4921 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); |
4677 | dev_unicast_flush(dev); | ||
4678 | dev_addr_discard(dev); | ||
4679 | 4922 | ||
4680 | if (dev->netdev_ops->ndo_uninit) | 4923 | /* |
4681 | dev->netdev_ops->ndo_uninit(dev); | 4924 | * Flush the unicast and multicast chains |
4925 | */ | ||
4926 | dev_unicast_flush(dev); | ||
4927 | dev_addr_discard(dev); | ||
4682 | 4928 | ||
4683 | /* Notifier chain MUST detach us from master device. */ | 4929 | if (dev->netdev_ops->ndo_uninit) |
4684 | WARN_ON(dev->master); | 4930 | dev->netdev_ops->ndo_uninit(dev); |
4685 | 4931 | ||
4686 | /* Remove entries from kobject tree */ | 4932 | /* Notifier chain MUST detach us from master device. */ |
4687 | netdev_unregister_kobject(dev); | 4933 | WARN_ON(dev->master); |
4934 | |||
4935 | /* Remove entries from kobject tree */ | ||
4936 | netdev_unregister_kobject(dev); | ||
4937 | } | ||
4938 | |||
4939 | /* Process any work delayed until the end of the batch */ | ||
4940 | dev = list_first_entry(head, struct net_device, unreg_list); | ||
4941 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | ||
4688 | 4942 | ||
4689 | synchronize_net(); | 4943 | synchronize_net(); |
4690 | 4944 | ||
4691 | dev_put(dev); | 4945 | list_for_each_entry(dev, head, unreg_list) |
4946 | dev_put(dev); | ||
4947 | } | ||
4948 | |||
4949 | static void rollback_registered(struct net_device *dev) | ||
4950 | { | ||
4951 | LIST_HEAD(single); | ||
4952 | |||
4953 | list_add(&dev->unreg_list, &single); | ||
4954 | rollback_registered_many(&single); | ||
4692 | } | 4955 | } |
4693 | 4956 | ||
4694 | static void __netdev_init_queue_locks_one(struct net_device *dev, | 4957 | static void __netdev_init_queue_locks_one(struct net_device *dev, |
@@ -4747,6 +5010,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
4747 | EXPORT_SYMBOL(netdev_fix_features); | 5010 | EXPORT_SYMBOL(netdev_fix_features); |
4748 | 5011 | ||
4749 | /** | 5012 | /** |
5013 | * netif_stacked_transfer_operstate - transfer operstate | ||
5014 | * @rootdev: the root or lower level device to transfer state from | ||
5015 | * @dev: the device to transfer operstate to | ||
5016 | * | ||
5017 | * Transfer operational state from root to device. This is normally | ||
5018 | * called when a stacking relationship exists between the root | ||
5019 | * device and the device(a leaf device). | ||
5020 | */ | ||
5021 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, | ||
5022 | struct net_device *dev) | ||
5023 | { | ||
5024 | if (rootdev->operstate == IF_OPER_DORMANT) | ||
5025 | netif_dormant_on(dev); | ||
5026 | else | ||
5027 | netif_dormant_off(dev); | ||
5028 | |||
5029 | if (netif_carrier_ok(rootdev)) { | ||
5030 | if (!netif_carrier_ok(dev)) | ||
5031 | netif_carrier_on(dev); | ||
5032 | } else { | ||
5033 | if (netif_carrier_ok(dev)) | ||
5034 | netif_carrier_off(dev); | ||
5035 | } | ||
5036 | } | ||
5037 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | ||
5038 | |||
5039 | /** | ||
4750 | * register_netdevice - register a network device | 5040 | * register_netdevice - register a network device |
4751 | * @dev: device to register | 5041 | * @dev: device to register |
4752 | * | 5042 | * |
@@ -4765,8 +5055,6 @@ EXPORT_SYMBOL(netdev_fix_features); | |||
4765 | 5055 | ||
4766 | int register_netdevice(struct net_device *dev) | 5056 | int register_netdevice(struct net_device *dev) |
4767 | { | 5057 | { |
4768 | struct hlist_head *head; | ||
4769 | struct hlist_node *p; | ||
4770 | int ret; | 5058 | int ret; |
4771 | struct net *net = dev_net(dev); | 5059 | struct net *net = dev_net(dev); |
4772 | 5060 | ||
@@ -4795,26 +5083,14 @@ int register_netdevice(struct net_device *dev) | |||
4795 | } | 5083 | } |
4796 | } | 5084 | } |
4797 | 5085 | ||
4798 | if (!dev_valid_name(dev->name)) { | 5086 | ret = dev_get_valid_name(net, dev->name, dev->name, 0); |
4799 | ret = -EINVAL; | 5087 | if (ret) |
4800 | goto err_uninit; | 5088 | goto err_uninit; |
4801 | } | ||
4802 | 5089 | ||
4803 | dev->ifindex = dev_new_index(net); | 5090 | dev->ifindex = dev_new_index(net); |
4804 | if (dev->iflink == -1) | 5091 | if (dev->iflink == -1) |
4805 | dev->iflink = dev->ifindex; | 5092 | dev->iflink = dev->ifindex; |
4806 | 5093 | ||
4807 | /* Check for existence of name */ | ||
4808 | head = dev_name_hash(net, dev->name); | ||
4809 | hlist_for_each(p, head) { | ||
4810 | struct net_device *d | ||
4811 | = hlist_entry(p, struct net_device, name_hlist); | ||
4812 | if (!strncmp(d->name, dev->name, IFNAMSIZ)) { | ||
4813 | ret = -EEXIST; | ||
4814 | goto err_uninit; | ||
4815 | } | ||
4816 | } | ||
4817 | |||
4818 | /* Fix illegal checksum combinations */ | 5094 | /* Fix illegal checksum combinations */ |
4819 | if ((dev->features & NETIF_F_HW_CSUM) && | 5095 | if ((dev->features & NETIF_F_HW_CSUM) && |
4820 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5096 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
@@ -4837,6 +5113,12 @@ int register_netdevice(struct net_device *dev) | |||
4837 | dev->features |= NETIF_F_GSO; | 5113 | dev->features |= NETIF_F_GSO; |
4838 | 5114 | ||
4839 | netdev_initialize_kobject(dev); | 5115 | netdev_initialize_kobject(dev); |
5116 | |||
5117 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | ||
5118 | ret = notifier_to_errno(ret); | ||
5119 | if (ret) | ||
5120 | goto err_uninit; | ||
5121 | |||
4840 | ret = netdev_register_kobject(dev); | 5122 | ret = netdev_register_kobject(dev); |
4841 | if (ret) | 5123 | if (ret) |
4842 | goto err_uninit; | 5124 | goto err_uninit; |
@@ -4860,6 +5142,13 @@ int register_netdevice(struct net_device *dev) | |||
4860 | rollback_registered(dev); | 5142 | rollback_registered(dev); |
4861 | dev->reg_state = NETREG_UNREGISTERED; | 5143 | dev->reg_state = NETREG_UNREGISTERED; |
4862 | } | 5144 | } |
5145 | /* | ||
5146 | * Prevent userspace races by waiting until the network | ||
5147 | * device is fully setup before sending notifications. | ||
5148 | */ | ||
5149 | if (!dev->rtnl_link_ops || | ||
5150 | dev->rtnl_link_state == RTNL_LINK_INITIALIZED) | ||
5151 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); | ||
4863 | 5152 | ||
4864 | out: | 5153 | out: |
4865 | return ret; | 5154 | return ret; |
@@ -4961,6 +5250,8 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
4961 | { | 5250 | { |
4962 | unsigned long rebroadcast_time, warning_time; | 5251 | unsigned long rebroadcast_time, warning_time; |
4963 | 5252 | ||
5253 | linkwatch_forget_dev(dev); | ||
5254 | |||
4964 | rebroadcast_time = warning_time = jiffies; | 5255 | rebroadcast_time = warning_time = jiffies; |
4965 | while (atomic_read(&dev->refcnt) != 0) { | 5256 | while (atomic_read(&dev->refcnt) != 0) { |
4966 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 5257 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
@@ -4968,6 +5259,8 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
4968 | 5259 | ||
4969 | /* Rebroadcast unregister notification */ | 5260 | /* Rebroadcast unregister notification */ |
4970 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5261 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5262 | /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users | ||
5263 | * should have already handle it the first time */ | ||
4971 | 5264 | ||
4972 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, | 5265 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, |
4973 | &dev->state)) { | 5266 | &dev->state)) { |
@@ -5032,7 +5325,7 @@ void netdev_run_todo(void) | |||
5032 | 5325 | ||
5033 | while (!list_empty(&list)) { | 5326 | while (!list_empty(&list)) { |
5034 | struct net_device *dev | 5327 | struct net_device *dev |
5035 | = list_entry(list.next, struct net_device, todo_list); | 5328 | = list_first_entry(&list, struct net_device, todo_list); |
5036 | list_del(&dev->todo_list); | 5329 | list_del(&dev->todo_list); |
5037 | 5330 | ||
5038 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { | 5331 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { |
@@ -5063,6 +5356,32 @@ void netdev_run_todo(void) | |||
5063 | } | 5356 | } |
5064 | 5357 | ||
5065 | /** | 5358 | /** |
5359 | * dev_txq_stats_fold - fold tx_queues stats | ||
5360 | * @dev: device to get statistics from | ||
5361 | * @stats: struct net_device_stats to hold results | ||
5362 | */ | ||
5363 | void dev_txq_stats_fold(const struct net_device *dev, | ||
5364 | struct net_device_stats *stats) | ||
5365 | { | ||
5366 | unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | ||
5367 | unsigned int i; | ||
5368 | struct netdev_queue *txq; | ||
5369 | |||
5370 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
5371 | txq = netdev_get_tx_queue(dev, i); | ||
5372 | tx_bytes += txq->tx_bytes; | ||
5373 | tx_packets += txq->tx_packets; | ||
5374 | tx_dropped += txq->tx_dropped; | ||
5375 | } | ||
5376 | if (tx_bytes || tx_packets || tx_dropped) { | ||
5377 | stats->tx_bytes = tx_bytes; | ||
5378 | stats->tx_packets = tx_packets; | ||
5379 | stats->tx_dropped = tx_dropped; | ||
5380 | } | ||
5381 | } | ||
5382 | EXPORT_SYMBOL(dev_txq_stats_fold); | ||
5383 | |||
5384 | /** | ||
5066 | * dev_get_stats - get network device statistics | 5385 | * dev_get_stats - get network device statistics |
5067 | * @dev: device to get statistics from | 5386 | * @dev: device to get statistics from |
5068 | * | 5387 | * |
@@ -5076,25 +5395,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev) | |||
5076 | 5395 | ||
5077 | if (ops->ndo_get_stats) | 5396 | if (ops->ndo_get_stats) |
5078 | return ops->ndo_get_stats(dev); | 5397 | return ops->ndo_get_stats(dev); |
5079 | else { | 5398 | |
5080 | unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | 5399 | dev_txq_stats_fold(dev, &dev->stats); |
5081 | struct net_device_stats *stats = &dev->stats; | 5400 | return &dev->stats; |
5082 | unsigned int i; | ||
5083 | struct netdev_queue *txq; | ||
5084 | |||
5085 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
5086 | txq = netdev_get_tx_queue(dev, i); | ||
5087 | tx_bytes += txq->tx_bytes; | ||
5088 | tx_packets += txq->tx_packets; | ||
5089 | tx_dropped += txq->tx_dropped; | ||
5090 | } | ||
5091 | if (tx_bytes || tx_packets || tx_dropped) { | ||
5092 | stats->tx_bytes = tx_bytes; | ||
5093 | stats->tx_packets = tx_packets; | ||
5094 | stats->tx_dropped = tx_dropped; | ||
5095 | } | ||
5096 | return stats; | ||
5097 | } | ||
5098 | } | 5401 | } |
5099 | EXPORT_SYMBOL(dev_get_stats); | 5402 | EXPORT_SYMBOL(dev_get_stats); |
5100 | 5403 | ||
@@ -5173,7 +5476,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5173 | 5476 | ||
5174 | netdev_init_queues(dev); | 5477 | netdev_init_queues(dev); |
5175 | 5478 | ||
5479 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); | ||
5480 | dev->ethtool_ntuple_list.count = 0; | ||
5176 | INIT_LIST_HEAD(&dev->napi_list); | 5481 | INIT_LIST_HEAD(&dev->napi_list); |
5482 | INIT_LIST_HEAD(&dev->unreg_list); | ||
5483 | INIT_LIST_HEAD(&dev->link_watch_list); | ||
5177 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 5484 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5178 | setup(dev); | 5485 | setup(dev); |
5179 | strcpy(dev->name, name); | 5486 | strcpy(dev->name, name); |
@@ -5207,6 +5514,9 @@ void free_netdev(struct net_device *dev) | |||
5207 | /* Flush device addresses */ | 5514 | /* Flush device addresses */ |
5208 | dev_addr_flush(dev); | 5515 | dev_addr_flush(dev); |
5209 | 5516 | ||
5517 | /* Clear ethtool n-tuple list */ | ||
5518 | ethtool_ntuple_flush(dev); | ||
5519 | |||
5210 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | 5520 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
5211 | netif_napi_del(p); | 5521 | netif_napi_del(p); |
5212 | 5522 | ||
@@ -5238,25 +5548,47 @@ void synchronize_net(void) | |||
5238 | EXPORT_SYMBOL(synchronize_net); | 5548 | EXPORT_SYMBOL(synchronize_net); |
5239 | 5549 | ||
5240 | /** | 5550 | /** |
5241 | * unregister_netdevice - remove device from the kernel | 5551 | * unregister_netdevice_queue - remove device from the kernel |
5242 | * @dev: device | 5552 | * @dev: device |
5553 | * @head: list | ||
5243 | * | 5554 | * |
5244 | * This function shuts down a device interface and removes it | 5555 | * This function shuts down a device interface and removes it |
5245 | * from the kernel tables. | 5556 | * from the kernel tables. |
5557 | * If head not NULL, device is queued to be unregistered later. | ||
5246 | * | 5558 | * |
5247 | * Callers must hold the rtnl semaphore. You may want | 5559 | * Callers must hold the rtnl semaphore. You may want |
5248 | * unregister_netdev() instead of this. | 5560 | * unregister_netdev() instead of this. |
5249 | */ | 5561 | */ |
5250 | 5562 | ||
5251 | void unregister_netdevice(struct net_device *dev) | 5563 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) |
5252 | { | 5564 | { |
5253 | ASSERT_RTNL(); | 5565 | ASSERT_RTNL(); |
5254 | 5566 | ||
5255 | rollback_registered(dev); | 5567 | if (head) { |
5256 | /* Finish processing unregister after unlock */ | 5568 | list_move_tail(&dev->unreg_list, head); |
5257 | net_set_todo(dev); | 5569 | } else { |
5570 | rollback_registered(dev); | ||
5571 | /* Finish processing unregister after unlock */ | ||
5572 | net_set_todo(dev); | ||
5573 | } | ||
5258 | } | 5574 | } |
5259 | EXPORT_SYMBOL(unregister_netdevice); | 5575 | EXPORT_SYMBOL(unregister_netdevice_queue); |
5576 | |||
5577 | /** | ||
5578 | * unregister_netdevice_many - unregister many devices | ||
5579 | * @head: list of devices | ||
5580 | */ | ||
5581 | void unregister_netdevice_many(struct list_head *head) | ||
5582 | { | ||
5583 | struct net_device *dev; | ||
5584 | |||
5585 | if (!list_empty(head)) { | ||
5586 | rollback_registered_many(head); | ||
5587 | list_for_each_entry(dev, head, unreg_list) | ||
5588 | net_set_todo(dev); | ||
5589 | } | ||
5590 | } | ||
5591 | EXPORT_SYMBOL(unregister_netdevice_many); | ||
5260 | 5592 | ||
5261 | /** | 5593 | /** |
5262 | * unregister_netdev - remove device from the kernel | 5594 | * unregister_netdev - remove device from the kernel |
@@ -5293,8 +5625,6 @@ EXPORT_SYMBOL(unregister_netdev); | |||
5293 | 5625 | ||
5294 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) | 5626 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) |
5295 | { | 5627 | { |
5296 | char buf[IFNAMSIZ]; | ||
5297 | const char *destname; | ||
5298 | int err; | 5628 | int err; |
5299 | 5629 | ||
5300 | ASSERT_RTNL(); | 5630 | ASSERT_RTNL(); |
@@ -5327,20 +5657,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5327 | * we can use it in the destination network namespace. | 5657 | * we can use it in the destination network namespace. |
5328 | */ | 5658 | */ |
5329 | err = -EEXIST; | 5659 | err = -EEXIST; |
5330 | destname = dev->name; | 5660 | if (__dev_get_by_name(net, dev->name)) { |
5331 | if (__dev_get_by_name(net, destname)) { | ||
5332 | /* We get here if we can't use the current device name */ | 5661 | /* We get here if we can't use the current device name */ |
5333 | if (!pat) | 5662 | if (!pat) |
5334 | goto out; | 5663 | goto out; |
5335 | if (!dev_valid_name(pat)) | 5664 | if (dev_get_valid_name(net, pat, dev->name, 1)) |
5336 | goto out; | ||
5337 | if (strchr(pat, '%')) { | ||
5338 | if (__dev_alloc_name(net, pat, buf) < 0) | ||
5339 | goto out; | ||
5340 | destname = buf; | ||
5341 | } else | ||
5342 | destname = pat; | ||
5343 | if (__dev_get_by_name(net, destname)) | ||
5344 | goto out; | 5665 | goto out; |
5345 | } | 5666 | } |
5346 | 5667 | ||
@@ -5364,6 +5685,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5364 | this device. They should clean all the things. | 5685 | this device. They should clean all the things. |
5365 | */ | 5686 | */ |
5366 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5687 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5688 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | ||
5367 | 5689 | ||
5368 | /* | 5690 | /* |
5369 | * Flush the unicast and multicast chains | 5691 | * Flush the unicast and multicast chains |
@@ -5376,10 +5698,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5376 | /* Actually switch the network namespace */ | 5698 | /* Actually switch the network namespace */ |
5377 | dev_net_set(dev, net); | 5699 | dev_net_set(dev, net); |
5378 | 5700 | ||
5379 | /* Assign the new device name */ | ||
5380 | if (destname != dev->name) | ||
5381 | strcpy(dev->name, destname); | ||
5382 | |||
5383 | /* If there is an ifindex conflict assign a new one */ | 5701 | /* If there is an ifindex conflict assign a new one */ |
5384 | if (__dev_get_by_index(net, dev->ifindex)) { | 5702 | if (__dev_get_by_index(net, dev->ifindex)) { |
5385 | int iflink = (dev->iflink == dev->ifindex); | 5703 | int iflink = (dev->iflink == dev->ifindex); |
@@ -5398,6 +5716,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5398 | /* Notify protocols, that a new device appeared. */ | 5716 | /* Notify protocols, that a new device appeared. */ |
5399 | call_netdevice_notifiers(NETDEV_REGISTER, dev); | 5717 | call_netdevice_notifiers(NETDEV_REGISTER, dev); |
5400 | 5718 | ||
5719 | /* | ||
5720 | * Prevent userspace races by waiting until the network | ||
5721 | * device is fully setup before sending notifications. | ||
5722 | */ | ||
5723 | rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); | ||
5724 | |||
5401 | synchronize_net(); | 5725 | synchronize_net(); |
5402 | err = 0; | 5726 | err = 0; |
5403 | out: | 5727 | out: |
@@ -5484,7 +5808,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, | |||
5484 | one |= NETIF_F_ALL_CSUM; | 5808 | one |= NETIF_F_ALL_CSUM; |
5485 | 5809 | ||
5486 | one |= all & NETIF_F_ONE_FOR_ALL; | 5810 | one |= all & NETIF_F_ONE_FOR_ALL; |
5487 | all &= one | NETIF_F_LLTX | NETIF_F_GSO; | 5811 | all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; |
5488 | all |= one & mask & NETIF_F_ONE_FOR_ALL; | 5812 | all |= one & mask & NETIF_F_ONE_FOR_ALL; |
5489 | 5813 | ||
5490 | return all; | 5814 | return all; |
@@ -5566,14 +5890,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = { | |||
5566 | 5890 | ||
5567 | static void __net_exit default_device_exit(struct net *net) | 5891 | static void __net_exit default_device_exit(struct net *net) |
5568 | { | 5892 | { |
5569 | struct net_device *dev; | 5893 | struct net_device *dev, *aux; |
5570 | /* | 5894 | /* |
5571 | * Push all migratable of the network devices back to the | 5895 | * Push all migratable network devices back to the |
5572 | * initial network namespace | 5896 | * initial network namespace |
5573 | */ | 5897 | */ |
5574 | rtnl_lock(); | 5898 | rtnl_lock(); |
5575 | restart: | 5899 | for_each_netdev_safe(net, dev, aux) { |
5576 | for_each_netdev(net, dev) { | ||
5577 | int err; | 5900 | int err; |
5578 | char fb_name[IFNAMSIZ]; | 5901 | char fb_name[IFNAMSIZ]; |
5579 | 5902 | ||
@@ -5581,11 +5904,9 @@ restart: | |||
5581 | if (dev->features & NETIF_F_NETNS_LOCAL) | 5904 | if (dev->features & NETIF_F_NETNS_LOCAL) |
5582 | continue; | 5905 | continue; |
5583 | 5906 | ||
5584 | /* Delete virtual devices */ | 5907 | /* Leave virtual devices for the generic cleanup */ |
5585 | if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { | 5908 | if (dev->rtnl_link_ops) |
5586 | dev->rtnl_link_ops->dellink(dev); | 5909 | continue; |
5587 | goto restart; | ||
5588 | } | ||
5589 | 5910 | ||
5590 | /* Push remaing network devices to init_net */ | 5911 | /* Push remaing network devices to init_net */ |
5591 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); | 5912 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
@@ -5595,13 +5916,37 @@ restart: | |||
5595 | __func__, dev->name, err); | 5916 | __func__, dev->name, err); |
5596 | BUG(); | 5917 | BUG(); |
5597 | } | 5918 | } |
5598 | goto restart; | ||
5599 | } | 5919 | } |
5600 | rtnl_unlock(); | 5920 | rtnl_unlock(); |
5601 | } | 5921 | } |
5602 | 5922 | ||
5923 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | ||
5924 | { | ||
5925 | /* At exit all network devices most be removed from a network | ||
5926 | * namespace. Do this in the reverse order of registeration. | ||
5927 | * Do this across as many network namespaces as possible to | ||
5928 | * improve batching efficiency. | ||
5929 | */ | ||
5930 | struct net_device *dev; | ||
5931 | struct net *net; | ||
5932 | LIST_HEAD(dev_kill_list); | ||
5933 | |||
5934 | rtnl_lock(); | ||
5935 | list_for_each_entry(net, net_list, exit_list) { | ||
5936 | for_each_netdev_reverse(net, dev) { | ||
5937 | if (dev->rtnl_link_ops) | ||
5938 | dev->rtnl_link_ops->dellink(dev, &dev_kill_list); | ||
5939 | else | ||
5940 | unregister_netdevice_queue(dev, &dev_kill_list); | ||
5941 | } | ||
5942 | } | ||
5943 | unregister_netdevice_many(&dev_kill_list); | ||
5944 | rtnl_unlock(); | ||
5945 | } | ||
5946 | |||
5603 | static struct pernet_operations __net_initdata default_device_ops = { | 5947 | static struct pernet_operations __net_initdata default_device_ops = { |
5604 | .exit = default_device_exit, | 5948 | .exit = default_device_exit, |
5949 | .exit_batch = default_device_exit_batch, | ||
5605 | }; | 5950 | }; |
5606 | 5951 | ||
5607 | /* | 5952 | /* |