diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 1758 |
1 files changed, 1096 insertions, 662 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 660dd41aaaa6..9c58c1ec41a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -128,7 +128,11 @@ | |||
128 | #include <linux/jhash.h> | 128 | #include <linux/jhash.h> |
129 | #include <linux/random.h> | 129 | #include <linux/random.h> |
130 | #include <trace/events/napi.h> | 130 | #include <trace/events/napi.h> |
131 | #include <trace/events/net.h> | ||
132 | #include <trace/events/skb.h> | ||
131 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | ||
135 | #include <linux/cpu_rmap.h> | ||
132 | 136 | ||
133 | #include "net-sysfs.h" | 137 | #include "net-sysfs.h" |
134 | 138 | ||
@@ -371,6 +375,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
371 | * --ANK (980803) | 375 | * --ANK (980803) |
372 | */ | 376 | */ |
373 | 377 | ||
378 | static inline struct list_head *ptype_head(const struct packet_type *pt) | ||
379 | { | ||
380 | if (pt->type == htons(ETH_P_ALL)) | ||
381 | return &ptype_all; | ||
382 | else | ||
383 | return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
384 | } | ||
385 | |||
374 | /** | 386 | /** |
375 | * dev_add_pack - add packet handler | 387 | * dev_add_pack - add packet handler |
376 | * @pt: packet type declaration | 388 | * @pt: packet type declaration |
@@ -386,16 +398,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
386 | 398 | ||
387 | void dev_add_pack(struct packet_type *pt) | 399 | void dev_add_pack(struct packet_type *pt) |
388 | { | 400 | { |
389 | int hash; | 401 | struct list_head *head = ptype_head(pt); |
390 | 402 | ||
391 | spin_lock_bh(&ptype_lock); | 403 | spin_lock(&ptype_lock); |
392 | if (pt->type == htons(ETH_P_ALL)) | 404 | list_add_rcu(&pt->list, head); |
393 | list_add_rcu(&pt->list, &ptype_all); | 405 | spin_unlock(&ptype_lock); |
394 | else { | ||
395 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; | ||
396 | list_add_rcu(&pt->list, &ptype_base[hash]); | ||
397 | } | ||
398 | spin_unlock_bh(&ptype_lock); | ||
399 | } | 406 | } |
400 | EXPORT_SYMBOL(dev_add_pack); | 407 | EXPORT_SYMBOL(dev_add_pack); |
401 | 408 | ||
@@ -414,15 +421,10 @@ EXPORT_SYMBOL(dev_add_pack); | |||
414 | */ | 421 | */ |
415 | void __dev_remove_pack(struct packet_type *pt) | 422 | void __dev_remove_pack(struct packet_type *pt) |
416 | { | 423 | { |
417 | struct list_head *head; | 424 | struct list_head *head = ptype_head(pt); |
418 | struct packet_type *pt1; | 425 | struct packet_type *pt1; |
419 | 426 | ||
420 | spin_lock_bh(&ptype_lock); | 427 | spin_lock(&ptype_lock); |
421 | |||
422 | if (pt->type == htons(ETH_P_ALL)) | ||
423 | head = &ptype_all; | ||
424 | else | ||
425 | head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
426 | 428 | ||
427 | list_for_each_entry(pt1, head, list) { | 429 | list_for_each_entry(pt1, head, list) { |
428 | if (pt == pt1) { | 430 | if (pt == pt1) { |
@@ -433,7 +435,7 @@ void __dev_remove_pack(struct packet_type *pt) | |||
433 | 435 | ||
434 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); | 436 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
435 | out: | 437 | out: |
436 | spin_unlock_bh(&ptype_lock); | 438 | spin_unlock(&ptype_lock); |
437 | } | 439 | } |
438 | EXPORT_SYMBOL(__dev_remove_pack); | 440 | EXPORT_SYMBOL(__dev_remove_pack); |
439 | 441 | ||
@@ -742,34 +744,32 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) | |||
742 | EXPORT_SYMBOL(dev_get_by_index); | 744 | EXPORT_SYMBOL(dev_get_by_index); |
743 | 745 | ||
744 | /** | 746 | /** |
745 | * dev_getbyhwaddr - find a device by its hardware address | 747 | * dev_getbyhwaddr_rcu - find a device by its hardware address |
746 | * @net: the applicable net namespace | 748 | * @net: the applicable net namespace |
747 | * @type: media type of device | 749 | * @type: media type of device |
748 | * @ha: hardware address | 750 | * @ha: hardware address |
749 | * | 751 | * |
750 | * Search for an interface by MAC address. Returns NULL if the device | 752 | * Search for an interface by MAC address. Returns NULL if the device |
751 | * is not found or a pointer to the device. The caller must hold the | 753 | * is not found or a pointer to the device. |
752 | * rtnl semaphore. The returned device has not had its ref count increased | 754 | * The caller must hold RCU or RTNL. |
755 | * The returned device has not had its ref count increased | ||
753 | * and the caller must therefore be careful about locking | 756 | * and the caller must therefore be careful about locking |
754 | * | 757 | * |
755 | * BUGS: | ||
756 | * If the API was consistent this would be __dev_get_by_hwaddr | ||
757 | */ | 758 | */ |
758 | 759 | ||
759 | struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) | 760 | struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, |
761 | const char *ha) | ||
760 | { | 762 | { |
761 | struct net_device *dev; | 763 | struct net_device *dev; |
762 | 764 | ||
763 | ASSERT_RTNL(); | 765 | for_each_netdev_rcu(net, dev) |
764 | |||
765 | for_each_netdev(net, dev) | ||
766 | if (dev->type == type && | 766 | if (dev->type == type && |
767 | !memcmp(dev->dev_addr, ha, dev->addr_len)) | 767 | !memcmp(dev->dev_addr, ha, dev->addr_len)) |
768 | return dev; | 768 | return dev; |
769 | 769 | ||
770 | return NULL; | 770 | return NULL; |
771 | } | 771 | } |
772 | EXPORT_SYMBOL(dev_getbyhwaddr); | 772 | EXPORT_SYMBOL(dev_getbyhwaddr_rcu); |
773 | 773 | ||
774 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) | 774 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) |
775 | { | 775 | { |
@@ -948,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) | |||
948 | } | 948 | } |
949 | EXPORT_SYMBOL(dev_alloc_name); | 949 | EXPORT_SYMBOL(dev_alloc_name); |
950 | 950 | ||
951 | static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) | 951 | static int dev_get_valid_name(struct net_device *dev, const char *name) |
952 | { | 952 | { |
953 | struct net *net; | 953 | struct net *net; |
954 | 954 | ||
@@ -958,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt | |||
958 | if (!dev_valid_name(name)) | 958 | if (!dev_valid_name(name)) |
959 | return -EINVAL; | 959 | return -EINVAL; |
960 | 960 | ||
961 | if (fmt && strchr(name, '%')) | 961 | if (strchr(name, '%')) |
962 | return dev_alloc_name(dev, name); | 962 | return dev_alloc_name(dev, name); |
963 | else if (__dev_get_by_name(net, name)) | 963 | else if (__dev_get_by_name(net, name)) |
964 | return -EEXIST; | 964 | return -EEXIST; |
@@ -995,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
995 | 995 | ||
996 | memcpy(oldname, dev->name, IFNAMSIZ); | 996 | memcpy(oldname, dev->name, IFNAMSIZ); |
997 | 997 | ||
998 | err = dev_get_valid_name(dev, newname, 1); | 998 | err = dev_get_valid_name(dev, newname); |
999 | if (err < 0) | 999 | if (err < 0) |
1000 | return err; | 1000 | return err; |
1001 | 1001 | ||
@@ -1007,7 +1007,7 @@ rollback: | |||
1007 | } | 1007 | } |
1008 | 1008 | ||
1009 | write_lock_bh(&dev_base_lock); | 1009 | write_lock_bh(&dev_base_lock); |
1010 | hlist_del(&dev->name_hlist); | 1010 | hlist_del_rcu(&dev->name_hlist); |
1011 | write_unlock_bh(&dev_base_lock); | 1011 | write_unlock_bh(&dev_base_lock); |
1012 | 1012 | ||
1013 | synchronize_rcu(); | 1013 | synchronize_rcu(); |
@@ -1115,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change); | |||
1115 | void dev_load(struct net *net, const char *name) | 1115 | void dev_load(struct net *net, const char *name) |
1116 | { | 1116 | { |
1117 | struct net_device *dev; | 1117 | struct net_device *dev; |
1118 | int no_module; | ||
1118 | 1119 | ||
1119 | rcu_read_lock(); | 1120 | rcu_read_lock(); |
1120 | dev = dev_get_by_name_rcu(net, name); | 1121 | dev = dev_get_by_name_rcu(net, name); |
1121 | rcu_read_unlock(); | 1122 | rcu_read_unlock(); |
1122 | 1123 | ||
1123 | if (!dev && capable(CAP_NET_ADMIN)) | 1124 | no_module = !dev; |
1124 | request_module("%s", name); | 1125 | if (no_module && capable(CAP_NET_ADMIN)) |
1126 | no_module = request_module("netdev-%s", name); | ||
1127 | if (no_module && capable(CAP_SYS_MODULE)) { | ||
1128 | if (!request_module("%s", name)) | ||
1129 | pr_err("Loading kernel module for a network device " | ||
1130 | "with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " | ||
1131 | "instead\n", name); | ||
1132 | } | ||
1125 | } | 1133 | } |
1126 | EXPORT_SYMBOL(dev_load); | 1134 | EXPORT_SYMBOL(dev_load); |
1127 | 1135 | ||
@@ -1132,9 +1140,6 @@ static int __dev_open(struct net_device *dev) | |||
1132 | 1140 | ||
1133 | ASSERT_RTNL(); | 1141 | ASSERT_RTNL(); |
1134 | 1142 | ||
1135 | /* | ||
1136 | * Is it even present? | ||
1137 | */ | ||
1138 | if (!netif_device_present(dev)) | 1143 | if (!netif_device_present(dev)) |
1139 | return -ENODEV; | 1144 | return -ENODEV; |
1140 | 1145 | ||
@@ -1143,9 +1148,6 @@ static int __dev_open(struct net_device *dev) | |||
1143 | if (ret) | 1148 | if (ret) |
1144 | return ret; | 1149 | return ret; |
1145 | 1150 | ||
1146 | /* | ||
1147 | * Call device private open method | ||
1148 | */ | ||
1149 | set_bit(__LINK_STATE_START, &dev->state); | 1151 | set_bit(__LINK_STATE_START, &dev->state); |
1150 | 1152 | ||
1151 | if (ops->ndo_validate_addr) | 1153 | if (ops->ndo_validate_addr) |
@@ -1154,31 +1156,12 @@ static int __dev_open(struct net_device *dev) | |||
1154 | if (!ret && ops->ndo_open) | 1156 | if (!ret && ops->ndo_open) |
1155 | ret = ops->ndo_open(dev); | 1157 | ret = ops->ndo_open(dev); |
1156 | 1158 | ||
1157 | /* | ||
1158 | * If it went open OK then: | ||
1159 | */ | ||
1160 | |||
1161 | if (ret) | 1159 | if (ret) |
1162 | clear_bit(__LINK_STATE_START, &dev->state); | 1160 | clear_bit(__LINK_STATE_START, &dev->state); |
1163 | else { | 1161 | else { |
1164 | /* | ||
1165 | * Set the flags. | ||
1166 | */ | ||
1167 | dev->flags |= IFF_UP; | 1162 | dev->flags |= IFF_UP; |
1168 | |||
1169 | /* | ||
1170 | * Enable NET_DMA | ||
1171 | */ | ||
1172 | net_dmaengine_get(); | 1163 | net_dmaengine_get(); |
1173 | |||
1174 | /* | ||
1175 | * Initialize multicasting status | ||
1176 | */ | ||
1177 | dev_set_rx_mode(dev); | 1164 | dev_set_rx_mode(dev); |
1178 | |||
1179 | /* | ||
1180 | * Wakeup transmit queue engine | ||
1181 | */ | ||
1182 | dev_activate(dev); | 1165 | dev_activate(dev); |
1183 | } | 1166 | } |
1184 | 1167 | ||
@@ -1201,22 +1184,13 @@ int dev_open(struct net_device *dev) | |||
1201 | { | 1184 | { |
1202 | int ret; | 1185 | int ret; |
1203 | 1186 | ||
1204 | /* | ||
1205 | * Is it already up? | ||
1206 | */ | ||
1207 | if (dev->flags & IFF_UP) | 1187 | if (dev->flags & IFF_UP) |
1208 | return 0; | 1188 | return 0; |
1209 | 1189 | ||
1210 | /* | ||
1211 | * Open device | ||
1212 | */ | ||
1213 | ret = __dev_open(dev); | 1190 | ret = __dev_open(dev); |
1214 | if (ret < 0) | 1191 | if (ret < 0) |
1215 | return ret; | 1192 | return ret; |
1216 | 1193 | ||
1217 | /* | ||
1218 | * ... and announce new interface. | ||
1219 | */ | ||
1220 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | 1194 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1221 | call_netdevice_notifiers(NETDEV_UP, dev); | 1195 | call_netdevice_notifiers(NETDEV_UP, dev); |
1222 | 1196 | ||
@@ -1224,52 +1198,78 @@ int dev_open(struct net_device *dev) | |||
1224 | } | 1198 | } |
1225 | EXPORT_SYMBOL(dev_open); | 1199 | EXPORT_SYMBOL(dev_open); |
1226 | 1200 | ||
1227 | static int __dev_close(struct net_device *dev) | 1201 | static int __dev_close_many(struct list_head *head) |
1228 | { | 1202 | { |
1229 | const struct net_device_ops *ops = dev->netdev_ops; | 1203 | struct net_device *dev; |
1230 | 1204 | ||
1231 | ASSERT_RTNL(); | 1205 | ASSERT_RTNL(); |
1232 | might_sleep(); | 1206 | might_sleep(); |
1233 | 1207 | ||
1234 | /* | 1208 | list_for_each_entry(dev, head, unreg_list) { |
1235 | * Tell people we are going down, so that they can | 1209 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); |
1236 | * prepare to death, when device is still operating. | ||
1237 | */ | ||
1238 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); | ||
1239 | 1210 | ||
1240 | clear_bit(__LINK_STATE_START, &dev->state); | 1211 | clear_bit(__LINK_STATE_START, &dev->state); |
1241 | 1212 | ||
1242 | /* Synchronize to scheduled poll. We cannot touch poll list, | 1213 | /* Synchronize to scheduled poll. We cannot touch poll list, it |
1243 | * it can be even on different cpu. So just clear netif_running(). | 1214 | * can be even on different cpu. So just clear netif_running(). |
1244 | * | 1215 | * |
1245 | * dev->stop() will invoke napi_disable() on all of it's | 1216 | * dev->stop() will invoke napi_disable() on all of it's |
1246 | * napi_struct instances on this device. | 1217 | * napi_struct instances on this device. |
1247 | */ | 1218 | */ |
1248 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ | 1219 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ |
1220 | } | ||
1249 | 1221 | ||
1250 | dev_deactivate(dev); | 1222 | dev_deactivate_many(head); |
1251 | 1223 | ||
1252 | /* | 1224 | list_for_each_entry(dev, head, unreg_list) { |
1253 | * Call the device specific close. This cannot fail. | 1225 | const struct net_device_ops *ops = dev->netdev_ops; |
1254 | * Only if device is UP | ||
1255 | * | ||
1256 | * We allow it to be called even after a DETACH hot-plug | ||
1257 | * event. | ||
1258 | */ | ||
1259 | if (ops->ndo_stop) | ||
1260 | ops->ndo_stop(dev); | ||
1261 | 1226 | ||
1262 | /* | 1227 | /* |
1263 | * Device is now down. | 1228 | * Call the device specific close. This cannot fail. |
1264 | */ | 1229 | * Only if device is UP |
1230 | * | ||
1231 | * We allow it to be called even after a DETACH hot-plug | ||
1232 | * event. | ||
1233 | */ | ||
1234 | if (ops->ndo_stop) | ||
1235 | ops->ndo_stop(dev); | ||
1265 | 1236 | ||
1266 | dev->flags &= ~IFF_UP; | 1237 | dev->flags &= ~IFF_UP; |
1238 | net_dmaengine_put(); | ||
1239 | } | ||
1267 | 1240 | ||
1268 | /* | 1241 | return 0; |
1269 | * Shutdown NET_DMA | 1242 | } |
1270 | */ | 1243 | |
1271 | net_dmaengine_put(); | 1244 | static int __dev_close(struct net_device *dev) |
1245 | { | ||
1246 | int retval; | ||
1247 | LIST_HEAD(single); | ||
1272 | 1248 | ||
1249 | list_add(&dev->unreg_list, &single); | ||
1250 | retval = __dev_close_many(&single); | ||
1251 | list_del(&single); | ||
1252 | return retval; | ||
1253 | } | ||
1254 | |||
1255 | static int dev_close_many(struct list_head *head) | ||
1256 | { | ||
1257 | struct net_device *dev, *tmp; | ||
1258 | LIST_HEAD(tmp_list); | ||
1259 | |||
1260 | list_for_each_entry_safe(dev, tmp, head, unreg_list) | ||
1261 | if (!(dev->flags & IFF_UP)) | ||
1262 | list_move(&dev->unreg_list, &tmp_list); | ||
1263 | |||
1264 | __dev_close_many(head); | ||
1265 | |||
1266 | list_for_each_entry(dev, head, unreg_list) { | ||
1267 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | ||
1268 | call_netdevice_notifiers(NETDEV_DOWN, dev); | ||
1269 | } | ||
1270 | |||
1271 | /* rollback_registered_many needs the complete original list */ | ||
1272 | list_splice(&tmp_list, head); | ||
1273 | return 0; | 1273 | return 0; |
1274 | } | 1274 | } |
1275 | 1275 | ||
@@ -1284,17 +1284,13 @@ static int __dev_close(struct net_device *dev) | |||
1284 | */ | 1284 | */ |
1285 | int dev_close(struct net_device *dev) | 1285 | int dev_close(struct net_device *dev) |
1286 | { | 1286 | { |
1287 | if (!(dev->flags & IFF_UP)) | 1287 | if (dev->flags & IFF_UP) { |
1288 | return 0; | 1288 | LIST_HEAD(single); |
1289 | |||
1290 | __dev_close(dev); | ||
1291 | |||
1292 | /* | ||
1293 | * Tell people we are down | ||
1294 | */ | ||
1295 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | ||
1296 | call_netdevice_notifiers(NETDEV_DOWN, dev); | ||
1297 | 1289 | ||
1290 | list_add(&dev->unreg_list, &single); | ||
1291 | dev_close_many(&single); | ||
1292 | list_del(&single); | ||
1293 | } | ||
1298 | return 0; | 1294 | return 0; |
1299 | } | 1295 | } |
1300 | EXPORT_SYMBOL(dev_close); | 1296 | EXPORT_SYMBOL(dev_close); |
@@ -1310,26 +1306,32 @@ EXPORT_SYMBOL(dev_close); | |||
1310 | */ | 1306 | */ |
1311 | void dev_disable_lro(struct net_device *dev) | 1307 | void dev_disable_lro(struct net_device *dev) |
1312 | { | 1308 | { |
1313 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags && | 1309 | u32 flags; |
1314 | dev->ethtool_ops->set_flags) { | 1310 | |
1315 | u32 flags = dev->ethtool_ops->get_flags(dev); | 1311 | /* |
1316 | if (flags & ETH_FLAG_LRO) { | 1312 | * If we're trying to disable lro on a vlan device |
1317 | flags &= ~ETH_FLAG_LRO; | 1313 | * use the underlying physical device instead |
1318 | dev->ethtool_ops->set_flags(dev, flags); | 1314 | */ |
1319 | } | 1315 | if (is_vlan_dev(dev)) |
1320 | } | 1316 | dev = vlan_dev_real_dev(dev); |
1321 | WARN_ON(dev->features & NETIF_F_LRO); | 1317 | |
1318 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags) | ||
1319 | flags = dev->ethtool_ops->get_flags(dev); | ||
1320 | else | ||
1321 | flags = ethtool_op_get_flags(dev); | ||
1322 | |||
1323 | if (!(flags & ETH_FLAG_LRO)) | ||
1324 | return; | ||
1325 | |||
1326 | __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); | ||
1327 | if (unlikely(dev->features & NETIF_F_LRO)) | ||
1328 | netdev_WARN(dev, "failed to disable LRO!\n"); | ||
1322 | } | 1329 | } |
1323 | EXPORT_SYMBOL(dev_disable_lro); | 1330 | EXPORT_SYMBOL(dev_disable_lro); |
1324 | 1331 | ||
1325 | 1332 | ||
1326 | static int dev_boot_phase = 1; | 1333 | static int dev_boot_phase = 1; |
1327 | 1334 | ||
1328 | /* | ||
1329 | * Device change register/unregister. These are not inline or static | ||
1330 | * as we export them to the world. | ||
1331 | */ | ||
1332 | |||
1333 | /** | 1335 | /** |
1334 | * register_netdevice_notifier - register a network notifier block | 1336 | * register_netdevice_notifier - register a network notifier block |
1335 | * @nb: notifier | 1337 | * @nb: notifier |
@@ -1431,6 +1433,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
1431 | ASSERT_RTNL(); | 1433 | ASSERT_RTNL(); |
1432 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1434 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1433 | } | 1435 | } |
1436 | EXPORT_SYMBOL(call_netdevice_notifiers); | ||
1434 | 1437 | ||
1435 | /* When > 0 there are consumers of rx skb time stamps */ | 1438 | /* When > 0 there are consumers of rx skb time stamps */ |
1436 | static atomic_t netstamp_needed = ATOMIC_INIT(0); | 1439 | static atomic_t netstamp_needed = ATOMIC_INIT(0); |
@@ -1461,6 +1464,27 @@ static inline void net_timestamp_check(struct sk_buff *skb) | |||
1461 | __net_timestamp(skb); | 1464 | __net_timestamp(skb); |
1462 | } | 1465 | } |
1463 | 1466 | ||
1467 | static inline bool is_skb_forwardable(struct net_device *dev, | ||
1468 | struct sk_buff *skb) | ||
1469 | { | ||
1470 | unsigned int len; | ||
1471 | |||
1472 | if (!(dev->flags & IFF_UP)) | ||
1473 | return false; | ||
1474 | |||
1475 | len = dev->mtu + dev->hard_header_len + VLAN_HLEN; | ||
1476 | if (skb->len <= len) | ||
1477 | return true; | ||
1478 | |||
1479 | /* if TSO is enabled, we don't care about the length as the packet | ||
1480 | * could be forwarded without being segmented before | ||
1481 | */ | ||
1482 | if (skb_is_gso(skb)) | ||
1483 | return true; | ||
1484 | |||
1485 | return false; | ||
1486 | } | ||
1487 | |||
1464 | /** | 1488 | /** |
1465 | * dev_forward_skb - loopback an skb to another netif | 1489 | * dev_forward_skb - loopback an skb to another netif |
1466 | * | 1490 | * |
@@ -1484,8 +1508,8 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1484 | skb_orphan(skb); | 1508 | skb_orphan(skb); |
1485 | nf_reset(skb); | 1509 | nf_reset(skb); |
1486 | 1510 | ||
1487 | if (!(dev->flags & IFF_UP) || | 1511 | if (unlikely(!is_skb_forwardable(dev, skb))) { |
1488 | (skb->len > (dev->mtu + dev->hard_header_len))) { | 1512 | atomic_long_inc(&dev->rx_dropped); |
1489 | kfree_skb(skb); | 1513 | kfree_skb(skb); |
1490 | return NET_RX_DROP; | 1514 | return NET_RX_DROP; |
1491 | } | 1515 | } |
@@ -1497,6 +1521,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1497 | } | 1521 | } |
1498 | EXPORT_SYMBOL_GPL(dev_forward_skb); | 1522 | EXPORT_SYMBOL_GPL(dev_forward_skb); |
1499 | 1523 | ||
1524 | static inline int deliver_skb(struct sk_buff *skb, | ||
1525 | struct packet_type *pt_prev, | ||
1526 | struct net_device *orig_dev) | ||
1527 | { | ||
1528 | atomic_inc(&skb->users); | ||
1529 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | ||
1530 | } | ||
1531 | |||
1500 | /* | 1532 | /* |
1501 | * Support routine. Sends outgoing frames to any network | 1533 | * Support routine. Sends outgoing frames to any network |
1502 | * taps currently in use. | 1534 | * taps currently in use. |
@@ -1505,13 +1537,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb); | |||
1505 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | 1537 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
1506 | { | 1538 | { |
1507 | struct packet_type *ptype; | 1539 | struct packet_type *ptype; |
1508 | 1540 | struct sk_buff *skb2 = NULL; | |
1509 | #ifdef CONFIG_NET_CLS_ACT | 1541 | struct packet_type *pt_prev = NULL; |
1510 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) | ||
1511 | net_timestamp_set(skb); | ||
1512 | #else | ||
1513 | net_timestamp_set(skb); | ||
1514 | #endif | ||
1515 | 1542 | ||
1516 | rcu_read_lock(); | 1543 | rcu_read_lock(); |
1517 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 1544 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
@@ -1521,10 +1548,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1521 | if ((ptype->dev == dev || !ptype->dev) && | 1548 | if ((ptype->dev == dev || !ptype->dev) && |
1522 | (ptype->af_packet_priv == NULL || | 1549 | (ptype->af_packet_priv == NULL || |
1523 | (struct sock *)ptype->af_packet_priv != skb->sk)) { | 1550 | (struct sock *)ptype->af_packet_priv != skb->sk)) { |
1524 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1551 | if (pt_prev) { |
1552 | deliver_skb(skb2, pt_prev, skb->dev); | ||
1553 | pt_prev = ptype; | ||
1554 | continue; | ||
1555 | } | ||
1556 | |||
1557 | skb2 = skb_clone(skb, GFP_ATOMIC); | ||
1525 | if (!skb2) | 1558 | if (!skb2) |
1526 | break; | 1559 | break; |
1527 | 1560 | ||
1561 | net_timestamp_set(skb2); | ||
1562 | |||
1528 | /* skb->nh should be correctly | 1563 | /* skb->nh should be correctly |
1529 | set by sender, so that the second statement is | 1564 | set by sender, so that the second statement is |
1530 | just protection against buggy protocols. | 1565 | just protection against buggy protocols. |
@@ -1543,31 +1578,121 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1543 | 1578 | ||
1544 | skb2->transport_header = skb2->network_header; | 1579 | skb2->transport_header = skb2->network_header; |
1545 | skb2->pkt_type = PACKET_OUTGOING; | 1580 | skb2->pkt_type = PACKET_OUTGOING; |
1546 | ptype->func(skb2, skb->dev, ptype, skb->dev); | 1581 | pt_prev = ptype; |
1547 | } | 1582 | } |
1548 | } | 1583 | } |
1584 | if (pt_prev) | ||
1585 | pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); | ||
1549 | rcu_read_unlock(); | 1586 | rcu_read_unlock(); |
1550 | } | 1587 | } |
1551 | 1588 | ||
1589 | /* netif_setup_tc - Handle tc mappings on real_num_tx_queues change | ||
1590 | * @dev: Network device | ||
1591 | * @txq: number of queues available | ||
1592 | * | ||
1593 | * If real_num_tx_queues is changed the tc mappings may no longer be | ||
1594 | * valid. To resolve this verify the tc mapping remains valid and if | ||
1595 | * not NULL the mapping. With no priorities mapping to this | ||
1596 | * offset/count pair it will no longer be used. In the worst case TC0 | ||
1597 | * is invalid nothing can be done so disable priority mappings. If is | ||
1598 | * expected that drivers will fix this mapping if they can before | ||
1599 | * calling netif_set_real_num_tx_queues. | ||
1600 | */ | ||
1601 | static void netif_setup_tc(struct net_device *dev, unsigned int txq) | ||
1602 | { | ||
1603 | int i; | ||
1604 | struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; | ||
1605 | |||
1606 | /* If TC0 is invalidated disable TC mapping */ | ||
1607 | if (tc->offset + tc->count > txq) { | ||
1608 | pr_warning("Number of in use tx queues changed " | ||
1609 | "invalidating tc mappings. Priority " | ||
1610 | "traffic classification disabled!\n"); | ||
1611 | dev->num_tc = 0; | ||
1612 | return; | ||
1613 | } | ||
1614 | |||
1615 | /* Invalidated prio to tc mappings set to TC0 */ | ||
1616 | for (i = 1; i < TC_BITMASK + 1; i++) { | ||
1617 | int q = netdev_get_prio_tc_map(dev, i); | ||
1618 | |||
1619 | tc = &dev->tc_to_txq[q]; | ||
1620 | if (tc->offset + tc->count > txq) { | ||
1621 | pr_warning("Number of in use tx queues " | ||
1622 | "changed. Priority %i to tc " | ||
1623 | "mapping %i is no longer valid " | ||
1624 | "setting map to 0\n", | ||
1625 | i, q); | ||
1626 | netdev_set_prio_tc_map(dev, i, 0); | ||
1627 | } | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1552 | /* | 1631 | /* |
1553 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1632 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1554 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1633 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
1555 | */ | 1634 | */ |
1556 | void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | 1635 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) |
1557 | { | 1636 | { |
1558 | unsigned int real_num = dev->real_num_tx_queues; | 1637 | int rc; |
1638 | |||
1639 | if (txq < 1 || txq > dev->num_tx_queues) | ||
1640 | return -EINVAL; | ||
1641 | |||
1642 | if (dev->reg_state == NETREG_REGISTERED || | ||
1643 | dev->reg_state == NETREG_UNREGISTERING) { | ||
1644 | ASSERT_RTNL(); | ||
1559 | 1645 | ||
1560 | if (unlikely(txq > dev->num_tx_queues)) | 1646 | rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, |
1561 | ; | 1647 | txq); |
1562 | else if (txq > real_num) | 1648 | if (rc) |
1563 | dev->real_num_tx_queues = txq; | 1649 | return rc; |
1564 | else if (txq < real_num) { | 1650 | |
1565 | dev->real_num_tx_queues = txq; | 1651 | if (dev->num_tc) |
1566 | qdisc_reset_all_tx_gt(dev, txq); | 1652 | netif_setup_tc(dev, txq); |
1653 | |||
1654 | if (txq < dev->real_num_tx_queues) | ||
1655 | qdisc_reset_all_tx_gt(dev, txq); | ||
1567 | } | 1656 | } |
1657 | |||
1658 | dev->real_num_tx_queues = txq; | ||
1659 | return 0; | ||
1568 | } | 1660 | } |
1569 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 1661 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
1570 | 1662 | ||
1663 | #ifdef CONFIG_RPS | ||
1664 | /** | ||
1665 | * netif_set_real_num_rx_queues - set actual number of RX queues used | ||
1666 | * @dev: Network device | ||
1667 | * @rxq: Actual number of RX queues | ||
1668 | * | ||
1669 | * This must be called either with the rtnl_lock held or before | ||
1670 | * registration of the net device. Returns 0 on success, or a | ||
1671 | * negative error code. If called before registration, it always | ||
1672 | * succeeds. | ||
1673 | */ | ||
1674 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) | ||
1675 | { | ||
1676 | int rc; | ||
1677 | |||
1678 | if (rxq < 1 || rxq > dev->num_rx_queues) | ||
1679 | return -EINVAL; | ||
1680 | |||
1681 | if (dev->reg_state == NETREG_REGISTERED) { | ||
1682 | ASSERT_RTNL(); | ||
1683 | |||
1684 | rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, | ||
1685 | rxq); | ||
1686 | if (rc) | ||
1687 | return rc; | ||
1688 | } | ||
1689 | |||
1690 | dev->real_num_rx_queues = rxq; | ||
1691 | return 0; | ||
1692 | } | ||
1693 | EXPORT_SYMBOL(netif_set_real_num_rx_queues); | ||
1694 | #endif | ||
1695 | |||
1571 | static inline void __netif_reschedule(struct Qdisc *q) | 1696 | static inline void __netif_reschedule(struct Qdisc *q) |
1572 | { | 1697 | { |
1573 | struct softnet_data *sd; | 1698 | struct softnet_data *sd; |
@@ -1646,32 +1771,6 @@ void netif_device_attach(struct net_device *dev) | |||
1646 | } | 1771 | } |
1647 | EXPORT_SYMBOL(netif_device_attach); | 1772 | EXPORT_SYMBOL(netif_device_attach); |
1648 | 1773 | ||
1649 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | ||
1650 | { | ||
1651 | return ((features & NETIF_F_GEN_CSUM) || | ||
1652 | ((features & NETIF_F_IP_CSUM) && | ||
1653 | protocol == htons(ETH_P_IP)) || | ||
1654 | ((features & NETIF_F_IPV6_CSUM) && | ||
1655 | protocol == htons(ETH_P_IPV6)) || | ||
1656 | ((features & NETIF_F_FCOE_CRC) && | ||
1657 | protocol == htons(ETH_P_FCOE))); | ||
1658 | } | ||
1659 | |||
1660 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) | ||
1661 | { | ||
1662 | if (can_checksum_protocol(dev->features, skb->protocol)) | ||
1663 | return true; | ||
1664 | |||
1665 | if (skb->protocol == htons(ETH_P_8021Q)) { | ||
1666 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | ||
1667 | if (can_checksum_protocol(dev->features & dev->vlan_features, | ||
1668 | veh->h_vlan_encapsulated_proto)) | ||
1669 | return true; | ||
1670 | } | ||
1671 | |||
1672 | return false; | ||
1673 | } | ||
1674 | |||
1675 | /** | 1774 | /** |
1676 | * skb_dev_set -- assign a new device to a buffer | 1775 | * skb_dev_set -- assign a new device to a buffer |
1677 | * @skb: buffer for the new device | 1776 | * @skb: buffer for the new device |
@@ -1719,7 +1818,7 @@ int skb_checksum_help(struct sk_buff *skb) | |||
1719 | goto out_set_summed; | 1818 | goto out_set_summed; |
1720 | } | 1819 | } |
1721 | 1820 | ||
1722 | offset = skb->csum_start - skb_headroom(skb); | 1821 | offset = skb_checksum_start_offset(skb); |
1723 | BUG_ON(offset >= skb_headlen(skb)); | 1822 | BUG_ON(offset >= skb_headlen(skb)); |
1724 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | 1823 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
1725 | 1824 | ||
@@ -1751,13 +1850,25 @@ EXPORT_SYMBOL(skb_checksum_help); | |||
1751 | * It may return NULL if the skb requires no segmentation. This is | 1850 | * It may return NULL if the skb requires no segmentation. This is |
1752 | * only possible when GSO is used for verifying header integrity. | 1851 | * only possible when GSO is used for verifying header integrity. |
1753 | */ | 1852 | */ |
1754 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | 1853 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) |
1755 | { | 1854 | { |
1756 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 1855 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1757 | struct packet_type *ptype; | 1856 | struct packet_type *ptype; |
1758 | __be16 type = skb->protocol; | 1857 | __be16 type = skb->protocol; |
1858 | int vlan_depth = ETH_HLEN; | ||
1759 | int err; | 1859 | int err; |
1760 | 1860 | ||
1861 | while (type == htons(ETH_P_8021Q)) { | ||
1862 | struct vlan_hdr *vh; | ||
1863 | |||
1864 | if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) | ||
1865 | return ERR_PTR(-EINVAL); | ||
1866 | |||
1867 | vh = (struct vlan_hdr *)(skb->data + vlan_depth); | ||
1868 | type = vh->h_vlan_encapsulated_proto; | ||
1869 | vlan_depth += VLAN_HLEN; | ||
1870 | } | ||
1871 | |||
1761 | skb_reset_mac_header(skb); | 1872 | skb_reset_mac_header(skb); |
1762 | skb->mac_len = skb->network_header - skb->mac_header; | 1873 | skb->mac_len = skb->network_header - skb->mac_header; |
1763 | __skb_pull(skb, skb->mac_len); | 1874 | __skb_pull(skb, skb->mac_len); |
@@ -1769,8 +1880,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1769 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) | 1880 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) |
1770 | dev->ethtool_ops->get_drvinfo(dev, &info); | 1881 | dev->ethtool_ops->get_drvinfo(dev, &info); |
1771 | 1882 | ||
1772 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " | 1883 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", |
1773 | "ip_summed=%d", | ||
1774 | info.driver, dev ? dev->features : 0L, | 1884 | info.driver, dev ? dev->features : 0L, |
1775 | skb->sk ? skb->sk->sk_route_caps : 0L, | 1885 | skb->sk ? skb->sk->sk_route_caps : 0L, |
1776 | skb->len, skb->data_len, skb->ip_summed); | 1886 | skb->len, skb->data_len, skb->ip_summed); |
@@ -1873,16 +1983,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) | |||
1873 | /** | 1983 | /** |
1874 | * dev_gso_segment - Perform emulated hardware segmentation on skb. | 1984 | * dev_gso_segment - Perform emulated hardware segmentation on skb. |
1875 | * @skb: buffer to segment | 1985 | * @skb: buffer to segment |
1986 | * @features: device features as applicable to this skb | ||
1876 | * | 1987 | * |
1877 | * This function segments the given skb and stores the list of segments | 1988 | * This function segments the given skb and stores the list of segments |
1878 | * in skb->next. | 1989 | * in skb->next. |
1879 | */ | 1990 | */ |
1880 | static int dev_gso_segment(struct sk_buff *skb) | 1991 | static int dev_gso_segment(struct sk_buff *skb, int features) |
1881 | { | 1992 | { |
1882 | struct net_device *dev = skb->dev; | ||
1883 | struct sk_buff *segs; | 1993 | struct sk_buff *segs; |
1884 | int features = dev->features & ~(illegal_highdma(dev, skb) ? | ||
1885 | NETIF_F_SG : 0); | ||
1886 | 1994 | ||
1887 | segs = skb_gso_segment(skb, features); | 1995 | segs = skb_gso_segment(skb, features); |
1888 | 1996 | ||
@@ -1902,14 +2010,14 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1902 | 2010 | ||
1903 | /* | 2011 | /* |
1904 | * Try to orphan skb early, right before transmission by the device. | 2012 | * Try to orphan skb early, right before transmission by the device. |
1905 | * We cannot orphan skb if tx timestamp is requested, since | 2013 | * We cannot orphan skb if tx timestamp is requested or the sk-reference |
1906 | * drivers need to call skb_tstamp_tx() to send the timestamp. | 2014 | * is needed on driver level for other reasons, e.g. see net/can/raw.c |
1907 | */ | 2015 | */ |
1908 | static inline void skb_orphan_try(struct sk_buff *skb) | 2016 | static inline void skb_orphan_try(struct sk_buff *skb) |
1909 | { | 2017 | { |
1910 | struct sock *sk = skb->sk; | 2018 | struct sock *sk = skb->sk; |
1911 | 2019 | ||
1912 | if (sk && !skb_tx(skb)->flags) { | 2020 | if (sk && !skb_shinfo(skb)->tx_flags) { |
1913 | /* skb_tx_hash() wont be able to get sk. | 2021 | /* skb_tx_hash() wont be able to get sk. |
1914 | * We copy sk_hash into skb->rxhash | 2022 | * We copy sk_hash into skb->rxhash |
1915 | */ | 2023 | */ |
@@ -1919,6 +2027,53 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
1919 | } | 2027 | } |
1920 | } | 2028 | } |
1921 | 2029 | ||
2030 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | ||
2031 | { | ||
2032 | return ((features & NETIF_F_GEN_CSUM) || | ||
2033 | ((features & NETIF_F_V4_CSUM) && | ||
2034 | protocol == htons(ETH_P_IP)) || | ||
2035 | ((features & NETIF_F_V6_CSUM) && | ||
2036 | protocol == htons(ETH_P_IPV6)) || | ||
2037 | ((features & NETIF_F_FCOE_CRC) && | ||
2038 | protocol == htons(ETH_P_FCOE))); | ||
2039 | } | ||
2040 | |||
2041 | static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) | ||
2042 | { | ||
2043 | if (!can_checksum_protocol(features, protocol)) { | ||
2044 | features &= ~NETIF_F_ALL_CSUM; | ||
2045 | features &= ~NETIF_F_SG; | ||
2046 | } else if (illegal_highdma(skb->dev, skb)) { | ||
2047 | features &= ~NETIF_F_SG; | ||
2048 | } | ||
2049 | |||
2050 | return features; | ||
2051 | } | ||
2052 | |||
2053 | u32 netif_skb_features(struct sk_buff *skb) | ||
2054 | { | ||
2055 | __be16 protocol = skb->protocol; | ||
2056 | u32 features = skb->dev->features; | ||
2057 | |||
2058 | if (protocol == htons(ETH_P_8021Q)) { | ||
2059 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | ||
2060 | protocol = veh->h_vlan_encapsulated_proto; | ||
2061 | } else if (!vlan_tx_tag_present(skb)) { | ||
2062 | return harmonize_features(skb, protocol, features); | ||
2063 | } | ||
2064 | |||
2065 | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); | ||
2066 | |||
2067 | if (protocol != htons(ETH_P_8021Q)) { | ||
2068 | return harmonize_features(skb, protocol, features); | ||
2069 | } else { | ||
2070 | features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | | ||
2071 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; | ||
2072 | return harmonize_features(skb, protocol, features); | ||
2073 | } | ||
2074 | } | ||
2075 | EXPORT_SYMBOL(netif_skb_features); | ||
2076 | |||
1922 | /* | 2077 | /* |
1923 | * Returns true if either: | 2078 | * Returns true if either: |
1924 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | 2079 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or |
@@ -1927,12 +2082,13 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
1927 | * support DMA from it. | 2082 | * support DMA from it. |
1928 | */ | 2083 | */ |
1929 | static inline int skb_needs_linearize(struct sk_buff *skb, | 2084 | static inline int skb_needs_linearize(struct sk_buff *skb, |
1930 | struct net_device *dev) | 2085 | int features) |
1931 | { | 2086 | { |
1932 | return skb_is_nonlinear(skb) && | 2087 | return skb_is_nonlinear(skb) && |
1933 | ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | 2088 | ((skb_has_frag_list(skb) && |
1934 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | 2089 | !(features & NETIF_F_FRAGLIST)) || |
1935 | illegal_highdma(dev, skb)))); | 2090 | (skb_shinfo(skb)->nr_frags && |
2091 | !(features & NETIF_F_SG))); | ||
1936 | } | 2092 | } |
1937 | 2093 | ||
1938 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 2094 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
@@ -1940,27 +2096,41 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1940 | { | 2096 | { |
1941 | const struct net_device_ops *ops = dev->netdev_ops; | 2097 | const struct net_device_ops *ops = dev->netdev_ops; |
1942 | int rc = NETDEV_TX_OK; | 2098 | int rc = NETDEV_TX_OK; |
2099 | unsigned int skb_len; | ||
1943 | 2100 | ||
1944 | if (likely(!skb->next)) { | 2101 | if (likely(!skb->next)) { |
1945 | if (!list_empty(&ptype_all)) | 2102 | u32 features; |
1946 | dev_queue_xmit_nit(skb, dev); | ||
1947 | 2103 | ||
1948 | /* | 2104 | /* |
1949 | * If device doesnt need skb->dst, release it right now while | 2105 | * If device doesn't need skb->dst, release it right now while |
1950 | * its hot in this cpu cache | 2106 | * its hot in this cpu cache |
1951 | */ | 2107 | */ |
1952 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2108 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1953 | skb_dst_drop(skb); | 2109 | skb_dst_drop(skb); |
1954 | 2110 | ||
2111 | if (!list_empty(&ptype_all)) | ||
2112 | dev_queue_xmit_nit(skb, dev); | ||
2113 | |||
1955 | skb_orphan_try(skb); | 2114 | skb_orphan_try(skb); |
1956 | 2115 | ||
1957 | if (netif_needs_gso(dev, skb)) { | 2116 | features = netif_skb_features(skb); |
1958 | if (unlikely(dev_gso_segment(skb))) | 2117 | |
2118 | if (vlan_tx_tag_present(skb) && | ||
2119 | !(features & NETIF_F_HW_VLAN_TX)) { | ||
2120 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); | ||
2121 | if (unlikely(!skb)) | ||
2122 | goto out; | ||
2123 | |||
2124 | skb->vlan_tci = 0; | ||
2125 | } | ||
2126 | |||
2127 | if (netif_needs_gso(skb, features)) { | ||
2128 | if (unlikely(dev_gso_segment(skb, features))) | ||
1959 | goto out_kfree_skb; | 2129 | goto out_kfree_skb; |
1960 | if (skb->next) | 2130 | if (skb->next) |
1961 | goto gso; | 2131 | goto gso; |
1962 | } else { | 2132 | } else { |
1963 | if (skb_needs_linearize(skb, dev) && | 2133 | if (skb_needs_linearize(skb, features) && |
1964 | __skb_linearize(skb)) | 2134 | __skb_linearize(skb)) |
1965 | goto out_kfree_skb; | 2135 | goto out_kfree_skb; |
1966 | 2136 | ||
@@ -1969,15 +2139,17 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1969 | * checksumming here. | 2139 | * checksumming here. |
1970 | */ | 2140 | */ |
1971 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 2141 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
1972 | skb_set_transport_header(skb, skb->csum_start - | 2142 | skb_set_transport_header(skb, |
1973 | skb_headroom(skb)); | 2143 | skb_checksum_start_offset(skb)); |
1974 | if (!dev_can_checksum(dev, skb) && | 2144 | if (!(features & NETIF_F_ALL_CSUM) && |
1975 | skb_checksum_help(skb)) | 2145 | skb_checksum_help(skb)) |
1976 | goto out_kfree_skb; | 2146 | goto out_kfree_skb; |
1977 | } | 2147 | } |
1978 | } | 2148 | } |
1979 | 2149 | ||
2150 | skb_len = skb->len; | ||
1980 | rc = ops->ndo_start_xmit(skb, dev); | 2151 | rc = ops->ndo_start_xmit(skb, dev); |
2152 | trace_net_dev_xmit(skb, rc, dev, skb_len); | ||
1981 | if (rc == NETDEV_TX_OK) | 2153 | if (rc == NETDEV_TX_OK) |
1982 | txq_trans_update(txq); | 2154 | txq_trans_update(txq); |
1983 | return rc; | 2155 | return rc; |
@@ -1991,13 +2163,15 @@ gso: | |||
1991 | nskb->next = NULL; | 2163 | nskb->next = NULL; |
1992 | 2164 | ||
1993 | /* | 2165 | /* |
1994 | * If device doesnt need nskb->dst, release it right now while | 2166 | * If device doesn't need nskb->dst, release it right now while |
1995 | * its hot in this cpu cache | 2167 | * its hot in this cpu cache |
1996 | */ | 2168 | */ |
1997 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2169 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1998 | skb_dst_drop(nskb); | 2170 | skb_dst_drop(nskb); |
1999 | 2171 | ||
2172 | skb_len = nskb->len; | ||
2000 | rc = ops->ndo_start_xmit(nskb, dev); | 2173 | rc = ops->ndo_start_xmit(nskb, dev); |
2174 | trace_net_dev_xmit(nskb, rc, dev, skb_len); | ||
2001 | if (unlikely(rc != NETDEV_TX_OK)) { | 2175 | if (unlikely(rc != NETDEV_TX_OK)) { |
2002 | if (rc & ~NETDEV_TX_MASK) | 2176 | if (rc & ~NETDEV_TX_MASK) |
2003 | goto out_kfree_gso_skb; | 2177 | goto out_kfree_gso_skb; |
@@ -2015,31 +2189,45 @@ out_kfree_gso_skb: | |||
2015 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 2189 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
2016 | out_kfree_skb: | 2190 | out_kfree_skb: |
2017 | kfree_skb(skb); | 2191 | kfree_skb(skb); |
2192 | out: | ||
2018 | return rc; | 2193 | return rc; |
2019 | } | 2194 | } |
2020 | 2195 | ||
2021 | static u32 hashrnd __read_mostly; | 2196 | static u32 hashrnd __read_mostly; |
2022 | 2197 | ||
2023 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | 2198 | /* |
2199 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number | ||
2200 | * to be used as a distribution range. | ||
2201 | */ | ||
2202 | u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | ||
2203 | unsigned int num_tx_queues) | ||
2024 | { | 2204 | { |
2025 | u32 hash; | 2205 | u32 hash; |
2206 | u16 qoffset = 0; | ||
2207 | u16 qcount = num_tx_queues; | ||
2026 | 2208 | ||
2027 | if (skb_rx_queue_recorded(skb)) { | 2209 | if (skb_rx_queue_recorded(skb)) { |
2028 | hash = skb_get_rx_queue(skb); | 2210 | hash = skb_get_rx_queue(skb); |
2029 | while (unlikely(hash >= dev->real_num_tx_queues)) | 2211 | while (unlikely(hash >= num_tx_queues)) |
2030 | hash -= dev->real_num_tx_queues; | 2212 | hash -= num_tx_queues; |
2031 | return hash; | 2213 | return hash; |
2032 | } | 2214 | } |
2033 | 2215 | ||
2216 | if (dev->num_tc) { | ||
2217 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
2218 | qoffset = dev->tc_to_txq[tc].offset; | ||
2219 | qcount = dev->tc_to_txq[tc].count; | ||
2220 | } | ||
2221 | |||
2034 | if (skb->sk && skb->sk->sk_hash) | 2222 | if (skb->sk && skb->sk->sk_hash) |
2035 | hash = skb->sk->sk_hash; | 2223 | hash = skb->sk->sk_hash; |
2036 | else | 2224 | else |
2037 | hash = (__force u16) skb->protocol ^ skb->rxhash; | 2225 | hash = (__force u16) skb->protocol ^ skb->rxhash; |
2038 | hash = jhash_1word(hash, hashrnd); | 2226 | hash = jhash_1word(hash, hashrnd); |
2039 | 2227 | ||
2040 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 2228 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; |
2041 | } | 2229 | } |
2042 | EXPORT_SYMBOL(skb_tx_hash); | 2230 | EXPORT_SYMBOL(__skb_tx_hash); |
2043 | 2231 | ||
2044 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | 2232 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) |
2045 | { | 2233 | { |
@@ -2054,26 +2242,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
2054 | return queue_index; | 2242 | return queue_index; |
2055 | } | 2243 | } |
2056 | 2244 | ||
2245 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | ||
2246 | { | ||
2247 | #ifdef CONFIG_XPS | ||
2248 | struct xps_dev_maps *dev_maps; | ||
2249 | struct xps_map *map; | ||
2250 | int queue_index = -1; | ||
2251 | |||
2252 | rcu_read_lock(); | ||
2253 | dev_maps = rcu_dereference(dev->xps_maps); | ||
2254 | if (dev_maps) { | ||
2255 | map = rcu_dereference( | ||
2256 | dev_maps->cpu_map[raw_smp_processor_id()]); | ||
2257 | if (map) { | ||
2258 | if (map->len == 1) | ||
2259 | queue_index = map->queues[0]; | ||
2260 | else { | ||
2261 | u32 hash; | ||
2262 | if (skb->sk && skb->sk->sk_hash) | ||
2263 | hash = skb->sk->sk_hash; | ||
2264 | else | ||
2265 | hash = (__force u16) skb->protocol ^ | ||
2266 | skb->rxhash; | ||
2267 | hash = jhash_1word(hash, hashrnd); | ||
2268 | queue_index = map->queues[ | ||
2269 | ((u64)hash * map->len) >> 32]; | ||
2270 | } | ||
2271 | if (unlikely(queue_index >= dev->real_num_tx_queues)) | ||
2272 | queue_index = -1; | ||
2273 | } | ||
2274 | } | ||
2275 | rcu_read_unlock(); | ||
2276 | |||
2277 | return queue_index; | ||
2278 | #else | ||
2279 | return -1; | ||
2280 | #endif | ||
2281 | } | ||
2282 | |||
2057 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 2283 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
2058 | struct sk_buff *skb) | 2284 | struct sk_buff *skb) |
2059 | { | 2285 | { |
2060 | int queue_index; | 2286 | int queue_index; |
2061 | const struct net_device_ops *ops = dev->netdev_ops; | 2287 | const struct net_device_ops *ops = dev->netdev_ops; |
2062 | 2288 | ||
2063 | if (ops->ndo_select_queue) { | 2289 | if (dev->real_num_tx_queues == 1) |
2290 | queue_index = 0; | ||
2291 | else if (ops->ndo_select_queue) { | ||
2064 | queue_index = ops->ndo_select_queue(dev, skb); | 2292 | queue_index = ops->ndo_select_queue(dev, skb); |
2065 | queue_index = dev_cap_txqueue(dev, queue_index); | 2293 | queue_index = dev_cap_txqueue(dev, queue_index); |
2066 | } else { | 2294 | } else { |
2067 | struct sock *sk = skb->sk; | 2295 | struct sock *sk = skb->sk; |
2068 | queue_index = sk_tx_queue_get(sk); | 2296 | queue_index = sk_tx_queue_get(sk); |
2069 | if (queue_index < 0) { | ||
2070 | 2297 | ||
2071 | queue_index = 0; | 2298 | if (queue_index < 0 || skb->ooo_okay || |
2072 | if (dev->real_num_tx_queues > 1) | 2299 | queue_index >= dev->real_num_tx_queues) { |
2300 | int old_index = queue_index; | ||
2301 | |||
2302 | queue_index = get_xps_queue(dev, skb); | ||
2303 | if (queue_index < 0) | ||
2073 | queue_index = skb_tx_hash(dev, skb); | 2304 | queue_index = skb_tx_hash(dev, skb); |
2074 | 2305 | ||
2075 | if (sk) { | 2306 | if (queue_index != old_index && sk) { |
2076 | struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); | 2307 | struct dst_entry *dst = |
2308 | rcu_dereference_check(sk->sk_dst_cache, 1); | ||
2077 | 2309 | ||
2078 | if (dst && skb_dst(skb) == dst) | 2310 | if (dst && skb_dst(skb) == dst) |
2079 | sk_tx_queue_set(sk, queue_index); | 2311 | sk_tx_queue_set(sk, queue_index); |
@@ -2090,15 +2322,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2090 | struct netdev_queue *txq) | 2322 | struct netdev_queue *txq) |
2091 | { | 2323 | { |
2092 | spinlock_t *root_lock = qdisc_lock(q); | 2324 | spinlock_t *root_lock = qdisc_lock(q); |
2093 | bool contended = qdisc_is_running(q); | 2325 | bool contended; |
2094 | int rc; | 2326 | int rc; |
2095 | 2327 | ||
2328 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2329 | qdisc_calculate_pkt_len(skb, q); | ||
2096 | /* | 2330 | /* |
2097 | * Heuristic to force contended enqueues to serialize on a | 2331 | * Heuristic to force contended enqueues to serialize on a |
2098 | * separate lock before trying to get qdisc main lock. | 2332 | * separate lock before trying to get qdisc main lock. |
2099 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | 2333 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2100 | * and dequeue packets faster. | 2334 | * and dequeue packets faster. |
2101 | */ | 2335 | */ |
2336 | contended = qdisc_is_running(q); | ||
2102 | if (unlikely(contended)) | 2337 | if (unlikely(contended)) |
2103 | spin_lock(&q->busylock); | 2338 | spin_lock(&q->busylock); |
2104 | 2339 | ||
@@ -2115,7 +2350,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2115 | */ | 2350 | */ |
2116 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2351 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2117 | skb_dst_force(skb); | 2352 | skb_dst_force(skb); |
2118 | __qdisc_update_bstats(q, skb->len); | 2353 | |
2354 | qdisc_bstats_update(q, skb); | ||
2355 | |||
2119 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { | 2356 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
2120 | if (unlikely(contended)) { | 2357 | if (unlikely(contended)) { |
2121 | spin_unlock(&q->busylock); | 2358 | spin_unlock(&q->busylock); |
@@ -2128,7 +2365,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2128 | rc = NET_XMIT_SUCCESS; | 2365 | rc = NET_XMIT_SUCCESS; |
2129 | } else { | 2366 | } else { |
2130 | skb_dst_force(skb); | 2367 | skb_dst_force(skb); |
2131 | rc = qdisc_enqueue_root(skb, q); | 2368 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2132 | if (qdisc_run_begin(q)) { | 2369 | if (qdisc_run_begin(q)) { |
2133 | if (unlikely(contended)) { | 2370 | if (unlikely(contended)) { |
2134 | spin_unlock(&q->busylock); | 2371 | spin_unlock(&q->busylock); |
@@ -2143,6 +2380,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2143 | return rc; | 2380 | return rc; |
2144 | } | 2381 | } |
2145 | 2382 | ||
2383 | static DEFINE_PER_CPU(int, xmit_recursion); | ||
2384 | #define RECURSION_LIMIT 10 | ||
2385 | |||
2146 | /** | 2386 | /** |
2147 | * dev_queue_xmit - transmit a buffer | 2387 | * dev_queue_xmit - transmit a buffer |
2148 | * @skb: buffer to transmit | 2388 | * @skb: buffer to transmit |
@@ -2186,6 +2426,7 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2186 | #ifdef CONFIG_NET_CLS_ACT | 2426 | #ifdef CONFIG_NET_CLS_ACT |
2187 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | 2427 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); |
2188 | #endif | 2428 | #endif |
2429 | trace_net_dev_queue(skb); | ||
2189 | if (q->enqueue) { | 2430 | if (q->enqueue) { |
2190 | rc = __dev_xmit_skb(skb, q, dev, txq); | 2431 | rc = __dev_xmit_skb(skb, q, dev, txq); |
2191 | goto out; | 2432 | goto out; |
@@ -2208,10 +2449,15 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2208 | 2449 | ||
2209 | if (txq->xmit_lock_owner != cpu) { | 2450 | if (txq->xmit_lock_owner != cpu) { |
2210 | 2451 | ||
2452 | if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) | ||
2453 | goto recursion_alert; | ||
2454 | |||
2211 | HARD_TX_LOCK(dev, txq, cpu); | 2455 | HARD_TX_LOCK(dev, txq, cpu); |
2212 | 2456 | ||
2213 | if (!netif_tx_queue_stopped(txq)) { | 2457 | if (!netif_tx_queue_stopped(txq)) { |
2458 | __this_cpu_inc(xmit_recursion); | ||
2214 | rc = dev_hard_start_xmit(skb, dev, txq); | 2459 | rc = dev_hard_start_xmit(skb, dev, txq); |
2460 | __this_cpu_dec(xmit_recursion); | ||
2215 | if (dev_xmit_complete(rc)) { | 2461 | if (dev_xmit_complete(rc)) { |
2216 | HARD_TX_UNLOCK(dev, txq); | 2462 | HARD_TX_UNLOCK(dev, txq); |
2217 | goto out; | 2463 | goto out; |
@@ -2223,7 +2469,9 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2223 | "queue packet!\n", dev->name); | 2469 | "queue packet!\n", dev->name); |
2224 | } else { | 2470 | } else { |
2225 | /* Recursion is detected! It is possible, | 2471 | /* Recursion is detected! It is possible, |
2226 | * unfortunately */ | 2472 | * unfortunately |
2473 | */ | ||
2474 | recursion_alert: | ||
2227 | if (net_ratelimit()) | 2475 | if (net_ratelimit()) |
2228 | printk(KERN_CRIT "Dead loop on virtual device " | 2476 | printk(KERN_CRIT "Dead loop on virtual device " |
2229 | "%s, fix it urgently!\n", dev->name); | 2477 | "%s, fix it urgently!\n", dev->name); |
@@ -2259,69 +2507,44 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2259 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2507 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2260 | } | 2508 | } |
2261 | 2509 | ||
2262 | #ifdef CONFIG_RPS | ||
2263 | |||
2264 | /* One global table that all flow-based protocols share. */ | ||
2265 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2266 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2267 | |||
2268 | /* | 2510 | /* |
2269 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2511 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses |
2270 | * CPU from the RPS map of the receiving queue for a given skb. | 2512 | * and src/dst port numbers. Returns a non-zero hash number on success |
2271 | * rcu_read_lock must be held on entry. | 2513 | * and 0 on failure. |
2272 | */ | 2514 | */ |
2273 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2515 | __u32 __skb_get_rxhash(struct sk_buff *skb) |
2274 | struct rps_dev_flow **rflowp) | ||
2275 | { | 2516 | { |
2276 | struct ipv6hdr *ip6; | 2517 | int nhoff, hash = 0, poff; |
2277 | struct iphdr *ip; | 2518 | const struct ipv6hdr *ip6; |
2278 | struct netdev_rx_queue *rxqueue; | 2519 | const struct iphdr *ip; |
2279 | struct rps_map *map; | ||
2280 | struct rps_dev_flow_table *flow_table; | ||
2281 | struct rps_sock_flow_table *sock_flow_table; | ||
2282 | int cpu = -1; | ||
2283 | u8 ip_proto; | 2520 | u8 ip_proto; |
2284 | u16 tcpu; | ||
2285 | u32 addr1, addr2, ihl; | 2521 | u32 addr1, addr2, ihl; |
2286 | union { | 2522 | union { |
2287 | u32 v32; | 2523 | u32 v32; |
2288 | u16 v16[2]; | 2524 | u16 v16[2]; |
2289 | } ports; | 2525 | } ports; |
2290 | 2526 | ||
2291 | if (skb_rx_queue_recorded(skb)) { | 2527 | nhoff = skb_network_offset(skb); |
2292 | u16 index = skb_get_rx_queue(skb); | ||
2293 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2294 | WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " | ||
2295 | "on queue %u, but number of RX queues is %u\n", | ||
2296 | dev->name, index, dev->num_rx_queues); | ||
2297 | goto done; | ||
2298 | } | ||
2299 | rxqueue = dev->_rx + index; | ||
2300 | } else | ||
2301 | rxqueue = dev->_rx; | ||
2302 | |||
2303 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
2304 | goto done; | ||
2305 | |||
2306 | if (skb->rxhash) | ||
2307 | goto got_hash; /* Skip hash computation on packet header */ | ||
2308 | 2528 | ||
2309 | switch (skb->protocol) { | 2529 | switch (skb->protocol) { |
2310 | case __constant_htons(ETH_P_IP): | 2530 | case __constant_htons(ETH_P_IP): |
2311 | if (!pskb_may_pull(skb, sizeof(*ip))) | 2531 | if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) |
2312 | goto done; | 2532 | goto done; |
2313 | 2533 | ||
2314 | ip = (struct iphdr *) skb->data; | 2534 | ip = (const struct iphdr *) (skb->data + nhoff); |
2315 | ip_proto = ip->protocol; | 2535 | if (ip->frag_off & htons(IP_MF | IP_OFFSET)) |
2536 | ip_proto = 0; | ||
2537 | else | ||
2538 | ip_proto = ip->protocol; | ||
2316 | addr1 = (__force u32) ip->saddr; | 2539 | addr1 = (__force u32) ip->saddr; |
2317 | addr2 = (__force u32) ip->daddr; | 2540 | addr2 = (__force u32) ip->daddr; |
2318 | ihl = ip->ihl; | 2541 | ihl = ip->ihl; |
2319 | break; | 2542 | break; |
2320 | case __constant_htons(ETH_P_IPV6): | 2543 | case __constant_htons(ETH_P_IPV6): |
2321 | if (!pskb_may_pull(skb, sizeof(*ip6))) | 2544 | if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) |
2322 | goto done; | 2545 | goto done; |
2323 | 2546 | ||
2324 | ip6 = (struct ipv6hdr *) skb->data; | 2547 | ip6 = (const struct ipv6hdr *) (skb->data + nhoff); |
2325 | ip_proto = ip6->nexthdr; | 2548 | ip_proto = ip6->nexthdr; |
2326 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | 2549 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; |
2327 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | 2550 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; |
@@ -2330,33 +2553,130 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2330 | default: | 2553 | default: |
2331 | goto done; | 2554 | goto done; |
2332 | } | 2555 | } |
2333 | switch (ip_proto) { | 2556 | |
2334 | case IPPROTO_TCP: | 2557 | ports.v32 = 0; |
2335 | case IPPROTO_UDP: | 2558 | poff = proto_ports_offset(ip_proto); |
2336 | case IPPROTO_DCCP: | 2559 | if (poff >= 0) { |
2337 | case IPPROTO_ESP: | 2560 | nhoff += ihl * 4 + poff; |
2338 | case IPPROTO_AH: | 2561 | if (pskb_may_pull(skb, nhoff + 4)) { |
2339 | case IPPROTO_SCTP: | 2562 | ports.v32 = * (__force u32 *) (skb->data + nhoff); |
2340 | case IPPROTO_UDPLITE: | ||
2341 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
2342 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
2343 | if (ports.v16[1] < ports.v16[0]) | 2563 | if (ports.v16[1] < ports.v16[0]) |
2344 | swap(ports.v16[0], ports.v16[1]); | 2564 | swap(ports.v16[0], ports.v16[1]); |
2345 | break; | ||
2346 | } | 2565 | } |
2347 | default: | ||
2348 | ports.v32 = 0; | ||
2349 | break; | ||
2350 | } | 2566 | } |
2351 | 2567 | ||
2352 | /* get a consistent hash (same value on both flow directions) */ | 2568 | /* get a consistent hash (same value on both flow directions) */ |
2353 | if (addr2 < addr1) | 2569 | if (addr2 < addr1) |
2354 | swap(addr1, addr2); | 2570 | swap(addr1, addr2); |
2355 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
2356 | if (!skb->rxhash) | ||
2357 | skb->rxhash = 1; | ||
2358 | 2571 | ||
2359 | got_hash: | 2572 | hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); |
2573 | if (!hash) | ||
2574 | hash = 1; | ||
2575 | |||
2576 | done: | ||
2577 | return hash; | ||
2578 | } | ||
2579 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
2580 | |||
2581 | #ifdef CONFIG_RPS | ||
2582 | |||
2583 | /* One global table that all flow-based protocols share. */ | ||
2584 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | ||
2585 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2586 | |||
2587 | static struct rps_dev_flow * | ||
2588 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2589 | struct rps_dev_flow *rflow, u16 next_cpu) | ||
2590 | { | ||
2591 | u16 tcpu; | ||
2592 | |||
2593 | tcpu = rflow->cpu = next_cpu; | ||
2594 | if (tcpu != RPS_NO_CPU) { | ||
2595 | #ifdef CONFIG_RFS_ACCEL | ||
2596 | struct netdev_rx_queue *rxqueue; | ||
2597 | struct rps_dev_flow_table *flow_table; | ||
2598 | struct rps_dev_flow *old_rflow; | ||
2599 | u32 flow_id; | ||
2600 | u16 rxq_index; | ||
2601 | int rc; | ||
2602 | |||
2603 | /* Should we steer this flow to a different hardware queue? */ | ||
2604 | if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || | ||
2605 | !(dev->features & NETIF_F_NTUPLE)) | ||
2606 | goto out; | ||
2607 | rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); | ||
2608 | if (rxq_index == skb_get_rx_queue(skb)) | ||
2609 | goto out; | ||
2610 | |||
2611 | rxqueue = dev->_rx + rxq_index; | ||
2612 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2613 | if (!flow_table) | ||
2614 | goto out; | ||
2615 | flow_id = skb->rxhash & flow_table->mask; | ||
2616 | rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | ||
2617 | rxq_index, flow_id); | ||
2618 | if (rc < 0) | ||
2619 | goto out; | ||
2620 | old_rflow = rflow; | ||
2621 | rflow = &flow_table->flows[flow_id]; | ||
2622 | rflow->cpu = next_cpu; | ||
2623 | rflow->filter = rc; | ||
2624 | if (old_rflow->filter == rflow->filter) | ||
2625 | old_rflow->filter = RPS_NO_FILTER; | ||
2626 | out: | ||
2627 | #endif | ||
2628 | rflow->last_qtail = | ||
2629 | per_cpu(softnet_data, tcpu).input_queue_head; | ||
2630 | } | ||
2631 | |||
2632 | return rflow; | ||
2633 | } | ||
2634 | |||
2635 | /* | ||
2636 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2637 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2638 | * rcu_read_lock must be held on entry. | ||
2639 | */ | ||
2640 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2641 | struct rps_dev_flow **rflowp) | ||
2642 | { | ||
2643 | struct netdev_rx_queue *rxqueue; | ||
2644 | struct rps_map *map; | ||
2645 | struct rps_dev_flow_table *flow_table; | ||
2646 | struct rps_sock_flow_table *sock_flow_table; | ||
2647 | int cpu = -1; | ||
2648 | u16 tcpu; | ||
2649 | |||
2650 | if (skb_rx_queue_recorded(skb)) { | ||
2651 | u16 index = skb_get_rx_queue(skb); | ||
2652 | if (unlikely(index >= dev->real_num_rx_queues)) { | ||
2653 | WARN_ONCE(dev->real_num_rx_queues > 1, | ||
2654 | "%s received packet on queue %u, but number " | ||
2655 | "of RX queues is %u\n", | ||
2656 | dev->name, index, dev->real_num_rx_queues); | ||
2657 | goto done; | ||
2658 | } | ||
2659 | rxqueue = dev->_rx + index; | ||
2660 | } else | ||
2661 | rxqueue = dev->_rx; | ||
2662 | |||
2663 | map = rcu_dereference(rxqueue->rps_map); | ||
2664 | if (map) { | ||
2665 | if (map->len == 1 && | ||
2666 | !rcu_dereference_raw(rxqueue->rps_flow_table)) { | ||
2667 | tcpu = map->cpus[0]; | ||
2668 | if (cpu_online(tcpu)) | ||
2669 | cpu = tcpu; | ||
2670 | goto done; | ||
2671 | } | ||
2672 | } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { | ||
2673 | goto done; | ||
2674 | } | ||
2675 | |||
2676 | skb_reset_network_header(skb); | ||
2677 | if (!skb_get_rxhash(skb)) | ||
2678 | goto done; | ||
2679 | |||
2360 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 2680 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
2361 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | 2681 | sock_flow_table = rcu_dereference(rps_sock_flow_table); |
2362 | if (flow_table && sock_flow_table) { | 2682 | if (flow_table && sock_flow_table) { |
@@ -2383,12 +2703,9 @@ got_hash: | |||
2383 | if (unlikely(tcpu != next_cpu) && | 2703 | if (unlikely(tcpu != next_cpu) && |
2384 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 2704 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || |
2385 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 2705 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
2386 | rflow->last_qtail)) >= 0)) { | 2706 | rflow->last_qtail)) >= 0)) |
2387 | tcpu = rflow->cpu = next_cpu; | 2707 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
2388 | if (tcpu != RPS_NO_CPU) | 2708 | |
2389 | rflow->last_qtail = per_cpu(softnet_data, | ||
2390 | tcpu).input_queue_head; | ||
2391 | } | ||
2392 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 2709 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { |
2393 | *rflowp = rflow; | 2710 | *rflowp = rflow; |
2394 | cpu = tcpu; | 2711 | cpu = tcpu; |
@@ -2396,7 +2713,6 @@ got_hash: | |||
2396 | } | 2713 | } |
2397 | } | 2714 | } |
2398 | 2715 | ||
2399 | map = rcu_dereference(rxqueue->rps_map); | ||
2400 | if (map) { | 2716 | if (map) { |
2401 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | 2717 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; |
2402 | 2718 | ||
@@ -2410,6 +2726,46 @@ done: | |||
2410 | return cpu; | 2726 | return cpu; |
2411 | } | 2727 | } |
2412 | 2728 | ||
2729 | #ifdef CONFIG_RFS_ACCEL | ||
2730 | |||
2731 | /** | ||
2732 | * rps_may_expire_flow - check whether an RFS hardware filter may be removed | ||
2733 | * @dev: Device on which the filter was set | ||
2734 | * @rxq_index: RX queue index | ||
2735 | * @flow_id: Flow ID passed to ndo_rx_flow_steer() | ||
2736 | * @filter_id: Filter ID returned by ndo_rx_flow_steer() | ||
2737 | * | ||
2738 | * Drivers that implement ndo_rx_flow_steer() should periodically call | ||
2739 | * this function for each installed filter and remove the filters for | ||
2740 | * which it returns %true. | ||
2741 | */ | ||
2742 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | ||
2743 | u32 flow_id, u16 filter_id) | ||
2744 | { | ||
2745 | struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; | ||
2746 | struct rps_dev_flow_table *flow_table; | ||
2747 | struct rps_dev_flow *rflow; | ||
2748 | bool expire = true; | ||
2749 | int cpu; | ||
2750 | |||
2751 | rcu_read_lock(); | ||
2752 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2753 | if (flow_table && flow_id <= flow_table->mask) { | ||
2754 | rflow = &flow_table->flows[flow_id]; | ||
2755 | cpu = ACCESS_ONCE(rflow->cpu); | ||
2756 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | ||
2757 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | ||
2758 | rflow->last_qtail) < | ||
2759 | (int)(10 * flow_table->mask))) | ||
2760 | expire = false; | ||
2761 | } | ||
2762 | rcu_read_unlock(); | ||
2763 | return expire; | ||
2764 | } | ||
2765 | EXPORT_SYMBOL(rps_may_expire_flow); | ||
2766 | |||
2767 | #endif /* CONFIG_RFS_ACCEL */ | ||
2768 | |||
2413 | /* Called from hardirq (IPI) context */ | 2769 | /* Called from hardirq (IPI) context */ |
2414 | static void rps_trigger_softirq(void *data) | 2770 | static void rps_trigger_softirq(void *data) |
2415 | { | 2771 | { |
@@ -2482,6 +2838,7 @@ enqueue: | |||
2482 | 2838 | ||
2483 | local_irq_restore(flags); | 2839 | local_irq_restore(flags); |
2484 | 2840 | ||
2841 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2485 | kfree_skb(skb); | 2842 | kfree_skb(skb); |
2486 | return NET_RX_DROP; | 2843 | return NET_RX_DROP; |
2487 | } | 2844 | } |
@@ -2512,6 +2869,7 @@ int netif_rx(struct sk_buff *skb) | |||
2512 | if (netdev_tstamp_prequeue) | 2869 | if (netdev_tstamp_prequeue) |
2513 | net_timestamp_check(skb); | 2870 | net_timestamp_check(skb); |
2514 | 2871 | ||
2872 | trace_netif_rx(skb); | ||
2515 | #ifdef CONFIG_RPS | 2873 | #ifdef CONFIG_RPS |
2516 | { | 2874 | { |
2517 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2875 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
@@ -2571,6 +2929,7 @@ static void net_tx_action(struct softirq_action *h) | |||
2571 | clist = clist->next; | 2929 | clist = clist->next; |
2572 | 2930 | ||
2573 | WARN_ON(atomic_read(&skb->users)); | 2931 | WARN_ON(atomic_read(&skb->users)); |
2932 | trace_kfree_skb(skb, net_tx_action); | ||
2574 | __kfree_skb(skb); | 2933 | __kfree_skb(skb); |
2575 | } | 2934 | } |
2576 | } | 2935 | } |
@@ -2611,14 +2970,6 @@ static void net_tx_action(struct softirq_action *h) | |||
2611 | } | 2970 | } |
2612 | } | 2971 | } |
2613 | 2972 | ||
2614 | static inline int deliver_skb(struct sk_buff *skb, | ||
2615 | struct packet_type *pt_prev, | ||
2616 | struct net_device *orig_dev) | ||
2617 | { | ||
2618 | atomic_inc(&skb->users); | ||
2619 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | ||
2620 | } | ||
2621 | |||
2622 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ | 2973 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ |
2623 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) | 2974 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) |
2624 | /* This hook is defined here for ATM LANE */ | 2975 | /* This hook is defined here for ATM LANE */ |
@@ -2632,15 +2983,14 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | |||
2632 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | 2983 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
2633 | * a compare and 2 stores extra right now if we dont have it on | 2984 | * a compare and 2 stores extra right now if we dont have it on |
2634 | * but have CONFIG_NET_CLS_ACT | 2985 | * but have CONFIG_NET_CLS_ACT |
2635 | * NOTE: This doesnt stop any functionality; if you dont have | 2986 | * NOTE: This doesn't stop any functionality; if you dont have |
2636 | * the ingress scheduler, you just cant add policies on ingress. | 2987 | * the ingress scheduler, you just can't add policies on ingress. |
2637 | * | 2988 | * |
2638 | */ | 2989 | */ |
2639 | static int ing_filter(struct sk_buff *skb) | 2990 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) |
2640 | { | 2991 | { |
2641 | struct net_device *dev = skb->dev; | 2992 | struct net_device *dev = skb->dev; |
2642 | u32 ttl = G_TC_RTTL(skb->tc_verd); | 2993 | u32 ttl = G_TC_RTTL(skb->tc_verd); |
2643 | struct netdev_queue *rxq; | ||
2644 | int result = TC_ACT_OK; | 2994 | int result = TC_ACT_OK; |
2645 | struct Qdisc *q; | 2995 | struct Qdisc *q; |
2646 | 2996 | ||
@@ -2654,8 +3004,6 @@ static int ing_filter(struct sk_buff *skb) | |||
2654 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); | 3004 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); |
2655 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); | 3005 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); |
2656 | 3006 | ||
2657 | rxq = &dev->rx_queue; | ||
2658 | |||
2659 | q = rxq->qdisc; | 3007 | q = rxq->qdisc; |
2660 | if (q != &noop_qdisc) { | 3008 | if (q != &noop_qdisc) { |
2661 | spin_lock(qdisc_lock(q)); | 3009 | spin_lock(qdisc_lock(q)); |
@@ -2671,7 +3019,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2671 | struct packet_type **pt_prev, | 3019 | struct packet_type **pt_prev, |
2672 | int *ret, struct net_device *orig_dev) | 3020 | int *ret, struct net_device *orig_dev) |
2673 | { | 3021 | { |
2674 | if (skb->dev->rx_queue.qdisc == &noop_qdisc) | 3022 | struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); |
3023 | |||
3024 | if (!rxq || rxq->qdisc == &noop_qdisc) | ||
2675 | goto out; | 3025 | goto out; |
2676 | 3026 | ||
2677 | if (*pt_prev) { | 3027 | if (*pt_prev) { |
@@ -2679,7 +3029,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2679 | *pt_prev = NULL; | 3029 | *pt_prev = NULL; |
2680 | } | 3030 | } |
2681 | 3031 | ||
2682 | switch (ing_filter(skb)) { | 3032 | switch (ing_filter(skb, rxq)) { |
2683 | case TC_ACT_SHOT: | 3033 | case TC_ACT_SHOT: |
2684 | case TC_ACT_STOLEN: | 3034 | case TC_ACT_STOLEN: |
2685 | kfree_skb(skb); | 3035 | kfree_skb(skb); |
@@ -2692,33 +3042,6 @@ out: | |||
2692 | } | 3042 | } |
2693 | #endif | 3043 | #endif |
2694 | 3044 | ||
2695 | /* | ||
2696 | * netif_nit_deliver - deliver received packets to network taps | ||
2697 | * @skb: buffer | ||
2698 | * | ||
2699 | * This function is used to deliver incoming packets to network | ||
2700 | * taps. It should be used when the normal netif_receive_skb path | ||
2701 | * is bypassed, for example because of VLAN acceleration. | ||
2702 | */ | ||
2703 | void netif_nit_deliver(struct sk_buff *skb) | ||
2704 | { | ||
2705 | struct packet_type *ptype; | ||
2706 | |||
2707 | if (list_empty(&ptype_all)) | ||
2708 | return; | ||
2709 | |||
2710 | skb_reset_network_header(skb); | ||
2711 | skb_reset_transport_header(skb); | ||
2712 | skb->mac_len = skb->network_header - skb->mac_header; | ||
2713 | |||
2714 | rcu_read_lock(); | ||
2715 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
2716 | if (!ptype->dev || ptype->dev == skb->dev) | ||
2717 | deliver_skb(skb, ptype, skb->dev); | ||
2718 | } | ||
2719 | rcu_read_unlock(); | ||
2720 | } | ||
2721 | |||
2722 | /** | 3045 | /** |
2723 | * netdev_rx_handler_register - register receive handler | 3046 | * netdev_rx_handler_register - register receive handler |
2724 | * @dev: device to register a handler for | 3047 | * @dev: device to register a handler for |
@@ -2730,6 +3053,8 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
2730 | * on a failure. | 3053 | * on a failure. |
2731 | * | 3054 | * |
2732 | * The caller must hold the rtnl_mutex. | 3055 | * The caller must hold the rtnl_mutex. |
3056 | * | ||
3057 | * For a general description of rx_handler, see enum rx_handler_result. | ||
2733 | */ | 3058 | */ |
2734 | int netdev_rx_handler_register(struct net_device *dev, | 3059 | int netdev_rx_handler_register(struct net_device *dev, |
2735 | rx_handler_func_t *rx_handler, | 3060 | rx_handler_func_t *rx_handler, |
@@ -2764,72 +3089,20 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |||
2764 | } | 3089 | } |
2765 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3090 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
2766 | 3091 | ||
2767 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, | ||
2768 | struct net_device *master) | ||
2769 | { | ||
2770 | if (skb->pkt_type == PACKET_HOST) { | ||
2771 | u16 *dest = (u16 *) eth_hdr(skb)->h_dest; | ||
2772 | |||
2773 | memcpy(dest, master->dev_addr, ETH_ALEN); | ||
2774 | } | ||
2775 | } | ||
2776 | |||
2777 | /* On bonding slaves other than the currently active slave, suppress | ||
2778 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and | ||
2779 | * ARP on active-backup slaves with arp_validate enabled. | ||
2780 | */ | ||
2781 | int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) | ||
2782 | { | ||
2783 | struct net_device *dev = skb->dev; | ||
2784 | |||
2785 | if (master->priv_flags & IFF_MASTER_ARPMON) | ||
2786 | dev->last_rx = jiffies; | ||
2787 | |||
2788 | if ((master->priv_flags & IFF_MASTER_ALB) && | ||
2789 | (master->priv_flags & IFF_BRIDGE_PORT)) { | ||
2790 | /* Do address unmangle. The local destination address | ||
2791 | * will be always the one master has. Provides the right | ||
2792 | * functionality in a bridge. | ||
2793 | */ | ||
2794 | skb_bond_set_mac_by_master(skb, master); | ||
2795 | } | ||
2796 | |||
2797 | if (dev->priv_flags & IFF_SLAVE_INACTIVE) { | ||
2798 | if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && | ||
2799 | skb->protocol == __cpu_to_be16(ETH_P_ARP)) | ||
2800 | return 0; | ||
2801 | |||
2802 | if (master->priv_flags & IFF_MASTER_ALB) { | ||
2803 | if (skb->pkt_type != PACKET_BROADCAST && | ||
2804 | skb->pkt_type != PACKET_MULTICAST) | ||
2805 | return 0; | ||
2806 | } | ||
2807 | if (master->priv_flags & IFF_MASTER_8023AD && | ||
2808 | skb->protocol == __cpu_to_be16(ETH_P_SLOW)) | ||
2809 | return 0; | ||
2810 | |||
2811 | return 1; | ||
2812 | } | ||
2813 | return 0; | ||
2814 | } | ||
2815 | EXPORT_SYMBOL(__skb_bond_should_drop); | ||
2816 | |||
2817 | static int __netif_receive_skb(struct sk_buff *skb) | 3092 | static int __netif_receive_skb(struct sk_buff *skb) |
2818 | { | 3093 | { |
2819 | struct packet_type *ptype, *pt_prev; | 3094 | struct packet_type *ptype, *pt_prev; |
2820 | rx_handler_func_t *rx_handler; | 3095 | rx_handler_func_t *rx_handler; |
2821 | struct net_device *orig_dev; | 3096 | struct net_device *orig_dev; |
2822 | struct net_device *master; | 3097 | struct net_device *null_or_dev; |
2823 | struct net_device *null_or_orig; | 3098 | bool deliver_exact = false; |
2824 | struct net_device *orig_or_bond; | ||
2825 | int ret = NET_RX_DROP; | 3099 | int ret = NET_RX_DROP; |
2826 | __be16 type; | 3100 | __be16 type; |
2827 | 3101 | ||
2828 | if (!netdev_tstamp_prequeue) | 3102 | if (!netdev_tstamp_prequeue) |
2829 | net_timestamp_check(skb); | 3103 | net_timestamp_check(skb); |
2830 | 3104 | ||
2831 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | 3105 | trace_netif_receive_skb(skb); |
2832 | return NET_RX_SUCCESS; | ||
2833 | 3106 | ||
2834 | /* if we've gotten here through NAPI, check netpoll */ | 3107 | /* if we've gotten here through NAPI, check netpoll */ |
2835 | if (netpoll_receive_skb(skb)) | 3108 | if (netpoll_receive_skb(skb)) |
@@ -2837,37 +3110,26 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2837 | 3110 | ||
2838 | if (!skb->skb_iif) | 3111 | if (!skb->skb_iif) |
2839 | skb->skb_iif = skb->dev->ifindex; | 3112 | skb->skb_iif = skb->dev->ifindex; |
2840 | |||
2841 | /* | ||
2842 | * bonding note: skbs received on inactive slaves should only | ||
2843 | * be delivered to pkt handlers that are exact matches. Also | ||
2844 | * the deliver_no_wcard flag will be set. If packet handlers | ||
2845 | * are sensitive to duplicate packets these skbs will need to | ||
2846 | * be dropped at the handler. The vlan accel path may have | ||
2847 | * already set the deliver_no_wcard flag. | ||
2848 | */ | ||
2849 | null_or_orig = NULL; | ||
2850 | orig_dev = skb->dev; | 3113 | orig_dev = skb->dev; |
2851 | master = ACCESS_ONCE(orig_dev->master); | ||
2852 | if (skb->deliver_no_wcard) | ||
2853 | null_or_orig = orig_dev; | ||
2854 | else if (master) { | ||
2855 | if (skb_bond_should_drop(skb, master)) { | ||
2856 | skb->deliver_no_wcard = 1; | ||
2857 | null_or_orig = orig_dev; /* deliver only exact match */ | ||
2858 | } else | ||
2859 | skb->dev = master; | ||
2860 | } | ||
2861 | 3114 | ||
2862 | __this_cpu_inc(softnet_data.processed); | ||
2863 | skb_reset_network_header(skb); | 3115 | skb_reset_network_header(skb); |
2864 | skb_reset_transport_header(skb); | 3116 | skb_reset_transport_header(skb); |
2865 | skb->mac_len = skb->network_header - skb->mac_header; | 3117 | skb_reset_mac_len(skb); |
2866 | 3118 | ||
2867 | pt_prev = NULL; | 3119 | pt_prev = NULL; |
2868 | 3120 | ||
2869 | rcu_read_lock(); | 3121 | rcu_read_lock(); |
2870 | 3122 | ||
3123 | another_round: | ||
3124 | |||
3125 | __this_cpu_inc(softnet_data.processed); | ||
3126 | |||
3127 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | ||
3128 | skb = vlan_untag(skb); | ||
3129 | if (unlikely(!skb)) | ||
3130 | goto out; | ||
3131 | } | ||
3132 | |||
2871 | #ifdef CONFIG_NET_CLS_ACT | 3133 | #ifdef CONFIG_NET_CLS_ACT |
2872 | if (skb->tc_verd & TC_NCLS) { | 3134 | if (skb->tc_verd & TC_NCLS) { |
2873 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 3135 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
@@ -2876,8 +3138,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2876 | #endif | 3138 | #endif |
2877 | 3139 | ||
2878 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3140 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
2879 | if (ptype->dev == null_or_orig || ptype->dev == skb->dev || | 3141 | if (!ptype->dev || ptype->dev == skb->dev) { |
2880 | ptype->dev == orig_dev) { | ||
2881 | if (pt_prev) | 3142 | if (pt_prev) |
2882 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3143 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2883 | pt_prev = ptype; | 3144 | pt_prev = ptype; |
@@ -2891,36 +3152,47 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2891 | ncls: | 3152 | ncls: |
2892 | #endif | 3153 | #endif |
2893 | 3154 | ||
2894 | /* Handle special case of bridge or macvlan */ | ||
2895 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3155 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
2896 | if (rx_handler) { | 3156 | if (rx_handler) { |
2897 | if (pt_prev) { | 3157 | if (pt_prev) { |
2898 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3158 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2899 | pt_prev = NULL; | 3159 | pt_prev = NULL; |
2900 | } | 3160 | } |
2901 | skb = rx_handler(skb); | 3161 | switch (rx_handler(&skb)) { |
2902 | if (!skb) | 3162 | case RX_HANDLER_CONSUMED: |
2903 | goto out; | 3163 | goto out; |
3164 | case RX_HANDLER_ANOTHER: | ||
3165 | goto another_round; | ||
3166 | case RX_HANDLER_EXACT: | ||
3167 | deliver_exact = true; | ||
3168 | case RX_HANDLER_PASS: | ||
3169 | break; | ||
3170 | default: | ||
3171 | BUG(); | ||
3172 | } | ||
2904 | } | 3173 | } |
2905 | 3174 | ||
2906 | /* | 3175 | if (vlan_tx_tag_present(skb)) { |
2907 | * Make sure frames received on VLAN interfaces stacked on | 3176 | if (pt_prev) { |
2908 | * bonding interfaces still make their way to any base bonding | 3177 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2909 | * device that may have registered for a specific ptype. The | 3178 | pt_prev = NULL; |
2910 | * handler may have to adjust skb->dev and orig_dev. | 3179 | } |
2911 | */ | 3180 | if (vlan_do_receive(&skb)) { |
2912 | orig_or_bond = orig_dev; | 3181 | ret = __netif_receive_skb(skb); |
2913 | if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && | 3182 | goto out; |
2914 | (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { | 3183 | } else if (unlikely(!skb)) |
2915 | orig_or_bond = vlan_dev_real_dev(skb->dev); | 3184 | goto out; |
2916 | } | 3185 | } |
2917 | 3186 | ||
3187 | /* deliver only exact match when indicated */ | ||
3188 | null_or_dev = deliver_exact ? skb->dev : NULL; | ||
3189 | |||
2918 | type = skb->protocol; | 3190 | type = skb->protocol; |
2919 | list_for_each_entry_rcu(ptype, | 3191 | list_for_each_entry_rcu(ptype, |
2920 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 3192 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
2921 | if (ptype->type == type && (ptype->dev == null_or_orig || | 3193 | if (ptype->type == type && |
2922 | ptype->dev == skb->dev || ptype->dev == orig_dev || | 3194 | (ptype->dev == null_or_dev || ptype->dev == skb->dev || |
2923 | ptype->dev == orig_or_bond)) { | 3195 | ptype->dev == orig_dev)) { |
2924 | if (pt_prev) | 3196 | if (pt_prev) |
2925 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3197 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2926 | pt_prev = ptype; | 3198 | pt_prev = ptype; |
@@ -2930,6 +3202,7 @@ ncls: | |||
2930 | if (pt_prev) { | 3202 | if (pt_prev) { |
2931 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3203 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2932 | } else { | 3204 | } else { |
3205 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2933 | kfree_skb(skb); | 3206 | kfree_skb(skb); |
2934 | /* Jamal, now you will not able to escape explaining | 3207 | /* Jamal, now you will not able to escape explaining |
2935 | * me how you were going to use this. :-) | 3208 | * me how you were going to use this. :-) |
@@ -3050,7 +3323,7 @@ out: | |||
3050 | return netif_receive_skb(skb); | 3323 | return netif_receive_skb(skb); |
3051 | } | 3324 | } |
3052 | 3325 | ||
3053 | static void napi_gro_flush(struct napi_struct *napi) | 3326 | inline void napi_gro_flush(struct napi_struct *napi) |
3054 | { | 3327 | { |
3055 | struct sk_buff *skb, *next; | 3328 | struct sk_buff *skb, *next; |
3056 | 3329 | ||
@@ -3063,6 +3336,7 @@ static void napi_gro_flush(struct napi_struct *napi) | |||
3063 | napi->gro_count = 0; | 3336 | napi->gro_count = 0; |
3064 | napi->gro_list = NULL; | 3337 | napi->gro_list = NULL; |
3065 | } | 3338 | } |
3339 | EXPORT_SYMBOL(napi_gro_flush); | ||
3066 | 3340 | ||
3067 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3341 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3068 | { | 3342 | { |
@@ -3077,7 +3351,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3077 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) | 3351 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
3078 | goto normal; | 3352 | goto normal; |
3079 | 3353 | ||
3080 | if (skb_is_gso(skb) || skb_has_frags(skb)) | 3354 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
3081 | goto normal; | 3355 | goto normal; |
3082 | 3356 | ||
3083 | rcu_read_lock(); | 3357 | rcu_read_lock(); |
@@ -3156,16 +3430,19 @@ normal: | |||
3156 | } | 3430 | } |
3157 | EXPORT_SYMBOL(dev_gro_receive); | 3431 | EXPORT_SYMBOL(dev_gro_receive); |
3158 | 3432 | ||
3159 | static gro_result_t | 3433 | static inline gro_result_t |
3160 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3434 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3161 | { | 3435 | { |
3162 | struct sk_buff *p; | 3436 | struct sk_buff *p; |
3163 | 3437 | ||
3164 | for (p = napi->gro_list; p; p = p->next) { | 3438 | for (p = napi->gro_list; p; p = p->next) { |
3165 | NAPI_GRO_CB(p)->same_flow = | 3439 | unsigned long diffs; |
3166 | (p->dev == skb->dev) && | 3440 | |
3167 | !compare_ether_header(skb_mac_header(p), | 3441 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3442 | diffs |= p->vlan_tci ^ skb->vlan_tci; | ||
3443 | diffs |= compare_ether_header(skb_mac_header(p), | ||
3168 | skb_gro_mac_header(skb)); | 3444 | skb_gro_mac_header(skb)); |
3445 | NAPI_GRO_CB(p)->same_flow = !diffs; | ||
3169 | NAPI_GRO_CB(p)->flush = 0; | 3446 | NAPI_GRO_CB(p)->flush = 0; |
3170 | } | 3447 | } |
3171 | 3448 | ||
@@ -3218,14 +3495,16 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3218 | } | 3495 | } |
3219 | EXPORT_SYMBOL(napi_gro_receive); | 3496 | EXPORT_SYMBOL(napi_gro_receive); |
3220 | 3497 | ||
3221 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 3498 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
3222 | { | 3499 | { |
3223 | __skb_pull(skb, skb_headlen(skb)); | 3500 | __skb_pull(skb, skb_headlen(skb)); |
3224 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); | 3501 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
3502 | skb->vlan_tci = 0; | ||
3503 | skb->dev = napi->dev; | ||
3504 | skb->skb_iif = 0; | ||
3225 | 3505 | ||
3226 | napi->skb = skb; | 3506 | napi->skb = skb; |
3227 | } | 3507 | } |
3228 | EXPORT_SYMBOL(napi_reuse_skb); | ||
3229 | 3508 | ||
3230 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 3509 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
3231 | { | 3510 | { |
@@ -3519,7 +3798,7 @@ static void net_rx_action(struct softirq_action *h) | |||
3519 | * with netpoll's poll_napi(). Only the entity which | 3798 | * with netpoll's poll_napi(). Only the entity which |
3520 | * obtains the lock and sees NAPI_STATE_SCHED set will | 3799 | * obtains the lock and sees NAPI_STATE_SCHED set will |
3521 | * actually make the ->poll() call. Therefore we avoid | 3800 | * actually make the ->poll() call. Therefore we avoid |
3522 | * accidently calling ->poll() when NAPI is not scheduled. | 3801 | * accidentally calling ->poll() when NAPI is not scheduled. |
3523 | */ | 3802 | */ |
3524 | work = 0; | 3803 | work = 0; |
3525 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { | 3804 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
@@ -3710,12 +3989,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) | |||
3710 | 3989 | ||
3711 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 3990 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3712 | { | 3991 | { |
3713 | struct net_device *dev = (v == SEQ_START_TOKEN) ? | 3992 | struct net_device *dev = v; |
3714 | first_net_device(seq_file_net(seq)) : | 3993 | |
3715 | next_net_device((struct net_device *)v); | 3994 | if (v == SEQ_START_TOKEN) |
3995 | dev = first_net_device_rcu(seq_file_net(seq)); | ||
3996 | else | ||
3997 | dev = next_net_device_rcu(dev); | ||
3716 | 3998 | ||
3717 | ++*pos; | 3999 | ++*pos; |
3718 | return rcu_dereference(dev); | 4000 | return dev; |
3719 | } | 4001 | } |
3720 | 4002 | ||
3721 | void dev_seq_stop(struct seq_file *seq, void *v) | 4003 | void dev_seq_stop(struct seq_file *seq, void *v) |
@@ -3999,15 +4281,14 @@ static int __init dev_proc_init(void) | |||
3999 | 4281 | ||
4000 | 4282 | ||
4001 | /** | 4283 | /** |
4002 | * netdev_set_master - set up master/slave pair | 4284 | * netdev_set_master - set up master pointer |
4003 | * @slave: slave device | 4285 | * @slave: slave device |
4004 | * @master: new master device | 4286 | * @master: new master device |
4005 | * | 4287 | * |
4006 | * Changes the master device of the slave. Pass %NULL to break the | 4288 | * Changes the master device of the slave. Pass %NULL to break the |
4007 | * bonding. The caller must hold the RTNL semaphore. On a failure | 4289 | * bonding. The caller must hold the RTNL semaphore. On a failure |
4008 | * a negative errno code is returned. On success the reference counts | 4290 | * a negative errno code is returned. On success the reference counts |
4009 | * are adjusted, %RTM_NEWLINK is sent to the routing socket and the | 4291 | * are adjusted and the function returns zero. |
4010 | * function returns zero. | ||
4011 | */ | 4292 | */ |
4012 | int netdev_set_master(struct net_device *slave, struct net_device *master) | 4293 | int netdev_set_master(struct net_device *slave, struct net_device *master) |
4013 | { | 4294 | { |
@@ -4023,10 +4304,31 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
4023 | 4304 | ||
4024 | slave->master = master; | 4305 | slave->master = master; |
4025 | 4306 | ||
4026 | if (old) { | 4307 | if (old) |
4027 | synchronize_net(); | ||
4028 | dev_put(old); | 4308 | dev_put(old); |
4029 | } | 4309 | return 0; |
4310 | } | ||
4311 | EXPORT_SYMBOL(netdev_set_master); | ||
4312 | |||
4313 | /** | ||
4314 | * netdev_set_bond_master - set up bonding master/slave pair | ||
4315 | * @slave: slave device | ||
4316 | * @master: new master device | ||
4317 | * | ||
4318 | * Changes the master device of the slave. Pass %NULL to break the | ||
4319 | * bonding. The caller must hold the RTNL semaphore. On a failure | ||
4320 | * a negative errno code is returned. On success %RTM_NEWLINK is sent | ||
4321 | * to the routing socket and the function returns zero. | ||
4322 | */ | ||
4323 | int netdev_set_bond_master(struct net_device *slave, struct net_device *master) | ||
4324 | { | ||
4325 | int err; | ||
4326 | |||
4327 | ASSERT_RTNL(); | ||
4328 | |||
4329 | err = netdev_set_master(slave, master); | ||
4330 | if (err) | ||
4331 | return err; | ||
4030 | if (master) | 4332 | if (master) |
4031 | slave->flags |= IFF_SLAVE; | 4333 | slave->flags |= IFF_SLAVE; |
4032 | else | 4334 | else |
@@ -4035,7 +4337,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
4035 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); | 4337 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); |
4036 | return 0; | 4338 | return 0; |
4037 | } | 4339 | } |
4038 | EXPORT_SYMBOL(netdev_set_master); | 4340 | EXPORT_SYMBOL(netdev_set_bond_master); |
4039 | 4341 | ||
4040 | static void dev_change_rx_flags(struct net_device *dev, int flags) | 4342 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
4041 | { | 4343 | { |
@@ -4204,6 +4506,30 @@ void dev_set_rx_mode(struct net_device *dev) | |||
4204 | } | 4506 | } |
4205 | 4507 | ||
4206 | /** | 4508 | /** |
4509 | * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() | ||
4510 | * @dev: device | ||
4511 | * @cmd: memory area for ethtool_ops::get_settings() result | ||
4512 | * | ||
4513 | * The cmd arg is initialized properly (cleared and | ||
4514 | * ethtool_cmd::cmd field set to ETHTOOL_GSET). | ||
4515 | * | ||
4516 | * Return device's ethtool_ops::get_settings() result value or | ||
4517 | * -EOPNOTSUPP when device doesn't expose | ||
4518 | * ethtool_ops::get_settings() operation. | ||
4519 | */ | ||
4520 | int dev_ethtool_get_settings(struct net_device *dev, | ||
4521 | struct ethtool_cmd *cmd) | ||
4522 | { | ||
4523 | if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) | ||
4524 | return -EOPNOTSUPP; | ||
4525 | |||
4526 | memset(cmd, 0, sizeof(struct ethtool_cmd)); | ||
4527 | cmd->cmd = ETHTOOL_GSET; | ||
4528 | return dev->ethtool_ops->get_settings(dev, cmd); | ||
4529 | } | ||
4530 | EXPORT_SYMBOL(dev_ethtool_get_settings); | ||
4531 | |||
4532 | /** | ||
4207 | * dev_get_flags - get flags reported to userspace | 4533 | * dev_get_flags - get flags reported to userspace |
4208 | * @dev: device | 4534 | * @dev: device |
4209 | * | 4535 | * |
@@ -4372,6 +4698,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) | |||
4372 | EXPORT_SYMBOL(dev_set_mtu); | 4698 | EXPORT_SYMBOL(dev_set_mtu); |
4373 | 4699 | ||
4374 | /** | 4700 | /** |
4701 | * dev_set_group - Change group this device belongs to | ||
4702 | * @dev: device | ||
4703 | * @new_group: group this device should belong to | ||
4704 | */ | ||
4705 | void dev_set_group(struct net_device *dev, int new_group) | ||
4706 | { | ||
4707 | dev->group = new_group; | ||
4708 | } | ||
4709 | EXPORT_SYMBOL(dev_set_group); | ||
4710 | |||
4711 | /** | ||
4375 | * dev_set_mac_address - Change Media Access Control Address | 4712 | * dev_set_mac_address - Change Media Access Control Address |
4376 | * @dev: device | 4713 | * @dev: device |
4377 | * @sa: new address | 4714 | * @sa: new address |
@@ -4456,7 +4793,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm | |||
4456 | * is never reached | 4793 | * is never reached |
4457 | */ | 4794 | */ |
4458 | WARN_ON(1); | 4795 | WARN_ON(1); |
4459 | err = -EINVAL; | 4796 | err = -ENOTTY; |
4460 | break; | 4797 | break; |
4461 | 4798 | ||
4462 | } | 4799 | } |
@@ -4724,7 +5061,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4724 | /* Set the per device memory buffer space. | 5061 | /* Set the per device memory buffer space. |
4725 | * Not applicable in our case */ | 5062 | * Not applicable in our case */ |
4726 | case SIOCSIFLINK: | 5063 | case SIOCSIFLINK: |
4727 | return -EINVAL; | 5064 | return -ENOTTY; |
4728 | 5065 | ||
4729 | /* | 5066 | /* |
4730 | * Unknown or private ioctl. | 5067 | * Unknown or private ioctl. |
@@ -4745,7 +5082,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4745 | /* Take care of Wireless Extensions */ | 5082 | /* Take care of Wireless Extensions */ |
4746 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) | 5083 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) |
4747 | return wext_handle_ioctl(net, &ifr, cmd, arg); | 5084 | return wext_handle_ioctl(net, &ifr, cmd, arg); |
4748 | return -EINVAL; | 5085 | return -ENOTTY; |
4749 | } | 5086 | } |
4750 | } | 5087 | } |
4751 | 5088 | ||
@@ -4797,12 +5134,14 @@ static void rollback_registered_many(struct list_head *head) | |||
4797 | list_del(&dev->unreg_list); | 5134 | list_del(&dev->unreg_list); |
4798 | continue; | 5135 | continue; |
4799 | } | 5136 | } |
4800 | 5137 | dev->dismantle = true; | |
4801 | BUG_ON(dev->reg_state != NETREG_REGISTERED); | 5138 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
5139 | } | ||
4802 | 5140 | ||
4803 | /* If device is running, close it first. */ | 5141 | /* If device is running, close it first. */ |
4804 | dev_close(dev); | 5142 | dev_close_many(head); |
4805 | 5143 | ||
5144 | list_for_each_entry(dev, head, unreg_list) { | ||
4806 | /* And unlink it from device chain. */ | 5145 | /* And unlink it from device chain. */ |
4807 | unlist_netdevice(dev); | 5146 | unlist_netdevice(dev); |
4808 | 5147 | ||
@@ -4857,55 +5196,62 @@ static void rollback_registered(struct net_device *dev) | |||
4857 | 5196 | ||
4858 | list_add(&dev->unreg_list, &single); | 5197 | list_add(&dev->unreg_list, &single); |
4859 | rollback_registered_many(&single); | 5198 | rollback_registered_many(&single); |
5199 | list_del(&single); | ||
4860 | } | 5200 | } |
4861 | 5201 | ||
4862 | static void __netdev_init_queue_locks_one(struct net_device *dev, | 5202 | u32 netdev_fix_features(struct net_device *dev, u32 features) |
4863 | struct netdev_queue *dev_queue, | ||
4864 | void *_unused) | ||
4865 | { | 5203 | { |
4866 | spin_lock_init(&dev_queue->_xmit_lock); | 5204 | /* Fix illegal checksum combinations */ |
4867 | netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); | 5205 | if ((features & NETIF_F_HW_CSUM) && |
4868 | dev_queue->xmit_lock_owner = -1; | 5206 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4869 | } | 5207 | netdev_warn(dev, "mixed HW and IP checksum settings.\n"); |
5208 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | ||
5209 | } | ||
4870 | 5210 | ||
4871 | static void netdev_init_queue_locks(struct net_device *dev) | 5211 | if ((features & NETIF_F_NO_CSUM) && |
4872 | { | 5212 | (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4873 | netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); | 5213 | netdev_warn(dev, "mixed no checksumming and other settings.\n"); |
4874 | __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); | 5214 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); |
4875 | } | 5215 | } |
4876 | 5216 | ||
4877 | unsigned long netdev_fix_features(unsigned long features, const char *name) | ||
4878 | { | ||
4879 | /* Fix illegal SG+CSUM combinations. */ | 5217 | /* Fix illegal SG+CSUM combinations. */ |
4880 | if ((features & NETIF_F_SG) && | 5218 | if ((features & NETIF_F_SG) && |
4881 | !(features & NETIF_F_ALL_CSUM)) { | 5219 | !(features & NETIF_F_ALL_CSUM)) { |
4882 | if (name) | 5220 | netdev_dbg(dev, |
4883 | printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " | 5221 | "Dropping NETIF_F_SG since no checksum feature.\n"); |
4884 | "checksum feature.\n", name); | ||
4885 | features &= ~NETIF_F_SG; | 5222 | features &= ~NETIF_F_SG; |
4886 | } | 5223 | } |
4887 | 5224 | ||
4888 | /* TSO requires that SG is present as well. */ | 5225 | /* TSO requires that SG is present as well. */ |
4889 | if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { | 5226 | if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { |
4890 | if (name) | 5227 | netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); |
4891 | printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " | 5228 | features &= ~NETIF_F_ALL_TSO; |
4892 | "SG feature.\n", name); | ||
4893 | features &= ~NETIF_F_TSO; | ||
4894 | } | 5229 | } |
4895 | 5230 | ||
5231 | /* TSO ECN requires that TSO is present as well. */ | ||
5232 | if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) | ||
5233 | features &= ~NETIF_F_TSO_ECN; | ||
5234 | |||
5235 | /* Software GSO depends on SG. */ | ||
5236 | if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { | ||
5237 | netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); | ||
5238 | features &= ~NETIF_F_GSO; | ||
5239 | } | ||
5240 | |||
5241 | /* UFO needs SG and checksumming */ | ||
4896 | if (features & NETIF_F_UFO) { | 5242 | if (features & NETIF_F_UFO) { |
4897 | if (!(features & NETIF_F_GEN_CSUM)) { | 5243 | /* maybe split UFO into V4 and V6? */ |
4898 | if (name) | 5244 | if (!((features & NETIF_F_GEN_CSUM) || |
4899 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " | 5245 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) |
4900 | "since no NETIF_F_HW_CSUM feature.\n", | 5246 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4901 | name); | 5247 | netdev_dbg(dev, |
5248 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); | ||
4902 | features &= ~NETIF_F_UFO; | 5249 | features &= ~NETIF_F_UFO; |
4903 | } | 5250 | } |
4904 | 5251 | ||
4905 | if (!(features & NETIF_F_SG)) { | 5252 | if (!(features & NETIF_F_SG)) { |
4906 | if (name) | 5253 | netdev_dbg(dev, |
4907 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " | 5254 | "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); |
4908 | "since no NETIF_F_SG feature.\n", name); | ||
4909 | features &= ~NETIF_F_UFO; | 5255 | features &= ~NETIF_F_UFO; |
4910 | } | 5256 | } |
4911 | } | 5257 | } |
@@ -4914,6 +5260,75 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
4914 | } | 5260 | } |
4915 | EXPORT_SYMBOL(netdev_fix_features); | 5261 | EXPORT_SYMBOL(netdev_fix_features); |
4916 | 5262 | ||
5263 | int __netdev_update_features(struct net_device *dev) | ||
5264 | { | ||
5265 | u32 features; | ||
5266 | int err = 0; | ||
5267 | |||
5268 | ASSERT_RTNL(); | ||
5269 | |||
5270 | features = netdev_get_wanted_features(dev); | ||
5271 | |||
5272 | if (dev->netdev_ops->ndo_fix_features) | ||
5273 | features = dev->netdev_ops->ndo_fix_features(dev, features); | ||
5274 | |||
5275 | /* driver might be less strict about feature dependencies */ | ||
5276 | features = netdev_fix_features(dev, features); | ||
5277 | |||
5278 | if (dev->features == features) | ||
5279 | return 0; | ||
5280 | |||
5281 | netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", | ||
5282 | dev->features, features); | ||
5283 | |||
5284 | if (dev->netdev_ops->ndo_set_features) | ||
5285 | err = dev->netdev_ops->ndo_set_features(dev, features); | ||
5286 | |||
5287 | if (unlikely(err < 0)) { | ||
5288 | netdev_err(dev, | ||
5289 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", | ||
5290 | err, features, dev->features); | ||
5291 | return -1; | ||
5292 | } | ||
5293 | |||
5294 | if (!err) | ||
5295 | dev->features = features; | ||
5296 | |||
5297 | return 1; | ||
5298 | } | ||
5299 | |||
5300 | /** | ||
5301 | * netdev_update_features - recalculate device features | ||
5302 | * @dev: the device to check | ||
5303 | * | ||
5304 | * Recalculate dev->features set and send notifications if it | ||
5305 | * has changed. Should be called after driver or hardware dependent | ||
5306 | * conditions might have changed that influence the features. | ||
5307 | */ | ||
5308 | void netdev_update_features(struct net_device *dev) | ||
5309 | { | ||
5310 | if (__netdev_update_features(dev)) | ||
5311 | netdev_features_change(dev); | ||
5312 | } | ||
5313 | EXPORT_SYMBOL(netdev_update_features); | ||
5314 | |||
5315 | /** | ||
5316 | * netdev_change_features - recalculate device features | ||
5317 | * @dev: the device to check | ||
5318 | * | ||
5319 | * Recalculate dev->features set and send notifications even | ||
5320 | * if they have not changed. Should be called instead of | ||
5321 | * netdev_update_features() if also dev->vlan_features might | ||
5322 | * have changed to allow the changes to be propagated to stacked | ||
5323 | * VLAN devices. | ||
5324 | */ | ||
5325 | void netdev_change_features(struct net_device *dev) | ||
5326 | { | ||
5327 | __netdev_update_features(dev); | ||
5328 | netdev_features_change(dev); | ||
5329 | } | ||
5330 | EXPORT_SYMBOL(netdev_change_features); | ||
5331 | |||
4917 | /** | 5332 | /** |
4918 | * netif_stacked_transfer_operstate - transfer operstate | 5333 | * netif_stacked_transfer_operstate - transfer operstate |
4919 | * @rootdev: the root or lower level device to transfer state from | 5334 | * @rootdev: the root or lower level device to transfer state from |
@@ -4941,6 +5356,59 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
4941 | } | 5356 | } |
4942 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5357 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
4943 | 5358 | ||
5359 | #ifdef CONFIG_RPS | ||
5360 | static int netif_alloc_rx_queues(struct net_device *dev) | ||
5361 | { | ||
5362 | unsigned int i, count = dev->num_rx_queues; | ||
5363 | struct netdev_rx_queue *rx; | ||
5364 | |||
5365 | BUG_ON(count < 1); | ||
5366 | |||
5367 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5368 | if (!rx) { | ||
5369 | pr_err("netdev: Unable to allocate %u rx queues.\n", count); | ||
5370 | return -ENOMEM; | ||
5371 | } | ||
5372 | dev->_rx = rx; | ||
5373 | |||
5374 | for (i = 0; i < count; i++) | ||
5375 | rx[i].dev = dev; | ||
5376 | return 0; | ||
5377 | } | ||
5378 | #endif | ||
5379 | |||
5380 | static void netdev_init_one_queue(struct net_device *dev, | ||
5381 | struct netdev_queue *queue, void *_unused) | ||
5382 | { | ||
5383 | /* Initialize queue lock */ | ||
5384 | spin_lock_init(&queue->_xmit_lock); | ||
5385 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); | ||
5386 | queue->xmit_lock_owner = -1; | ||
5387 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); | ||
5388 | queue->dev = dev; | ||
5389 | } | ||
5390 | |||
5391 | static int netif_alloc_netdev_queues(struct net_device *dev) | ||
5392 | { | ||
5393 | unsigned int count = dev->num_tx_queues; | ||
5394 | struct netdev_queue *tx; | ||
5395 | |||
5396 | BUG_ON(count < 1); | ||
5397 | |||
5398 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5399 | if (!tx) { | ||
5400 | pr_err("netdev: Unable to allocate %u tx queues.\n", | ||
5401 | count); | ||
5402 | return -ENOMEM; | ||
5403 | } | ||
5404 | dev->_tx = tx; | ||
5405 | |||
5406 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | ||
5407 | spin_lock_init(&dev->tx_global_lock); | ||
5408 | |||
5409 | return 0; | ||
5410 | } | ||
5411 | |||
4944 | /** | 5412 | /** |
4945 | * register_netdevice - register a network device | 5413 | * register_netdevice - register a network device |
4946 | * @dev: device to register | 5414 | * @dev: device to register |
@@ -4974,28 +5442,13 @@ int register_netdevice(struct net_device *dev) | |||
4974 | 5442 | ||
4975 | spin_lock_init(&dev->addr_list_lock); | 5443 | spin_lock_init(&dev->addr_list_lock); |
4976 | netdev_set_addr_lockdep_class(dev); | 5444 | netdev_set_addr_lockdep_class(dev); |
4977 | netdev_init_queue_locks(dev); | ||
4978 | 5445 | ||
4979 | dev->iflink = -1; | 5446 | dev->iflink = -1; |
4980 | 5447 | ||
4981 | #ifdef CONFIG_RPS | 5448 | ret = dev_get_valid_name(dev, dev->name); |
4982 | if (!dev->num_rx_queues) { | 5449 | if (ret < 0) |
4983 | /* | 5450 | goto out; |
4984 | * Allocate a single RX queue if driver never called | ||
4985 | * alloc_netdev_mq | ||
4986 | */ | ||
4987 | |||
4988 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
4989 | if (!dev->_rx) { | ||
4990 | ret = -ENOMEM; | ||
4991 | goto out; | ||
4992 | } | ||
4993 | 5451 | ||
4994 | dev->_rx->first = dev->_rx; | ||
4995 | atomic_set(&dev->_rx->count, 1); | ||
4996 | dev->num_rx_queues = 1; | ||
4997 | } | ||
4998 | #endif | ||
4999 | /* Init, if this function is available */ | 5452 | /* Init, if this function is available */ |
5000 | if (dev->netdev_ops->ndo_init) { | 5453 | if (dev->netdev_ops->ndo_init) { |
5001 | ret = dev->netdev_ops->ndo_init(dev); | 5454 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5006,34 +5459,30 @@ int register_netdevice(struct net_device *dev) | |||
5006 | } | 5459 | } |
5007 | } | 5460 | } |
5008 | 5461 | ||
5009 | ret = dev_get_valid_name(dev, dev->name, 0); | ||
5010 | if (ret) | ||
5011 | goto err_uninit; | ||
5012 | |||
5013 | dev->ifindex = dev_new_index(net); | 5462 | dev->ifindex = dev_new_index(net); |
5014 | if (dev->iflink == -1) | 5463 | if (dev->iflink == -1) |
5015 | dev->iflink = dev->ifindex; | 5464 | dev->iflink = dev->ifindex; |
5016 | 5465 | ||
5017 | /* Fix illegal checksum combinations */ | 5466 | /* Transfer changeable features to wanted_features and enable |
5018 | if ((dev->features & NETIF_F_HW_CSUM) && | 5467 | * software offloads (GSO and GRO). |
5019 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5468 | */ |
5020 | printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", | 5469 | dev->hw_features |= NETIF_F_SOFT_FEATURES; |
5021 | dev->name); | 5470 | dev->features |= NETIF_F_SOFT_FEATURES; |
5022 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | 5471 | dev->wanted_features = dev->features & dev->hw_features; |
5023 | } | ||
5024 | 5472 | ||
5025 | if ((dev->features & NETIF_F_NO_CSUM) && | 5473 | /* Turn on no cache copy if HW is doing checksum */ |
5026 | (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5474 | dev->hw_features |= NETIF_F_NOCACHE_COPY; |
5027 | printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", | 5475 | if ((dev->features & NETIF_F_ALL_CSUM) && |
5028 | dev->name); | 5476 | !(dev->features & NETIF_F_NO_CSUM)) { |
5029 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); | 5477 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; |
5478 | dev->features |= NETIF_F_NOCACHE_COPY; | ||
5030 | } | 5479 | } |
5031 | 5480 | ||
5032 | dev->features = netdev_fix_features(dev->features, dev->name); | 5481 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, |
5033 | 5482 | * vlan_dev_init() will do the dev->features check, so these features | |
5034 | /* Enable software GSO if SG is supported. */ | 5483 | * are enabled only if supported by underlying device. |
5035 | if (dev->features & NETIF_F_SG) | 5484 | */ |
5036 | dev->features |= NETIF_F_GSO; | 5485 | dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); |
5037 | 5486 | ||
5038 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 5487 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
5039 | ret = notifier_to_errno(ret); | 5488 | ret = notifier_to_errno(ret); |
@@ -5045,6 +5494,8 @@ int register_netdevice(struct net_device *dev) | |||
5045 | goto err_uninit; | 5494 | goto err_uninit; |
5046 | dev->reg_state = NETREG_REGISTERED; | 5495 | dev->reg_state = NETREG_REGISTERED; |
5047 | 5496 | ||
5497 | __netdev_update_features(dev); | ||
5498 | |||
5048 | /* | 5499 | /* |
5049 | * Default initial state at registry is that the | 5500 | * Default initial state at registry is that the |
5050 | * device is present. | 5501 | * device is present. |
@@ -5105,9 +5556,6 @@ int init_dummy_netdev(struct net_device *dev) | |||
5105 | */ | 5556 | */ |
5106 | dev->reg_state = NETREG_DUMMY; | 5557 | dev->reg_state = NETREG_DUMMY; |
5107 | 5558 | ||
5108 | /* initialize the ref count */ | ||
5109 | atomic_set(&dev->refcnt, 1); | ||
5110 | |||
5111 | /* NAPI wants this */ | 5559 | /* NAPI wants this */ |
5112 | INIT_LIST_HEAD(&dev->napi_list); | 5560 | INIT_LIST_HEAD(&dev->napi_list); |
5113 | 5561 | ||
@@ -5115,6 +5563,11 @@ int init_dummy_netdev(struct net_device *dev) | |||
5115 | set_bit(__LINK_STATE_PRESENT, &dev->state); | 5563 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
5116 | set_bit(__LINK_STATE_START, &dev->state); | 5564 | set_bit(__LINK_STATE_START, &dev->state); |
5117 | 5565 | ||
5566 | /* Note : We dont allocate pcpu_refcnt for dummy devices, | ||
5567 | * because users of this 'device' dont need to change | ||
5568 | * its refcount. | ||
5569 | */ | ||
5570 | |||
5118 | return 0; | 5571 | return 0; |
5119 | } | 5572 | } |
5120 | EXPORT_SYMBOL_GPL(init_dummy_netdev); | 5573 | EXPORT_SYMBOL_GPL(init_dummy_netdev); |
@@ -5138,24 +5591,22 @@ int register_netdev(struct net_device *dev) | |||
5138 | int err; | 5591 | int err; |
5139 | 5592 | ||
5140 | rtnl_lock(); | 5593 | rtnl_lock(); |
5141 | |||
5142 | /* | ||
5143 | * If the name is a format string the caller wants us to do a | ||
5144 | * name allocation. | ||
5145 | */ | ||
5146 | if (strchr(dev->name, '%')) { | ||
5147 | err = dev_alloc_name(dev, dev->name); | ||
5148 | if (err < 0) | ||
5149 | goto out; | ||
5150 | } | ||
5151 | |||
5152 | err = register_netdevice(dev); | 5594 | err = register_netdevice(dev); |
5153 | out: | ||
5154 | rtnl_unlock(); | 5595 | rtnl_unlock(); |
5155 | return err; | 5596 | return err; |
5156 | } | 5597 | } |
5157 | EXPORT_SYMBOL(register_netdev); | 5598 | EXPORT_SYMBOL(register_netdev); |
5158 | 5599 | ||
5600 | int netdev_refcnt_read(const struct net_device *dev) | ||
5601 | { | ||
5602 | int i, refcnt = 0; | ||
5603 | |||
5604 | for_each_possible_cpu(i) | ||
5605 | refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); | ||
5606 | return refcnt; | ||
5607 | } | ||
5608 | EXPORT_SYMBOL(netdev_refcnt_read); | ||
5609 | |||
5159 | /* | 5610 | /* |
5160 | * netdev_wait_allrefs - wait until all references are gone. | 5611 | * netdev_wait_allrefs - wait until all references are gone. |
5161 | * | 5612 | * |
@@ -5170,11 +5621,14 @@ EXPORT_SYMBOL(register_netdev); | |||
5170 | static void netdev_wait_allrefs(struct net_device *dev) | 5621 | static void netdev_wait_allrefs(struct net_device *dev) |
5171 | { | 5622 | { |
5172 | unsigned long rebroadcast_time, warning_time; | 5623 | unsigned long rebroadcast_time, warning_time; |
5624 | int refcnt; | ||
5173 | 5625 | ||
5174 | linkwatch_forget_dev(dev); | 5626 | linkwatch_forget_dev(dev); |
5175 | 5627 | ||
5176 | rebroadcast_time = warning_time = jiffies; | 5628 | rebroadcast_time = warning_time = jiffies; |
5177 | while (atomic_read(&dev->refcnt) != 0) { | 5629 | refcnt = netdev_refcnt_read(dev); |
5630 | |||
5631 | while (refcnt != 0) { | ||
5178 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 5632 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
5179 | rtnl_lock(); | 5633 | rtnl_lock(); |
5180 | 5634 | ||
@@ -5201,11 +5655,13 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
5201 | 5655 | ||
5202 | msleep(250); | 5656 | msleep(250); |
5203 | 5657 | ||
5658 | refcnt = netdev_refcnt_read(dev); | ||
5659 | |||
5204 | if (time_after(jiffies, warning_time + 10 * HZ)) { | 5660 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
5205 | printk(KERN_EMERG "unregister_netdevice: " | 5661 | printk(KERN_EMERG "unregister_netdevice: " |
5206 | "waiting for %s to become free. Usage " | 5662 | "waiting for %s to become free. Usage " |
5207 | "count = %d\n", | 5663 | "count = %d\n", |
5208 | dev->name, atomic_read(&dev->refcnt)); | 5664 | dev->name, refcnt); |
5209 | warning_time = jiffies; | 5665 | warning_time = jiffies; |
5210 | } | 5666 | } |
5211 | } | 5667 | } |
@@ -5263,9 +5719,9 @@ void netdev_run_todo(void) | |||
5263 | netdev_wait_allrefs(dev); | 5719 | netdev_wait_allrefs(dev); |
5264 | 5720 | ||
5265 | /* paranoia */ | 5721 | /* paranoia */ |
5266 | BUG_ON(atomic_read(&dev->refcnt)); | 5722 | BUG_ON(netdev_refcnt_read(dev)); |
5267 | WARN_ON(dev->ip_ptr); | 5723 | WARN_ON(rcu_dereference_raw(dev->ip_ptr)); |
5268 | WARN_ON(dev->ip6_ptr); | 5724 | WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); |
5269 | WARN_ON(dev->dn_ptr); | 5725 | WARN_ON(dev->dn_ptr); |
5270 | 5726 | ||
5271 | if (dev->destructor) | 5727 | if (dev->destructor) |
@@ -5276,34 +5732,6 @@ void netdev_run_todo(void) | |||
5276 | } | 5732 | } |
5277 | } | 5733 | } |
5278 | 5734 | ||
5279 | /** | ||
5280 | * dev_txq_stats_fold - fold tx_queues stats | ||
5281 | * @dev: device to get statistics from | ||
5282 | * @stats: struct rtnl_link_stats64 to hold results | ||
5283 | */ | ||
5284 | void dev_txq_stats_fold(const struct net_device *dev, | ||
5285 | struct rtnl_link_stats64 *stats) | ||
5286 | { | ||
5287 | u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | ||
5288 | unsigned int i; | ||
5289 | struct netdev_queue *txq; | ||
5290 | |||
5291 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
5292 | txq = netdev_get_tx_queue(dev, i); | ||
5293 | spin_lock_bh(&txq->_xmit_lock); | ||
5294 | tx_bytes += txq->tx_bytes; | ||
5295 | tx_packets += txq->tx_packets; | ||
5296 | tx_dropped += txq->tx_dropped; | ||
5297 | spin_unlock_bh(&txq->_xmit_lock); | ||
5298 | } | ||
5299 | if (tx_bytes || tx_packets || tx_dropped) { | ||
5300 | stats->tx_bytes = tx_bytes; | ||
5301 | stats->tx_packets = tx_packets; | ||
5302 | stats->tx_dropped = tx_dropped; | ||
5303 | } | ||
5304 | } | ||
5305 | EXPORT_SYMBOL(dev_txq_stats_fold); | ||
5306 | |||
5307 | /* Convert net_device_stats to rtnl_link_stats64. They have the same | 5735 | /* Convert net_device_stats to rtnl_link_stats64. They have the same |
5308 | * fields in the same order, with only the type differing. | 5736 | * fields in the same order, with only the type differing. |
5309 | */ | 5737 | */ |
@@ -5342,57 +5770,71 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | |||
5342 | 5770 | ||
5343 | if (ops->ndo_get_stats64) { | 5771 | if (ops->ndo_get_stats64) { |
5344 | memset(storage, 0, sizeof(*storage)); | 5772 | memset(storage, 0, sizeof(*storage)); |
5345 | return ops->ndo_get_stats64(dev, storage); | 5773 | ops->ndo_get_stats64(dev, storage); |
5346 | } | 5774 | } else if (ops->ndo_get_stats) { |
5347 | if (ops->ndo_get_stats) { | ||
5348 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); | 5775 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); |
5349 | return storage; | 5776 | } else { |
5777 | netdev_stats_to_stats64(storage, &dev->stats); | ||
5350 | } | 5778 | } |
5351 | netdev_stats_to_stats64(storage, &dev->stats); | 5779 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); |
5352 | dev_txq_stats_fold(dev, storage); | ||
5353 | return storage; | 5780 | return storage; |
5354 | } | 5781 | } |
5355 | EXPORT_SYMBOL(dev_get_stats); | 5782 | EXPORT_SYMBOL(dev_get_stats); |
5356 | 5783 | ||
5357 | static void netdev_init_one_queue(struct net_device *dev, | 5784 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
5358 | struct netdev_queue *queue, | ||
5359 | void *_unused) | ||
5360 | { | 5785 | { |
5361 | queue->dev = dev; | 5786 | struct netdev_queue *queue = dev_ingress_queue(dev); |
5362 | } | ||
5363 | 5787 | ||
5364 | static void netdev_init_queues(struct net_device *dev) | 5788 | #ifdef CONFIG_NET_CLS_ACT |
5365 | { | 5789 | if (queue) |
5366 | netdev_init_one_queue(dev, &dev->rx_queue, NULL); | 5790 | return queue; |
5367 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | 5791 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
5368 | spin_lock_init(&dev->tx_global_lock); | 5792 | if (!queue) |
5793 | return NULL; | ||
5794 | netdev_init_one_queue(dev, queue, NULL); | ||
5795 | queue->qdisc = &noop_qdisc; | ||
5796 | queue->qdisc_sleeping = &noop_qdisc; | ||
5797 | rcu_assign_pointer(dev->ingress_queue, queue); | ||
5798 | #endif | ||
5799 | return queue; | ||
5369 | } | 5800 | } |
5370 | 5801 | ||
5371 | /** | 5802 | /** |
5372 | * alloc_netdev_mq - allocate network device | 5803 | * alloc_netdev_mqs - allocate network device |
5373 | * @sizeof_priv: size of private data to allocate space for | 5804 | * @sizeof_priv: size of private data to allocate space for |
5374 | * @name: device name format string | 5805 | * @name: device name format string |
5375 | * @setup: callback to initialize device | 5806 | * @setup: callback to initialize device |
5376 | * @queue_count: the number of subqueues to allocate | 5807 | * @txqs: the number of TX subqueues to allocate |
5808 | * @rxqs: the number of RX subqueues to allocate | ||
5377 | * | 5809 | * |
5378 | * Allocates a struct net_device with private data area for driver use | 5810 | * Allocates a struct net_device with private data area for driver use |
5379 | * and performs basic initialization. Also allocates subquue structs | 5811 | * and performs basic initialization. Also allocates subquue structs |
5380 | * for each queue on the device at the end of the netdevice. | 5812 | * for each queue on the device. |
5381 | */ | 5813 | */ |
5382 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | 5814 | struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, |
5383 | void (*setup)(struct net_device *), unsigned int queue_count) | 5815 | void (*setup)(struct net_device *), |
5816 | unsigned int txqs, unsigned int rxqs) | ||
5384 | { | 5817 | { |
5385 | struct netdev_queue *tx; | ||
5386 | struct net_device *dev; | 5818 | struct net_device *dev; |
5387 | size_t alloc_size; | 5819 | size_t alloc_size; |
5388 | struct net_device *p; | 5820 | struct net_device *p; |
5389 | #ifdef CONFIG_RPS | ||
5390 | struct netdev_rx_queue *rx; | ||
5391 | int i; | ||
5392 | #endif | ||
5393 | 5821 | ||
5394 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5822 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5395 | 5823 | ||
5824 | if (txqs < 1) { | ||
5825 | pr_err("alloc_netdev: Unable to allocate device " | ||
5826 | "with zero queues.\n"); | ||
5827 | return NULL; | ||
5828 | } | ||
5829 | |||
5830 | #ifdef CONFIG_RPS | ||
5831 | if (rxqs < 1) { | ||
5832 | pr_err("alloc_netdev: Unable to allocate device " | ||
5833 | "with zero RX queues.\n"); | ||
5834 | return NULL; | ||
5835 | } | ||
5836 | #endif | ||
5837 | |||
5396 | alloc_size = sizeof(struct net_device); | 5838 | alloc_size = sizeof(struct net_device); |
5397 | if (sizeof_priv) { | 5839 | if (sizeof_priv) { |
5398 | /* ensure 32-byte alignment of private area */ | 5840 | /* ensure 32-byte alignment of private area */ |
@@ -5408,55 +5850,23 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5408 | return NULL; | 5850 | return NULL; |
5409 | } | 5851 | } |
5410 | 5852 | ||
5411 | tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5412 | if (!tx) { | ||
5413 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5414 | "tx qdiscs.\n"); | ||
5415 | goto free_p; | ||
5416 | } | ||
5417 | |||
5418 | #ifdef CONFIG_RPS | ||
5419 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5420 | if (!rx) { | ||
5421 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5422 | "rx queues.\n"); | ||
5423 | goto free_tx; | ||
5424 | } | ||
5425 | |||
5426 | atomic_set(&rx->count, queue_count); | ||
5427 | |||
5428 | /* | ||
5429 | * Set a pointer to first element in the array which holds the | ||
5430 | * reference count. | ||
5431 | */ | ||
5432 | for (i = 0; i < queue_count; i++) | ||
5433 | rx[i].first = rx; | ||
5434 | #endif | ||
5435 | |||
5436 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5853 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5437 | dev->padded = (char *)dev - (char *)p; | 5854 | dev->padded = (char *)dev - (char *)p; |
5438 | 5855 | ||
5856 | dev->pcpu_refcnt = alloc_percpu(int); | ||
5857 | if (!dev->pcpu_refcnt) | ||
5858 | goto free_p; | ||
5859 | |||
5439 | if (dev_addr_init(dev)) | 5860 | if (dev_addr_init(dev)) |
5440 | goto free_rx; | 5861 | goto free_pcpu; |
5441 | 5862 | ||
5442 | dev_mc_init(dev); | 5863 | dev_mc_init(dev); |
5443 | dev_uc_init(dev); | 5864 | dev_uc_init(dev); |
5444 | 5865 | ||
5445 | dev_net_set(dev, &init_net); | 5866 | dev_net_set(dev, &init_net); |
5446 | 5867 | ||
5447 | dev->_tx = tx; | ||
5448 | dev->num_tx_queues = queue_count; | ||
5449 | dev->real_num_tx_queues = queue_count; | ||
5450 | |||
5451 | #ifdef CONFIG_RPS | ||
5452 | dev->_rx = rx; | ||
5453 | dev->num_rx_queues = queue_count; | ||
5454 | #endif | ||
5455 | |||
5456 | dev->gso_max_size = GSO_MAX_SIZE; | 5868 | dev->gso_max_size = GSO_MAX_SIZE; |
5457 | 5869 | ||
5458 | netdev_init_queues(dev); | ||
5459 | |||
5460 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); | 5870 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); |
5461 | dev->ethtool_ntuple_list.count = 0; | 5871 | dev->ethtool_ntuple_list.count = 0; |
5462 | INIT_LIST_HEAD(&dev->napi_list); | 5872 | INIT_LIST_HEAD(&dev->napi_list); |
@@ -5464,20 +5874,39 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5464 | INIT_LIST_HEAD(&dev->link_watch_list); | 5874 | INIT_LIST_HEAD(&dev->link_watch_list); |
5465 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 5875 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5466 | setup(dev); | 5876 | setup(dev); |
5877 | |||
5878 | dev->num_tx_queues = txqs; | ||
5879 | dev->real_num_tx_queues = txqs; | ||
5880 | if (netif_alloc_netdev_queues(dev)) | ||
5881 | goto free_all; | ||
5882 | |||
5883 | #ifdef CONFIG_RPS | ||
5884 | dev->num_rx_queues = rxqs; | ||
5885 | dev->real_num_rx_queues = rxqs; | ||
5886 | if (netif_alloc_rx_queues(dev)) | ||
5887 | goto free_all; | ||
5888 | #endif | ||
5889 | |||
5467 | strcpy(dev->name, name); | 5890 | strcpy(dev->name, name); |
5891 | dev->group = INIT_NETDEV_GROUP; | ||
5468 | return dev; | 5892 | return dev; |
5469 | 5893 | ||
5470 | free_rx: | 5894 | free_all: |
5895 | free_netdev(dev); | ||
5896 | return NULL; | ||
5897 | |||
5898 | free_pcpu: | ||
5899 | free_percpu(dev->pcpu_refcnt); | ||
5900 | kfree(dev->_tx); | ||
5471 | #ifdef CONFIG_RPS | 5901 | #ifdef CONFIG_RPS |
5472 | kfree(rx); | 5902 | kfree(dev->_rx); |
5473 | free_tx: | ||
5474 | #endif | 5903 | #endif |
5475 | kfree(tx); | 5904 | |
5476 | free_p: | 5905 | free_p: |
5477 | kfree(p); | 5906 | kfree(p); |
5478 | return NULL; | 5907 | return NULL; |
5479 | } | 5908 | } |
5480 | EXPORT_SYMBOL(alloc_netdev_mq); | 5909 | EXPORT_SYMBOL(alloc_netdev_mqs); |
5481 | 5910 | ||
5482 | /** | 5911 | /** |
5483 | * free_netdev - free network device | 5912 | * free_netdev - free network device |
@@ -5494,6 +5923,11 @@ void free_netdev(struct net_device *dev) | |||
5494 | release_net(dev_net(dev)); | 5923 | release_net(dev_net(dev)); |
5495 | 5924 | ||
5496 | kfree(dev->_tx); | 5925 | kfree(dev->_tx); |
5926 | #ifdef CONFIG_RPS | ||
5927 | kfree(dev->_rx); | ||
5928 | #endif | ||
5929 | |||
5930 | kfree(rcu_dereference_raw(dev->ingress_queue)); | ||
5497 | 5931 | ||
5498 | /* Flush device addresses */ | 5932 | /* Flush device addresses */ |
5499 | dev_addr_flush(dev); | 5933 | dev_addr_flush(dev); |
@@ -5504,6 +5938,9 @@ void free_netdev(struct net_device *dev) | |||
5504 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | 5938 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
5505 | netif_napi_del(p); | 5939 | netif_napi_del(p); |
5506 | 5940 | ||
5941 | free_percpu(dev->pcpu_refcnt); | ||
5942 | dev->pcpu_refcnt = NULL; | ||
5943 | |||
5507 | /* Compatibility with error handling in drivers */ | 5944 | /* Compatibility with error handling in drivers */ |
5508 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 5945 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
5509 | kfree((char *)dev - dev->padded); | 5946 | kfree((char *)dev - dev->padded); |
@@ -5527,7 +5964,10 @@ EXPORT_SYMBOL(free_netdev); | |||
5527 | void synchronize_net(void) | 5964 | void synchronize_net(void) |
5528 | { | 5965 | { |
5529 | might_sleep(); | 5966 | might_sleep(); |
5530 | synchronize_rcu(); | 5967 | if (rtnl_is_locked()) |
5968 | synchronize_rcu_expedited(); | ||
5969 | else | ||
5970 | synchronize_rcu(); | ||
5531 | } | 5971 | } |
5532 | EXPORT_SYMBOL(synchronize_net); | 5972 | EXPORT_SYMBOL(synchronize_net); |
5533 | 5973 | ||
@@ -5636,7 +6076,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5636 | /* We get here if we can't use the current device name */ | 6076 | /* We get here if we can't use the current device name */ |
5637 | if (!pat) | 6077 | if (!pat) |
5638 | goto out; | 6078 | goto out; |
5639 | if (dev_get_valid_name(dev, pat, 1)) | 6079 | if (dev_get_valid_name(dev, pat) < 0) |
5640 | goto out; | 6080 | goto out; |
5641 | } | 6081 | } |
5642 | 6082 | ||
@@ -5658,6 +6098,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5658 | 6098 | ||
5659 | /* Notify protocols, that we are about to destroy | 6099 | /* Notify protocols, that we are about to destroy |
5660 | this device. They should clean all the things. | 6100 | this device. They should clean all the things. |
6101 | |||
6102 | Note that dev->reg_state stays at NETREG_REGISTERED. | ||
6103 | This is wanted because this way 8021q and macvlan know | ||
6104 | the device is just moving and can keep their slaves up. | ||
5661 | */ | 6105 | */ |
5662 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 6106 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5663 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | 6107 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); |
@@ -5734,6 +6178,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5734 | oldsd->output_queue = NULL; | 6178 | oldsd->output_queue = NULL; |
5735 | oldsd->output_queue_tailp = &oldsd->output_queue; | 6179 | oldsd->output_queue_tailp = &oldsd->output_queue; |
5736 | } | 6180 | } |
6181 | /* Append NAPI poll list from offline CPU. */ | ||
6182 | if (!list_empty(&oldsd->poll_list)) { | ||
6183 | list_splice_init(&oldsd->poll_list, &sd->poll_list); | ||
6184 | raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
6185 | } | ||
5737 | 6186 | ||
5738 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 6187 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
5739 | local_irq_enable(); | 6188 | local_irq_enable(); |
@@ -5762,32 +6211,22 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5762 | * @one to the master device with current feature set @all. Will not | 6211 | * @one to the master device with current feature set @all. Will not |
5763 | * enable anything that is off in @mask. Returns the new feature set. | 6212 | * enable anything that is off in @mask. Returns the new feature set. |
5764 | */ | 6213 | */ |
5765 | unsigned long netdev_increment_features(unsigned long all, unsigned long one, | 6214 | u32 netdev_increment_features(u32 all, u32 one, u32 mask) |
5766 | unsigned long mask) | ||
5767 | { | 6215 | { |
5768 | /* If device needs checksumming, downgrade to it. */ | 6216 | if (mask & NETIF_F_GEN_CSUM) |
5769 | if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) | 6217 | mask |= NETIF_F_ALL_CSUM; |
5770 | all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); | 6218 | mask |= NETIF_F_VLAN_CHALLENGED; |
5771 | else if (mask & NETIF_F_ALL_CSUM) { | ||
5772 | /* If one device supports v4/v6 checksumming, set for all. */ | ||
5773 | if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && | ||
5774 | !(all & NETIF_F_GEN_CSUM)) { | ||
5775 | all &= ~NETIF_F_ALL_CSUM; | ||
5776 | all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); | ||
5777 | } | ||
5778 | 6219 | ||
5779 | /* If one device supports hw checksumming, set for all. */ | 6220 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; |
5780 | if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { | 6221 | all &= one | ~NETIF_F_ALL_FOR_ALL; |
5781 | all &= ~NETIF_F_ALL_CSUM; | ||
5782 | all |= NETIF_F_HW_CSUM; | ||
5783 | } | ||
5784 | } | ||
5785 | 6222 | ||
5786 | one |= NETIF_F_ALL_CSUM; | 6223 | /* If device needs checksumming, downgrade to it. */ |
6224 | if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) | ||
6225 | all &= ~NETIF_F_NO_CSUM; | ||
5787 | 6226 | ||
5788 | one |= all & NETIF_F_ONE_FOR_ALL; | 6227 | /* If one device supports hw checksumming, set for all. */ |
5789 | all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; | 6228 | if (all & NETIF_F_GEN_CSUM) |
5790 | all |= one & mask & NETIF_F_ONE_FOR_ALL; | 6229 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); |
5791 | 6230 | ||
5792 | return all; | 6231 | return all; |
5793 | } | 6232 | } |
@@ -5830,29 +6269,23 @@ err_name: | |||
5830 | /** | 6269 | /** |
5831 | * netdev_drivername - network driver for the device | 6270 | * netdev_drivername - network driver for the device |
5832 | * @dev: network device | 6271 | * @dev: network device |
5833 | * @buffer: buffer for resulting name | ||
5834 | * @len: size of buffer | ||
5835 | * | 6272 | * |
5836 | * Determine network driver for device. | 6273 | * Determine network driver for device. |
5837 | */ | 6274 | */ |
5838 | char *netdev_drivername(const struct net_device *dev, char *buffer, int len) | 6275 | const char *netdev_drivername(const struct net_device *dev) |
5839 | { | 6276 | { |
5840 | const struct device_driver *driver; | 6277 | const struct device_driver *driver; |
5841 | const struct device *parent; | 6278 | const struct device *parent; |
5842 | 6279 | const char *empty = ""; | |
5843 | if (len <= 0 || !buffer) | ||
5844 | return buffer; | ||
5845 | buffer[0] = 0; | ||
5846 | 6280 | ||
5847 | parent = dev->dev.parent; | 6281 | parent = dev->dev.parent; |
5848 | |||
5849 | if (!parent) | 6282 | if (!parent) |
5850 | return buffer; | 6283 | return empty; |
5851 | 6284 | ||
5852 | driver = parent->driver; | 6285 | driver = parent->driver; |
5853 | if (driver && driver->name) | 6286 | if (driver && driver->name) |
5854 | strlcpy(buffer, driver->name, len); | 6287 | return driver->name; |
5855 | return buffer; | 6288 | return empty; |
5856 | } | 6289 | } |
5857 | 6290 | ||
5858 | static int __netdev_printk(const char *level, const struct net_device *dev, | 6291 | static int __netdev_printk(const char *level, const struct net_device *dev, |
@@ -5948,7 +6381,7 @@ static void __net_exit default_device_exit(struct net *net) | |||
5948 | if (dev->rtnl_link_ops) | 6381 | if (dev->rtnl_link_ops) |
5949 | continue; | 6382 | continue; |
5950 | 6383 | ||
5951 | /* Push remaing network devices to init_net */ | 6384 | /* Push remaining network devices to init_net */ |
5952 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); | 6385 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
5953 | err = dev_change_net_namespace(dev, &init_net, fb_name); | 6386 | err = dev_change_net_namespace(dev, &init_net, fb_name); |
5954 | if (err) { | 6387 | if (err) { |
@@ -5963,7 +6396,7 @@ static void __net_exit default_device_exit(struct net *net) | |||
5963 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | 6396 | static void __net_exit default_device_exit_batch(struct list_head *net_list) |
5964 | { | 6397 | { |
5965 | /* At exit all network devices most be removed from a network | 6398 | /* At exit all network devices most be removed from a network |
5966 | * namespace. Do this in the reverse order of registeration. | 6399 | * namespace. Do this in the reverse order of registration. |
5967 | * Do this across as many network namespaces as possible to | 6400 | * Do this across as many network namespaces as possible to |
5968 | * improve batching efficiency. | 6401 | * improve batching efficiency. |
5969 | */ | 6402 | */ |
@@ -5981,6 +6414,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) | |||
5981 | } | 6414 | } |
5982 | } | 6415 | } |
5983 | unregister_netdevice_many(&dev_kill_list); | 6416 | unregister_netdevice_many(&dev_kill_list); |
6417 | list_del(&dev_kill_list); | ||
5984 | rtnl_unlock(); | 6418 | rtnl_unlock(); |
5985 | } | 6419 | } |
5986 | 6420 | ||