diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 654 |
1 files changed, 436 insertions, 218 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index fe10551d3671..c36a17aafcf3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -79,6 +79,7 @@ | |||
79 | #include <linux/cpu.h> | 79 | #include <linux/cpu.h> |
80 | #include <linux/types.h> | 80 | #include <linux/types.h> |
81 | #include <linux/kernel.h> | 81 | #include <linux/kernel.h> |
82 | #include <linux/hash.h> | ||
82 | #include <linux/sched.h> | 83 | #include <linux/sched.h> |
83 | #include <linux/mutex.h> | 84 | #include <linux/mutex.h> |
84 | #include <linux/string.h> | 85 | #include <linux/string.h> |
@@ -104,6 +105,7 @@ | |||
104 | #include <net/dst.h> | 105 | #include <net/dst.h> |
105 | #include <net/pkt_sched.h> | 106 | #include <net/pkt_sched.h> |
106 | #include <net/checksum.h> | 107 | #include <net/checksum.h> |
108 | #include <net/xfrm.h> | ||
107 | #include <linux/highmem.h> | 109 | #include <linux/highmem.h> |
108 | #include <linux/init.h> | 110 | #include <linux/init.h> |
109 | #include <linux/kmod.h> | 111 | #include <linux/kmod.h> |
@@ -175,7 +177,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */ | |||
175 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl | 177 | * The @dev_base_head list is protected by @dev_base_lock and the rtnl |
176 | * semaphore. | 178 | * semaphore. |
177 | * | 179 | * |
178 | * Pure readers hold dev_base_lock for reading. | 180 | * Pure readers hold dev_base_lock for reading, or rcu_read_lock() |
179 | * | 181 | * |
180 | * Writers must hold the rtnl semaphore while they loop through the | 182 | * Writers must hold the rtnl semaphore while they loop through the |
181 | * dev_base_head list, and hold dev_base_lock for writing when they do the | 183 | * dev_base_head list, and hold dev_base_lock for writing when they do the |
@@ -193,18 +195,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */ | |||
193 | DEFINE_RWLOCK(dev_base_lock); | 195 | DEFINE_RWLOCK(dev_base_lock); |
194 | EXPORT_SYMBOL(dev_base_lock); | 196 | EXPORT_SYMBOL(dev_base_lock); |
195 | 197 | ||
196 | #define NETDEV_HASHBITS 8 | ||
197 | #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) | ||
198 | |||
199 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) | 198 | static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) |
200 | { | 199 | { |
201 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); | 200 | unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); |
202 | return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; | 201 | return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; |
203 | } | 202 | } |
204 | 203 | ||
205 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | 204 | static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) |
206 | { | 205 | { |
207 | return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; | 206 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; |
208 | } | 207 | } |
209 | 208 | ||
210 | /* Device list insertion */ | 209 | /* Device list insertion */ |
@@ -215,23 +214,26 @@ static int list_netdevice(struct net_device *dev) | |||
215 | ASSERT_RTNL(); | 214 | ASSERT_RTNL(); |
216 | 215 | ||
217 | write_lock_bh(&dev_base_lock); | 216 | write_lock_bh(&dev_base_lock); |
218 | list_add_tail(&dev->dev_list, &net->dev_base_head); | 217 | list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); |
219 | hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); | 218 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); |
220 | hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); | 219 | hlist_add_head_rcu(&dev->index_hlist, |
220 | dev_index_hash(net, dev->ifindex)); | ||
221 | write_unlock_bh(&dev_base_lock); | 221 | write_unlock_bh(&dev_base_lock); |
222 | return 0; | 222 | return 0; |
223 | } | 223 | } |
224 | 224 | ||
225 | /* Device list removal */ | 225 | /* Device list removal |
226 | * caller must respect a RCU grace period before freeing/reusing dev | ||
227 | */ | ||
226 | static void unlist_netdevice(struct net_device *dev) | 228 | static void unlist_netdevice(struct net_device *dev) |
227 | { | 229 | { |
228 | ASSERT_RTNL(); | 230 | ASSERT_RTNL(); |
229 | 231 | ||
230 | /* Unlink dev from the device chain */ | 232 | /* Unlink dev from the device chain */ |
231 | write_lock_bh(&dev_base_lock); | 233 | write_lock_bh(&dev_base_lock); |
232 | list_del(&dev->dev_list); | 234 | list_del_rcu(&dev->dev_list); |
233 | hlist_del(&dev->name_hlist); | 235 | hlist_del_rcu(&dev->name_hlist); |
234 | hlist_del(&dev->index_hlist); | 236 | hlist_del_rcu(&dev->index_hlist); |
235 | write_unlock_bh(&dev_base_lock); | 237 | write_unlock_bh(&dev_base_lock); |
236 | } | 238 | } |
237 | 239 | ||
@@ -587,18 +589,44 @@ __setup("netdev=", netdev_boot_setup); | |||
587 | struct net_device *__dev_get_by_name(struct net *net, const char *name) | 589 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
588 | { | 590 | { |
589 | struct hlist_node *p; | 591 | struct hlist_node *p; |
592 | struct net_device *dev; | ||
593 | struct hlist_head *head = dev_name_hash(net, name); | ||
590 | 594 | ||
591 | hlist_for_each(p, dev_name_hash(net, name)) { | 595 | hlist_for_each_entry(dev, p, head, name_hlist) |
592 | struct net_device *dev | ||
593 | = hlist_entry(p, struct net_device, name_hlist); | ||
594 | if (!strncmp(dev->name, name, IFNAMSIZ)) | 596 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
595 | return dev; | 597 | return dev; |
596 | } | 598 | |
597 | return NULL; | 599 | return NULL; |
598 | } | 600 | } |
599 | EXPORT_SYMBOL(__dev_get_by_name); | 601 | EXPORT_SYMBOL(__dev_get_by_name); |
600 | 602 | ||
601 | /** | 603 | /** |
604 | * dev_get_by_name_rcu - find a device by its name | ||
605 | * @net: the applicable net namespace | ||
606 | * @name: name to find | ||
607 | * | ||
608 | * Find an interface by name. | ||
609 | * If the name is found a pointer to the device is returned. | ||
610 | * If the name is not found then %NULL is returned. | ||
611 | * The reference counters are not incremented so the caller must be | ||
612 | * careful with locks. The caller must hold RCU lock. | ||
613 | */ | ||
614 | |||
615 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) | ||
616 | { | ||
617 | struct hlist_node *p; | ||
618 | struct net_device *dev; | ||
619 | struct hlist_head *head = dev_name_hash(net, name); | ||
620 | |||
621 | hlist_for_each_entry_rcu(dev, p, head, name_hlist) | ||
622 | if (!strncmp(dev->name, name, IFNAMSIZ)) | ||
623 | return dev; | ||
624 | |||
625 | return NULL; | ||
626 | } | ||
627 | EXPORT_SYMBOL(dev_get_by_name_rcu); | ||
628 | |||
629 | /** | ||
602 | * dev_get_by_name - find a device by its name | 630 | * dev_get_by_name - find a device by its name |
603 | * @net: the applicable net namespace | 631 | * @net: the applicable net namespace |
604 | * @name: name to find | 632 | * @name: name to find |
@@ -614,11 +642,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) | |||
614 | { | 642 | { |
615 | struct net_device *dev; | 643 | struct net_device *dev; |
616 | 644 | ||
617 | read_lock(&dev_base_lock); | 645 | rcu_read_lock(); |
618 | dev = __dev_get_by_name(net, name); | 646 | dev = dev_get_by_name_rcu(net, name); |
619 | if (dev) | 647 | if (dev) |
620 | dev_hold(dev); | 648 | dev_hold(dev); |
621 | read_unlock(&dev_base_lock); | 649 | rcu_read_unlock(); |
622 | return dev; | 650 | return dev; |
623 | } | 651 | } |
624 | EXPORT_SYMBOL(dev_get_by_name); | 652 | EXPORT_SYMBOL(dev_get_by_name); |
@@ -638,17 +666,42 @@ EXPORT_SYMBOL(dev_get_by_name); | |||
638 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) | 666 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) |
639 | { | 667 | { |
640 | struct hlist_node *p; | 668 | struct hlist_node *p; |
669 | struct net_device *dev; | ||
670 | struct hlist_head *head = dev_index_hash(net, ifindex); | ||
641 | 671 | ||
642 | hlist_for_each(p, dev_index_hash(net, ifindex)) { | 672 | hlist_for_each_entry(dev, p, head, index_hlist) |
643 | struct net_device *dev | ||
644 | = hlist_entry(p, struct net_device, index_hlist); | ||
645 | if (dev->ifindex == ifindex) | 673 | if (dev->ifindex == ifindex) |
646 | return dev; | 674 | return dev; |
647 | } | 675 | |
648 | return NULL; | 676 | return NULL; |
649 | } | 677 | } |
650 | EXPORT_SYMBOL(__dev_get_by_index); | 678 | EXPORT_SYMBOL(__dev_get_by_index); |
651 | 679 | ||
680 | /** | ||
681 | * dev_get_by_index_rcu - find a device by its ifindex | ||
682 | * @net: the applicable net namespace | ||
683 | * @ifindex: index of device | ||
684 | * | ||
685 | * Search for an interface by index. Returns %NULL if the device | ||
686 | * is not found or a pointer to the device. The device has not | ||
687 | * had its reference counter increased so the caller must be careful | ||
688 | * about locking. The caller must hold RCU lock. | ||
689 | */ | ||
690 | |||
691 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) | ||
692 | { | ||
693 | struct hlist_node *p; | ||
694 | struct net_device *dev; | ||
695 | struct hlist_head *head = dev_index_hash(net, ifindex); | ||
696 | |||
697 | hlist_for_each_entry_rcu(dev, p, head, index_hlist) | ||
698 | if (dev->ifindex == ifindex) | ||
699 | return dev; | ||
700 | |||
701 | return NULL; | ||
702 | } | ||
703 | EXPORT_SYMBOL(dev_get_by_index_rcu); | ||
704 | |||
652 | 705 | ||
653 | /** | 706 | /** |
654 | * dev_get_by_index - find a device by its ifindex | 707 | * dev_get_by_index - find a device by its ifindex |
@@ -665,11 +718,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) | |||
665 | { | 718 | { |
666 | struct net_device *dev; | 719 | struct net_device *dev; |
667 | 720 | ||
668 | read_lock(&dev_base_lock); | 721 | rcu_read_lock(); |
669 | dev = __dev_get_by_index(net, ifindex); | 722 | dev = dev_get_by_index_rcu(net, ifindex); |
670 | if (dev) | 723 | if (dev) |
671 | dev_hold(dev); | 724 | dev_hold(dev); |
672 | read_unlock(&dev_base_lock); | 725 | rcu_read_unlock(); |
673 | return dev; | 726 | return dev; |
674 | } | 727 | } |
675 | EXPORT_SYMBOL(dev_get_by_index); | 728 | EXPORT_SYMBOL(dev_get_by_index); |
@@ -748,15 +801,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, | |||
748 | struct net_device *dev, *ret; | 801 | struct net_device *dev, *ret; |
749 | 802 | ||
750 | ret = NULL; | 803 | ret = NULL; |
751 | read_lock(&dev_base_lock); | 804 | rcu_read_lock(); |
752 | for_each_netdev(net, dev) { | 805 | for_each_netdev_rcu(net, dev) { |
753 | if (((dev->flags ^ if_flags) & mask) == 0) { | 806 | if (((dev->flags ^ if_flags) & mask) == 0) { |
754 | dev_hold(dev); | 807 | dev_hold(dev); |
755 | ret = dev; | 808 | ret = dev; |
756 | break; | 809 | break; |
757 | } | 810 | } |
758 | } | 811 | } |
759 | read_unlock(&dev_base_lock); | 812 | rcu_read_unlock(); |
760 | return ret; | 813 | return ret; |
761 | } | 814 | } |
762 | EXPORT_SYMBOL(dev_get_by_flags); | 815 | EXPORT_SYMBOL(dev_get_by_flags); |
@@ -841,7 +894,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) | |||
841 | free_page((unsigned long) inuse); | 894 | free_page((unsigned long) inuse); |
842 | } | 895 | } |
843 | 896 | ||
844 | snprintf(buf, IFNAMSIZ, name, i); | 897 | if (buf != name) |
898 | snprintf(buf, IFNAMSIZ, name, i); | ||
845 | if (!__dev_get_by_name(net, buf)) | 899 | if (!__dev_get_by_name(net, buf)) |
846 | return i; | 900 | return i; |
847 | 901 | ||
@@ -881,6 +935,21 @@ int dev_alloc_name(struct net_device *dev, const char *name) | |||
881 | } | 935 | } |
882 | EXPORT_SYMBOL(dev_alloc_name); | 936 | EXPORT_SYMBOL(dev_alloc_name); |
883 | 937 | ||
938 | static int dev_get_valid_name(struct net *net, const char *name, char *buf, | ||
939 | bool fmt) | ||
940 | { | ||
941 | if (!dev_valid_name(name)) | ||
942 | return -EINVAL; | ||
943 | |||
944 | if (fmt && strchr(name, '%')) | ||
945 | return __dev_alloc_name(net, name, buf); | ||
946 | else if (__dev_get_by_name(net, name)) | ||
947 | return -EEXIST; | ||
948 | else if (buf != name) | ||
949 | strlcpy(buf, name, IFNAMSIZ); | ||
950 | |||
951 | return 0; | ||
952 | } | ||
884 | 953 | ||
885 | /** | 954 | /** |
886 | * dev_change_name - change name of a device | 955 | * dev_change_name - change name of a device |
@@ -904,28 +973,20 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
904 | if (dev->flags & IFF_UP) | 973 | if (dev->flags & IFF_UP) |
905 | return -EBUSY; | 974 | return -EBUSY; |
906 | 975 | ||
907 | if (!dev_valid_name(newname)) | ||
908 | return -EINVAL; | ||
909 | |||
910 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | 976 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) |
911 | return 0; | 977 | return 0; |
912 | 978 | ||
913 | memcpy(oldname, dev->name, IFNAMSIZ); | 979 | memcpy(oldname, dev->name, IFNAMSIZ); |
914 | 980 | ||
915 | if (strchr(newname, '%')) { | 981 | err = dev_get_valid_name(net, newname, dev->name, 1); |
916 | err = dev_alloc_name(dev, newname); | 982 | if (err < 0) |
917 | if (err < 0) | 983 | return err; |
918 | return err; | ||
919 | } else if (__dev_get_by_name(net, newname)) | ||
920 | return -EEXIST; | ||
921 | else | ||
922 | strlcpy(dev->name, newname, IFNAMSIZ); | ||
923 | 984 | ||
924 | rollback: | 985 | rollback: |
925 | /* For now only devices in the initial network namespace | 986 | /* For now only devices in the initial network namespace |
926 | * are in sysfs. | 987 | * are in sysfs. |
927 | */ | 988 | */ |
928 | if (net == &init_net) { | 989 | if (net_eq(net, &init_net)) { |
929 | ret = device_rename(&dev->dev, dev->name); | 990 | ret = device_rename(&dev->dev, dev->name); |
930 | if (ret) { | 991 | if (ret) { |
931 | memcpy(dev->name, oldname, IFNAMSIZ); | 992 | memcpy(dev->name, oldname, IFNAMSIZ); |
@@ -935,7 +996,12 @@ rollback: | |||
935 | 996 | ||
936 | write_lock_bh(&dev_base_lock); | 997 | write_lock_bh(&dev_base_lock); |
937 | hlist_del(&dev->name_hlist); | 998 | hlist_del(&dev->name_hlist); |
938 | hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); | 999 | write_unlock_bh(&dev_base_lock); |
1000 | |||
1001 | synchronize_rcu(); | ||
1002 | |||
1003 | write_lock_bh(&dev_base_lock); | ||
1004 | hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); | ||
939 | write_unlock_bh(&dev_base_lock); | 1005 | write_unlock_bh(&dev_base_lock); |
940 | 1006 | ||
941 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); | 1007 | ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); |
@@ -1038,9 +1104,9 @@ void dev_load(struct net *net, const char *name) | |||
1038 | { | 1104 | { |
1039 | struct net_device *dev; | 1105 | struct net_device *dev; |
1040 | 1106 | ||
1041 | read_lock(&dev_base_lock); | 1107 | rcu_read_lock(); |
1042 | dev = __dev_get_by_name(net, name); | 1108 | dev = dev_get_by_name_rcu(net, name); |
1043 | read_unlock(&dev_base_lock); | 1109 | rcu_read_unlock(); |
1044 | 1110 | ||
1045 | if (!dev && capable(CAP_NET_ADMIN)) | 1111 | if (!dev && capable(CAP_NET_ADMIN)) |
1046 | request_module("%s", name); | 1112 | request_module("%s", name); |
@@ -1287,6 +1353,7 @@ rollback: | |||
1287 | nb->notifier_call(nb, NETDEV_DOWN, dev); | 1353 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1288 | } | 1354 | } |
1289 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); | 1355 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1356 | nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); | ||
1290 | } | 1357 | } |
1291 | } | 1358 | } |
1292 | 1359 | ||
@@ -1353,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1353 | skb->tstamp.tv64 = 0; | 1420 | skb->tstamp.tv64 = 0; |
1354 | } | 1421 | } |
1355 | 1422 | ||
1423 | /** | ||
1424 | * dev_forward_skb - loopback an skb to another netif | ||
1425 | * | ||
1426 | * @dev: destination network device | ||
1427 | * @skb: buffer to forward | ||
1428 | * | ||
1429 | * return values: | ||
1430 | * NET_RX_SUCCESS (no congestion) | ||
1431 | * NET_RX_DROP (packet was dropped) | ||
1432 | * | ||
1433 | * dev_forward_skb can be used for injecting an skb from the | ||
1434 | * start_xmit function of one device into the receive queue | ||
1435 | * of another device. | ||
1436 | * | ||
1437 | * The receiving device may be in another namespace, so | ||
1438 | * we have to clear all information in the skb that could | ||
1439 | * impact namespace isolation. | ||
1440 | */ | ||
1441 | int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | ||
1442 | { | ||
1443 | skb_orphan(skb); | ||
1444 | |||
1445 | if (!(dev->flags & IFF_UP)) | ||
1446 | return NET_RX_DROP; | ||
1447 | |||
1448 | if (skb->len > (dev->mtu + dev->hard_header_len)) | ||
1449 | return NET_RX_DROP; | ||
1450 | |||
1451 | skb_dst_drop(skb); | ||
1452 | skb->tstamp.tv64 = 0; | ||
1453 | skb->pkt_type = PACKET_HOST; | ||
1454 | skb->protocol = eth_type_trans(skb, dev); | ||
1455 | skb->mark = 0; | ||
1456 | secpath_reset(skb); | ||
1457 | nf_reset(skb); | ||
1458 | return netif_rx(skb); | ||
1459 | } | ||
1460 | EXPORT_SYMBOL_GPL(dev_forward_skb); | ||
1461 | |||
1356 | /* | 1462 | /* |
1357 | * Support routine. Sends outgoing frames to any network | 1463 | * Support routine. Sends outgoing frames to any network |
1358 | * taps currently in use. | 1464 | * taps currently in use. |
@@ -1701,7 +1807,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1701 | struct netdev_queue *txq) | 1807 | struct netdev_queue *txq) |
1702 | { | 1808 | { |
1703 | const struct net_device_ops *ops = dev->netdev_ops; | 1809 | const struct net_device_ops *ops = dev->netdev_ops; |
1704 | int rc; | 1810 | int rc = NETDEV_TX_OK; |
1705 | 1811 | ||
1706 | if (likely(!skb->next)) { | 1812 | if (likely(!skb->next)) { |
1707 | if (!list_empty(&ptype_all)) | 1813 | if (!list_empty(&ptype_all)) |
@@ -1749,6 +1855,8 @@ gso: | |||
1749 | nskb->next = NULL; | 1855 | nskb->next = NULL; |
1750 | rc = ops->ndo_start_xmit(nskb, dev); | 1856 | rc = ops->ndo_start_xmit(nskb, dev); |
1751 | if (unlikely(rc != NETDEV_TX_OK)) { | 1857 | if (unlikely(rc != NETDEV_TX_OK)) { |
1858 | if (rc & ~NETDEV_TX_MASK) | ||
1859 | goto out_kfree_gso_skb; | ||
1752 | nskb->next = skb->next; | 1860 | nskb->next = skb->next; |
1753 | skb->next = nskb; | 1861 | skb->next = nskb; |
1754 | return rc; | 1862 | return rc; |
@@ -1758,11 +1866,12 @@ gso: | |||
1758 | return NETDEV_TX_BUSY; | 1866 | return NETDEV_TX_BUSY; |
1759 | } while (skb->next); | 1867 | } while (skb->next); |
1760 | 1868 | ||
1761 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 1869 | out_kfree_gso_skb: |
1762 | 1870 | if (likely(skb->next == NULL)) | |
1871 | skb->destructor = DEV_GSO_CB(skb)->destructor; | ||
1763 | out_kfree_skb: | 1872 | out_kfree_skb: |
1764 | kfree_skb(skb); | 1873 | kfree_skb(skb); |
1765 | return NETDEV_TX_OK; | 1874 | return rc; |
1766 | } | 1875 | } |
1767 | 1876 | ||
1768 | static u32 skb_tx_hashrnd; | 1877 | static u32 skb_tx_hashrnd; |
@@ -1789,16 +1898,43 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
1789 | } | 1898 | } |
1790 | EXPORT_SYMBOL(skb_tx_hash); | 1899 | EXPORT_SYMBOL(skb_tx_hash); |
1791 | 1900 | ||
1901 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | ||
1902 | { | ||
1903 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | ||
1904 | if (net_ratelimit()) { | ||
1905 | WARN(1, "%s selects TX queue %d, but " | ||
1906 | "real number of TX queues is %d\n", | ||
1907 | dev->name, queue_index, | ||
1908 | dev->real_num_tx_queues); | ||
1909 | } | ||
1910 | return 0; | ||
1911 | } | ||
1912 | return queue_index; | ||
1913 | } | ||
1914 | |||
1792 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 1915 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
1793 | struct sk_buff *skb) | 1916 | struct sk_buff *skb) |
1794 | { | 1917 | { |
1795 | const struct net_device_ops *ops = dev->netdev_ops; | 1918 | u16 queue_index; |
1796 | u16 queue_index = 0; | 1919 | struct sock *sk = skb->sk; |
1920 | |||
1921 | if (sk_tx_queue_recorded(sk)) { | ||
1922 | queue_index = sk_tx_queue_get(sk); | ||
1923 | } else { | ||
1924 | const struct net_device_ops *ops = dev->netdev_ops; | ||
1797 | 1925 | ||
1798 | if (ops->ndo_select_queue) | 1926 | if (ops->ndo_select_queue) { |
1799 | queue_index = ops->ndo_select_queue(dev, skb); | 1927 | queue_index = ops->ndo_select_queue(dev, skb); |
1800 | else if (dev->real_num_tx_queues > 1) | 1928 | queue_index = dev_cap_txqueue(dev, queue_index); |
1801 | queue_index = skb_tx_hash(dev, skb); | 1929 | } else { |
1930 | queue_index = 0; | ||
1931 | if (dev->real_num_tx_queues > 1) | ||
1932 | queue_index = skb_tx_hash(dev, skb); | ||
1933 | |||
1934 | if (sk && sk->sk_dst_cache) | ||
1935 | sk_tx_queue_set(sk, queue_index); | ||
1936 | } | ||
1937 | } | ||
1802 | 1938 | ||
1803 | skb_set_queue_mapping(skb, queue_index); | 1939 | skb_set_queue_mapping(skb, queue_index); |
1804 | return netdev_get_tx_queue(dev, queue_index); | 1940 | return netdev_get_tx_queue(dev, queue_index); |
@@ -1935,8 +2071,8 @@ gso: | |||
1935 | HARD_TX_LOCK(dev, txq, cpu); | 2071 | HARD_TX_LOCK(dev, txq, cpu); |
1936 | 2072 | ||
1937 | if (!netif_tx_queue_stopped(txq)) { | 2073 | if (!netif_tx_queue_stopped(txq)) { |
1938 | rc = NET_XMIT_SUCCESS; | 2074 | rc = dev_hard_start_xmit(skb, dev, txq); |
1939 | if (!dev_hard_start_xmit(skb, dev, txq)) { | 2075 | if (dev_xmit_complete(rc)) { |
1940 | HARD_TX_UNLOCK(dev, txq); | 2076 | HARD_TX_UNLOCK(dev, txq); |
1941 | goto out; | 2077 | goto out; |
1942 | } | 2078 | } |
@@ -2191,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb) | |||
2191 | if (MAX_RED_LOOP < ttl++) { | 2327 | if (MAX_RED_LOOP < ttl++) { |
2192 | printk(KERN_WARNING | 2328 | printk(KERN_WARNING |
2193 | "Redir loop detected Dropping packet (%d->%d)\n", | 2329 | "Redir loop detected Dropping packet (%d->%d)\n", |
2194 | skb->iif, dev->ifindex); | 2330 | skb->skb_iif, dev->ifindex); |
2195 | return TC_ACT_SHOT; | 2331 | return TC_ACT_SHOT; |
2196 | } | 2332 | } |
2197 | 2333 | ||
@@ -2292,15 +2428,15 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2292 | if (!skb->tstamp.tv64) | 2428 | if (!skb->tstamp.tv64) |
2293 | net_timestamp(skb); | 2429 | net_timestamp(skb); |
2294 | 2430 | ||
2295 | if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) | 2431 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) |
2296 | return NET_RX_SUCCESS; | 2432 | return NET_RX_SUCCESS; |
2297 | 2433 | ||
2298 | /* if we've gotten here through NAPI, check netpoll */ | 2434 | /* if we've gotten here through NAPI, check netpoll */ |
2299 | if (netpoll_receive_skb(skb)) | 2435 | if (netpoll_receive_skb(skb)) |
2300 | return NET_RX_DROP; | 2436 | return NET_RX_DROP; |
2301 | 2437 | ||
2302 | if (!skb->iif) | 2438 | if (!skb->skb_iif) |
2303 | skb->iif = skb->dev->ifindex; | 2439 | skb->skb_iif = skb->dev->ifindex; |
2304 | 2440 | ||
2305 | null_or_orig = NULL; | 2441 | null_or_orig = NULL; |
2306 | orig_dev = skb->dev; | 2442 | orig_dev = skb->dev; |
@@ -2440,7 +2576,7 @@ void napi_gro_flush(struct napi_struct *napi) | |||
2440 | } | 2576 | } |
2441 | EXPORT_SYMBOL(napi_gro_flush); | 2577 | EXPORT_SYMBOL(napi_gro_flush); |
2442 | 2578 | ||
2443 | int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2579 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2444 | { | 2580 | { |
2445 | struct sk_buff **pp = NULL; | 2581 | struct sk_buff **pp = NULL; |
2446 | struct packet_type *ptype; | 2582 | struct packet_type *ptype; |
@@ -2448,7 +2584,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2448 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; | 2584 | struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; |
2449 | int same_flow; | 2585 | int same_flow; |
2450 | int mac_len; | 2586 | int mac_len; |
2451 | int ret; | 2587 | enum gro_result ret; |
2452 | 2588 | ||
2453 | if (!(skb->dev->features & NETIF_F_GRO)) | 2589 | if (!(skb->dev->features & NETIF_F_GRO)) |
2454 | goto normal; | 2590 | goto normal; |
@@ -2532,7 +2668,8 @@ normal: | |||
2532 | } | 2668 | } |
2533 | EXPORT_SYMBOL(dev_gro_receive); | 2669 | EXPORT_SYMBOL(dev_gro_receive); |
2534 | 2670 | ||
2535 | static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2671 | static gro_result_t |
2672 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | ||
2536 | { | 2673 | { |
2537 | struct sk_buff *p; | 2674 | struct sk_buff *p; |
2538 | 2675 | ||
@@ -2540,33 +2677,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2540 | return GRO_NORMAL; | 2677 | return GRO_NORMAL; |
2541 | 2678 | ||
2542 | for (p = napi->gro_list; p; p = p->next) { | 2679 | for (p = napi->gro_list; p; p = p->next) { |
2543 | NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) | 2680 | NAPI_GRO_CB(p)->same_flow = |
2544 | && !compare_ether_header(skb_mac_header(p), | 2681 | (p->dev == skb->dev) && |
2545 | skb_gro_mac_header(skb)); | 2682 | !compare_ether_header(skb_mac_header(p), |
2683 | skb_gro_mac_header(skb)); | ||
2546 | NAPI_GRO_CB(p)->flush = 0; | 2684 | NAPI_GRO_CB(p)->flush = 0; |
2547 | } | 2685 | } |
2548 | 2686 | ||
2549 | return dev_gro_receive(napi, skb); | 2687 | return dev_gro_receive(napi, skb); |
2550 | } | 2688 | } |
2551 | 2689 | ||
2552 | int napi_skb_finish(int ret, struct sk_buff *skb) | 2690 | gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
2553 | { | 2691 | { |
2554 | int err = NET_RX_SUCCESS; | ||
2555 | |||
2556 | switch (ret) { | 2692 | switch (ret) { |
2557 | case GRO_NORMAL: | 2693 | case GRO_NORMAL: |
2558 | return netif_receive_skb(skb); | 2694 | if (netif_receive_skb(skb)) |
2695 | ret = GRO_DROP; | ||
2696 | break; | ||
2559 | 2697 | ||
2560 | case GRO_DROP: | 2698 | case GRO_DROP: |
2561 | err = NET_RX_DROP; | ||
2562 | /* fall through */ | ||
2563 | |||
2564 | case GRO_MERGED_FREE: | 2699 | case GRO_MERGED_FREE: |
2565 | kfree_skb(skb); | 2700 | kfree_skb(skb); |
2566 | break; | 2701 | break; |
2702 | |||
2703 | case GRO_HELD: | ||
2704 | case GRO_MERGED: | ||
2705 | break; | ||
2567 | } | 2706 | } |
2568 | 2707 | ||
2569 | return err; | 2708 | return ret; |
2570 | } | 2709 | } |
2571 | EXPORT_SYMBOL(napi_skb_finish); | 2710 | EXPORT_SYMBOL(napi_skb_finish); |
2572 | 2711 | ||
@@ -2586,7 +2725,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) | |||
2586 | } | 2725 | } |
2587 | EXPORT_SYMBOL(skb_gro_reset_offset); | 2726 | EXPORT_SYMBOL(skb_gro_reset_offset); |
2588 | 2727 | ||
2589 | int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 2728 | gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
2590 | { | 2729 | { |
2591 | skb_gro_reset_offset(skb); | 2730 | skb_gro_reset_offset(skb); |
2592 | 2731 | ||
@@ -2605,49 +2744,41 @@ EXPORT_SYMBOL(napi_reuse_skb); | |||
2605 | 2744 | ||
2606 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 2745 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
2607 | { | 2746 | { |
2608 | struct net_device *dev = napi->dev; | ||
2609 | struct sk_buff *skb = napi->skb; | 2747 | struct sk_buff *skb = napi->skb; |
2610 | 2748 | ||
2611 | if (!skb) { | 2749 | if (!skb) { |
2612 | skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); | 2750 | skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); |
2613 | if (!skb) | 2751 | if (skb) |
2614 | goto out; | 2752 | napi->skb = skb; |
2615 | |||
2616 | skb_reserve(skb, NET_IP_ALIGN); | ||
2617 | |||
2618 | napi->skb = skb; | ||
2619 | } | 2753 | } |
2620 | |||
2621 | out: | ||
2622 | return skb; | 2754 | return skb; |
2623 | } | 2755 | } |
2624 | EXPORT_SYMBOL(napi_get_frags); | 2756 | EXPORT_SYMBOL(napi_get_frags); |
2625 | 2757 | ||
2626 | int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) | 2758 | gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, |
2759 | gro_result_t ret) | ||
2627 | { | 2760 | { |
2628 | int err = NET_RX_SUCCESS; | ||
2629 | |||
2630 | switch (ret) { | 2761 | switch (ret) { |
2631 | case GRO_NORMAL: | 2762 | case GRO_NORMAL: |
2632 | case GRO_HELD: | 2763 | case GRO_HELD: |
2633 | skb->protocol = eth_type_trans(skb, napi->dev); | 2764 | skb->protocol = eth_type_trans(skb, napi->dev); |
2634 | 2765 | ||
2635 | if (ret == GRO_NORMAL) | 2766 | if (ret == GRO_HELD) |
2636 | return netif_receive_skb(skb); | 2767 | skb_gro_pull(skb, -ETH_HLEN); |
2637 | 2768 | else if (netif_receive_skb(skb)) | |
2638 | skb_gro_pull(skb, -ETH_HLEN); | 2769 | ret = GRO_DROP; |
2639 | break; | 2770 | break; |
2640 | 2771 | ||
2641 | case GRO_DROP: | 2772 | case GRO_DROP: |
2642 | err = NET_RX_DROP; | ||
2643 | /* fall through */ | ||
2644 | |||
2645 | case GRO_MERGED_FREE: | 2773 | case GRO_MERGED_FREE: |
2646 | napi_reuse_skb(napi, skb); | 2774 | napi_reuse_skb(napi, skb); |
2647 | break; | 2775 | break; |
2776 | |||
2777 | case GRO_MERGED: | ||
2778 | break; | ||
2648 | } | 2779 | } |
2649 | 2780 | ||
2650 | return err; | 2781 | return ret; |
2651 | } | 2782 | } |
2652 | EXPORT_SYMBOL(napi_frags_finish); | 2783 | EXPORT_SYMBOL(napi_frags_finish); |
2653 | 2784 | ||
@@ -2688,12 +2819,12 @@ out: | |||
2688 | } | 2819 | } |
2689 | EXPORT_SYMBOL(napi_frags_skb); | 2820 | EXPORT_SYMBOL(napi_frags_skb); |
2690 | 2821 | ||
2691 | int napi_gro_frags(struct napi_struct *napi) | 2822 | gro_result_t napi_gro_frags(struct napi_struct *napi) |
2692 | { | 2823 | { |
2693 | struct sk_buff *skb = napi_frags_skb(napi); | 2824 | struct sk_buff *skb = napi_frags_skb(napi); |
2694 | 2825 | ||
2695 | if (!skb) | 2826 | if (!skb) |
2696 | return NET_RX_DROP; | 2827 | return GRO_DROP; |
2697 | 2828 | ||
2698 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); | 2829 | return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); |
2699 | } | 2830 | } |
@@ -2938,15 +3069,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) | |||
2938 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | 3069 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) |
2939 | return -EFAULT; | 3070 | return -EFAULT; |
2940 | 3071 | ||
2941 | read_lock(&dev_base_lock); | 3072 | rcu_read_lock(); |
2942 | dev = __dev_get_by_index(net, ifr.ifr_ifindex); | 3073 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); |
2943 | if (!dev) { | 3074 | if (!dev) { |
2944 | read_unlock(&dev_base_lock); | 3075 | rcu_read_unlock(); |
2945 | return -ENODEV; | 3076 | return -ENODEV; |
2946 | } | 3077 | } |
2947 | 3078 | ||
2948 | strcpy(ifr.ifr_name, dev->name); | 3079 | strcpy(ifr.ifr_name, dev->name); |
2949 | read_unlock(&dev_base_lock); | 3080 | rcu_read_unlock(); |
2950 | 3081 | ||
2951 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | 3082 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) |
2952 | return -EFAULT; | 3083 | return -EFAULT; |
@@ -3016,18 +3147,18 @@ static int dev_ifconf(struct net *net, char __user *arg) | |||
3016 | * in detail. | 3147 | * in detail. |
3017 | */ | 3148 | */ |
3018 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) | 3149 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) |
3019 | __acquires(dev_base_lock) | 3150 | __acquires(RCU) |
3020 | { | 3151 | { |
3021 | struct net *net = seq_file_net(seq); | 3152 | struct net *net = seq_file_net(seq); |
3022 | loff_t off; | 3153 | loff_t off; |
3023 | struct net_device *dev; | 3154 | struct net_device *dev; |
3024 | 3155 | ||
3025 | read_lock(&dev_base_lock); | 3156 | rcu_read_lock(); |
3026 | if (!*pos) | 3157 | if (!*pos) |
3027 | return SEQ_START_TOKEN; | 3158 | return SEQ_START_TOKEN; |
3028 | 3159 | ||
3029 | off = 1; | 3160 | off = 1; |
3030 | for_each_netdev(net, dev) | 3161 | for_each_netdev_rcu(net, dev) |
3031 | if (off++ == *pos) | 3162 | if (off++ == *pos) |
3032 | return dev; | 3163 | return dev; |
3033 | 3164 | ||
@@ -3036,16 +3167,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) | |||
3036 | 3167 | ||
3037 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 3168 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3038 | { | 3169 | { |
3039 | struct net *net = seq_file_net(seq); | 3170 | struct net_device *dev = (v == SEQ_START_TOKEN) ? |
3171 | first_net_device(seq_file_net(seq)) : | ||
3172 | next_net_device((struct net_device *)v); | ||
3173 | |||
3040 | ++*pos; | 3174 | ++*pos; |
3041 | return v == SEQ_START_TOKEN ? | 3175 | return rcu_dereference(dev); |
3042 | first_net_device(net) : next_net_device((struct net_device *)v); | ||
3043 | } | 3176 | } |
3044 | 3177 | ||
3045 | void dev_seq_stop(struct seq_file *seq, void *v) | 3178 | void dev_seq_stop(struct seq_file *seq, void *v) |
3046 | __releases(dev_base_lock) | 3179 | __releases(RCU) |
3047 | { | 3180 | { |
3048 | read_unlock(&dev_base_lock); | 3181 | rcu_read_unlock(); |
3049 | } | 3182 | } |
3050 | 3183 | ||
3051 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | 3184 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) |
@@ -4254,12 +4387,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | |||
4254 | EXPORT_SYMBOL(dev_set_mac_address); | 4387 | EXPORT_SYMBOL(dev_set_mac_address); |
4255 | 4388 | ||
4256 | /* | 4389 | /* |
4257 | * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) | 4390 | * Perform the SIOCxIFxxx calls, inside rcu_read_lock() |
4258 | */ | 4391 | */ |
4259 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) | 4392 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) |
4260 | { | 4393 | { |
4261 | int err; | 4394 | int err; |
4262 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); | 4395 | struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); |
4263 | 4396 | ||
4264 | if (!dev) | 4397 | if (!dev) |
4265 | return -ENODEV; | 4398 | return -ENODEV; |
@@ -4491,9 +4624,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4491 | case SIOCGIFINDEX: | 4624 | case SIOCGIFINDEX: |
4492 | case SIOCGIFTXQLEN: | 4625 | case SIOCGIFTXQLEN: |
4493 | dev_load(net, ifr.ifr_name); | 4626 | dev_load(net, ifr.ifr_name); |
4494 | read_lock(&dev_base_lock); | 4627 | rcu_read_lock(); |
4495 | ret = dev_ifsioc_locked(net, &ifr, cmd); | 4628 | ret = dev_ifsioc_locked(net, &ifr, cmd); |
4496 | read_unlock(&dev_base_lock); | 4629 | rcu_read_unlock(); |
4497 | if (!ret) { | 4630 | if (!ret) { |
4498 | if (colon) | 4631 | if (colon) |
4499 | *colon = ':'; | 4632 | *colon = ':'; |
@@ -4636,59 +4769,80 @@ static void net_set_todo(struct net_device *dev) | |||
4636 | list_add_tail(&dev->todo_list, &net_todo_list); | 4769 | list_add_tail(&dev->todo_list, &net_todo_list); |
4637 | } | 4770 | } |
4638 | 4771 | ||
4639 | static void rollback_registered(struct net_device *dev) | 4772 | static void rollback_registered_many(struct list_head *head) |
4640 | { | 4773 | { |
4774 | struct net_device *dev; | ||
4775 | |||
4641 | BUG_ON(dev_boot_phase); | 4776 | BUG_ON(dev_boot_phase); |
4642 | ASSERT_RTNL(); | 4777 | ASSERT_RTNL(); |
4643 | 4778 | ||
4644 | /* Some devices call without registering for initialization unwind. */ | 4779 | list_for_each_entry(dev, head, unreg_list) { |
4645 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 4780 | /* Some devices call without registering |
4646 | printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " | 4781 | * for initialization unwind. |
4647 | "was registered\n", dev->name, dev); | 4782 | */ |
4783 | if (dev->reg_state == NETREG_UNINITIALIZED) { | ||
4784 | pr_debug("unregister_netdevice: device %s/%p never " | ||
4785 | "was registered\n", dev->name, dev); | ||
4648 | 4786 | ||
4649 | WARN_ON(1); | 4787 | WARN_ON(1); |
4650 | return; | 4788 | return; |
4651 | } | 4789 | } |
4652 | 4790 | ||
4653 | BUG_ON(dev->reg_state != NETREG_REGISTERED); | 4791 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
4654 | 4792 | ||
4655 | /* If device is running, close it first. */ | 4793 | /* If device is running, close it first. */ |
4656 | dev_close(dev); | 4794 | dev_close(dev); |
4657 | 4795 | ||
4658 | /* And unlink it from device chain. */ | 4796 | /* And unlink it from device chain. */ |
4659 | unlist_netdevice(dev); | 4797 | unlist_netdevice(dev); |
4660 | 4798 | ||
4661 | dev->reg_state = NETREG_UNREGISTERING; | 4799 | dev->reg_state = NETREG_UNREGISTERING; |
4800 | } | ||
4662 | 4801 | ||
4663 | synchronize_net(); | 4802 | synchronize_net(); |
4664 | 4803 | ||
4665 | /* Shutdown queueing discipline. */ | 4804 | list_for_each_entry(dev, head, unreg_list) { |
4666 | dev_shutdown(dev); | 4805 | /* Shutdown queueing discipline. */ |
4806 | dev_shutdown(dev); | ||
4667 | 4807 | ||
4668 | 4808 | ||
4669 | /* Notify protocols, that we are about to destroy | 4809 | /* Notify protocols, that we are about to destroy |
4670 | this device. They should clean all the things. | 4810 | this device. They should clean all the things. |
4671 | */ | 4811 | */ |
4672 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 4812 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
4673 | 4813 | ||
4674 | /* | 4814 | /* |
4675 | * Flush the unicast and multicast chains | 4815 | * Flush the unicast and multicast chains |
4676 | */ | 4816 | */ |
4677 | dev_unicast_flush(dev); | 4817 | dev_unicast_flush(dev); |
4678 | dev_addr_discard(dev); | 4818 | dev_addr_discard(dev); |
4679 | 4819 | ||
4680 | if (dev->netdev_ops->ndo_uninit) | 4820 | if (dev->netdev_ops->ndo_uninit) |
4681 | dev->netdev_ops->ndo_uninit(dev); | 4821 | dev->netdev_ops->ndo_uninit(dev); |
4682 | 4822 | ||
4683 | /* Notifier chain MUST detach us from master device. */ | 4823 | /* Notifier chain MUST detach us from master device. */ |
4684 | WARN_ON(dev->master); | 4824 | WARN_ON(dev->master); |
4685 | 4825 | ||
4686 | /* Remove entries from kobject tree */ | 4826 | /* Remove entries from kobject tree */ |
4687 | netdev_unregister_kobject(dev); | 4827 | netdev_unregister_kobject(dev); |
4828 | } | ||
4829 | |||
4830 | /* Process any work delayed until the end of the batch */ | ||
4831 | dev = list_entry(head->next, struct net_device, unreg_list); | ||
4832 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | ||
4688 | 4833 | ||
4689 | synchronize_net(); | 4834 | synchronize_net(); |
4690 | 4835 | ||
4691 | dev_put(dev); | 4836 | list_for_each_entry(dev, head, unreg_list) |
4837 | dev_put(dev); | ||
4838 | } | ||
4839 | |||
4840 | static void rollback_registered(struct net_device *dev) | ||
4841 | { | ||
4842 | LIST_HEAD(single); | ||
4843 | |||
4844 | list_add(&dev->unreg_list, &single); | ||
4845 | rollback_registered_many(&single); | ||
4692 | } | 4846 | } |
4693 | 4847 | ||
4694 | static void __netdev_init_queue_locks_one(struct net_device *dev, | 4848 | static void __netdev_init_queue_locks_one(struct net_device *dev, |
@@ -4747,6 +4901,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
4747 | EXPORT_SYMBOL(netdev_fix_features); | 4901 | EXPORT_SYMBOL(netdev_fix_features); |
4748 | 4902 | ||
4749 | /** | 4903 | /** |
4904 | * netif_stacked_transfer_operstate - transfer operstate | ||
4905 | * @rootdev: the root or lower level device to transfer state from | ||
4906 | * @dev: the device to transfer operstate to | ||
4907 | * | ||
4908 | * Transfer operational state from root to device. This is normally | ||
4909 | * called when a stacking relationship exists between the root | ||
4910 | * device and the device(a leaf device). | ||
4911 | */ | ||
4912 | void netif_stacked_transfer_operstate(const struct net_device *rootdev, | ||
4913 | struct net_device *dev) | ||
4914 | { | ||
4915 | if (rootdev->operstate == IF_OPER_DORMANT) | ||
4916 | netif_dormant_on(dev); | ||
4917 | else | ||
4918 | netif_dormant_off(dev); | ||
4919 | |||
4920 | if (netif_carrier_ok(rootdev)) { | ||
4921 | if (!netif_carrier_ok(dev)) | ||
4922 | netif_carrier_on(dev); | ||
4923 | } else { | ||
4924 | if (netif_carrier_ok(dev)) | ||
4925 | netif_carrier_off(dev); | ||
4926 | } | ||
4927 | } | ||
4928 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | ||
4929 | |||
4930 | /** | ||
4750 | * register_netdevice - register a network device | 4931 | * register_netdevice - register a network device |
4751 | * @dev: device to register | 4932 | * @dev: device to register |
4752 | * | 4933 | * |
@@ -4765,8 +4946,6 @@ EXPORT_SYMBOL(netdev_fix_features); | |||
4765 | 4946 | ||
4766 | int register_netdevice(struct net_device *dev) | 4947 | int register_netdevice(struct net_device *dev) |
4767 | { | 4948 | { |
4768 | struct hlist_head *head; | ||
4769 | struct hlist_node *p; | ||
4770 | int ret; | 4949 | int ret; |
4771 | struct net *net = dev_net(dev); | 4950 | struct net *net = dev_net(dev); |
4772 | 4951 | ||
@@ -4795,26 +4974,14 @@ int register_netdevice(struct net_device *dev) | |||
4795 | } | 4974 | } |
4796 | } | 4975 | } |
4797 | 4976 | ||
4798 | if (!dev_valid_name(dev->name)) { | 4977 | ret = dev_get_valid_name(net, dev->name, dev->name, 0); |
4799 | ret = -EINVAL; | 4978 | if (ret) |
4800 | goto err_uninit; | 4979 | goto err_uninit; |
4801 | } | ||
4802 | 4980 | ||
4803 | dev->ifindex = dev_new_index(net); | 4981 | dev->ifindex = dev_new_index(net); |
4804 | if (dev->iflink == -1) | 4982 | if (dev->iflink == -1) |
4805 | dev->iflink = dev->ifindex; | 4983 | dev->iflink = dev->ifindex; |
4806 | 4984 | ||
4807 | /* Check for existence of name */ | ||
4808 | head = dev_name_hash(net, dev->name); | ||
4809 | hlist_for_each(p, head) { | ||
4810 | struct net_device *d | ||
4811 | = hlist_entry(p, struct net_device, name_hlist); | ||
4812 | if (!strncmp(d->name, dev->name, IFNAMSIZ)) { | ||
4813 | ret = -EEXIST; | ||
4814 | goto err_uninit; | ||
4815 | } | ||
4816 | } | ||
4817 | |||
4818 | /* Fix illegal checksum combinations */ | 4985 | /* Fix illegal checksum combinations */ |
4819 | if ((dev->features & NETIF_F_HW_CSUM) && | 4986 | if ((dev->features & NETIF_F_HW_CSUM) && |
4820 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 4987 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
@@ -4837,6 +5004,12 @@ int register_netdevice(struct net_device *dev) | |||
4837 | dev->features |= NETIF_F_GSO; | 5004 | dev->features |= NETIF_F_GSO; |
4838 | 5005 | ||
4839 | netdev_initialize_kobject(dev); | 5006 | netdev_initialize_kobject(dev); |
5007 | |||
5008 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | ||
5009 | ret = notifier_to_errno(ret); | ||
5010 | if (ret) | ||
5011 | goto err_uninit; | ||
5012 | |||
4840 | ret = netdev_register_kobject(dev); | 5013 | ret = netdev_register_kobject(dev); |
4841 | if (ret) | 5014 | if (ret) |
4842 | goto err_uninit; | 5015 | goto err_uninit; |
@@ -4961,6 +5134,8 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
4961 | { | 5134 | { |
4962 | unsigned long rebroadcast_time, warning_time; | 5135 | unsigned long rebroadcast_time, warning_time; |
4963 | 5136 | ||
5137 | linkwatch_forget_dev(dev); | ||
5138 | |||
4964 | rebroadcast_time = warning_time = jiffies; | 5139 | rebroadcast_time = warning_time = jiffies; |
4965 | while (atomic_read(&dev->refcnt) != 0) { | 5140 | while (atomic_read(&dev->refcnt) != 0) { |
4966 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 5141 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
@@ -4968,6 +5143,8 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
4968 | 5143 | ||
4969 | /* Rebroadcast unregister notification */ | 5144 | /* Rebroadcast unregister notification */ |
4970 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5145 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5146 | /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users | ||
5147 | * should have already handle it the first time */ | ||
4971 | 5148 | ||
4972 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, | 5149 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, |
4973 | &dev->state)) { | 5150 | &dev->state)) { |
@@ -5063,6 +5240,32 @@ void netdev_run_todo(void) | |||
5063 | } | 5240 | } |
5064 | 5241 | ||
5065 | /** | 5242 | /** |
5243 | * dev_txq_stats_fold - fold tx_queues stats | ||
5244 | * @dev: device to get statistics from | ||
5245 | * @stats: struct net_device_stats to hold results | ||
5246 | */ | ||
5247 | void dev_txq_stats_fold(const struct net_device *dev, | ||
5248 | struct net_device_stats *stats) | ||
5249 | { | ||
5250 | unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | ||
5251 | unsigned int i; | ||
5252 | struct netdev_queue *txq; | ||
5253 | |||
5254 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
5255 | txq = netdev_get_tx_queue(dev, i); | ||
5256 | tx_bytes += txq->tx_bytes; | ||
5257 | tx_packets += txq->tx_packets; | ||
5258 | tx_dropped += txq->tx_dropped; | ||
5259 | } | ||
5260 | if (tx_bytes || tx_packets || tx_dropped) { | ||
5261 | stats->tx_bytes = tx_bytes; | ||
5262 | stats->tx_packets = tx_packets; | ||
5263 | stats->tx_dropped = tx_dropped; | ||
5264 | } | ||
5265 | } | ||
5266 | EXPORT_SYMBOL(dev_txq_stats_fold); | ||
5267 | |||
5268 | /** | ||
5066 | * dev_get_stats - get network device statistics | 5269 | * dev_get_stats - get network device statistics |
5067 | * @dev: device to get statistics from | 5270 | * @dev: device to get statistics from |
5068 | * | 5271 | * |
@@ -5076,25 +5279,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev) | |||
5076 | 5279 | ||
5077 | if (ops->ndo_get_stats) | 5280 | if (ops->ndo_get_stats) |
5078 | return ops->ndo_get_stats(dev); | 5281 | return ops->ndo_get_stats(dev); |
5079 | else { | 5282 | |
5080 | unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | 5283 | dev_txq_stats_fold(dev, &dev->stats); |
5081 | struct net_device_stats *stats = &dev->stats; | 5284 | return &dev->stats; |
5082 | unsigned int i; | ||
5083 | struct netdev_queue *txq; | ||
5084 | |||
5085 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
5086 | txq = netdev_get_tx_queue(dev, i); | ||
5087 | tx_bytes += txq->tx_bytes; | ||
5088 | tx_packets += txq->tx_packets; | ||
5089 | tx_dropped += txq->tx_dropped; | ||
5090 | } | ||
5091 | if (tx_bytes || tx_packets || tx_dropped) { | ||
5092 | stats->tx_bytes = tx_bytes; | ||
5093 | stats->tx_packets = tx_packets; | ||
5094 | stats->tx_dropped = tx_dropped; | ||
5095 | } | ||
5096 | return stats; | ||
5097 | } | ||
5098 | } | 5285 | } |
5099 | EXPORT_SYMBOL(dev_get_stats); | 5286 | EXPORT_SYMBOL(dev_get_stats); |
5100 | 5287 | ||
@@ -5174,6 +5361,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5174 | netdev_init_queues(dev); | 5361 | netdev_init_queues(dev); |
5175 | 5362 | ||
5176 | INIT_LIST_HEAD(&dev->napi_list); | 5363 | INIT_LIST_HEAD(&dev->napi_list); |
5364 | INIT_LIST_HEAD(&dev->unreg_list); | ||
5365 | INIT_LIST_HEAD(&dev->link_watch_list); | ||
5177 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 5366 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5178 | setup(dev); | 5367 | setup(dev); |
5179 | strcpy(dev->name, name); | 5368 | strcpy(dev->name, name); |
@@ -5238,25 +5427,47 @@ void synchronize_net(void) | |||
5238 | EXPORT_SYMBOL(synchronize_net); | 5427 | EXPORT_SYMBOL(synchronize_net); |
5239 | 5428 | ||
5240 | /** | 5429 | /** |
5241 | * unregister_netdevice - remove device from the kernel | 5430 | * unregister_netdevice_queue - remove device from the kernel |
5242 | * @dev: device | 5431 | * @dev: device |
5432 | * @head: list | ||
5243 | * | 5433 | * |
5244 | * This function shuts down a device interface and removes it | 5434 | * This function shuts down a device interface and removes it |
5245 | * from the kernel tables. | 5435 | * from the kernel tables. |
5436 | * If head not NULL, device is queued to be unregistered later. | ||
5246 | * | 5437 | * |
5247 | * Callers must hold the rtnl semaphore. You may want | 5438 | * Callers must hold the rtnl semaphore. You may want |
5248 | * unregister_netdev() instead of this. | 5439 | * unregister_netdev() instead of this. |
5249 | */ | 5440 | */ |
5250 | 5441 | ||
5251 | void unregister_netdevice(struct net_device *dev) | 5442 | void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) |
5252 | { | 5443 | { |
5253 | ASSERT_RTNL(); | 5444 | ASSERT_RTNL(); |
5254 | 5445 | ||
5255 | rollback_registered(dev); | 5446 | if (head) { |
5256 | /* Finish processing unregister after unlock */ | 5447 | list_move_tail(&dev->unreg_list, head); |
5257 | net_set_todo(dev); | 5448 | } else { |
5449 | rollback_registered(dev); | ||
5450 | /* Finish processing unregister after unlock */ | ||
5451 | net_set_todo(dev); | ||
5452 | } | ||
5258 | } | 5453 | } |
5259 | EXPORT_SYMBOL(unregister_netdevice); | 5454 | EXPORT_SYMBOL(unregister_netdevice_queue); |
5455 | |||
5456 | /** | ||
5457 | * unregister_netdevice_many - unregister many devices | ||
5458 | * @head: list of devices | ||
5459 | */ | ||
5460 | void unregister_netdevice_many(struct list_head *head) | ||
5461 | { | ||
5462 | struct net_device *dev; | ||
5463 | |||
5464 | if (!list_empty(head)) { | ||
5465 | rollback_registered_many(head); | ||
5466 | list_for_each_entry(dev, head, unreg_list) | ||
5467 | net_set_todo(dev); | ||
5468 | } | ||
5469 | } | ||
5470 | EXPORT_SYMBOL(unregister_netdevice_many); | ||
5260 | 5471 | ||
5261 | /** | 5472 | /** |
5262 | * unregister_netdev - remove device from the kernel | 5473 | * unregister_netdev - remove device from the kernel |
@@ -5293,8 +5504,6 @@ EXPORT_SYMBOL(unregister_netdev); | |||
5293 | 5504 | ||
5294 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) | 5505 | int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) |
5295 | { | 5506 | { |
5296 | char buf[IFNAMSIZ]; | ||
5297 | const char *destname; | ||
5298 | int err; | 5507 | int err; |
5299 | 5508 | ||
5300 | ASSERT_RTNL(); | 5509 | ASSERT_RTNL(); |
@@ -5327,20 +5536,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5327 | * we can use it in the destination network namespace. | 5536 | * we can use it in the destination network namespace. |
5328 | */ | 5537 | */ |
5329 | err = -EEXIST; | 5538 | err = -EEXIST; |
5330 | destname = dev->name; | 5539 | if (__dev_get_by_name(net, dev->name)) { |
5331 | if (__dev_get_by_name(net, destname)) { | ||
5332 | /* We get here if we can't use the current device name */ | 5540 | /* We get here if we can't use the current device name */ |
5333 | if (!pat) | 5541 | if (!pat) |
5334 | goto out; | 5542 | goto out; |
5335 | if (!dev_valid_name(pat)) | 5543 | if (dev_get_valid_name(net, pat, dev->name, 1)) |
5336 | goto out; | ||
5337 | if (strchr(pat, '%')) { | ||
5338 | if (__dev_alloc_name(net, pat, buf) < 0) | ||
5339 | goto out; | ||
5340 | destname = buf; | ||
5341 | } else | ||
5342 | destname = pat; | ||
5343 | if (__dev_get_by_name(net, destname)) | ||
5344 | goto out; | 5544 | goto out; |
5345 | } | 5545 | } |
5346 | 5546 | ||
@@ -5364,6 +5564,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5364 | this device. They should clean all the things. | 5564 | this device. They should clean all the things. |
5365 | */ | 5565 | */ |
5366 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5566 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5567 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | ||
5367 | 5568 | ||
5368 | /* | 5569 | /* |
5369 | * Flush the unicast and multicast chains | 5570 | * Flush the unicast and multicast chains |
@@ -5376,10 +5577,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5376 | /* Actually switch the network namespace */ | 5577 | /* Actually switch the network namespace */ |
5377 | dev_net_set(dev, net); | 5578 | dev_net_set(dev, net); |
5378 | 5579 | ||
5379 | /* Assign the new device name */ | ||
5380 | if (destname != dev->name) | ||
5381 | strcpy(dev->name, destname); | ||
5382 | |||
5383 | /* If there is an ifindex conflict assign a new one */ | 5580 | /* If there is an ifindex conflict assign a new one */ |
5384 | if (__dev_get_by_index(net, dev->ifindex)) { | 5581 | if (__dev_get_by_index(net, dev->ifindex)) { |
5385 | int iflink = (dev->iflink == dev->ifindex); | 5582 | int iflink = (dev->iflink == dev->ifindex); |
@@ -5484,7 +5681,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, | |||
5484 | one |= NETIF_F_ALL_CSUM; | 5681 | one |= NETIF_F_ALL_CSUM; |
5485 | 5682 | ||
5486 | one |= all & NETIF_F_ONE_FOR_ALL; | 5683 | one |= all & NETIF_F_ONE_FOR_ALL; |
5487 | all &= one | NETIF_F_LLTX | NETIF_F_GSO; | 5684 | all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; |
5488 | all |= one & mask & NETIF_F_ONE_FOR_ALL; | 5685 | all |= one & mask & NETIF_F_ONE_FOR_ALL; |
5489 | 5686 | ||
5490 | return all; | 5687 | return all; |
@@ -5566,14 +5763,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = { | |||
5566 | 5763 | ||
5567 | static void __net_exit default_device_exit(struct net *net) | 5764 | static void __net_exit default_device_exit(struct net *net) |
5568 | { | 5765 | { |
5569 | struct net_device *dev; | 5766 | struct net_device *dev, *aux; |
5570 | /* | 5767 | /* |
5571 | * Push all migratable of the network devices back to the | 5768 | * Push all migratable network devices back to the |
5572 | * initial network namespace | 5769 | * initial network namespace |
5573 | */ | 5770 | */ |
5574 | rtnl_lock(); | 5771 | rtnl_lock(); |
5575 | restart: | 5772 | for_each_netdev_safe(net, dev, aux) { |
5576 | for_each_netdev(net, dev) { | ||
5577 | int err; | 5773 | int err; |
5578 | char fb_name[IFNAMSIZ]; | 5774 | char fb_name[IFNAMSIZ]; |
5579 | 5775 | ||
@@ -5581,11 +5777,9 @@ restart: | |||
5581 | if (dev->features & NETIF_F_NETNS_LOCAL) | 5777 | if (dev->features & NETIF_F_NETNS_LOCAL) |
5582 | continue; | 5778 | continue; |
5583 | 5779 | ||
5584 | /* Delete virtual devices */ | 5780 | /* Leave virtual devices for the generic cleanup */ |
5585 | if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { | 5781 | if (dev->rtnl_link_ops) |
5586 | dev->rtnl_link_ops->dellink(dev); | 5782 | continue; |
5587 | goto restart; | ||
5588 | } | ||
5589 | 5783 | ||
5590 | /* Push remaing network devices to init_net */ | 5784 | /* Push remaing network devices to init_net */ |
5591 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); | 5785 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
@@ -5595,13 +5789,37 @@ restart: | |||
5595 | __func__, dev->name, err); | 5789 | __func__, dev->name, err); |
5596 | BUG(); | 5790 | BUG(); |
5597 | } | 5791 | } |
5598 | goto restart; | ||
5599 | } | 5792 | } |
5600 | rtnl_unlock(); | 5793 | rtnl_unlock(); |
5601 | } | 5794 | } |
5602 | 5795 | ||
5796 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | ||
5797 | { | ||
5798 | /* At exit all network devices most be removed from a network | ||
5799 | * namespace. Do this in the reverse order of registeration. | ||
5800 | * Do this across as many network namespaces as possible to | ||
5801 | * improve batching efficiency. | ||
5802 | */ | ||
5803 | struct net_device *dev; | ||
5804 | struct net *net; | ||
5805 | LIST_HEAD(dev_kill_list); | ||
5806 | |||
5807 | rtnl_lock(); | ||
5808 | list_for_each_entry(net, net_list, exit_list) { | ||
5809 | for_each_netdev_reverse(net, dev) { | ||
5810 | if (dev->rtnl_link_ops) | ||
5811 | dev->rtnl_link_ops->dellink(dev, &dev_kill_list); | ||
5812 | else | ||
5813 | unregister_netdevice_queue(dev, &dev_kill_list); | ||
5814 | } | ||
5815 | } | ||
5816 | unregister_netdevice_many(&dev_kill_list); | ||
5817 | rtnl_unlock(); | ||
5818 | } | ||
5819 | |||
5603 | static struct pernet_operations __net_initdata default_device_ops = { | 5820 | static struct pernet_operations __net_initdata default_device_ops = { |
5604 | .exit = default_device_exit, | 5821 | .exit = default_device_exit, |
5822 | .exit_batch = default_device_exit_batch, | ||
5605 | }; | 5823 | }; |
5606 | 5824 | ||
5607 | /* | 5825 | /* |