diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 1756 |
1 files changed, 585 insertions, 1171 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index d0cbc93fcf32..a06a7a58dd11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -97,8 +97,6 @@ | |||
97 | #include <net/net_namespace.h> | 97 | #include <net/net_namespace.h> |
98 | #include <net/sock.h> | 98 | #include <net/sock.h> |
99 | #include <linux/rtnetlink.h> | 99 | #include <linux/rtnetlink.h> |
100 | #include <linux/proc_fs.h> | ||
101 | #include <linux/seq_file.h> | ||
102 | #include <linux/stat.h> | 100 | #include <linux/stat.h> |
103 | #include <net/dst.h> | 101 | #include <net/dst.h> |
104 | #include <net/pkt_sched.h> | 102 | #include <net/pkt_sched.h> |
@@ -106,12 +104,10 @@ | |||
106 | #include <net/xfrm.h> | 104 | #include <net/xfrm.h> |
107 | #include <linux/highmem.h> | 105 | #include <linux/highmem.h> |
108 | #include <linux/init.h> | 106 | #include <linux/init.h> |
109 | #include <linux/kmod.h> | ||
110 | #include <linux/module.h> | 107 | #include <linux/module.h> |
111 | #include <linux/netpoll.h> | 108 | #include <linux/netpoll.h> |
112 | #include <linux/rcupdate.h> | 109 | #include <linux/rcupdate.h> |
113 | #include <linux/delay.h> | 110 | #include <linux/delay.h> |
114 | #include <net/wext.h> | ||
115 | #include <net/iw_handler.h> | 111 | #include <net/iw_handler.h> |
116 | #include <asm/current.h> | 112 | #include <asm/current.h> |
117 | #include <linux/audit.h> | 113 | #include <linux/audit.h> |
@@ -132,9 +128,7 @@ | |||
132 | #include <linux/pci.h> | 128 | #include <linux/pci.h> |
133 | #include <linux/inetdevice.h> | 129 | #include <linux/inetdevice.h> |
134 | #include <linux/cpu_rmap.h> | 130 | #include <linux/cpu_rmap.h> |
135 | #include <linux/net_tstamp.h> | ||
136 | #include <linux/static_key.h> | 131 | #include <linux/static_key.h> |
137 | #include <net/flow_keys.h> | ||
138 | 132 | ||
139 | #include "net-sysfs.h" | 133 | #include "net-sysfs.h" |
140 | 134 | ||
@@ -144,41 +138,10 @@ | |||
144 | /* This should be increased if a protocol with a bigger head is added. */ | 138 | /* This should be increased if a protocol with a bigger head is added. */ |
145 | #define GRO_MAX_HEAD (MAX_HEADER + 128) | 139 | #define GRO_MAX_HEAD (MAX_HEADER + 128) |
146 | 140 | ||
147 | /* | ||
148 | * The list of packet types we will receive (as opposed to discard) | ||
149 | * and the routines to invoke. | ||
150 | * | ||
151 | * Why 16. Because with 16 the only overlap we get on a hash of the | ||
152 | * low nibble of the protocol value is RARP/SNAP/X.25. | ||
153 | * | ||
154 | * NOTE: That is no longer true with the addition of VLAN tags. Not | ||
155 | * sure which should go first, but I bet it won't make much | ||
156 | * difference if we are running VLANs. The good news is that | ||
157 | * this protocol won't be in the list unless compiled in, so | ||
158 | * the average user (w/out VLANs) will not be adversely affected. | ||
159 | * --BLG | ||
160 | * | ||
161 | * 0800 IP | ||
162 | * 8100 802.1Q VLAN | ||
163 | * 0001 802.3 | ||
164 | * 0002 AX.25 | ||
165 | * 0004 802.2 | ||
166 | * 8035 RARP | ||
167 | * 0005 SNAP | ||
168 | * 0805 X.25 | ||
169 | * 0806 ARP | ||
170 | * 8137 IPX | ||
171 | * 0009 Localtalk | ||
172 | * 86DD IPv6 | ||
173 | */ | ||
174 | |||
175 | #define PTYPE_HASH_SIZE (16) | ||
176 | #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) | ||
177 | |||
178 | static DEFINE_SPINLOCK(ptype_lock); | 141 | static DEFINE_SPINLOCK(ptype_lock); |
179 | static DEFINE_SPINLOCK(offload_lock); | 142 | static DEFINE_SPINLOCK(offload_lock); |
180 | static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; | 143 | struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; |
181 | static struct list_head ptype_all __read_mostly; /* Taps */ | 144 | struct list_head ptype_all __read_mostly; /* Taps */ |
182 | static struct list_head offload_base __read_mostly; | 145 | static struct list_head offload_base __read_mostly; |
183 | 146 | ||
184 | /* | 147 | /* |
@@ -203,7 +166,7 @@ static struct list_head offload_base __read_mostly; | |||
203 | DEFINE_RWLOCK(dev_base_lock); | 166 | DEFINE_RWLOCK(dev_base_lock); |
204 | EXPORT_SYMBOL(dev_base_lock); | 167 | EXPORT_SYMBOL(dev_base_lock); |
205 | 168 | ||
206 | DEFINE_SEQLOCK(devnet_rename_seq); | 169 | seqcount_t devnet_rename_seq; |
207 | 170 | ||
208 | static inline void dev_base_seq_inc(struct net *net) | 171 | static inline void dev_base_seq_inc(struct net *net) |
209 | { | 172 | { |
@@ -695,11 +658,10 @@ __setup("netdev=", netdev_boot_setup); | |||
695 | 658 | ||
696 | struct net_device *__dev_get_by_name(struct net *net, const char *name) | 659 | struct net_device *__dev_get_by_name(struct net *net, const char *name) |
697 | { | 660 | { |
698 | struct hlist_node *p; | ||
699 | struct net_device *dev; | 661 | struct net_device *dev; |
700 | struct hlist_head *head = dev_name_hash(net, name); | 662 | struct hlist_head *head = dev_name_hash(net, name); |
701 | 663 | ||
702 | hlist_for_each_entry(dev, p, head, name_hlist) | 664 | hlist_for_each_entry(dev, head, name_hlist) |
703 | if (!strncmp(dev->name, name, IFNAMSIZ)) | 665 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
704 | return dev; | 666 | return dev; |
705 | 667 | ||
@@ -721,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name); | |||
721 | 683 | ||
722 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) | 684 | struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) |
723 | { | 685 | { |
724 | struct hlist_node *p; | ||
725 | struct net_device *dev; | 686 | struct net_device *dev; |
726 | struct hlist_head *head = dev_name_hash(net, name); | 687 | struct hlist_head *head = dev_name_hash(net, name); |
727 | 688 | ||
728 | hlist_for_each_entry_rcu(dev, p, head, name_hlist) | 689 | hlist_for_each_entry_rcu(dev, head, name_hlist) |
729 | if (!strncmp(dev->name, name, IFNAMSIZ)) | 690 | if (!strncmp(dev->name, name, IFNAMSIZ)) |
730 | return dev; | 691 | return dev; |
731 | 692 | ||
@@ -772,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name); | |||
772 | 733 | ||
773 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) | 734 | struct net_device *__dev_get_by_index(struct net *net, int ifindex) |
774 | { | 735 | { |
775 | struct hlist_node *p; | ||
776 | struct net_device *dev; | 736 | struct net_device *dev; |
777 | struct hlist_head *head = dev_index_hash(net, ifindex); | 737 | struct hlist_head *head = dev_index_hash(net, ifindex); |
778 | 738 | ||
779 | hlist_for_each_entry(dev, p, head, index_hlist) | 739 | hlist_for_each_entry(dev, head, index_hlist) |
780 | if (dev->ifindex == ifindex) | 740 | if (dev->ifindex == ifindex) |
781 | return dev; | 741 | return dev; |
782 | 742 | ||
@@ -797,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index); | |||
797 | 757 | ||
798 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) | 758 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) |
799 | { | 759 | { |
800 | struct hlist_node *p; | ||
801 | struct net_device *dev; | 760 | struct net_device *dev; |
802 | struct hlist_head *head = dev_index_hash(net, ifindex); | 761 | struct hlist_head *head = dev_index_hash(net, ifindex); |
803 | 762 | ||
804 | hlist_for_each_entry_rcu(dev, p, head, index_hlist) | 763 | hlist_for_each_entry_rcu(dev, head, index_hlist) |
805 | if (dev->ifindex == ifindex) | 764 | if (dev->ifindex == ifindex) |
806 | return dev; | 765 | return dev; |
807 | 766 | ||
@@ -1093,10 +1052,10 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
1093 | if (dev->flags & IFF_UP) | 1052 | if (dev->flags & IFF_UP) |
1094 | return -EBUSY; | 1053 | return -EBUSY; |
1095 | 1054 | ||
1096 | write_seqlock(&devnet_rename_seq); | 1055 | write_seqcount_begin(&devnet_rename_seq); |
1097 | 1056 | ||
1098 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | 1057 | if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { |
1099 | write_sequnlock(&devnet_rename_seq); | 1058 | write_seqcount_end(&devnet_rename_seq); |
1100 | return 0; | 1059 | return 0; |
1101 | } | 1060 | } |
1102 | 1061 | ||
@@ -1104,7 +1063,7 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
1104 | 1063 | ||
1105 | err = dev_get_valid_name(net, dev, newname); | 1064 | err = dev_get_valid_name(net, dev, newname); |
1106 | if (err < 0) { | 1065 | if (err < 0) { |
1107 | write_sequnlock(&devnet_rename_seq); | 1066 | write_seqcount_end(&devnet_rename_seq); |
1108 | return err; | 1067 | return err; |
1109 | } | 1068 | } |
1110 | 1069 | ||
@@ -1112,11 +1071,11 @@ rollback: | |||
1112 | ret = device_rename(&dev->dev, dev->name); | 1071 | ret = device_rename(&dev->dev, dev->name); |
1113 | if (ret) { | 1072 | if (ret) { |
1114 | memcpy(dev->name, oldname, IFNAMSIZ); | 1073 | memcpy(dev->name, oldname, IFNAMSIZ); |
1115 | write_sequnlock(&devnet_rename_seq); | 1074 | write_seqcount_end(&devnet_rename_seq); |
1116 | return ret; | 1075 | return ret; |
1117 | } | 1076 | } |
1118 | 1077 | ||
1119 | write_sequnlock(&devnet_rename_seq); | 1078 | write_seqcount_end(&devnet_rename_seq); |
1120 | 1079 | ||
1121 | write_lock_bh(&dev_base_lock); | 1080 | write_lock_bh(&dev_base_lock); |
1122 | hlist_del_rcu(&dev->name_hlist); | 1081 | hlist_del_rcu(&dev->name_hlist); |
@@ -1135,7 +1094,7 @@ rollback: | |||
1135 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | 1094 | /* err >= 0 after dev_alloc_name() or stores the first errno */ |
1136 | if (err >= 0) { | 1095 | if (err >= 0) { |
1137 | err = ret; | 1096 | err = ret; |
1138 | write_seqlock(&devnet_rename_seq); | 1097 | write_seqcount_begin(&devnet_rename_seq); |
1139 | memcpy(dev->name, oldname, IFNAMSIZ); | 1098 | memcpy(dev->name, oldname, IFNAMSIZ); |
1140 | goto rollback; | 1099 | goto rollback; |
1141 | } else { | 1100 | } else { |
@@ -1227,36 +1186,6 @@ void netdev_notify_peers(struct net_device *dev) | |||
1227 | } | 1186 | } |
1228 | EXPORT_SYMBOL(netdev_notify_peers); | 1187 | EXPORT_SYMBOL(netdev_notify_peers); |
1229 | 1188 | ||
1230 | /** | ||
1231 | * dev_load - load a network module | ||
1232 | * @net: the applicable net namespace | ||
1233 | * @name: name of interface | ||
1234 | * | ||
1235 | * If a network interface is not present and the process has suitable | ||
1236 | * privileges this function loads the module. If module loading is not | ||
1237 | * available in this kernel then it becomes a nop. | ||
1238 | */ | ||
1239 | |||
1240 | void dev_load(struct net *net, const char *name) | ||
1241 | { | ||
1242 | struct net_device *dev; | ||
1243 | int no_module; | ||
1244 | |||
1245 | rcu_read_lock(); | ||
1246 | dev = dev_get_by_name_rcu(net, name); | ||
1247 | rcu_read_unlock(); | ||
1248 | |||
1249 | no_module = !dev; | ||
1250 | if (no_module && capable(CAP_NET_ADMIN)) | ||
1251 | no_module = request_module("netdev-%s", name); | ||
1252 | if (no_module && capable(CAP_SYS_MODULE)) { | ||
1253 | if (!request_module("%s", name)) | ||
1254 | pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", | ||
1255 | name); | ||
1256 | } | ||
1257 | } | ||
1258 | EXPORT_SYMBOL(dev_load); | ||
1259 | |||
1260 | static int __dev_open(struct net_device *dev) | 1189 | static int __dev_open(struct net_device *dev) |
1261 | { | 1190 | { |
1262 | const struct net_device_ops *ops = dev->netdev_ops; | 1191 | const struct net_device_ops *ops = dev->netdev_ops; |
@@ -1267,6 +1196,14 @@ static int __dev_open(struct net_device *dev) | |||
1267 | if (!netif_device_present(dev)) | 1196 | if (!netif_device_present(dev)) |
1268 | return -ENODEV; | 1197 | return -ENODEV; |
1269 | 1198 | ||
1199 | /* Block netpoll from trying to do any rx path servicing. | ||
1200 | * If we don't do this there is a chance ndo_poll_controller | ||
1201 | * or ndo_poll may be running while we open the device | ||
1202 | */ | ||
1203 | ret = netpoll_rx_disable(dev); | ||
1204 | if (ret) | ||
1205 | return ret; | ||
1206 | |||
1270 | ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); | 1207 | ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); |
1271 | ret = notifier_to_errno(ret); | 1208 | ret = notifier_to_errno(ret); |
1272 | if (ret) | 1209 | if (ret) |
@@ -1280,6 +1217,8 @@ static int __dev_open(struct net_device *dev) | |||
1280 | if (!ret && ops->ndo_open) | 1217 | if (!ret && ops->ndo_open) |
1281 | ret = ops->ndo_open(dev); | 1218 | ret = ops->ndo_open(dev); |
1282 | 1219 | ||
1220 | netpoll_rx_enable(dev); | ||
1221 | |||
1283 | if (ret) | 1222 | if (ret) |
1284 | clear_bit(__LINK_STATE_START, &dev->state); | 1223 | clear_bit(__LINK_STATE_START, &dev->state); |
1285 | else { | 1224 | else { |
@@ -1371,9 +1310,16 @@ static int __dev_close(struct net_device *dev) | |||
1371 | int retval; | 1310 | int retval; |
1372 | LIST_HEAD(single); | 1311 | LIST_HEAD(single); |
1373 | 1312 | ||
1313 | /* Temporarily disable netpoll until the interface is down */ | ||
1314 | retval = netpoll_rx_disable(dev); | ||
1315 | if (retval) | ||
1316 | return retval; | ||
1317 | |||
1374 | list_add(&dev->unreg_list, &single); | 1318 | list_add(&dev->unreg_list, &single); |
1375 | retval = __dev_close_many(&single); | 1319 | retval = __dev_close_many(&single); |
1376 | list_del(&single); | 1320 | list_del(&single); |
1321 | |||
1322 | netpoll_rx_enable(dev); | ||
1377 | return retval; | 1323 | return retval; |
1378 | } | 1324 | } |
1379 | 1325 | ||
@@ -1409,14 +1355,22 @@ static int dev_close_many(struct list_head *head) | |||
1409 | */ | 1355 | */ |
1410 | int dev_close(struct net_device *dev) | 1356 | int dev_close(struct net_device *dev) |
1411 | { | 1357 | { |
1358 | int ret = 0; | ||
1412 | if (dev->flags & IFF_UP) { | 1359 | if (dev->flags & IFF_UP) { |
1413 | LIST_HEAD(single); | 1360 | LIST_HEAD(single); |
1414 | 1361 | ||
1362 | /* Block netpoll rx while the interface is going down */ | ||
1363 | ret = netpoll_rx_disable(dev); | ||
1364 | if (ret) | ||
1365 | return ret; | ||
1366 | |||
1415 | list_add(&dev->unreg_list, &single); | 1367 | list_add(&dev->unreg_list, &single); |
1416 | dev_close_many(&single); | 1368 | dev_close_many(&single); |
1417 | list_del(&single); | 1369 | list_del(&single); |
1370 | |||
1371 | netpoll_rx_enable(dev); | ||
1418 | } | 1372 | } |
1419 | return 0; | 1373 | return ret; |
1420 | } | 1374 | } |
1421 | EXPORT_SYMBOL(dev_close); | 1375 | EXPORT_SYMBOL(dev_close); |
1422 | 1376 | ||
@@ -1621,57 +1575,6 @@ static inline void net_timestamp_set(struct sk_buff *skb) | |||
1621 | __net_timestamp(SKB); \ | 1575 | __net_timestamp(SKB); \ |
1622 | } \ | 1576 | } \ |
1623 | 1577 | ||
1624 | static int net_hwtstamp_validate(struct ifreq *ifr) | ||
1625 | { | ||
1626 | struct hwtstamp_config cfg; | ||
1627 | enum hwtstamp_tx_types tx_type; | ||
1628 | enum hwtstamp_rx_filters rx_filter; | ||
1629 | int tx_type_valid = 0; | ||
1630 | int rx_filter_valid = 0; | ||
1631 | |||
1632 | if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) | ||
1633 | return -EFAULT; | ||
1634 | |||
1635 | if (cfg.flags) /* reserved for future extensions */ | ||
1636 | return -EINVAL; | ||
1637 | |||
1638 | tx_type = cfg.tx_type; | ||
1639 | rx_filter = cfg.rx_filter; | ||
1640 | |||
1641 | switch (tx_type) { | ||
1642 | case HWTSTAMP_TX_OFF: | ||
1643 | case HWTSTAMP_TX_ON: | ||
1644 | case HWTSTAMP_TX_ONESTEP_SYNC: | ||
1645 | tx_type_valid = 1; | ||
1646 | break; | ||
1647 | } | ||
1648 | |||
1649 | switch (rx_filter) { | ||
1650 | case HWTSTAMP_FILTER_NONE: | ||
1651 | case HWTSTAMP_FILTER_ALL: | ||
1652 | case HWTSTAMP_FILTER_SOME: | ||
1653 | case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: | ||
1654 | case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: | ||
1655 | case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: | ||
1656 | case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: | ||
1657 | case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: | ||
1658 | case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: | ||
1659 | case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: | ||
1660 | case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: | ||
1661 | case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: | ||
1662 | case HWTSTAMP_FILTER_PTP_V2_EVENT: | ||
1663 | case HWTSTAMP_FILTER_PTP_V2_SYNC: | ||
1664 | case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: | ||
1665 | rx_filter_valid = 1; | ||
1666 | break; | ||
1667 | } | ||
1668 | |||
1669 | if (!tx_type_valid || !rx_filter_valid) | ||
1670 | return -ERANGE; | ||
1671 | |||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | static inline bool is_skb_forwardable(struct net_device *dev, | 1578 | static inline bool is_skb_forwardable(struct net_device *dev, |
1676 | struct sk_buff *skb) | 1579 | struct sk_buff *skb) |
1677 | { | 1580 | { |
@@ -1857,6 +1760,230 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) | |||
1857 | } | 1760 | } |
1858 | } | 1761 | } |
1859 | 1762 | ||
1763 | #ifdef CONFIG_XPS | ||
1764 | static DEFINE_MUTEX(xps_map_mutex); | ||
1765 | #define xmap_dereference(P) \ | ||
1766 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) | ||
1767 | |||
1768 | static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, | ||
1769 | int cpu, u16 index) | ||
1770 | { | ||
1771 | struct xps_map *map = NULL; | ||
1772 | int pos; | ||
1773 | |||
1774 | if (dev_maps) | ||
1775 | map = xmap_dereference(dev_maps->cpu_map[cpu]); | ||
1776 | |||
1777 | for (pos = 0; map && pos < map->len; pos++) { | ||
1778 | if (map->queues[pos] == index) { | ||
1779 | if (map->len > 1) { | ||
1780 | map->queues[pos] = map->queues[--map->len]; | ||
1781 | } else { | ||
1782 | RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); | ||
1783 | kfree_rcu(map, rcu); | ||
1784 | map = NULL; | ||
1785 | } | ||
1786 | break; | ||
1787 | } | ||
1788 | } | ||
1789 | |||
1790 | return map; | ||
1791 | } | ||
1792 | |||
1793 | static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) | ||
1794 | { | ||
1795 | struct xps_dev_maps *dev_maps; | ||
1796 | int cpu, i; | ||
1797 | bool active = false; | ||
1798 | |||
1799 | mutex_lock(&xps_map_mutex); | ||
1800 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1801 | |||
1802 | if (!dev_maps) | ||
1803 | goto out_no_maps; | ||
1804 | |||
1805 | for_each_possible_cpu(cpu) { | ||
1806 | for (i = index; i < dev->num_tx_queues; i++) { | ||
1807 | if (!remove_xps_queue(dev_maps, cpu, i)) | ||
1808 | break; | ||
1809 | } | ||
1810 | if (i == dev->num_tx_queues) | ||
1811 | active = true; | ||
1812 | } | ||
1813 | |||
1814 | if (!active) { | ||
1815 | RCU_INIT_POINTER(dev->xps_maps, NULL); | ||
1816 | kfree_rcu(dev_maps, rcu); | ||
1817 | } | ||
1818 | |||
1819 | for (i = index; i < dev->num_tx_queues; i++) | ||
1820 | netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), | ||
1821 | NUMA_NO_NODE); | ||
1822 | |||
1823 | out_no_maps: | ||
1824 | mutex_unlock(&xps_map_mutex); | ||
1825 | } | ||
1826 | |||
1827 | static struct xps_map *expand_xps_map(struct xps_map *map, | ||
1828 | int cpu, u16 index) | ||
1829 | { | ||
1830 | struct xps_map *new_map; | ||
1831 | int alloc_len = XPS_MIN_MAP_ALLOC; | ||
1832 | int i, pos; | ||
1833 | |||
1834 | for (pos = 0; map && pos < map->len; pos++) { | ||
1835 | if (map->queues[pos] != index) | ||
1836 | continue; | ||
1837 | return map; | ||
1838 | } | ||
1839 | |||
1840 | /* Need to add queue to this CPU's existing map */ | ||
1841 | if (map) { | ||
1842 | if (pos < map->alloc_len) | ||
1843 | return map; | ||
1844 | |||
1845 | alloc_len = map->alloc_len * 2; | ||
1846 | } | ||
1847 | |||
1848 | /* Need to allocate new map to store queue on this CPU's map */ | ||
1849 | new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, | ||
1850 | cpu_to_node(cpu)); | ||
1851 | if (!new_map) | ||
1852 | return NULL; | ||
1853 | |||
1854 | for (i = 0; i < pos; i++) | ||
1855 | new_map->queues[i] = map->queues[i]; | ||
1856 | new_map->alloc_len = alloc_len; | ||
1857 | new_map->len = pos; | ||
1858 | |||
1859 | return new_map; | ||
1860 | } | ||
1861 | |||
1862 | int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) | ||
1863 | { | ||
1864 | struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; | ||
1865 | struct xps_map *map, *new_map; | ||
1866 | int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); | ||
1867 | int cpu, numa_node_id = -2; | ||
1868 | bool active = false; | ||
1869 | |||
1870 | mutex_lock(&xps_map_mutex); | ||
1871 | |||
1872 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1873 | |||
1874 | /* allocate memory for queue storage */ | ||
1875 | for_each_online_cpu(cpu) { | ||
1876 | if (!cpumask_test_cpu(cpu, mask)) | ||
1877 | continue; | ||
1878 | |||
1879 | if (!new_dev_maps) | ||
1880 | new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); | ||
1881 | if (!new_dev_maps) { | ||
1882 | mutex_unlock(&xps_map_mutex); | ||
1883 | return -ENOMEM; | ||
1884 | } | ||
1885 | |||
1886 | map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : | ||
1887 | NULL; | ||
1888 | |||
1889 | map = expand_xps_map(map, cpu, index); | ||
1890 | if (!map) | ||
1891 | goto error; | ||
1892 | |||
1893 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); | ||
1894 | } | ||
1895 | |||
1896 | if (!new_dev_maps) | ||
1897 | goto out_no_new_maps; | ||
1898 | |||
1899 | for_each_possible_cpu(cpu) { | ||
1900 | if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { | ||
1901 | /* add queue to CPU maps */ | ||
1902 | int pos = 0; | ||
1903 | |||
1904 | map = xmap_dereference(new_dev_maps->cpu_map[cpu]); | ||
1905 | while ((pos < map->len) && (map->queues[pos] != index)) | ||
1906 | pos++; | ||
1907 | |||
1908 | if (pos == map->len) | ||
1909 | map->queues[map->len++] = index; | ||
1910 | #ifdef CONFIG_NUMA | ||
1911 | if (numa_node_id == -2) | ||
1912 | numa_node_id = cpu_to_node(cpu); | ||
1913 | else if (numa_node_id != cpu_to_node(cpu)) | ||
1914 | numa_node_id = -1; | ||
1915 | #endif | ||
1916 | } else if (dev_maps) { | ||
1917 | /* fill in the new device map from the old device map */ | ||
1918 | map = xmap_dereference(dev_maps->cpu_map[cpu]); | ||
1919 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); | ||
1920 | } | ||
1921 | |||
1922 | } | ||
1923 | |||
1924 | rcu_assign_pointer(dev->xps_maps, new_dev_maps); | ||
1925 | |||
1926 | /* Cleanup old maps */ | ||
1927 | if (dev_maps) { | ||
1928 | for_each_possible_cpu(cpu) { | ||
1929 | new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); | ||
1930 | map = xmap_dereference(dev_maps->cpu_map[cpu]); | ||
1931 | if (map && map != new_map) | ||
1932 | kfree_rcu(map, rcu); | ||
1933 | } | ||
1934 | |||
1935 | kfree_rcu(dev_maps, rcu); | ||
1936 | } | ||
1937 | |||
1938 | dev_maps = new_dev_maps; | ||
1939 | active = true; | ||
1940 | |||
1941 | out_no_new_maps: | ||
1942 | /* update Tx queue numa node */ | ||
1943 | netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), | ||
1944 | (numa_node_id >= 0) ? numa_node_id : | ||
1945 | NUMA_NO_NODE); | ||
1946 | |||
1947 | if (!dev_maps) | ||
1948 | goto out_no_maps; | ||
1949 | |||
1950 | /* removes queue from unused CPUs */ | ||
1951 | for_each_possible_cpu(cpu) { | ||
1952 | if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) | ||
1953 | continue; | ||
1954 | |||
1955 | if (remove_xps_queue(dev_maps, cpu, index)) | ||
1956 | active = true; | ||
1957 | } | ||
1958 | |||
1959 | /* free map if not active */ | ||
1960 | if (!active) { | ||
1961 | RCU_INIT_POINTER(dev->xps_maps, NULL); | ||
1962 | kfree_rcu(dev_maps, rcu); | ||
1963 | } | ||
1964 | |||
1965 | out_no_maps: | ||
1966 | mutex_unlock(&xps_map_mutex); | ||
1967 | |||
1968 | return 0; | ||
1969 | error: | ||
1970 | /* remove any maps that we added */ | ||
1971 | for_each_possible_cpu(cpu) { | ||
1972 | new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); | ||
1973 | map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : | ||
1974 | NULL; | ||
1975 | if (new_map && new_map != map) | ||
1976 | kfree(new_map); | ||
1977 | } | ||
1978 | |||
1979 | mutex_unlock(&xps_map_mutex); | ||
1980 | |||
1981 | kfree(new_dev_maps); | ||
1982 | return -ENOMEM; | ||
1983 | } | ||
1984 | EXPORT_SYMBOL(netif_set_xps_queue); | ||
1985 | |||
1986 | #endif | ||
1860 | /* | 1987 | /* |
1861 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1988 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1862 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1989 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
@@ -1880,8 +2007,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | |||
1880 | if (dev->num_tc) | 2007 | if (dev->num_tc) |
1881 | netif_setup_tc(dev, txq); | 2008 | netif_setup_tc(dev, txq); |
1882 | 2009 | ||
1883 | if (txq < dev->real_num_tx_queues) | 2010 | if (txq < dev->real_num_tx_queues) { |
1884 | qdisc_reset_all_tx_gt(dev, txq); | 2011 | qdisc_reset_all_tx_gt(dev, txq); |
2012 | #ifdef CONFIG_XPS | ||
2013 | netif_reset_xps_queues_gt(dev, txq); | ||
2014 | #endif | ||
2015 | } | ||
1885 | } | 2016 | } |
1886 | 2017 | ||
1887 | dev->real_num_tx_queues = txq; | 2018 | dev->real_num_tx_queues = txq; |
@@ -2046,6 +2177,15 @@ int skb_checksum_help(struct sk_buff *skb) | |||
2046 | return -EINVAL; | 2177 | return -EINVAL; |
2047 | } | 2178 | } |
2048 | 2179 | ||
2180 | /* Before computing a checksum, we should make sure no frag could | ||
2181 | * be modified by an external entity : checksum could be wrong. | ||
2182 | */ | ||
2183 | if (skb_has_shared_frag(skb)) { | ||
2184 | ret = __skb_linearize(skb); | ||
2185 | if (ret) | ||
2186 | goto out; | ||
2187 | } | ||
2188 | |||
2049 | offset = skb_checksum_start_offset(skb); | 2189 | offset = skb_checksum_start_offset(skb); |
2050 | BUG_ON(offset >= skb_headlen(skb)); | 2190 | BUG_ON(offset >= skb_headlen(skb)); |
2051 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | 2191 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
@@ -2069,25 +2209,19 @@ out: | |||
2069 | EXPORT_SYMBOL(skb_checksum_help); | 2209 | EXPORT_SYMBOL(skb_checksum_help); |
2070 | 2210 | ||
2071 | /** | 2211 | /** |
2072 | * skb_gso_segment - Perform segmentation on skb. | 2212 | * skb_mac_gso_segment - mac layer segmentation handler. |
2073 | * @skb: buffer to segment | 2213 | * @skb: buffer to segment |
2074 | * @features: features for the output path (see dev->features) | 2214 | * @features: features for the output path (see dev->features) |
2075 | * | ||
2076 | * This function segments the given skb and returns a list of segments. | ||
2077 | * | ||
2078 | * It may return NULL if the skb requires no segmentation. This is | ||
2079 | * only possible when GSO is used for verifying header integrity. | ||
2080 | */ | 2215 | */ |
2081 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, | 2216 | struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, |
2082 | netdev_features_t features) | 2217 | netdev_features_t features) |
2083 | { | 2218 | { |
2084 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 2219 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
2085 | struct packet_offload *ptype; | 2220 | struct packet_offload *ptype; |
2086 | __be16 type = skb->protocol; | 2221 | __be16 type = skb->protocol; |
2087 | int vlan_depth = ETH_HLEN; | ||
2088 | int err; | ||
2089 | 2222 | ||
2090 | while (type == htons(ETH_P_8021Q)) { | 2223 | while (type == htons(ETH_P_8021Q)) { |
2224 | int vlan_depth = ETH_HLEN; | ||
2091 | struct vlan_hdr *vh; | 2225 | struct vlan_hdr *vh; |
2092 | 2226 | ||
2093 | if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) | 2227 | if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) |
@@ -2098,22 +2232,14 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, | |||
2098 | vlan_depth += VLAN_HLEN; | 2232 | vlan_depth += VLAN_HLEN; |
2099 | } | 2233 | } |
2100 | 2234 | ||
2101 | skb_reset_mac_header(skb); | ||
2102 | skb->mac_len = skb->network_header - skb->mac_header; | ||
2103 | __skb_pull(skb, skb->mac_len); | 2235 | __skb_pull(skb, skb->mac_len); |
2104 | 2236 | ||
2105 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | ||
2106 | skb_warn_bad_offload(skb); | ||
2107 | |||
2108 | if (skb_header_cloned(skb) && | ||
2109 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | ||
2110 | return ERR_PTR(err); | ||
2111 | } | ||
2112 | |||
2113 | rcu_read_lock(); | 2237 | rcu_read_lock(); |
2114 | list_for_each_entry_rcu(ptype, &offload_base, list) { | 2238 | list_for_each_entry_rcu(ptype, &offload_base, list) { |
2115 | if (ptype->type == type && ptype->callbacks.gso_segment) { | 2239 | if (ptype->type == type && ptype->callbacks.gso_segment) { |
2116 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | 2240 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
2241 | int err; | ||
2242 | |||
2117 | err = ptype->callbacks.gso_send_check(skb); | 2243 | err = ptype->callbacks.gso_send_check(skb); |
2118 | segs = ERR_PTR(err); | 2244 | segs = ERR_PTR(err); |
2119 | if (err || skb_gso_ok(skb, features)) | 2245 | if (err || skb_gso_ok(skb, features)) |
@@ -2131,7 +2257,50 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, | |||
2131 | 2257 | ||
2132 | return segs; | 2258 | return segs; |
2133 | } | 2259 | } |
2134 | EXPORT_SYMBOL(skb_gso_segment); | 2260 | EXPORT_SYMBOL(skb_mac_gso_segment); |
2261 | |||
2262 | |||
2263 | /* openvswitch calls this on rx path, so we need a different check. | ||
2264 | */ | ||
2265 | static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) | ||
2266 | { | ||
2267 | if (tx_path) | ||
2268 | return skb->ip_summed != CHECKSUM_PARTIAL; | ||
2269 | else | ||
2270 | return skb->ip_summed == CHECKSUM_NONE; | ||
2271 | } | ||
2272 | |||
2273 | /** | ||
2274 | * __skb_gso_segment - Perform segmentation on skb. | ||
2275 | * @skb: buffer to segment | ||
2276 | * @features: features for the output path (see dev->features) | ||
2277 | * @tx_path: whether it is called in TX path | ||
2278 | * | ||
2279 | * This function segments the given skb and returns a list of segments. | ||
2280 | * | ||
2281 | * It may return NULL if the skb requires no segmentation. This is | ||
2282 | * only possible when GSO is used for verifying header integrity. | ||
2283 | */ | ||
2284 | struct sk_buff *__skb_gso_segment(struct sk_buff *skb, | ||
2285 | netdev_features_t features, bool tx_path) | ||
2286 | { | ||
2287 | if (unlikely(skb_needs_check(skb, tx_path))) { | ||
2288 | int err; | ||
2289 | |||
2290 | skb_warn_bad_offload(skb); | ||
2291 | |||
2292 | if (skb_header_cloned(skb) && | ||
2293 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | ||
2294 | return ERR_PTR(err); | ||
2295 | } | ||
2296 | |||
2297 | SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); | ||
2298 | skb_reset_mac_header(skb); | ||
2299 | skb_reset_mac_len(skb); | ||
2300 | |||
2301 | return skb_mac_gso_segment(skb, features); | ||
2302 | } | ||
2303 | EXPORT_SYMBOL(__skb_gso_segment); | ||
2135 | 2304 | ||
2136 | /* Take action when hardware reception checksum errors are detected. */ | 2305 | /* Take action when hardware reception checksum errors are detected. */ |
2137 | #ifdef CONFIG_BUG | 2306 | #ifdef CONFIG_BUG |
@@ -2410,126 +2579,28 @@ out: | |||
2410 | return rc; | 2579 | return rc; |
2411 | } | 2580 | } |
2412 | 2581 | ||
2413 | static u32 hashrnd __read_mostly; | 2582 | static void qdisc_pkt_len_init(struct sk_buff *skb) |
2414 | |||
2415 | /* | ||
2416 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number | ||
2417 | * to be used as a distribution range. | ||
2418 | */ | ||
2419 | u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | ||
2420 | unsigned int num_tx_queues) | ||
2421 | { | ||
2422 | u32 hash; | ||
2423 | u16 qoffset = 0; | ||
2424 | u16 qcount = num_tx_queues; | ||
2425 | |||
2426 | if (skb_rx_queue_recorded(skb)) { | ||
2427 | hash = skb_get_rx_queue(skb); | ||
2428 | while (unlikely(hash >= num_tx_queues)) | ||
2429 | hash -= num_tx_queues; | ||
2430 | return hash; | ||
2431 | } | ||
2432 | |||
2433 | if (dev->num_tc) { | ||
2434 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
2435 | qoffset = dev->tc_to_txq[tc].offset; | ||
2436 | qcount = dev->tc_to_txq[tc].count; | ||
2437 | } | ||
2438 | |||
2439 | if (skb->sk && skb->sk->sk_hash) | ||
2440 | hash = skb->sk->sk_hash; | ||
2441 | else | ||
2442 | hash = (__force u16) skb->protocol; | ||
2443 | hash = jhash_1word(hash, hashrnd); | ||
2444 | |||
2445 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; | ||
2446 | } | ||
2447 | EXPORT_SYMBOL(__skb_tx_hash); | ||
2448 | |||
2449 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | ||
2450 | { | ||
2451 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | ||
2452 | net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", | ||
2453 | dev->name, queue_index, | ||
2454 | dev->real_num_tx_queues); | ||
2455 | return 0; | ||
2456 | } | ||
2457 | return queue_index; | ||
2458 | } | ||
2459 | |||
2460 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | ||
2461 | { | 2583 | { |
2462 | #ifdef CONFIG_XPS | 2584 | const struct skb_shared_info *shinfo = skb_shinfo(skb); |
2463 | struct xps_dev_maps *dev_maps; | ||
2464 | struct xps_map *map; | ||
2465 | int queue_index = -1; | ||
2466 | 2585 | ||
2467 | rcu_read_lock(); | 2586 | qdisc_skb_cb(skb)->pkt_len = skb->len; |
2468 | dev_maps = rcu_dereference(dev->xps_maps); | ||
2469 | if (dev_maps) { | ||
2470 | map = rcu_dereference( | ||
2471 | dev_maps->cpu_map[raw_smp_processor_id()]); | ||
2472 | if (map) { | ||
2473 | if (map->len == 1) | ||
2474 | queue_index = map->queues[0]; | ||
2475 | else { | ||
2476 | u32 hash; | ||
2477 | if (skb->sk && skb->sk->sk_hash) | ||
2478 | hash = skb->sk->sk_hash; | ||
2479 | else | ||
2480 | hash = (__force u16) skb->protocol ^ | ||
2481 | skb->rxhash; | ||
2482 | hash = jhash_1word(hash, hashrnd); | ||
2483 | queue_index = map->queues[ | ||
2484 | ((u64)hash * map->len) >> 32]; | ||
2485 | } | ||
2486 | if (unlikely(queue_index >= dev->real_num_tx_queues)) | ||
2487 | queue_index = -1; | ||
2488 | } | ||
2489 | } | ||
2490 | rcu_read_unlock(); | ||
2491 | |||
2492 | return queue_index; | ||
2493 | #else | ||
2494 | return -1; | ||
2495 | #endif | ||
2496 | } | ||
2497 | |||
2498 | struct netdev_queue *netdev_pick_tx(struct net_device *dev, | ||
2499 | struct sk_buff *skb) | ||
2500 | { | ||
2501 | int queue_index; | ||
2502 | const struct net_device_ops *ops = dev->netdev_ops; | ||
2503 | |||
2504 | if (dev->real_num_tx_queues == 1) | ||
2505 | queue_index = 0; | ||
2506 | else if (ops->ndo_select_queue) { | ||
2507 | queue_index = ops->ndo_select_queue(dev, skb); | ||
2508 | queue_index = dev_cap_txqueue(dev, queue_index); | ||
2509 | } else { | ||
2510 | struct sock *sk = skb->sk; | ||
2511 | queue_index = sk_tx_queue_get(sk); | ||
2512 | |||
2513 | if (queue_index < 0 || skb->ooo_okay || | ||
2514 | queue_index >= dev->real_num_tx_queues) { | ||
2515 | int old_index = queue_index; | ||
2516 | 2587 | ||
2517 | queue_index = get_xps_queue(dev, skb); | 2588 | /* To get more precise estimation of bytes sent on wire, |
2518 | if (queue_index < 0) | 2589 | * we add to pkt_len the headers size of all segments |
2519 | queue_index = skb_tx_hash(dev, skb); | 2590 | */ |
2591 | if (shinfo->gso_size) { | ||
2592 | unsigned int hdr_len; | ||
2520 | 2593 | ||
2521 | if (queue_index != old_index && sk) { | 2594 | /* mac layer + network layer */ |
2522 | struct dst_entry *dst = | 2595 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); |
2523 | rcu_dereference_check(sk->sk_dst_cache, 1); | ||
2524 | 2596 | ||
2525 | if (dst && skb_dst(skb) == dst) | 2597 | /* + transport layer */ |
2526 | sk_tx_queue_set(sk, queue_index); | 2598 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) |
2527 | } | 2599 | hdr_len += tcp_hdrlen(skb); |
2528 | } | 2600 | else |
2601 | hdr_len += sizeof(struct udphdr); | ||
2602 | qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; | ||
2529 | } | 2603 | } |
2530 | |||
2531 | skb_set_queue_mapping(skb, queue_index); | ||
2532 | return netdev_get_tx_queue(dev, queue_index); | ||
2533 | } | 2604 | } |
2534 | 2605 | ||
2535 | static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | 2606 | static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, |
@@ -2540,7 +2611,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2540 | bool contended; | 2611 | bool contended; |
2541 | int rc; | 2612 | int rc; |
2542 | 2613 | ||
2543 | qdisc_skb_cb(skb)->pkt_len = skb->len; | 2614 | qdisc_pkt_len_init(skb); |
2544 | qdisc_calculate_pkt_len(skb, q); | 2615 | qdisc_calculate_pkt_len(skb, q); |
2545 | /* | 2616 | /* |
2546 | * Heuristic to force contended enqueues to serialize on a | 2617 | * Heuristic to force contended enqueues to serialize on a |
@@ -2663,6 +2734,8 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2663 | struct Qdisc *q; | 2734 | struct Qdisc *q; |
2664 | int rc = -ENOMEM; | 2735 | int rc = -ENOMEM; |
2665 | 2736 | ||
2737 | skb_reset_mac_header(skb); | ||
2738 | |||
2666 | /* Disable soft irqs for various locks below. Also | 2739 | /* Disable soft irqs for various locks below. Also |
2667 | * stops preemption for RCU. | 2740 | * stops preemption for RCU. |
2668 | */ | 2741 | */ |
@@ -2757,41 +2830,6 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2757 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2830 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2758 | } | 2831 | } |
2759 | 2832 | ||
2760 | /* | ||
2761 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses | ||
2762 | * and src/dst port numbers. Sets rxhash in skb to non-zero hash value | ||
2763 | * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb | ||
2764 | * if hash is a canonical 4-tuple hash over transport ports. | ||
2765 | */ | ||
2766 | void __skb_get_rxhash(struct sk_buff *skb) | ||
2767 | { | ||
2768 | struct flow_keys keys; | ||
2769 | u32 hash; | ||
2770 | |||
2771 | if (!skb_flow_dissect(skb, &keys)) | ||
2772 | return; | ||
2773 | |||
2774 | if (keys.ports) | ||
2775 | skb->l4_rxhash = 1; | ||
2776 | |||
2777 | /* get a consistent hash (same value on both flow directions) */ | ||
2778 | if (((__force u32)keys.dst < (__force u32)keys.src) || | ||
2779 | (((__force u32)keys.dst == (__force u32)keys.src) && | ||
2780 | ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { | ||
2781 | swap(keys.dst, keys.src); | ||
2782 | swap(keys.port16[0], keys.port16[1]); | ||
2783 | } | ||
2784 | |||
2785 | hash = jhash_3words((__force u32)keys.dst, | ||
2786 | (__force u32)keys.src, | ||
2787 | (__force u32)keys.ports, hashrnd); | ||
2788 | if (!hash) | ||
2789 | hash = 1; | ||
2790 | |||
2791 | skb->rxhash = hash; | ||
2792 | } | ||
2793 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
2794 | |||
2795 | #ifdef CONFIG_RPS | 2833 | #ifdef CONFIG_RPS |
2796 | 2834 | ||
2797 | /* One global table that all flow-based protocols share. */ | 2835 | /* One global table that all flow-based protocols share. */ |
@@ -3318,7 +3356,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) | |||
3318 | } | 3356 | } |
3319 | } | 3357 | } |
3320 | 3358 | ||
3321 | static int __netif_receive_skb(struct sk_buff *skb) | 3359 | static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) |
3322 | { | 3360 | { |
3323 | struct packet_type *ptype, *pt_prev; | 3361 | struct packet_type *ptype, *pt_prev; |
3324 | rx_handler_func_t *rx_handler; | 3362 | rx_handler_func_t *rx_handler; |
@@ -3327,24 +3365,11 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3327 | bool deliver_exact = false; | 3365 | bool deliver_exact = false; |
3328 | int ret = NET_RX_DROP; | 3366 | int ret = NET_RX_DROP; |
3329 | __be16 type; | 3367 | __be16 type; |
3330 | unsigned long pflags = current->flags; | ||
3331 | 3368 | ||
3332 | net_timestamp_check(!netdev_tstamp_prequeue, skb); | 3369 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
3333 | 3370 | ||
3334 | trace_netif_receive_skb(skb); | 3371 | trace_netif_receive_skb(skb); |
3335 | 3372 | ||
3336 | /* | ||
3337 | * PFMEMALLOC skbs are special, they should | ||
3338 | * - be delivered to SOCK_MEMALLOC sockets only | ||
3339 | * - stay away from userspace | ||
3340 | * - have bounded memory usage | ||
3341 | * | ||
3342 | * Use PF_MEMALLOC as this saves us from propagating the allocation | ||
3343 | * context down to all allocation sites. | ||
3344 | */ | ||
3345 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
3346 | current->flags |= PF_MEMALLOC; | ||
3347 | |||
3348 | /* if we've gotten here through NAPI, check netpoll */ | 3373 | /* if we've gotten here through NAPI, check netpoll */ |
3349 | if (netpoll_receive_skb(skb)) | 3374 | if (netpoll_receive_skb(skb)) |
3350 | goto out; | 3375 | goto out; |
@@ -3352,7 +3377,8 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3352 | orig_dev = skb->dev; | 3377 | orig_dev = skb->dev; |
3353 | 3378 | ||
3354 | skb_reset_network_header(skb); | 3379 | skb_reset_network_header(skb); |
3355 | skb_reset_transport_header(skb); | 3380 | if (!skb_transport_header_was_set(skb)) |
3381 | skb_reset_transport_header(skb); | ||
3356 | skb_reset_mac_len(skb); | 3382 | skb_reset_mac_len(skb); |
3357 | 3383 | ||
3358 | pt_prev = NULL; | 3384 | pt_prev = NULL; |
@@ -3377,7 +3403,7 @@ another_round: | |||
3377 | } | 3403 | } |
3378 | #endif | 3404 | #endif |
3379 | 3405 | ||
3380 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | 3406 | if (pfmemalloc) |
3381 | goto skip_taps; | 3407 | goto skip_taps; |
3382 | 3408 | ||
3383 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3409 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
@@ -3396,8 +3422,7 @@ skip_taps: | |||
3396 | ncls: | 3422 | ncls: |
3397 | #endif | 3423 | #endif |
3398 | 3424 | ||
3399 | if (sk_memalloc_socks() && skb_pfmemalloc(skb) | 3425 | if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) |
3400 | && !skb_pfmemalloc_protocol(skb)) | ||
3401 | goto drop; | 3426 | goto drop; |
3402 | 3427 | ||
3403 | if (vlan_tx_tag_present(skb)) { | 3428 | if (vlan_tx_tag_present(skb)) { |
@@ -3467,7 +3492,31 @@ drop: | |||
3467 | unlock: | 3492 | unlock: |
3468 | rcu_read_unlock(); | 3493 | rcu_read_unlock(); |
3469 | out: | 3494 | out: |
3470 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | 3495 | return ret; |
3496 | } | ||
3497 | |||
3498 | static int __netif_receive_skb(struct sk_buff *skb) | ||
3499 | { | ||
3500 | int ret; | ||
3501 | |||
3502 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) { | ||
3503 | unsigned long pflags = current->flags; | ||
3504 | |||
3505 | /* | ||
3506 | * PFMEMALLOC skbs are special, they should | ||
3507 | * - be delivered to SOCK_MEMALLOC sockets only | ||
3508 | * - stay away from userspace | ||
3509 | * - have bounded memory usage | ||
3510 | * | ||
3511 | * Use PF_MEMALLOC as this saves us from propagating the allocation | ||
3512 | * context down to all allocation sites. | ||
3513 | */ | ||
3514 | current->flags |= PF_MEMALLOC; | ||
3515 | ret = __netif_receive_skb_core(skb, true); | ||
3516 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
3517 | } else | ||
3518 | ret = __netif_receive_skb_core(skb, false); | ||
3519 | |||
3471 | return ret; | 3520 | return ret; |
3472 | } | 3521 | } |
3473 | 3522 | ||
@@ -3634,7 +3683,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
3634 | __be16 type = skb->protocol; | 3683 | __be16 type = skb->protocol; |
3635 | struct list_head *head = &offload_base; | 3684 | struct list_head *head = &offload_base; |
3636 | int same_flow; | 3685 | int same_flow; |
3637 | int mac_len; | ||
3638 | enum gro_result ret; | 3686 | enum gro_result ret; |
3639 | 3687 | ||
3640 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) | 3688 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
@@ -3651,8 +3699,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
3651 | continue; | 3699 | continue; |
3652 | 3700 | ||
3653 | skb_set_network_header(skb, skb_gro_offset(skb)); | 3701 | skb_set_network_header(skb, skb_gro_offset(skb)); |
3654 | mac_len = skb->network_header - skb->mac_header; | 3702 | skb_reset_mac_len(skb); |
3655 | skb->mac_len = mac_len; | ||
3656 | NAPI_GRO_CB(skb)->same_flow = 0; | 3703 | NAPI_GRO_CB(skb)->same_flow = 0; |
3657 | NAPI_GRO_CB(skb)->flush = 0; | 3704 | NAPI_GRO_CB(skb)->flush = 0; |
3658 | NAPI_GRO_CB(skb)->free = 0; | 3705 | NAPI_GRO_CB(skb)->free = 0; |
@@ -4134,530 +4181,231 @@ softnet_break: | |||
4134 | goto out; | 4181 | goto out; |
4135 | } | 4182 | } |
4136 | 4183 | ||
4137 | static gifconf_func_t *gifconf_list[NPROTO]; | 4184 | struct netdev_upper { |
4138 | |||
4139 | /** | ||
4140 | * register_gifconf - register a SIOCGIF handler | ||
4141 | * @family: Address family | ||
4142 | * @gifconf: Function handler | ||
4143 | * | ||
4144 | * Register protocol dependent address dumping routines. The handler | ||
4145 | * that is passed must not be freed or reused until it has been replaced | ||
4146 | * by another handler. | ||
4147 | */ | ||
4148 | int register_gifconf(unsigned int family, gifconf_func_t *gifconf) | ||
4149 | { | ||
4150 | if (family >= NPROTO) | ||
4151 | return -EINVAL; | ||
4152 | gifconf_list[family] = gifconf; | ||
4153 | return 0; | ||
4154 | } | ||
4155 | EXPORT_SYMBOL(register_gifconf); | ||
4156 | |||
4157 | |||
4158 | /* | ||
4159 | * Map an interface index to its name (SIOCGIFNAME) | ||
4160 | */ | ||
4161 | |||
4162 | /* | ||
4163 | * We need this ioctl for efficient implementation of the | ||
4164 | * if_indextoname() function required by the IPv6 API. Without | ||
4165 | * it, we would have to search all the interfaces to find a | ||
4166 | * match. --pb | ||
4167 | */ | ||
4168 | |||
4169 | static int dev_ifname(struct net *net, struct ifreq __user *arg) | ||
4170 | { | ||
4171 | struct net_device *dev; | 4185 | struct net_device *dev; |
4172 | struct ifreq ifr; | 4186 | bool master; |
4173 | unsigned seq; | 4187 | struct list_head list; |
4174 | 4188 | struct rcu_head rcu; | |
4175 | /* | 4189 | struct list_head search_list; |
4176 | * Fetch the caller's info block. | 4190 | }; |
4177 | */ | ||
4178 | 4191 | ||
4179 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | 4192 | static void __append_search_uppers(struct list_head *search_list, |
4180 | return -EFAULT; | 4193 | struct net_device *dev) |
4194 | { | ||
4195 | struct netdev_upper *upper; | ||
4181 | 4196 | ||
4182 | retry: | 4197 | list_for_each_entry(upper, &dev->upper_dev_list, list) { |
4183 | seq = read_seqbegin(&devnet_rename_seq); | 4198 | /* check if this upper is not already in search list */ |
4184 | rcu_read_lock(); | 4199 | if (list_empty(&upper->search_list)) |
4185 | dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); | 4200 | list_add_tail(&upper->search_list, search_list); |
4186 | if (!dev) { | ||
4187 | rcu_read_unlock(); | ||
4188 | return -ENODEV; | ||
4189 | } | 4201 | } |
4190 | |||
4191 | strcpy(ifr.ifr_name, dev->name); | ||
4192 | rcu_read_unlock(); | ||
4193 | if (read_seqretry(&devnet_rename_seq, seq)) | ||
4194 | goto retry; | ||
4195 | |||
4196 | if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | ||
4197 | return -EFAULT; | ||
4198 | return 0; | ||
4199 | } | 4202 | } |
4200 | 4203 | ||
4201 | /* | 4204 | static bool __netdev_search_upper_dev(struct net_device *dev, |
4202 | * Perform a SIOCGIFCONF call. This structure will change | 4205 | struct net_device *upper_dev) |
4203 | * size eventually, and there is nothing I can do about it. | ||
4204 | * Thus we will need a 'compatibility mode'. | ||
4205 | */ | ||
4206 | |||
4207 | static int dev_ifconf(struct net *net, char __user *arg) | ||
4208 | { | 4206 | { |
4209 | struct ifconf ifc; | 4207 | LIST_HEAD(search_list); |
4210 | struct net_device *dev; | 4208 | struct netdev_upper *upper; |
4211 | char __user *pos; | 4209 | struct netdev_upper *tmp; |
4212 | int len; | 4210 | bool ret = false; |
4213 | int total; | ||
4214 | int i; | ||
4215 | 4211 | ||
4216 | /* | 4212 | __append_search_uppers(&search_list, dev); |
4217 | * Fetch the caller's info block. | 4213 | list_for_each_entry(upper, &search_list, search_list) { |
4218 | */ | 4214 | if (upper->dev == upper_dev) { |
4219 | 4215 | ret = true; | |
4220 | if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) | 4216 | break; |
4221 | return -EFAULT; | ||
4222 | |||
4223 | pos = ifc.ifc_buf; | ||
4224 | len = ifc.ifc_len; | ||
4225 | |||
4226 | /* | ||
4227 | * Loop over the interfaces, and write an info block for each. | ||
4228 | */ | ||
4229 | |||
4230 | total = 0; | ||
4231 | for_each_netdev(net, dev) { | ||
4232 | for (i = 0; i < NPROTO; i++) { | ||
4233 | if (gifconf_list[i]) { | ||
4234 | int done; | ||
4235 | if (!pos) | ||
4236 | done = gifconf_list[i](dev, NULL, 0); | ||
4237 | else | ||
4238 | done = gifconf_list[i](dev, pos + total, | ||
4239 | len - total); | ||
4240 | if (done < 0) | ||
4241 | return -EFAULT; | ||
4242 | total += done; | ||
4243 | } | ||
4244 | } | 4217 | } |
4218 | __append_search_uppers(&search_list, upper->dev); | ||
4245 | } | 4219 | } |
4246 | 4220 | list_for_each_entry_safe(upper, tmp, &search_list, search_list) | |
4247 | /* | 4221 | INIT_LIST_HEAD(&upper->search_list); |
4248 | * All done. Write the updated control block back to the caller. | 4222 | return ret; |
4249 | */ | ||
4250 | ifc.ifc_len = total; | ||
4251 | |||
4252 | /* | ||
4253 | * Both BSD and Solaris return 0 here, so we do too. | ||
4254 | */ | ||
4255 | return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; | ||
4256 | } | 4223 | } |
4257 | 4224 | ||
4258 | #ifdef CONFIG_PROC_FS | 4225 | static struct netdev_upper *__netdev_find_upper(struct net_device *dev, |
4259 | 4226 | struct net_device *upper_dev) | |
4260 | #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) | ||
4261 | |||
4262 | #define get_bucket(x) ((x) >> BUCKET_SPACE) | ||
4263 | #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) | ||
4264 | #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) | ||
4265 | |||
4266 | static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) | ||
4267 | { | 4227 | { |
4268 | struct net *net = seq_file_net(seq); | 4228 | struct netdev_upper *upper; |
4269 | struct net_device *dev; | ||
4270 | struct hlist_node *p; | ||
4271 | struct hlist_head *h; | ||
4272 | unsigned int count = 0, offset = get_offset(*pos); | ||
4273 | 4229 | ||
4274 | h = &net->dev_name_head[get_bucket(*pos)]; | 4230 | list_for_each_entry(upper, &dev->upper_dev_list, list) { |
4275 | hlist_for_each_entry_rcu(dev, p, h, name_hlist) { | 4231 | if (upper->dev == upper_dev) |
4276 | if (++count == offset) | 4232 | return upper; |
4277 | return dev; | ||
4278 | } | 4233 | } |
4279 | |||
4280 | return NULL; | 4234 | return NULL; |
4281 | } | 4235 | } |
4282 | 4236 | ||
4283 | static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) | 4237 | /** |
4284 | { | 4238 | * netdev_has_upper_dev - Check if device is linked to an upper device |
4285 | struct net_device *dev; | 4239 | * @dev: device |
4286 | unsigned int bucket; | 4240 | * @upper_dev: upper device to check |
4287 | 4241 | * | |
4288 | do { | 4242 | * Find out if a device is linked to specified upper device and return true |
4289 | dev = dev_from_same_bucket(seq, pos); | 4243 | * in case it is. Note that this checks only immediate upper device, |
4290 | if (dev) | 4244 | * not through a complete stack of devices. The caller must hold the RTNL lock. |
4291 | return dev; | ||
4292 | |||
4293 | bucket = get_bucket(*pos) + 1; | ||
4294 | *pos = set_bucket_offset(bucket, 1); | ||
4295 | } while (bucket < NETDEV_HASHENTRIES); | ||
4296 | |||
4297 | return NULL; | ||
4298 | } | ||
4299 | |||
4300 | /* | ||
4301 | * This is invoked by the /proc filesystem handler to display a device | ||
4302 | * in detail. | ||
4303 | */ | 4245 | */ |
4304 | void *dev_seq_start(struct seq_file *seq, loff_t *pos) | 4246 | bool netdev_has_upper_dev(struct net_device *dev, |
4305 | __acquires(RCU) | 4247 | struct net_device *upper_dev) |
4306 | { | ||
4307 | rcu_read_lock(); | ||
4308 | if (!*pos) | ||
4309 | return SEQ_START_TOKEN; | ||
4310 | |||
4311 | if (get_bucket(*pos) >= NETDEV_HASHENTRIES) | ||
4312 | return NULL; | ||
4313 | |||
4314 | return dev_from_bucket(seq, pos); | ||
4315 | } | ||
4316 | |||
4317 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
4318 | { | ||
4319 | ++*pos; | ||
4320 | return dev_from_bucket(seq, pos); | ||
4321 | } | ||
4322 | |||
4323 | void dev_seq_stop(struct seq_file *seq, void *v) | ||
4324 | __releases(RCU) | ||
4325 | { | ||
4326 | rcu_read_unlock(); | ||
4327 | } | ||
4328 | |||
4329 | static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | ||
4330 | { | 4248 | { |
4331 | struct rtnl_link_stats64 temp; | 4249 | ASSERT_RTNL(); |
4332 | const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); | ||
4333 | 4250 | ||
4334 | seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " | 4251 | return __netdev_find_upper(dev, upper_dev); |
4335 | "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", | ||
4336 | dev->name, stats->rx_bytes, stats->rx_packets, | ||
4337 | stats->rx_errors, | ||
4338 | stats->rx_dropped + stats->rx_missed_errors, | ||
4339 | stats->rx_fifo_errors, | ||
4340 | stats->rx_length_errors + stats->rx_over_errors + | ||
4341 | stats->rx_crc_errors + stats->rx_frame_errors, | ||
4342 | stats->rx_compressed, stats->multicast, | ||
4343 | stats->tx_bytes, stats->tx_packets, | ||
4344 | stats->tx_errors, stats->tx_dropped, | ||
4345 | stats->tx_fifo_errors, stats->collisions, | ||
4346 | stats->tx_carrier_errors + | ||
4347 | stats->tx_aborted_errors + | ||
4348 | stats->tx_window_errors + | ||
4349 | stats->tx_heartbeat_errors, | ||
4350 | stats->tx_compressed); | ||
4351 | } | 4252 | } |
4253 | EXPORT_SYMBOL(netdev_has_upper_dev); | ||
4352 | 4254 | ||
4353 | /* | 4255 | /** |
4354 | * Called from the PROCfs module. This now uses the new arbitrary sized | 4256 | * netdev_has_any_upper_dev - Check if device is linked to some device |
4355 | * /proc/net interface to create /proc/net/dev | 4257 | * @dev: device |
4258 | * | ||
4259 | * Find out if a device is linked to an upper device and return true in case | ||
4260 | * it is. The caller must hold the RTNL lock. | ||
4356 | */ | 4261 | */ |
4357 | static int dev_seq_show(struct seq_file *seq, void *v) | 4262 | bool netdev_has_any_upper_dev(struct net_device *dev) |
4358 | { | ||
4359 | if (v == SEQ_START_TOKEN) | ||
4360 | seq_puts(seq, "Inter-| Receive " | ||
4361 | " | Transmit\n" | ||
4362 | " face |bytes packets errs drop fifo frame " | ||
4363 | "compressed multicast|bytes packets errs " | ||
4364 | "drop fifo colls carrier compressed\n"); | ||
4365 | else | ||
4366 | dev_seq_printf_stats(seq, v); | ||
4367 | return 0; | ||
4368 | } | ||
4369 | |||
4370 | static struct softnet_data *softnet_get_online(loff_t *pos) | ||
4371 | { | ||
4372 | struct softnet_data *sd = NULL; | ||
4373 | |||
4374 | while (*pos < nr_cpu_ids) | ||
4375 | if (cpu_online(*pos)) { | ||
4376 | sd = &per_cpu(softnet_data, *pos); | ||
4377 | break; | ||
4378 | } else | ||
4379 | ++*pos; | ||
4380 | return sd; | ||
4381 | } | ||
4382 | |||
4383 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | ||
4384 | { | 4263 | { |
4385 | return softnet_get_online(pos); | 4264 | ASSERT_RTNL(); |
4386 | } | ||
4387 | |||
4388 | static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
4389 | { | ||
4390 | ++*pos; | ||
4391 | return softnet_get_online(pos); | ||
4392 | } | ||
4393 | 4265 | ||
4394 | static void softnet_seq_stop(struct seq_file *seq, void *v) | 4266 | return !list_empty(&dev->upper_dev_list); |
4395 | { | ||
4396 | } | 4267 | } |
4268 | EXPORT_SYMBOL(netdev_has_any_upper_dev); | ||
4397 | 4269 | ||
4398 | static int softnet_seq_show(struct seq_file *seq, void *v) | 4270 | /** |
4399 | { | 4271 | * netdev_master_upper_dev_get - Get master upper device |
4400 | struct softnet_data *sd = v; | 4272 | * @dev: device |
4401 | 4273 | * | |
4402 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 4274 | * Find a master upper device and return pointer to it or NULL in case |
4403 | sd->processed, sd->dropped, sd->time_squeeze, 0, | 4275 | * it's not there. The caller must hold the RTNL lock. |
4404 | 0, 0, 0, 0, /* was fastroute */ | 4276 | */ |
4405 | sd->cpu_collision, sd->received_rps); | 4277 | struct net_device *netdev_master_upper_dev_get(struct net_device *dev) |
4406 | return 0; | ||
4407 | } | ||
4408 | |||
4409 | static const struct seq_operations dev_seq_ops = { | ||
4410 | .start = dev_seq_start, | ||
4411 | .next = dev_seq_next, | ||
4412 | .stop = dev_seq_stop, | ||
4413 | .show = dev_seq_show, | ||
4414 | }; | ||
4415 | |||
4416 | static int dev_seq_open(struct inode *inode, struct file *file) | ||
4417 | { | 4278 | { |
4418 | return seq_open_net(inode, file, &dev_seq_ops, | 4279 | struct netdev_upper *upper; |
4419 | sizeof(struct seq_net_private)); | ||
4420 | } | ||
4421 | 4280 | ||
4422 | static const struct file_operations dev_seq_fops = { | 4281 | ASSERT_RTNL(); |
4423 | .owner = THIS_MODULE, | ||
4424 | .open = dev_seq_open, | ||
4425 | .read = seq_read, | ||
4426 | .llseek = seq_lseek, | ||
4427 | .release = seq_release_net, | ||
4428 | }; | ||
4429 | 4282 | ||
4430 | static const struct seq_operations softnet_seq_ops = { | 4283 | if (list_empty(&dev->upper_dev_list)) |
4431 | .start = softnet_seq_start, | 4284 | return NULL; |
4432 | .next = softnet_seq_next, | ||
4433 | .stop = softnet_seq_stop, | ||
4434 | .show = softnet_seq_show, | ||
4435 | }; | ||
4436 | 4285 | ||
4437 | static int softnet_seq_open(struct inode *inode, struct file *file) | 4286 | upper = list_first_entry(&dev->upper_dev_list, |
4438 | { | 4287 | struct netdev_upper, list); |
4439 | return seq_open(file, &softnet_seq_ops); | 4288 | if (likely(upper->master)) |
4289 | return upper->dev; | ||
4290 | return NULL; | ||
4440 | } | 4291 | } |
4292 | EXPORT_SYMBOL(netdev_master_upper_dev_get); | ||
4441 | 4293 | ||
4442 | static const struct file_operations softnet_seq_fops = { | 4294 | /** |
4443 | .owner = THIS_MODULE, | 4295 | * netdev_master_upper_dev_get_rcu - Get master upper device |
4444 | .open = softnet_seq_open, | 4296 | * @dev: device |
4445 | .read = seq_read, | 4297 | * |
4446 | .llseek = seq_lseek, | 4298 | * Find a master upper device and return pointer to it or NULL in case |
4447 | .release = seq_release, | 4299 | * it's not there. The caller must hold the RCU read lock. |
4448 | }; | 4300 | */ |
4449 | 4301 | struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) | |
4450 | static void *ptype_get_idx(loff_t pos) | ||
4451 | { | 4302 | { |
4452 | struct packet_type *pt = NULL; | 4303 | struct netdev_upper *upper; |
4453 | loff_t i = 0; | ||
4454 | int t; | ||
4455 | |||
4456 | list_for_each_entry_rcu(pt, &ptype_all, list) { | ||
4457 | if (i == pos) | ||
4458 | return pt; | ||
4459 | ++i; | ||
4460 | } | ||
4461 | 4304 | ||
4462 | for (t = 0; t < PTYPE_HASH_SIZE; t++) { | 4305 | upper = list_first_or_null_rcu(&dev->upper_dev_list, |
4463 | list_for_each_entry_rcu(pt, &ptype_base[t], list) { | 4306 | struct netdev_upper, list); |
4464 | if (i == pos) | 4307 | if (upper && likely(upper->master)) |
4465 | return pt; | 4308 | return upper->dev; |
4466 | ++i; | ||
4467 | } | ||
4468 | } | ||
4469 | return NULL; | 4309 | return NULL; |
4470 | } | 4310 | } |
4311 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); | ||
4471 | 4312 | ||
4472 | static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) | 4313 | static int __netdev_upper_dev_link(struct net_device *dev, |
4473 | __acquires(RCU) | 4314 | struct net_device *upper_dev, bool master) |
4474 | { | 4315 | { |
4475 | rcu_read_lock(); | 4316 | struct netdev_upper *upper; |
4476 | return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; | ||
4477 | } | ||
4478 | 4317 | ||
4479 | static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 4318 | ASSERT_RTNL(); |
4480 | { | ||
4481 | struct packet_type *pt; | ||
4482 | struct list_head *nxt; | ||
4483 | int hash; | ||
4484 | 4319 | ||
4485 | ++*pos; | 4320 | if (dev == upper_dev) |
4486 | if (v == SEQ_START_TOKEN) | 4321 | return -EBUSY; |
4487 | return ptype_get_idx(0); | ||
4488 | 4322 | ||
4489 | pt = v; | 4323 | /* To prevent loops, check if dev is not upper device to upper_dev. */ |
4490 | nxt = pt->list.next; | 4324 | if (__netdev_search_upper_dev(upper_dev, dev)) |
4491 | if (pt->type == htons(ETH_P_ALL)) { | 4325 | return -EBUSY; |
4492 | if (nxt != &ptype_all) | ||
4493 | goto found; | ||
4494 | hash = 0; | ||
4495 | nxt = ptype_base[0].next; | ||
4496 | } else | ||
4497 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; | ||
4498 | 4326 | ||
4499 | while (nxt == &ptype_base[hash]) { | 4327 | if (__netdev_find_upper(dev, upper_dev)) |
4500 | if (++hash >= PTYPE_HASH_SIZE) | 4328 | return -EEXIST; |
4501 | return NULL; | ||
4502 | nxt = ptype_base[hash].next; | ||
4503 | } | ||
4504 | found: | ||
4505 | return list_entry(nxt, struct packet_type, list); | ||
4506 | } | ||
4507 | 4329 | ||
4508 | static void ptype_seq_stop(struct seq_file *seq, void *v) | 4330 | if (master && netdev_master_upper_dev_get(dev)) |
4509 | __releases(RCU) | 4331 | return -EBUSY; |
4510 | { | ||
4511 | rcu_read_unlock(); | ||
4512 | } | ||
4513 | 4332 | ||
4514 | static int ptype_seq_show(struct seq_file *seq, void *v) | 4333 | upper = kmalloc(sizeof(*upper), GFP_KERNEL); |
4515 | { | 4334 | if (!upper) |
4516 | struct packet_type *pt = v; | 4335 | return -ENOMEM; |
4517 | 4336 | ||
4518 | if (v == SEQ_START_TOKEN) | 4337 | upper->dev = upper_dev; |
4519 | seq_puts(seq, "Type Device Function\n"); | 4338 | upper->master = master; |
4520 | else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { | 4339 | INIT_LIST_HEAD(&upper->search_list); |
4521 | if (pt->type == htons(ETH_P_ALL)) | ||
4522 | seq_puts(seq, "ALL "); | ||
4523 | else | ||
4524 | seq_printf(seq, "%04x", ntohs(pt->type)); | ||
4525 | 4340 | ||
4526 | seq_printf(seq, " %-8s %pF\n", | 4341 | /* Ensure that master upper link is always the first item in list. */ |
4527 | pt->dev ? pt->dev->name : "", pt->func); | 4342 | if (master) |
4528 | } | 4343 | list_add_rcu(&upper->list, &dev->upper_dev_list); |
4344 | else | ||
4345 | list_add_tail_rcu(&upper->list, &dev->upper_dev_list); | ||
4346 | dev_hold(upper_dev); | ||
4529 | 4347 | ||
4530 | return 0; | 4348 | return 0; |
4531 | } | 4349 | } |
4532 | 4350 | ||
4533 | static const struct seq_operations ptype_seq_ops = { | 4351 | /** |
4534 | .start = ptype_seq_start, | 4352 | * netdev_upper_dev_link - Add a link to the upper device |
4535 | .next = ptype_seq_next, | 4353 | * @dev: device |
4536 | .stop = ptype_seq_stop, | 4354 | * @upper_dev: new upper device |
4537 | .show = ptype_seq_show, | 4355 | * |
4538 | }; | 4356 | * Adds a link to device which is upper to this one. The caller must hold |
4539 | 4357 | * the RTNL lock. On a failure a negative errno code is returned. | |
4540 | static int ptype_seq_open(struct inode *inode, struct file *file) | 4358 | * On success the reference counts are adjusted and the function |
4541 | { | 4359 | * returns zero. |
4542 | return seq_open_net(inode, file, &ptype_seq_ops, | 4360 | */ |
4543 | sizeof(struct seq_net_private)); | 4361 | int netdev_upper_dev_link(struct net_device *dev, |
4544 | } | 4362 | struct net_device *upper_dev) |
4545 | |||
4546 | static const struct file_operations ptype_seq_fops = { | ||
4547 | .owner = THIS_MODULE, | ||
4548 | .open = ptype_seq_open, | ||
4549 | .read = seq_read, | ||
4550 | .llseek = seq_lseek, | ||
4551 | .release = seq_release_net, | ||
4552 | }; | ||
4553 | |||
4554 | |||
4555 | static int __net_init dev_proc_net_init(struct net *net) | ||
4556 | { | ||
4557 | int rc = -ENOMEM; | ||
4558 | |||
4559 | if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) | ||
4560 | goto out; | ||
4561 | if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) | ||
4562 | goto out_dev; | ||
4563 | if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) | ||
4564 | goto out_softnet; | ||
4565 | |||
4566 | if (wext_proc_init(net)) | ||
4567 | goto out_ptype; | ||
4568 | rc = 0; | ||
4569 | out: | ||
4570 | return rc; | ||
4571 | out_ptype: | ||
4572 | proc_net_remove(net, "ptype"); | ||
4573 | out_softnet: | ||
4574 | proc_net_remove(net, "softnet_stat"); | ||
4575 | out_dev: | ||
4576 | proc_net_remove(net, "dev"); | ||
4577 | goto out; | ||
4578 | } | ||
4579 | |||
4580 | static void __net_exit dev_proc_net_exit(struct net *net) | ||
4581 | { | ||
4582 | wext_proc_exit(net); | ||
4583 | |||
4584 | proc_net_remove(net, "ptype"); | ||
4585 | proc_net_remove(net, "softnet_stat"); | ||
4586 | proc_net_remove(net, "dev"); | ||
4587 | } | ||
4588 | |||
4589 | static struct pernet_operations __net_initdata dev_proc_ops = { | ||
4590 | .init = dev_proc_net_init, | ||
4591 | .exit = dev_proc_net_exit, | ||
4592 | }; | ||
4593 | |||
4594 | static int __init dev_proc_init(void) | ||
4595 | { | 4363 | { |
4596 | return register_pernet_subsys(&dev_proc_ops); | 4364 | return __netdev_upper_dev_link(dev, upper_dev, false); |
4597 | } | 4365 | } |
4598 | #else | 4366 | EXPORT_SYMBOL(netdev_upper_dev_link); |
4599 | #define dev_proc_init() 0 | ||
4600 | #endif /* CONFIG_PROC_FS */ | ||
4601 | |||
4602 | 4367 | ||
4603 | /** | 4368 | /** |
4604 | * netdev_set_master - set up master pointer | 4369 | * netdev_master_upper_dev_link - Add a master link to the upper device |
4605 | * @slave: slave device | 4370 | * @dev: device |
4606 | * @master: new master device | 4371 | * @upper_dev: new upper device |
4607 | * | 4372 | * |
4608 | * Changes the master device of the slave. Pass %NULL to break the | 4373 | * Adds a link to device which is upper to this one. In this case, only |
4609 | * bonding. The caller must hold the RTNL semaphore. On a failure | 4374 | * one master upper device can be linked, although other non-master devices |
4610 | * a negative errno code is returned. On success the reference counts | 4375 | * might be linked as well. The caller must hold the RTNL lock. |
4611 | * are adjusted and the function returns zero. | 4376 | * On a failure a negative errno code is returned. On success the reference |
4377 | * counts are adjusted and the function returns zero. | ||
4612 | */ | 4378 | */ |
4613 | int netdev_set_master(struct net_device *slave, struct net_device *master) | 4379 | int netdev_master_upper_dev_link(struct net_device *dev, |
4380 | struct net_device *upper_dev) | ||
4614 | { | 4381 | { |
4615 | struct net_device *old = slave->master; | 4382 | return __netdev_upper_dev_link(dev, upper_dev, true); |
4616 | |||
4617 | ASSERT_RTNL(); | ||
4618 | |||
4619 | if (master) { | ||
4620 | if (old) | ||
4621 | return -EBUSY; | ||
4622 | dev_hold(master); | ||
4623 | } | ||
4624 | |||
4625 | slave->master = master; | ||
4626 | |||
4627 | if (old) | ||
4628 | dev_put(old); | ||
4629 | return 0; | ||
4630 | } | 4383 | } |
4631 | EXPORT_SYMBOL(netdev_set_master); | 4384 | EXPORT_SYMBOL(netdev_master_upper_dev_link); |
4632 | 4385 | ||
4633 | /** | 4386 | /** |
4634 | * netdev_set_bond_master - set up bonding master/slave pair | 4387 | * netdev_upper_dev_unlink - Removes a link to upper device |
4635 | * @slave: slave device | 4388 | * @dev: device |
4636 | * @master: new master device | 4389 | * @upper_dev: new upper device |
4637 | * | 4390 | * |
4638 | * Changes the master device of the slave. Pass %NULL to break the | 4391 | * Removes a link to device which is upper to this one. The caller must hold |
4639 | * bonding. The caller must hold the RTNL semaphore. On a failure | 4392 | * the RTNL lock. |
4640 | * a negative errno code is returned. On success %RTM_NEWLINK is sent | ||
4641 | * to the routing socket and the function returns zero. | ||
4642 | */ | 4393 | */ |
4643 | int netdev_set_bond_master(struct net_device *slave, struct net_device *master) | 4394 | void netdev_upper_dev_unlink(struct net_device *dev, |
4395 | struct net_device *upper_dev) | ||
4644 | { | 4396 | { |
4645 | int err; | 4397 | struct netdev_upper *upper; |
4646 | 4398 | ||
4647 | ASSERT_RTNL(); | 4399 | ASSERT_RTNL(); |
4648 | 4400 | ||
4649 | err = netdev_set_master(slave, master); | 4401 | upper = __netdev_find_upper(dev, upper_dev); |
4650 | if (err) | 4402 | if (!upper) |
4651 | return err; | 4403 | return; |
4652 | if (master) | 4404 | list_del_rcu(&upper->list); |
4653 | slave->flags |= IFF_SLAVE; | 4405 | dev_put(upper_dev); |
4654 | else | 4406 | kfree_rcu(upper, rcu); |
4655 | slave->flags &= ~IFF_SLAVE; | ||
4656 | |||
4657 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); | ||
4658 | return 0; | ||
4659 | } | 4407 | } |
4660 | EXPORT_SYMBOL(netdev_set_bond_master); | 4408 | EXPORT_SYMBOL(netdev_upper_dev_unlink); |
4661 | 4409 | ||
4662 | static void dev_change_rx_flags(struct net_device *dev, int flags) | 4410 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
4663 | { | 4411 | { |
@@ -5020,381 +4768,33 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | |||
5020 | if (!netif_device_present(dev)) | 4768 | if (!netif_device_present(dev)) |
5021 | return -ENODEV; | 4769 | return -ENODEV; |
5022 | err = ops->ndo_set_mac_address(dev, sa); | 4770 | err = ops->ndo_set_mac_address(dev, sa); |
5023 | if (!err) | 4771 | if (err) |
5024 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | 4772 | return err; |
4773 | dev->addr_assign_type = NET_ADDR_SET; | ||
4774 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
5025 | add_device_randomness(dev->dev_addr, dev->addr_len); | 4775 | add_device_randomness(dev->dev_addr, dev->addr_len); |
5026 | return err; | 4776 | return 0; |
5027 | } | 4777 | } |
5028 | EXPORT_SYMBOL(dev_set_mac_address); | 4778 | EXPORT_SYMBOL(dev_set_mac_address); |
5029 | 4779 | ||
5030 | /* | ||
5031 | * Perform the SIOCxIFxxx calls, inside rcu_read_lock() | ||
5032 | */ | ||
5033 | static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) | ||
5034 | { | ||
5035 | int err; | ||
5036 | struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); | ||
5037 | |||
5038 | if (!dev) | ||
5039 | return -ENODEV; | ||
5040 | |||
5041 | switch (cmd) { | ||
5042 | case SIOCGIFFLAGS: /* Get interface flags */ | ||
5043 | ifr->ifr_flags = (short) dev_get_flags(dev); | ||
5044 | return 0; | ||
5045 | |||
5046 | case SIOCGIFMETRIC: /* Get the metric on the interface | ||
5047 | (currently unused) */ | ||
5048 | ifr->ifr_metric = 0; | ||
5049 | return 0; | ||
5050 | |||
5051 | case SIOCGIFMTU: /* Get the MTU of a device */ | ||
5052 | ifr->ifr_mtu = dev->mtu; | ||
5053 | return 0; | ||
5054 | |||
5055 | case SIOCGIFHWADDR: | ||
5056 | if (!dev->addr_len) | ||
5057 | memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); | ||
5058 | else | ||
5059 | memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, | ||
5060 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | ||
5061 | ifr->ifr_hwaddr.sa_family = dev->type; | ||
5062 | return 0; | ||
5063 | |||
5064 | case SIOCGIFSLAVE: | ||
5065 | err = -EINVAL; | ||
5066 | break; | ||
5067 | |||
5068 | case SIOCGIFMAP: | ||
5069 | ifr->ifr_map.mem_start = dev->mem_start; | ||
5070 | ifr->ifr_map.mem_end = dev->mem_end; | ||
5071 | ifr->ifr_map.base_addr = dev->base_addr; | ||
5072 | ifr->ifr_map.irq = dev->irq; | ||
5073 | ifr->ifr_map.dma = dev->dma; | ||
5074 | ifr->ifr_map.port = dev->if_port; | ||
5075 | return 0; | ||
5076 | |||
5077 | case SIOCGIFINDEX: | ||
5078 | ifr->ifr_ifindex = dev->ifindex; | ||
5079 | return 0; | ||
5080 | |||
5081 | case SIOCGIFTXQLEN: | ||
5082 | ifr->ifr_qlen = dev->tx_queue_len; | ||
5083 | return 0; | ||
5084 | |||
5085 | default: | ||
5086 | /* dev_ioctl() should ensure this case | ||
5087 | * is never reached | ||
5088 | */ | ||
5089 | WARN_ON(1); | ||
5090 | err = -ENOTTY; | ||
5091 | break; | ||
5092 | |||
5093 | } | ||
5094 | return err; | ||
5095 | } | ||
5096 | |||
5097 | /* | ||
5098 | * Perform the SIOCxIFxxx calls, inside rtnl_lock() | ||
5099 | */ | ||
5100 | static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | ||
5101 | { | ||
5102 | int err; | ||
5103 | struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); | ||
5104 | const struct net_device_ops *ops; | ||
5105 | |||
5106 | if (!dev) | ||
5107 | return -ENODEV; | ||
5108 | |||
5109 | ops = dev->netdev_ops; | ||
5110 | |||
5111 | switch (cmd) { | ||
5112 | case SIOCSIFFLAGS: /* Set interface flags */ | ||
5113 | return dev_change_flags(dev, ifr->ifr_flags); | ||
5114 | |||
5115 | case SIOCSIFMETRIC: /* Set the metric on the interface | ||
5116 | (currently unused) */ | ||
5117 | return -EOPNOTSUPP; | ||
5118 | |||
5119 | case SIOCSIFMTU: /* Set the MTU of a device */ | ||
5120 | return dev_set_mtu(dev, ifr->ifr_mtu); | ||
5121 | |||
5122 | case SIOCSIFHWADDR: | ||
5123 | return dev_set_mac_address(dev, &ifr->ifr_hwaddr); | ||
5124 | |||
5125 | case SIOCSIFHWBROADCAST: | ||
5126 | if (ifr->ifr_hwaddr.sa_family != dev->type) | ||
5127 | return -EINVAL; | ||
5128 | memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, | ||
5129 | min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | ||
5130 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
5131 | return 0; | ||
5132 | |||
5133 | case SIOCSIFMAP: | ||
5134 | if (ops->ndo_set_config) { | ||
5135 | if (!netif_device_present(dev)) | ||
5136 | return -ENODEV; | ||
5137 | return ops->ndo_set_config(dev, &ifr->ifr_map); | ||
5138 | } | ||
5139 | return -EOPNOTSUPP; | ||
5140 | |||
5141 | case SIOCADDMULTI: | ||
5142 | if (!ops->ndo_set_rx_mode || | ||
5143 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | ||
5144 | return -EINVAL; | ||
5145 | if (!netif_device_present(dev)) | ||
5146 | return -ENODEV; | ||
5147 | return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); | ||
5148 | |||
5149 | case SIOCDELMULTI: | ||
5150 | if (!ops->ndo_set_rx_mode || | ||
5151 | ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | ||
5152 | return -EINVAL; | ||
5153 | if (!netif_device_present(dev)) | ||
5154 | return -ENODEV; | ||
5155 | return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); | ||
5156 | |||
5157 | case SIOCSIFTXQLEN: | ||
5158 | if (ifr->ifr_qlen < 0) | ||
5159 | return -EINVAL; | ||
5160 | dev->tx_queue_len = ifr->ifr_qlen; | ||
5161 | return 0; | ||
5162 | |||
5163 | case SIOCSIFNAME: | ||
5164 | ifr->ifr_newname[IFNAMSIZ-1] = '\0'; | ||
5165 | return dev_change_name(dev, ifr->ifr_newname); | ||
5166 | |||
5167 | case SIOCSHWTSTAMP: | ||
5168 | err = net_hwtstamp_validate(ifr); | ||
5169 | if (err) | ||
5170 | return err; | ||
5171 | /* fall through */ | ||
5172 | |||
5173 | /* | ||
5174 | * Unknown or private ioctl | ||
5175 | */ | ||
5176 | default: | ||
5177 | if ((cmd >= SIOCDEVPRIVATE && | ||
5178 | cmd <= SIOCDEVPRIVATE + 15) || | ||
5179 | cmd == SIOCBONDENSLAVE || | ||
5180 | cmd == SIOCBONDRELEASE || | ||
5181 | cmd == SIOCBONDSETHWADDR || | ||
5182 | cmd == SIOCBONDSLAVEINFOQUERY || | ||
5183 | cmd == SIOCBONDINFOQUERY || | ||
5184 | cmd == SIOCBONDCHANGEACTIVE || | ||
5185 | cmd == SIOCGMIIPHY || | ||
5186 | cmd == SIOCGMIIREG || | ||
5187 | cmd == SIOCSMIIREG || | ||
5188 | cmd == SIOCBRADDIF || | ||
5189 | cmd == SIOCBRDELIF || | ||
5190 | cmd == SIOCSHWTSTAMP || | ||
5191 | cmd == SIOCWANDEV) { | ||
5192 | err = -EOPNOTSUPP; | ||
5193 | if (ops->ndo_do_ioctl) { | ||
5194 | if (netif_device_present(dev)) | ||
5195 | err = ops->ndo_do_ioctl(dev, ifr, cmd); | ||
5196 | else | ||
5197 | err = -ENODEV; | ||
5198 | } | ||
5199 | } else | ||
5200 | err = -EINVAL; | ||
5201 | |||
5202 | } | ||
5203 | return err; | ||
5204 | } | ||
5205 | |||
5206 | /* | ||
5207 | * This function handles all "interface"-type I/O control requests. The actual | ||
5208 | * 'doing' part of this is dev_ifsioc above. | ||
5209 | */ | ||
5210 | |||
5211 | /** | 4780 | /** |
5212 | * dev_ioctl - network device ioctl | 4781 | * dev_change_carrier - Change device carrier |
5213 | * @net: the applicable net namespace | 4782 | * @dev: device |
5214 | * @cmd: command to issue | 4783 | * @new_carries: new value |
5215 | * @arg: pointer to a struct ifreq in user space | ||
5216 | * | 4784 | * |
5217 | * Issue ioctl functions to devices. This is normally called by the | 4785 | * Change device carrier |
5218 | * user space syscall interfaces but can sometimes be useful for | ||
5219 | * other purposes. The return value is the return from the syscall if | ||
5220 | * positive or a negative errno code on error. | ||
5221 | */ | 4786 | */ |
5222 | 4787 | int dev_change_carrier(struct net_device *dev, bool new_carrier) | |
5223 | int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | ||
5224 | { | 4788 | { |
5225 | struct ifreq ifr; | 4789 | const struct net_device_ops *ops = dev->netdev_ops; |
5226 | int ret; | ||
5227 | char *colon; | ||
5228 | |||
5229 | /* One special case: SIOCGIFCONF takes ifconf argument | ||
5230 | and requires shared lock, because it sleeps writing | ||
5231 | to user space. | ||
5232 | */ | ||
5233 | |||
5234 | if (cmd == SIOCGIFCONF) { | ||
5235 | rtnl_lock(); | ||
5236 | ret = dev_ifconf(net, (char __user *) arg); | ||
5237 | rtnl_unlock(); | ||
5238 | return ret; | ||
5239 | } | ||
5240 | if (cmd == SIOCGIFNAME) | ||
5241 | return dev_ifname(net, (struct ifreq __user *)arg); | ||
5242 | |||
5243 | if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | ||
5244 | return -EFAULT; | ||
5245 | |||
5246 | ifr.ifr_name[IFNAMSIZ-1] = 0; | ||
5247 | |||
5248 | colon = strchr(ifr.ifr_name, ':'); | ||
5249 | if (colon) | ||
5250 | *colon = 0; | ||
5251 | |||
5252 | /* | ||
5253 | * See which interface the caller is talking about. | ||
5254 | */ | ||
5255 | |||
5256 | switch (cmd) { | ||
5257 | /* | ||
5258 | * These ioctl calls: | ||
5259 | * - can be done by all. | ||
5260 | * - atomic and do not require locking. | ||
5261 | * - return a value | ||
5262 | */ | ||
5263 | case SIOCGIFFLAGS: | ||
5264 | case SIOCGIFMETRIC: | ||
5265 | case SIOCGIFMTU: | ||
5266 | case SIOCGIFHWADDR: | ||
5267 | case SIOCGIFSLAVE: | ||
5268 | case SIOCGIFMAP: | ||
5269 | case SIOCGIFINDEX: | ||
5270 | case SIOCGIFTXQLEN: | ||
5271 | dev_load(net, ifr.ifr_name); | ||
5272 | rcu_read_lock(); | ||
5273 | ret = dev_ifsioc_locked(net, &ifr, cmd); | ||
5274 | rcu_read_unlock(); | ||
5275 | if (!ret) { | ||
5276 | if (colon) | ||
5277 | *colon = ':'; | ||
5278 | if (copy_to_user(arg, &ifr, | ||
5279 | sizeof(struct ifreq))) | ||
5280 | ret = -EFAULT; | ||
5281 | } | ||
5282 | return ret; | ||
5283 | |||
5284 | case SIOCETHTOOL: | ||
5285 | dev_load(net, ifr.ifr_name); | ||
5286 | rtnl_lock(); | ||
5287 | ret = dev_ethtool(net, &ifr); | ||
5288 | rtnl_unlock(); | ||
5289 | if (!ret) { | ||
5290 | if (colon) | ||
5291 | *colon = ':'; | ||
5292 | if (copy_to_user(arg, &ifr, | ||
5293 | sizeof(struct ifreq))) | ||
5294 | ret = -EFAULT; | ||
5295 | } | ||
5296 | return ret; | ||
5297 | |||
5298 | /* | ||
5299 | * These ioctl calls: | ||
5300 | * - require superuser power. | ||
5301 | * - require strict serialization. | ||
5302 | * - return a value | ||
5303 | */ | ||
5304 | case SIOCGMIIPHY: | ||
5305 | case SIOCGMIIREG: | ||
5306 | case SIOCSIFNAME: | ||
5307 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | ||
5308 | return -EPERM; | ||
5309 | dev_load(net, ifr.ifr_name); | ||
5310 | rtnl_lock(); | ||
5311 | ret = dev_ifsioc(net, &ifr, cmd); | ||
5312 | rtnl_unlock(); | ||
5313 | if (!ret) { | ||
5314 | if (colon) | ||
5315 | *colon = ':'; | ||
5316 | if (copy_to_user(arg, &ifr, | ||
5317 | sizeof(struct ifreq))) | ||
5318 | ret = -EFAULT; | ||
5319 | } | ||
5320 | return ret; | ||
5321 | |||
5322 | /* | ||
5323 | * These ioctl calls: | ||
5324 | * - require superuser power. | ||
5325 | * - require strict serialization. | ||
5326 | * - do not return a value | ||
5327 | */ | ||
5328 | case SIOCSIFMAP: | ||
5329 | case SIOCSIFTXQLEN: | ||
5330 | if (!capable(CAP_NET_ADMIN)) | ||
5331 | return -EPERM; | ||
5332 | /* fall through */ | ||
5333 | /* | ||
5334 | * These ioctl calls: | ||
5335 | * - require local superuser power. | ||
5336 | * - require strict serialization. | ||
5337 | * - do not return a value | ||
5338 | */ | ||
5339 | case SIOCSIFFLAGS: | ||
5340 | case SIOCSIFMETRIC: | ||
5341 | case SIOCSIFMTU: | ||
5342 | case SIOCSIFHWADDR: | ||
5343 | case SIOCSIFSLAVE: | ||
5344 | case SIOCADDMULTI: | ||
5345 | case SIOCDELMULTI: | ||
5346 | case SIOCSIFHWBROADCAST: | ||
5347 | case SIOCSMIIREG: | ||
5348 | case SIOCBONDENSLAVE: | ||
5349 | case SIOCBONDRELEASE: | ||
5350 | case SIOCBONDSETHWADDR: | ||
5351 | case SIOCBONDCHANGEACTIVE: | ||
5352 | case SIOCBRADDIF: | ||
5353 | case SIOCBRDELIF: | ||
5354 | case SIOCSHWTSTAMP: | ||
5355 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | ||
5356 | return -EPERM; | ||
5357 | /* fall through */ | ||
5358 | case SIOCBONDSLAVEINFOQUERY: | ||
5359 | case SIOCBONDINFOQUERY: | ||
5360 | dev_load(net, ifr.ifr_name); | ||
5361 | rtnl_lock(); | ||
5362 | ret = dev_ifsioc(net, &ifr, cmd); | ||
5363 | rtnl_unlock(); | ||
5364 | return ret; | ||
5365 | |||
5366 | case SIOCGIFMEM: | ||
5367 | /* Get the per device memory space. We can add this but | ||
5368 | * currently do not support it */ | ||
5369 | case SIOCSIFMEM: | ||
5370 | /* Set the per device memory buffer space. | ||
5371 | * Not applicable in our case */ | ||
5372 | case SIOCSIFLINK: | ||
5373 | return -ENOTTY; | ||
5374 | 4790 | ||
5375 | /* | 4791 | if (!ops->ndo_change_carrier) |
5376 | * Unknown or private ioctl. | 4792 | return -EOPNOTSUPP; |
5377 | */ | 4793 | if (!netif_device_present(dev)) |
5378 | default: | 4794 | return -ENODEV; |
5379 | if (cmd == SIOCWANDEV || | 4795 | return ops->ndo_change_carrier(dev, new_carrier); |
5380 | (cmd >= SIOCDEVPRIVATE && | ||
5381 | cmd <= SIOCDEVPRIVATE + 15)) { | ||
5382 | dev_load(net, ifr.ifr_name); | ||
5383 | rtnl_lock(); | ||
5384 | ret = dev_ifsioc(net, &ifr, cmd); | ||
5385 | rtnl_unlock(); | ||
5386 | if (!ret && copy_to_user(arg, &ifr, | ||
5387 | sizeof(struct ifreq))) | ||
5388 | ret = -EFAULT; | ||
5389 | return ret; | ||
5390 | } | ||
5391 | /* Take care of Wireless Extensions */ | ||
5392 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) | ||
5393 | return wext_handle_ioctl(net, &ifr, cmd, arg); | ||
5394 | return -ENOTTY; | ||
5395 | } | ||
5396 | } | 4796 | } |
5397 | 4797 | EXPORT_SYMBOL(dev_change_carrier); | |
5398 | 4798 | ||
5399 | /** | 4799 | /** |
5400 | * dev_new_index - allocate an ifindex | 4800 | * dev_new_index - allocate an ifindex |
@@ -5482,11 +4882,15 @@ static void rollback_registered_many(struct list_head *head) | |||
5482 | if (dev->netdev_ops->ndo_uninit) | 4882 | if (dev->netdev_ops->ndo_uninit) |
5483 | dev->netdev_ops->ndo_uninit(dev); | 4883 | dev->netdev_ops->ndo_uninit(dev); |
5484 | 4884 | ||
5485 | /* Notifier chain MUST detach us from master device. */ | 4885 | /* Notifier chain MUST detach us all upper devices. */ |
5486 | WARN_ON(dev->master); | 4886 | WARN_ON(netdev_has_any_upper_dev(dev)); |
5487 | 4887 | ||
5488 | /* Remove entries from kobject tree */ | 4888 | /* Remove entries from kobject tree */ |
5489 | netdev_unregister_kobject(dev); | 4889 | netdev_unregister_kobject(dev); |
4890 | #ifdef CONFIG_XPS | ||
4891 | /* Remove XPS queueing entries */ | ||
4892 | netif_reset_xps_queues_gt(dev, 0); | ||
4893 | #endif | ||
5490 | } | 4894 | } |
5491 | 4895 | ||
5492 | synchronize_net(); | 4896 | synchronize_net(); |
@@ -5664,10 +5068,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) | |||
5664 | BUG_ON(count < 1); | 5068 | BUG_ON(count < 1); |
5665 | 5069 | ||
5666 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | 5070 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); |
5667 | if (!rx) { | 5071 | if (!rx) |
5668 | pr_err("netdev: Unable to allocate %u rx queues\n", count); | ||
5669 | return -ENOMEM; | 5072 | return -ENOMEM; |
5670 | } | 5073 | |
5671 | dev->_rx = rx; | 5074 | dev->_rx = rx; |
5672 | 5075 | ||
5673 | for (i = 0; i < count; i++) | 5076 | for (i = 0; i < count; i++) |
@@ -5698,10 +5101,9 @@ static int netif_alloc_netdev_queues(struct net_device *dev) | |||
5698 | BUG_ON(count < 1); | 5101 | BUG_ON(count < 1); |
5699 | 5102 | ||
5700 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); | 5103 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); |
5701 | if (!tx) { | 5104 | if (!tx) |
5702 | pr_err("netdev: Unable to allocate %u tx queues\n", count); | ||
5703 | return -ENOMEM; | 5105 | return -ENOMEM; |
5704 | } | 5106 | |
5705 | dev->_tx = tx; | 5107 | dev->_tx = tx; |
5706 | 5108 | ||
5707 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | 5109 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); |
@@ -5760,6 +5162,14 @@ int register_netdevice(struct net_device *dev) | |||
5760 | } | 5162 | } |
5761 | } | 5163 | } |
5762 | 5164 | ||
5165 | if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && | ||
5166 | (!dev->netdev_ops->ndo_vlan_rx_add_vid || | ||
5167 | !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { | ||
5168 | netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); | ||
5169 | ret = -EINVAL; | ||
5170 | goto err_uninit; | ||
5171 | } | ||
5172 | |||
5763 | ret = -EBUSY; | 5173 | ret = -EBUSY; |
5764 | if (!dev->ifindex) | 5174 | if (!dev->ifindex) |
5765 | dev->ifindex = dev_new_index(net); | 5175 | dev->ifindex = dev_new_index(net); |
@@ -5815,6 +5225,13 @@ int register_netdevice(struct net_device *dev) | |||
5815 | list_netdevice(dev); | 5225 | list_netdevice(dev); |
5816 | add_device_randomness(dev->dev_addr, dev->addr_len); | 5226 | add_device_randomness(dev->dev_addr, dev->addr_len); |
5817 | 5227 | ||
5228 | /* If the device has permanent device address, driver should | ||
5229 | * set dev_addr and also addr_assign_type should be set to | ||
5230 | * NET_ADDR_PERM (default value). | ||
5231 | */ | ||
5232 | if (dev->addr_assign_type == NET_ADDR_PERM) | ||
5233 | memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); | ||
5234 | |||
5818 | /* Notify protocols, that a new device appeared. */ | 5235 | /* Notify protocols, that a new device appeared. */ |
5819 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); | 5236 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); |
5820 | ret = notifier_to_errno(ret); | 5237 | ret = notifier_to_errno(ret); |
@@ -6121,6 +5538,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) | |||
6121 | 5538 | ||
6122 | static const struct ethtool_ops default_ethtool_ops; | 5539 | static const struct ethtool_ops default_ethtool_ops; |
6123 | 5540 | ||
5541 | void netdev_set_default_ethtool_ops(struct net_device *dev, | ||
5542 | const struct ethtool_ops *ops) | ||
5543 | { | ||
5544 | if (dev->ethtool_ops == &default_ethtool_ops) | ||
5545 | dev->ethtool_ops = ops; | ||
5546 | } | ||
5547 | EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); | ||
5548 | |||
6124 | /** | 5549 | /** |
6125 | * alloc_netdev_mqs - allocate network device | 5550 | * alloc_netdev_mqs - allocate network device |
6126 | * @sizeof_priv: size of private data to allocate space for | 5551 | * @sizeof_priv: size of private data to allocate space for |
@@ -6165,10 +5590,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6165 | alloc_size += NETDEV_ALIGN - 1; | 5590 | alloc_size += NETDEV_ALIGN - 1; |
6166 | 5591 | ||
6167 | p = kzalloc(alloc_size, GFP_KERNEL); | 5592 | p = kzalloc(alloc_size, GFP_KERNEL); |
6168 | if (!p) { | 5593 | if (!p) |
6169 | pr_err("alloc_netdev: Unable to allocate device\n"); | ||
6170 | return NULL; | 5594 | return NULL; |
6171 | } | ||
6172 | 5595 | ||
6173 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5596 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
6174 | dev->padded = (char *)dev - (char *)p; | 5597 | dev->padded = (char *)dev - (char *)p; |
@@ -6191,6 +5614,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6191 | INIT_LIST_HEAD(&dev->napi_list); | 5614 | INIT_LIST_HEAD(&dev->napi_list); |
6192 | INIT_LIST_HEAD(&dev->unreg_list); | 5615 | INIT_LIST_HEAD(&dev->unreg_list); |
6193 | INIT_LIST_HEAD(&dev->link_watch_list); | 5616 | INIT_LIST_HEAD(&dev->link_watch_list); |
5617 | INIT_LIST_HEAD(&dev->upper_dev_list); | ||
6194 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 5618 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
6195 | setup(dev); | 5619 | setup(dev); |
6196 | 5620 | ||
@@ -6834,19 +6258,9 @@ static int __init net_dev_init(void) | |||
6834 | 6258 | ||
6835 | hotcpu_notifier(dev_cpu_callback, 0); | 6259 | hotcpu_notifier(dev_cpu_callback, 0); |
6836 | dst_init(); | 6260 | dst_init(); |
6837 | dev_mcast_init(); | ||
6838 | rc = 0; | 6261 | rc = 0; |
6839 | out: | 6262 | out: |
6840 | return rc; | 6263 | return rc; |
6841 | } | 6264 | } |
6842 | 6265 | ||
6843 | subsys_initcall(net_dev_init); | 6266 | subsys_initcall(net_dev_init); |
6844 | |||
6845 | static int __init initialize_hashrnd(void) | ||
6846 | { | ||
6847 | get_random_bytes(&hashrnd, sizeof(hashrnd)); | ||
6848 | return 0; | ||
6849 | } | ||
6850 | |||
6851 | late_initcall_sync(initialize_hashrnd); | ||
6852 | |||