diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/core | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 8 | ||||
-rw-r--r-- | net/core/dev.c | 1758 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 18 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 14 | ||||
-rw-r--r-- | net/core/dst.c | 134 | ||||
-rw-r--r-- | net/core/ethtool.c | 927 | ||||
-rw-r--r-- | net/core/fib_rules.c | 44 | ||||
-rw-r--r-- | net/core/filter.c | 495 | ||||
-rw-r--r-- | net/core/flow.c | 96 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 13 | ||||
-rw-r--r-- | net/core/iovec.c | 26 | ||||
-rw-r--r-- | net/core/link_watch.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 488 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 533 | ||||
-rw-r--r-- | net/core/net-sysfs.h | 4 | ||||
-rw-r--r-- | net/core/net-traces.c | 1 | ||||
-rw-r--r-- | net/core/net_namespace.c | 101 | ||||
-rw-r--r-- | net/core/netpoll.c | 60 | ||||
-rw-r--r-- | net/core/pktgen.c | 507 | ||||
-rw-r--r-- | net/core/request_sock.c | 5 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 331 | ||||
-rw-r--r-- | net/core/scm.c | 12 | ||||
-rw-r--r-- | net/core/skbuff.c | 147 | ||||
-rw-r--r-- | net/core/sock.c | 101 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 13 | ||||
-rw-r--r-- | net/core/timestamping.c | 10 | ||||
-rw-r--r-- | net/core/utils.c | 40 |
27 files changed, 3797 insertions, 2091 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 251997a95483..18ac112ea7ae 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, | |||
177 | * interrupt level will suddenly eat the receive_queue. | 177 | * interrupt level will suddenly eat the receive_queue. |
178 | * | 178 | * |
179 | * Look at current nfs client by the way... | 179 | * Look at current nfs client by the way... |
180 | * However, this function was corrent in any case. 8) | 180 | * However, this function was correct in any case. 8) |
181 | */ | 181 | */ |
182 | unsigned long cpu_flags; | 182 | unsigned long cpu_flags; |
183 | 183 | ||
@@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) | |||
243 | unlock_sock_fast(sk, slow); | 243 | unlock_sock_fast(sk, slow); |
244 | 244 | ||
245 | /* skb is now orphaned, can be freed outside of locked section */ | 245 | /* skb is now orphaned, can be freed outside of locked section */ |
246 | trace_kfree_skb(skb, skb_free_datagram_locked); | ||
246 | __kfree_skb(skb); | 247 | __kfree_skb(skb); |
247 | } | 248 | } |
248 | EXPORT_SYMBOL(skb_free_datagram_locked); | 249 | EXPORT_SYMBOL(skb_free_datagram_locked); |
@@ -746,13 +747,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, | |||
746 | if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) | 747 | if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) |
747 | mask |= POLLERR; | 748 | mask |= POLLERR; |
748 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 749 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
749 | mask |= POLLRDHUP; | 750 | mask |= POLLRDHUP | POLLIN | POLLRDNORM; |
750 | if (sk->sk_shutdown == SHUTDOWN_MASK) | 751 | if (sk->sk_shutdown == SHUTDOWN_MASK) |
751 | mask |= POLLHUP; | 752 | mask |= POLLHUP; |
752 | 753 | ||
753 | /* readable? */ | 754 | /* readable? */ |
754 | if (!skb_queue_empty(&sk->sk_receive_queue) || | 755 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
755 | (sk->sk_shutdown & RCV_SHUTDOWN)) | ||
756 | mask |= POLLIN | POLLRDNORM; | 756 | mask |= POLLIN | POLLRDNORM; |
757 | 757 | ||
758 | /* Connection-based need to check for termination and startup */ | 758 | /* Connection-based need to check for termination and startup */ |
diff --git a/net/core/dev.c b/net/core/dev.c index 660dd41aaaa6..9c58c1ec41a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -128,7 +128,11 @@ | |||
128 | #include <linux/jhash.h> | 128 | #include <linux/jhash.h> |
129 | #include <linux/random.h> | 129 | #include <linux/random.h> |
130 | #include <trace/events/napi.h> | 130 | #include <trace/events/napi.h> |
131 | #include <trace/events/net.h> | ||
132 | #include <trace/events/skb.h> | ||
131 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | ||
135 | #include <linux/cpu_rmap.h> | ||
132 | 136 | ||
133 | #include "net-sysfs.h" | 137 | #include "net-sysfs.h" |
134 | 138 | ||
@@ -371,6 +375,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
371 | * --ANK (980803) | 375 | * --ANK (980803) |
372 | */ | 376 | */ |
373 | 377 | ||
378 | static inline struct list_head *ptype_head(const struct packet_type *pt) | ||
379 | { | ||
380 | if (pt->type == htons(ETH_P_ALL)) | ||
381 | return &ptype_all; | ||
382 | else | ||
383 | return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
384 | } | ||
385 | |||
374 | /** | 386 | /** |
375 | * dev_add_pack - add packet handler | 387 | * dev_add_pack - add packet handler |
376 | * @pt: packet type declaration | 388 | * @pt: packet type declaration |
@@ -386,16 +398,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) | |||
386 | 398 | ||
387 | void dev_add_pack(struct packet_type *pt) | 399 | void dev_add_pack(struct packet_type *pt) |
388 | { | 400 | { |
389 | int hash; | 401 | struct list_head *head = ptype_head(pt); |
390 | 402 | ||
391 | spin_lock_bh(&ptype_lock); | 403 | spin_lock(&ptype_lock); |
392 | if (pt->type == htons(ETH_P_ALL)) | 404 | list_add_rcu(&pt->list, head); |
393 | list_add_rcu(&pt->list, &ptype_all); | 405 | spin_unlock(&ptype_lock); |
394 | else { | ||
395 | hash = ntohs(pt->type) & PTYPE_HASH_MASK; | ||
396 | list_add_rcu(&pt->list, &ptype_base[hash]); | ||
397 | } | ||
398 | spin_unlock_bh(&ptype_lock); | ||
399 | } | 406 | } |
400 | EXPORT_SYMBOL(dev_add_pack); | 407 | EXPORT_SYMBOL(dev_add_pack); |
401 | 408 | ||
@@ -414,15 +421,10 @@ EXPORT_SYMBOL(dev_add_pack); | |||
414 | */ | 421 | */ |
415 | void __dev_remove_pack(struct packet_type *pt) | 422 | void __dev_remove_pack(struct packet_type *pt) |
416 | { | 423 | { |
417 | struct list_head *head; | 424 | struct list_head *head = ptype_head(pt); |
418 | struct packet_type *pt1; | 425 | struct packet_type *pt1; |
419 | 426 | ||
420 | spin_lock_bh(&ptype_lock); | 427 | spin_lock(&ptype_lock); |
421 | |||
422 | if (pt->type == htons(ETH_P_ALL)) | ||
423 | head = &ptype_all; | ||
424 | else | ||
425 | head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; | ||
426 | 428 | ||
427 | list_for_each_entry(pt1, head, list) { | 429 | list_for_each_entry(pt1, head, list) { |
428 | if (pt == pt1) { | 430 | if (pt == pt1) { |
@@ -433,7 +435,7 @@ void __dev_remove_pack(struct packet_type *pt) | |||
433 | 435 | ||
434 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); | 436 | printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); |
435 | out: | 437 | out: |
436 | spin_unlock_bh(&ptype_lock); | 438 | spin_unlock(&ptype_lock); |
437 | } | 439 | } |
438 | EXPORT_SYMBOL(__dev_remove_pack); | 440 | EXPORT_SYMBOL(__dev_remove_pack); |
439 | 441 | ||
@@ -742,34 +744,32 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) | |||
742 | EXPORT_SYMBOL(dev_get_by_index); | 744 | EXPORT_SYMBOL(dev_get_by_index); |
743 | 745 | ||
744 | /** | 746 | /** |
745 | * dev_getbyhwaddr - find a device by its hardware address | 747 | * dev_getbyhwaddr_rcu - find a device by its hardware address |
746 | * @net: the applicable net namespace | 748 | * @net: the applicable net namespace |
747 | * @type: media type of device | 749 | * @type: media type of device |
748 | * @ha: hardware address | 750 | * @ha: hardware address |
749 | * | 751 | * |
750 | * Search for an interface by MAC address. Returns NULL if the device | 752 | * Search for an interface by MAC address. Returns NULL if the device |
751 | * is not found or a pointer to the device. The caller must hold the | 753 | * is not found or a pointer to the device. |
752 | * rtnl semaphore. The returned device has not had its ref count increased | 754 | * The caller must hold RCU or RTNL. |
755 | * The returned device has not had its ref count increased | ||
753 | * and the caller must therefore be careful about locking | 756 | * and the caller must therefore be careful about locking |
754 | * | 757 | * |
755 | * BUGS: | ||
756 | * If the API was consistent this would be __dev_get_by_hwaddr | ||
757 | */ | 758 | */ |
758 | 759 | ||
759 | struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) | 760 | struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, |
761 | const char *ha) | ||
760 | { | 762 | { |
761 | struct net_device *dev; | 763 | struct net_device *dev; |
762 | 764 | ||
763 | ASSERT_RTNL(); | 765 | for_each_netdev_rcu(net, dev) |
764 | |||
765 | for_each_netdev(net, dev) | ||
766 | if (dev->type == type && | 766 | if (dev->type == type && |
767 | !memcmp(dev->dev_addr, ha, dev->addr_len)) | 767 | !memcmp(dev->dev_addr, ha, dev->addr_len)) |
768 | return dev; | 768 | return dev; |
769 | 769 | ||
770 | return NULL; | 770 | return NULL; |
771 | } | 771 | } |
772 | EXPORT_SYMBOL(dev_getbyhwaddr); | 772 | EXPORT_SYMBOL(dev_getbyhwaddr_rcu); |
773 | 773 | ||
774 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) | 774 | struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) |
775 | { | 775 | { |
@@ -948,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) | |||
948 | } | 948 | } |
949 | EXPORT_SYMBOL(dev_alloc_name); | 949 | EXPORT_SYMBOL(dev_alloc_name); |
950 | 950 | ||
951 | static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) | 951 | static int dev_get_valid_name(struct net_device *dev, const char *name) |
952 | { | 952 | { |
953 | struct net *net; | 953 | struct net *net; |
954 | 954 | ||
@@ -958,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt | |||
958 | if (!dev_valid_name(name)) | 958 | if (!dev_valid_name(name)) |
959 | return -EINVAL; | 959 | return -EINVAL; |
960 | 960 | ||
961 | if (fmt && strchr(name, '%')) | 961 | if (strchr(name, '%')) |
962 | return dev_alloc_name(dev, name); | 962 | return dev_alloc_name(dev, name); |
963 | else if (__dev_get_by_name(net, name)) | 963 | else if (__dev_get_by_name(net, name)) |
964 | return -EEXIST; | 964 | return -EEXIST; |
@@ -995,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
995 | 995 | ||
996 | memcpy(oldname, dev->name, IFNAMSIZ); | 996 | memcpy(oldname, dev->name, IFNAMSIZ); |
997 | 997 | ||
998 | err = dev_get_valid_name(dev, newname, 1); | 998 | err = dev_get_valid_name(dev, newname); |
999 | if (err < 0) | 999 | if (err < 0) |
1000 | return err; | 1000 | return err; |
1001 | 1001 | ||
@@ -1007,7 +1007,7 @@ rollback: | |||
1007 | } | 1007 | } |
1008 | 1008 | ||
1009 | write_lock_bh(&dev_base_lock); | 1009 | write_lock_bh(&dev_base_lock); |
1010 | hlist_del(&dev->name_hlist); | 1010 | hlist_del_rcu(&dev->name_hlist); |
1011 | write_unlock_bh(&dev_base_lock); | 1011 | write_unlock_bh(&dev_base_lock); |
1012 | 1012 | ||
1013 | synchronize_rcu(); | 1013 | synchronize_rcu(); |
@@ -1115,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change); | |||
1115 | void dev_load(struct net *net, const char *name) | 1115 | void dev_load(struct net *net, const char *name) |
1116 | { | 1116 | { |
1117 | struct net_device *dev; | 1117 | struct net_device *dev; |
1118 | int no_module; | ||
1118 | 1119 | ||
1119 | rcu_read_lock(); | 1120 | rcu_read_lock(); |
1120 | dev = dev_get_by_name_rcu(net, name); | 1121 | dev = dev_get_by_name_rcu(net, name); |
1121 | rcu_read_unlock(); | 1122 | rcu_read_unlock(); |
1122 | 1123 | ||
1123 | if (!dev && capable(CAP_NET_ADMIN)) | 1124 | no_module = !dev; |
1124 | request_module("%s", name); | 1125 | if (no_module && capable(CAP_NET_ADMIN)) |
1126 | no_module = request_module("netdev-%s", name); | ||
1127 | if (no_module && capable(CAP_SYS_MODULE)) { | ||
1128 | if (!request_module("%s", name)) | ||
1129 | pr_err("Loading kernel module for a network device " | ||
1130 | "with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " | ||
1131 | "instead\n", name); | ||
1132 | } | ||
1125 | } | 1133 | } |
1126 | EXPORT_SYMBOL(dev_load); | 1134 | EXPORT_SYMBOL(dev_load); |
1127 | 1135 | ||
@@ -1132,9 +1140,6 @@ static int __dev_open(struct net_device *dev) | |||
1132 | 1140 | ||
1133 | ASSERT_RTNL(); | 1141 | ASSERT_RTNL(); |
1134 | 1142 | ||
1135 | /* | ||
1136 | * Is it even present? | ||
1137 | */ | ||
1138 | if (!netif_device_present(dev)) | 1143 | if (!netif_device_present(dev)) |
1139 | return -ENODEV; | 1144 | return -ENODEV; |
1140 | 1145 | ||
@@ -1143,9 +1148,6 @@ static int __dev_open(struct net_device *dev) | |||
1143 | if (ret) | 1148 | if (ret) |
1144 | return ret; | 1149 | return ret; |
1145 | 1150 | ||
1146 | /* | ||
1147 | * Call device private open method | ||
1148 | */ | ||
1149 | set_bit(__LINK_STATE_START, &dev->state); | 1151 | set_bit(__LINK_STATE_START, &dev->state); |
1150 | 1152 | ||
1151 | if (ops->ndo_validate_addr) | 1153 | if (ops->ndo_validate_addr) |
@@ -1154,31 +1156,12 @@ static int __dev_open(struct net_device *dev) | |||
1154 | if (!ret && ops->ndo_open) | 1156 | if (!ret && ops->ndo_open) |
1155 | ret = ops->ndo_open(dev); | 1157 | ret = ops->ndo_open(dev); |
1156 | 1158 | ||
1157 | /* | ||
1158 | * If it went open OK then: | ||
1159 | */ | ||
1160 | |||
1161 | if (ret) | 1159 | if (ret) |
1162 | clear_bit(__LINK_STATE_START, &dev->state); | 1160 | clear_bit(__LINK_STATE_START, &dev->state); |
1163 | else { | 1161 | else { |
1164 | /* | ||
1165 | * Set the flags. | ||
1166 | */ | ||
1167 | dev->flags |= IFF_UP; | 1162 | dev->flags |= IFF_UP; |
1168 | |||
1169 | /* | ||
1170 | * Enable NET_DMA | ||
1171 | */ | ||
1172 | net_dmaengine_get(); | 1163 | net_dmaengine_get(); |
1173 | |||
1174 | /* | ||
1175 | * Initialize multicasting status | ||
1176 | */ | ||
1177 | dev_set_rx_mode(dev); | 1164 | dev_set_rx_mode(dev); |
1178 | |||
1179 | /* | ||
1180 | * Wakeup transmit queue engine | ||
1181 | */ | ||
1182 | dev_activate(dev); | 1165 | dev_activate(dev); |
1183 | } | 1166 | } |
1184 | 1167 | ||
@@ -1201,22 +1184,13 @@ int dev_open(struct net_device *dev) | |||
1201 | { | 1184 | { |
1202 | int ret; | 1185 | int ret; |
1203 | 1186 | ||
1204 | /* | ||
1205 | * Is it already up? | ||
1206 | */ | ||
1207 | if (dev->flags & IFF_UP) | 1187 | if (dev->flags & IFF_UP) |
1208 | return 0; | 1188 | return 0; |
1209 | 1189 | ||
1210 | /* | ||
1211 | * Open device | ||
1212 | */ | ||
1213 | ret = __dev_open(dev); | 1190 | ret = __dev_open(dev); |
1214 | if (ret < 0) | 1191 | if (ret < 0) |
1215 | return ret; | 1192 | return ret; |
1216 | 1193 | ||
1217 | /* | ||
1218 | * ... and announce new interface. | ||
1219 | */ | ||
1220 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | 1194 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); |
1221 | call_netdevice_notifiers(NETDEV_UP, dev); | 1195 | call_netdevice_notifiers(NETDEV_UP, dev); |
1222 | 1196 | ||
@@ -1224,52 +1198,78 @@ int dev_open(struct net_device *dev) | |||
1224 | } | 1198 | } |
1225 | EXPORT_SYMBOL(dev_open); | 1199 | EXPORT_SYMBOL(dev_open); |
1226 | 1200 | ||
1227 | static int __dev_close(struct net_device *dev) | 1201 | static int __dev_close_many(struct list_head *head) |
1228 | { | 1202 | { |
1229 | const struct net_device_ops *ops = dev->netdev_ops; | 1203 | struct net_device *dev; |
1230 | 1204 | ||
1231 | ASSERT_RTNL(); | 1205 | ASSERT_RTNL(); |
1232 | might_sleep(); | 1206 | might_sleep(); |
1233 | 1207 | ||
1234 | /* | 1208 | list_for_each_entry(dev, head, unreg_list) { |
1235 | * Tell people we are going down, so that they can | 1209 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); |
1236 | * prepare to death, when device is still operating. | ||
1237 | */ | ||
1238 | call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); | ||
1239 | 1210 | ||
1240 | clear_bit(__LINK_STATE_START, &dev->state); | 1211 | clear_bit(__LINK_STATE_START, &dev->state); |
1241 | 1212 | ||
1242 | /* Synchronize to scheduled poll. We cannot touch poll list, | 1213 | /* Synchronize to scheduled poll. We cannot touch poll list, it |
1243 | * it can be even on different cpu. So just clear netif_running(). | 1214 | * can be even on different cpu. So just clear netif_running(). |
1244 | * | 1215 | * |
1245 | * dev->stop() will invoke napi_disable() on all of it's | 1216 | * dev->stop() will invoke napi_disable() on all of it's |
1246 | * napi_struct instances on this device. | 1217 | * napi_struct instances on this device. |
1247 | */ | 1218 | */ |
1248 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ | 1219 | smp_mb__after_clear_bit(); /* Commit netif_running(). */ |
1220 | } | ||
1249 | 1221 | ||
1250 | dev_deactivate(dev); | 1222 | dev_deactivate_many(head); |
1251 | 1223 | ||
1252 | /* | 1224 | list_for_each_entry(dev, head, unreg_list) { |
1253 | * Call the device specific close. This cannot fail. | 1225 | const struct net_device_ops *ops = dev->netdev_ops; |
1254 | * Only if device is UP | ||
1255 | * | ||
1256 | * We allow it to be called even after a DETACH hot-plug | ||
1257 | * event. | ||
1258 | */ | ||
1259 | if (ops->ndo_stop) | ||
1260 | ops->ndo_stop(dev); | ||
1261 | 1226 | ||
1262 | /* | 1227 | /* |
1263 | * Device is now down. | 1228 | * Call the device specific close. This cannot fail. |
1264 | */ | 1229 | * Only if device is UP |
1230 | * | ||
1231 | * We allow it to be called even after a DETACH hot-plug | ||
1232 | * event. | ||
1233 | */ | ||
1234 | if (ops->ndo_stop) | ||
1235 | ops->ndo_stop(dev); | ||
1265 | 1236 | ||
1266 | dev->flags &= ~IFF_UP; | 1237 | dev->flags &= ~IFF_UP; |
1238 | net_dmaengine_put(); | ||
1239 | } | ||
1267 | 1240 | ||
1268 | /* | 1241 | return 0; |
1269 | * Shutdown NET_DMA | 1242 | } |
1270 | */ | 1243 | |
1271 | net_dmaengine_put(); | 1244 | static int __dev_close(struct net_device *dev) |
1245 | { | ||
1246 | int retval; | ||
1247 | LIST_HEAD(single); | ||
1272 | 1248 | ||
1249 | list_add(&dev->unreg_list, &single); | ||
1250 | retval = __dev_close_many(&single); | ||
1251 | list_del(&single); | ||
1252 | return retval; | ||
1253 | } | ||
1254 | |||
1255 | static int dev_close_many(struct list_head *head) | ||
1256 | { | ||
1257 | struct net_device *dev, *tmp; | ||
1258 | LIST_HEAD(tmp_list); | ||
1259 | |||
1260 | list_for_each_entry_safe(dev, tmp, head, unreg_list) | ||
1261 | if (!(dev->flags & IFF_UP)) | ||
1262 | list_move(&dev->unreg_list, &tmp_list); | ||
1263 | |||
1264 | __dev_close_many(head); | ||
1265 | |||
1266 | list_for_each_entry(dev, head, unreg_list) { | ||
1267 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | ||
1268 | call_netdevice_notifiers(NETDEV_DOWN, dev); | ||
1269 | } | ||
1270 | |||
1271 | /* rollback_registered_many needs the complete original list */ | ||
1272 | list_splice(&tmp_list, head); | ||
1273 | return 0; | 1273 | return 0; |
1274 | } | 1274 | } |
1275 | 1275 | ||
@@ -1284,17 +1284,13 @@ static int __dev_close(struct net_device *dev) | |||
1284 | */ | 1284 | */ |
1285 | int dev_close(struct net_device *dev) | 1285 | int dev_close(struct net_device *dev) |
1286 | { | 1286 | { |
1287 | if (!(dev->flags & IFF_UP)) | 1287 | if (dev->flags & IFF_UP) { |
1288 | return 0; | 1288 | LIST_HEAD(single); |
1289 | |||
1290 | __dev_close(dev); | ||
1291 | |||
1292 | /* | ||
1293 | * Tell people we are down | ||
1294 | */ | ||
1295 | rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); | ||
1296 | call_netdevice_notifiers(NETDEV_DOWN, dev); | ||
1297 | 1289 | ||
1290 | list_add(&dev->unreg_list, &single); | ||
1291 | dev_close_many(&single); | ||
1292 | list_del(&single); | ||
1293 | } | ||
1298 | return 0; | 1294 | return 0; |
1299 | } | 1295 | } |
1300 | EXPORT_SYMBOL(dev_close); | 1296 | EXPORT_SYMBOL(dev_close); |
@@ -1310,26 +1306,32 @@ EXPORT_SYMBOL(dev_close); | |||
1310 | */ | 1306 | */ |
1311 | void dev_disable_lro(struct net_device *dev) | 1307 | void dev_disable_lro(struct net_device *dev) |
1312 | { | 1308 | { |
1313 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags && | 1309 | u32 flags; |
1314 | dev->ethtool_ops->set_flags) { | 1310 | |
1315 | u32 flags = dev->ethtool_ops->get_flags(dev); | 1311 | /* |
1316 | if (flags & ETH_FLAG_LRO) { | 1312 | * If we're trying to disable lro on a vlan device |
1317 | flags &= ~ETH_FLAG_LRO; | 1313 | * use the underlying physical device instead |
1318 | dev->ethtool_ops->set_flags(dev, flags); | 1314 | */ |
1319 | } | 1315 | if (is_vlan_dev(dev)) |
1320 | } | 1316 | dev = vlan_dev_real_dev(dev); |
1321 | WARN_ON(dev->features & NETIF_F_LRO); | 1317 | |
1318 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags) | ||
1319 | flags = dev->ethtool_ops->get_flags(dev); | ||
1320 | else | ||
1321 | flags = ethtool_op_get_flags(dev); | ||
1322 | |||
1323 | if (!(flags & ETH_FLAG_LRO)) | ||
1324 | return; | ||
1325 | |||
1326 | __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); | ||
1327 | if (unlikely(dev->features & NETIF_F_LRO)) | ||
1328 | netdev_WARN(dev, "failed to disable LRO!\n"); | ||
1322 | } | 1329 | } |
1323 | EXPORT_SYMBOL(dev_disable_lro); | 1330 | EXPORT_SYMBOL(dev_disable_lro); |
1324 | 1331 | ||
1325 | 1332 | ||
1326 | static int dev_boot_phase = 1; | 1333 | static int dev_boot_phase = 1; |
1327 | 1334 | ||
1328 | /* | ||
1329 | * Device change register/unregister. These are not inline or static | ||
1330 | * as we export them to the world. | ||
1331 | */ | ||
1332 | |||
1333 | /** | 1335 | /** |
1334 | * register_netdevice_notifier - register a network notifier block | 1336 | * register_netdevice_notifier - register a network notifier block |
1335 | * @nb: notifier | 1337 | * @nb: notifier |
@@ -1431,6 +1433,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
1431 | ASSERT_RTNL(); | 1433 | ASSERT_RTNL(); |
1432 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1434 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1433 | } | 1435 | } |
1436 | EXPORT_SYMBOL(call_netdevice_notifiers); | ||
1434 | 1437 | ||
1435 | /* When > 0 there are consumers of rx skb time stamps */ | 1438 | /* When > 0 there are consumers of rx skb time stamps */ |
1436 | static atomic_t netstamp_needed = ATOMIC_INIT(0); | 1439 | static atomic_t netstamp_needed = ATOMIC_INIT(0); |
@@ -1461,6 +1464,27 @@ static inline void net_timestamp_check(struct sk_buff *skb) | |||
1461 | __net_timestamp(skb); | 1464 | __net_timestamp(skb); |
1462 | } | 1465 | } |
1463 | 1466 | ||
1467 | static inline bool is_skb_forwardable(struct net_device *dev, | ||
1468 | struct sk_buff *skb) | ||
1469 | { | ||
1470 | unsigned int len; | ||
1471 | |||
1472 | if (!(dev->flags & IFF_UP)) | ||
1473 | return false; | ||
1474 | |||
1475 | len = dev->mtu + dev->hard_header_len + VLAN_HLEN; | ||
1476 | if (skb->len <= len) | ||
1477 | return true; | ||
1478 | |||
1479 | /* if TSO is enabled, we don't care about the length as the packet | ||
1480 | * could be forwarded without being segmented before | ||
1481 | */ | ||
1482 | if (skb_is_gso(skb)) | ||
1483 | return true; | ||
1484 | |||
1485 | return false; | ||
1486 | } | ||
1487 | |||
1464 | /** | 1488 | /** |
1465 | * dev_forward_skb - loopback an skb to another netif | 1489 | * dev_forward_skb - loopback an skb to another netif |
1466 | * | 1490 | * |
@@ -1484,8 +1508,8 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1484 | skb_orphan(skb); | 1508 | skb_orphan(skb); |
1485 | nf_reset(skb); | 1509 | nf_reset(skb); |
1486 | 1510 | ||
1487 | if (!(dev->flags & IFF_UP) || | 1511 | if (unlikely(!is_skb_forwardable(dev, skb))) { |
1488 | (skb->len > (dev->mtu + dev->hard_header_len))) { | 1512 | atomic_long_inc(&dev->rx_dropped); |
1489 | kfree_skb(skb); | 1513 | kfree_skb(skb); |
1490 | return NET_RX_DROP; | 1514 | return NET_RX_DROP; |
1491 | } | 1515 | } |
@@ -1497,6 +1521,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1497 | } | 1521 | } |
1498 | EXPORT_SYMBOL_GPL(dev_forward_skb); | 1522 | EXPORT_SYMBOL_GPL(dev_forward_skb); |
1499 | 1523 | ||
1524 | static inline int deliver_skb(struct sk_buff *skb, | ||
1525 | struct packet_type *pt_prev, | ||
1526 | struct net_device *orig_dev) | ||
1527 | { | ||
1528 | atomic_inc(&skb->users); | ||
1529 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | ||
1530 | } | ||
1531 | |||
1500 | /* | 1532 | /* |
1501 | * Support routine. Sends outgoing frames to any network | 1533 | * Support routine. Sends outgoing frames to any network |
1502 | * taps currently in use. | 1534 | * taps currently in use. |
@@ -1505,13 +1537,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb); | |||
1505 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | 1537 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
1506 | { | 1538 | { |
1507 | struct packet_type *ptype; | 1539 | struct packet_type *ptype; |
1508 | 1540 | struct sk_buff *skb2 = NULL; | |
1509 | #ifdef CONFIG_NET_CLS_ACT | 1541 | struct packet_type *pt_prev = NULL; |
1510 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) | ||
1511 | net_timestamp_set(skb); | ||
1512 | #else | ||
1513 | net_timestamp_set(skb); | ||
1514 | #endif | ||
1515 | 1542 | ||
1516 | rcu_read_lock(); | 1543 | rcu_read_lock(); |
1517 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 1544 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
@@ -1521,10 +1548,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1521 | if ((ptype->dev == dev || !ptype->dev) && | 1548 | if ((ptype->dev == dev || !ptype->dev) && |
1522 | (ptype->af_packet_priv == NULL || | 1549 | (ptype->af_packet_priv == NULL || |
1523 | (struct sock *)ptype->af_packet_priv != skb->sk)) { | 1550 | (struct sock *)ptype->af_packet_priv != skb->sk)) { |
1524 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1551 | if (pt_prev) { |
1552 | deliver_skb(skb2, pt_prev, skb->dev); | ||
1553 | pt_prev = ptype; | ||
1554 | continue; | ||
1555 | } | ||
1556 | |||
1557 | skb2 = skb_clone(skb, GFP_ATOMIC); | ||
1525 | if (!skb2) | 1558 | if (!skb2) |
1526 | break; | 1559 | break; |
1527 | 1560 | ||
1561 | net_timestamp_set(skb2); | ||
1562 | |||
1528 | /* skb->nh should be correctly | 1563 | /* skb->nh should be correctly |
1529 | set by sender, so that the second statement is | 1564 | set by sender, so that the second statement is |
1530 | just protection against buggy protocols. | 1565 | just protection against buggy protocols. |
@@ -1543,31 +1578,121 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1543 | 1578 | ||
1544 | skb2->transport_header = skb2->network_header; | 1579 | skb2->transport_header = skb2->network_header; |
1545 | skb2->pkt_type = PACKET_OUTGOING; | 1580 | skb2->pkt_type = PACKET_OUTGOING; |
1546 | ptype->func(skb2, skb->dev, ptype, skb->dev); | 1581 | pt_prev = ptype; |
1547 | } | 1582 | } |
1548 | } | 1583 | } |
1584 | if (pt_prev) | ||
1585 | pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); | ||
1549 | rcu_read_unlock(); | 1586 | rcu_read_unlock(); |
1550 | } | 1587 | } |
1551 | 1588 | ||
1589 | /* netif_setup_tc - Handle tc mappings on real_num_tx_queues change | ||
1590 | * @dev: Network device | ||
1591 | * @txq: number of queues available | ||
1592 | * | ||
1593 | * If real_num_tx_queues is changed the tc mappings may no longer be | ||
1594 | * valid. To resolve this verify the tc mapping remains valid and if | ||
1595 | * not NULL the mapping. With no priorities mapping to this | ||
1596 | * offset/count pair it will no longer be used. In the worst case TC0 | ||
1597 | * is invalid nothing can be done so disable priority mappings. If is | ||
1598 | * expected that drivers will fix this mapping if they can before | ||
1599 | * calling netif_set_real_num_tx_queues. | ||
1600 | */ | ||
1601 | static void netif_setup_tc(struct net_device *dev, unsigned int txq) | ||
1602 | { | ||
1603 | int i; | ||
1604 | struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; | ||
1605 | |||
1606 | /* If TC0 is invalidated disable TC mapping */ | ||
1607 | if (tc->offset + tc->count > txq) { | ||
1608 | pr_warning("Number of in use tx queues changed " | ||
1609 | "invalidating tc mappings. Priority " | ||
1610 | "traffic classification disabled!\n"); | ||
1611 | dev->num_tc = 0; | ||
1612 | return; | ||
1613 | } | ||
1614 | |||
1615 | /* Invalidated prio to tc mappings set to TC0 */ | ||
1616 | for (i = 1; i < TC_BITMASK + 1; i++) { | ||
1617 | int q = netdev_get_prio_tc_map(dev, i); | ||
1618 | |||
1619 | tc = &dev->tc_to_txq[q]; | ||
1620 | if (tc->offset + tc->count > txq) { | ||
1621 | pr_warning("Number of in use tx queues " | ||
1622 | "changed. Priority %i to tc " | ||
1623 | "mapping %i is no longer valid " | ||
1624 | "setting map to 0\n", | ||
1625 | i, q); | ||
1626 | netdev_set_prio_tc_map(dev, i, 0); | ||
1627 | } | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1552 | /* | 1631 | /* |
1553 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues | 1632 | * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues |
1554 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. | 1633 | * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. |
1555 | */ | 1634 | */ |
1556 | void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) | 1635 | int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) |
1557 | { | 1636 | { |
1558 | unsigned int real_num = dev->real_num_tx_queues; | 1637 | int rc; |
1638 | |||
1639 | if (txq < 1 || txq > dev->num_tx_queues) | ||
1640 | return -EINVAL; | ||
1641 | |||
1642 | if (dev->reg_state == NETREG_REGISTERED || | ||
1643 | dev->reg_state == NETREG_UNREGISTERING) { | ||
1644 | ASSERT_RTNL(); | ||
1559 | 1645 | ||
1560 | if (unlikely(txq > dev->num_tx_queues)) | 1646 | rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, |
1561 | ; | 1647 | txq); |
1562 | else if (txq > real_num) | 1648 | if (rc) |
1563 | dev->real_num_tx_queues = txq; | 1649 | return rc; |
1564 | else if (txq < real_num) { | 1650 | |
1565 | dev->real_num_tx_queues = txq; | 1651 | if (dev->num_tc) |
1566 | qdisc_reset_all_tx_gt(dev, txq); | 1652 | netif_setup_tc(dev, txq); |
1653 | |||
1654 | if (txq < dev->real_num_tx_queues) | ||
1655 | qdisc_reset_all_tx_gt(dev, txq); | ||
1567 | } | 1656 | } |
1657 | |||
1658 | dev->real_num_tx_queues = txq; | ||
1659 | return 0; | ||
1568 | } | 1660 | } |
1569 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); | 1661 | EXPORT_SYMBOL(netif_set_real_num_tx_queues); |
1570 | 1662 | ||
1663 | #ifdef CONFIG_RPS | ||
1664 | /** | ||
1665 | * netif_set_real_num_rx_queues - set actual number of RX queues used | ||
1666 | * @dev: Network device | ||
1667 | * @rxq: Actual number of RX queues | ||
1668 | * | ||
1669 | * This must be called either with the rtnl_lock held or before | ||
1670 | * registration of the net device. Returns 0 on success, or a | ||
1671 | * negative error code. If called before registration, it always | ||
1672 | * succeeds. | ||
1673 | */ | ||
1674 | int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) | ||
1675 | { | ||
1676 | int rc; | ||
1677 | |||
1678 | if (rxq < 1 || rxq > dev->num_rx_queues) | ||
1679 | return -EINVAL; | ||
1680 | |||
1681 | if (dev->reg_state == NETREG_REGISTERED) { | ||
1682 | ASSERT_RTNL(); | ||
1683 | |||
1684 | rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, | ||
1685 | rxq); | ||
1686 | if (rc) | ||
1687 | return rc; | ||
1688 | } | ||
1689 | |||
1690 | dev->real_num_rx_queues = rxq; | ||
1691 | return 0; | ||
1692 | } | ||
1693 | EXPORT_SYMBOL(netif_set_real_num_rx_queues); | ||
1694 | #endif | ||
1695 | |||
1571 | static inline void __netif_reschedule(struct Qdisc *q) | 1696 | static inline void __netif_reschedule(struct Qdisc *q) |
1572 | { | 1697 | { |
1573 | struct softnet_data *sd; | 1698 | struct softnet_data *sd; |
@@ -1646,32 +1771,6 @@ void netif_device_attach(struct net_device *dev) | |||
1646 | } | 1771 | } |
1647 | EXPORT_SYMBOL(netif_device_attach); | 1772 | EXPORT_SYMBOL(netif_device_attach); |
1648 | 1773 | ||
1649 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | ||
1650 | { | ||
1651 | return ((features & NETIF_F_GEN_CSUM) || | ||
1652 | ((features & NETIF_F_IP_CSUM) && | ||
1653 | protocol == htons(ETH_P_IP)) || | ||
1654 | ((features & NETIF_F_IPV6_CSUM) && | ||
1655 | protocol == htons(ETH_P_IPV6)) || | ||
1656 | ((features & NETIF_F_FCOE_CRC) && | ||
1657 | protocol == htons(ETH_P_FCOE))); | ||
1658 | } | ||
1659 | |||
1660 | static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) | ||
1661 | { | ||
1662 | if (can_checksum_protocol(dev->features, skb->protocol)) | ||
1663 | return true; | ||
1664 | |||
1665 | if (skb->protocol == htons(ETH_P_8021Q)) { | ||
1666 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | ||
1667 | if (can_checksum_protocol(dev->features & dev->vlan_features, | ||
1668 | veh->h_vlan_encapsulated_proto)) | ||
1669 | return true; | ||
1670 | } | ||
1671 | |||
1672 | return false; | ||
1673 | } | ||
1674 | |||
1675 | /** | 1774 | /** |
1676 | * skb_dev_set -- assign a new device to a buffer | 1775 | * skb_dev_set -- assign a new device to a buffer |
1677 | * @skb: buffer for the new device | 1776 | * @skb: buffer for the new device |
@@ -1719,7 +1818,7 @@ int skb_checksum_help(struct sk_buff *skb) | |||
1719 | goto out_set_summed; | 1818 | goto out_set_summed; |
1720 | } | 1819 | } |
1721 | 1820 | ||
1722 | offset = skb->csum_start - skb_headroom(skb); | 1821 | offset = skb_checksum_start_offset(skb); |
1723 | BUG_ON(offset >= skb_headlen(skb)); | 1822 | BUG_ON(offset >= skb_headlen(skb)); |
1724 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | 1823 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
1725 | 1824 | ||
@@ -1751,13 +1850,25 @@ EXPORT_SYMBOL(skb_checksum_help); | |||
1751 | * It may return NULL if the skb requires no segmentation. This is | 1850 | * It may return NULL if the skb requires no segmentation. This is |
1752 | * only possible when GSO is used for verifying header integrity. | 1851 | * only possible when GSO is used for verifying header integrity. |
1753 | */ | 1852 | */ |
1754 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | 1853 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) |
1755 | { | 1854 | { |
1756 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 1855 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1757 | struct packet_type *ptype; | 1856 | struct packet_type *ptype; |
1758 | __be16 type = skb->protocol; | 1857 | __be16 type = skb->protocol; |
1858 | int vlan_depth = ETH_HLEN; | ||
1759 | int err; | 1859 | int err; |
1760 | 1860 | ||
1861 | while (type == htons(ETH_P_8021Q)) { | ||
1862 | struct vlan_hdr *vh; | ||
1863 | |||
1864 | if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) | ||
1865 | return ERR_PTR(-EINVAL); | ||
1866 | |||
1867 | vh = (struct vlan_hdr *)(skb->data + vlan_depth); | ||
1868 | type = vh->h_vlan_encapsulated_proto; | ||
1869 | vlan_depth += VLAN_HLEN; | ||
1870 | } | ||
1871 | |||
1761 | skb_reset_mac_header(skb); | 1872 | skb_reset_mac_header(skb); |
1762 | skb->mac_len = skb->network_header - skb->mac_header; | 1873 | skb->mac_len = skb->network_header - skb->mac_header; |
1763 | __skb_pull(skb, skb->mac_len); | 1874 | __skb_pull(skb, skb->mac_len); |
@@ -1769,8 +1880,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1769 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) | 1880 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) |
1770 | dev->ethtool_ops->get_drvinfo(dev, &info); | 1881 | dev->ethtool_ops->get_drvinfo(dev, &info); |
1771 | 1882 | ||
1772 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " | 1883 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", |
1773 | "ip_summed=%d", | ||
1774 | info.driver, dev ? dev->features : 0L, | 1884 | info.driver, dev ? dev->features : 0L, |
1775 | skb->sk ? skb->sk->sk_route_caps : 0L, | 1885 | skb->sk ? skb->sk->sk_route_caps : 0L, |
1776 | skb->len, skb->data_len, skb->ip_summed); | 1886 | skb->len, skb->data_len, skb->ip_summed); |
@@ -1873,16 +1983,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) | |||
1873 | /** | 1983 | /** |
1874 | * dev_gso_segment - Perform emulated hardware segmentation on skb. | 1984 | * dev_gso_segment - Perform emulated hardware segmentation on skb. |
1875 | * @skb: buffer to segment | 1985 | * @skb: buffer to segment |
1986 | * @features: device features as applicable to this skb | ||
1876 | * | 1987 | * |
1877 | * This function segments the given skb and stores the list of segments | 1988 | * This function segments the given skb and stores the list of segments |
1878 | * in skb->next. | 1989 | * in skb->next. |
1879 | */ | 1990 | */ |
1880 | static int dev_gso_segment(struct sk_buff *skb) | 1991 | static int dev_gso_segment(struct sk_buff *skb, int features) |
1881 | { | 1992 | { |
1882 | struct net_device *dev = skb->dev; | ||
1883 | struct sk_buff *segs; | 1993 | struct sk_buff *segs; |
1884 | int features = dev->features & ~(illegal_highdma(dev, skb) ? | ||
1885 | NETIF_F_SG : 0); | ||
1886 | 1994 | ||
1887 | segs = skb_gso_segment(skb, features); | 1995 | segs = skb_gso_segment(skb, features); |
1888 | 1996 | ||
@@ -1902,14 +2010,14 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1902 | 2010 | ||
1903 | /* | 2011 | /* |
1904 | * Try to orphan skb early, right before transmission by the device. | 2012 | * Try to orphan skb early, right before transmission by the device. |
1905 | * We cannot orphan skb if tx timestamp is requested, since | 2013 | * We cannot orphan skb if tx timestamp is requested or the sk-reference |
1906 | * drivers need to call skb_tstamp_tx() to send the timestamp. | 2014 | * is needed on driver level for other reasons, e.g. see net/can/raw.c |
1907 | */ | 2015 | */ |
1908 | static inline void skb_orphan_try(struct sk_buff *skb) | 2016 | static inline void skb_orphan_try(struct sk_buff *skb) |
1909 | { | 2017 | { |
1910 | struct sock *sk = skb->sk; | 2018 | struct sock *sk = skb->sk; |
1911 | 2019 | ||
1912 | if (sk && !skb_tx(skb)->flags) { | 2020 | if (sk && !skb_shinfo(skb)->tx_flags) { |
1913 | /* skb_tx_hash() wont be able to get sk. | 2021 | /* skb_tx_hash() wont be able to get sk. |
1914 | * We copy sk_hash into skb->rxhash | 2022 | * We copy sk_hash into skb->rxhash |
1915 | */ | 2023 | */ |
@@ -1919,6 +2027,53 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
1919 | } | 2027 | } |
1920 | } | 2028 | } |
1921 | 2029 | ||
2030 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | ||
2031 | { | ||
2032 | return ((features & NETIF_F_GEN_CSUM) || | ||
2033 | ((features & NETIF_F_V4_CSUM) && | ||
2034 | protocol == htons(ETH_P_IP)) || | ||
2035 | ((features & NETIF_F_V6_CSUM) && | ||
2036 | protocol == htons(ETH_P_IPV6)) || | ||
2037 | ((features & NETIF_F_FCOE_CRC) && | ||
2038 | protocol == htons(ETH_P_FCOE))); | ||
2039 | } | ||
2040 | |||
2041 | static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) | ||
2042 | { | ||
2043 | if (!can_checksum_protocol(features, protocol)) { | ||
2044 | features &= ~NETIF_F_ALL_CSUM; | ||
2045 | features &= ~NETIF_F_SG; | ||
2046 | } else if (illegal_highdma(skb->dev, skb)) { | ||
2047 | features &= ~NETIF_F_SG; | ||
2048 | } | ||
2049 | |||
2050 | return features; | ||
2051 | } | ||
2052 | |||
2053 | u32 netif_skb_features(struct sk_buff *skb) | ||
2054 | { | ||
2055 | __be16 protocol = skb->protocol; | ||
2056 | u32 features = skb->dev->features; | ||
2057 | |||
2058 | if (protocol == htons(ETH_P_8021Q)) { | ||
2059 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | ||
2060 | protocol = veh->h_vlan_encapsulated_proto; | ||
2061 | } else if (!vlan_tx_tag_present(skb)) { | ||
2062 | return harmonize_features(skb, protocol, features); | ||
2063 | } | ||
2064 | |||
2065 | features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); | ||
2066 | |||
2067 | if (protocol != htons(ETH_P_8021Q)) { | ||
2068 | return harmonize_features(skb, protocol, features); | ||
2069 | } else { | ||
2070 | features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | | ||
2071 | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; | ||
2072 | return harmonize_features(skb, protocol, features); | ||
2073 | } | ||
2074 | } | ||
2075 | EXPORT_SYMBOL(netif_skb_features); | ||
2076 | |||
1922 | /* | 2077 | /* |
1923 | * Returns true if either: | 2078 | * Returns true if either: |
1924 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or | 2079 | * 1. skb has frag_list and the device doesn't support FRAGLIST, or |
@@ -1927,12 +2082,13 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
1927 | * support DMA from it. | 2082 | * support DMA from it. |
1928 | */ | 2083 | */ |
1929 | static inline int skb_needs_linearize(struct sk_buff *skb, | 2084 | static inline int skb_needs_linearize(struct sk_buff *skb, |
1930 | struct net_device *dev) | 2085 | int features) |
1931 | { | 2086 | { |
1932 | return skb_is_nonlinear(skb) && | 2087 | return skb_is_nonlinear(skb) && |
1933 | ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || | 2088 | ((skb_has_frag_list(skb) && |
1934 | (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || | 2089 | !(features & NETIF_F_FRAGLIST)) || |
1935 | illegal_highdma(dev, skb)))); | 2090 | (skb_shinfo(skb)->nr_frags && |
2091 | !(features & NETIF_F_SG))); | ||
1936 | } | 2092 | } |
1937 | 2093 | ||
1938 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 2094 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
@@ -1940,27 +2096,41 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1940 | { | 2096 | { |
1941 | const struct net_device_ops *ops = dev->netdev_ops; | 2097 | const struct net_device_ops *ops = dev->netdev_ops; |
1942 | int rc = NETDEV_TX_OK; | 2098 | int rc = NETDEV_TX_OK; |
2099 | unsigned int skb_len; | ||
1943 | 2100 | ||
1944 | if (likely(!skb->next)) { | 2101 | if (likely(!skb->next)) { |
1945 | if (!list_empty(&ptype_all)) | 2102 | u32 features; |
1946 | dev_queue_xmit_nit(skb, dev); | ||
1947 | 2103 | ||
1948 | /* | 2104 | /* |
1949 | * If device doesnt need skb->dst, release it right now while | 2105 | * If device doesn't need skb->dst, release it right now while |
1950 | * its hot in this cpu cache | 2106 | * its hot in this cpu cache |
1951 | */ | 2107 | */ |
1952 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2108 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1953 | skb_dst_drop(skb); | 2109 | skb_dst_drop(skb); |
1954 | 2110 | ||
2111 | if (!list_empty(&ptype_all)) | ||
2112 | dev_queue_xmit_nit(skb, dev); | ||
2113 | |||
1955 | skb_orphan_try(skb); | 2114 | skb_orphan_try(skb); |
1956 | 2115 | ||
1957 | if (netif_needs_gso(dev, skb)) { | 2116 | features = netif_skb_features(skb); |
1958 | if (unlikely(dev_gso_segment(skb))) | 2117 | |
2118 | if (vlan_tx_tag_present(skb) && | ||
2119 | !(features & NETIF_F_HW_VLAN_TX)) { | ||
2120 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); | ||
2121 | if (unlikely(!skb)) | ||
2122 | goto out; | ||
2123 | |||
2124 | skb->vlan_tci = 0; | ||
2125 | } | ||
2126 | |||
2127 | if (netif_needs_gso(skb, features)) { | ||
2128 | if (unlikely(dev_gso_segment(skb, features))) | ||
1959 | goto out_kfree_skb; | 2129 | goto out_kfree_skb; |
1960 | if (skb->next) | 2130 | if (skb->next) |
1961 | goto gso; | 2131 | goto gso; |
1962 | } else { | 2132 | } else { |
1963 | if (skb_needs_linearize(skb, dev) && | 2133 | if (skb_needs_linearize(skb, features) && |
1964 | __skb_linearize(skb)) | 2134 | __skb_linearize(skb)) |
1965 | goto out_kfree_skb; | 2135 | goto out_kfree_skb; |
1966 | 2136 | ||
@@ -1969,15 +2139,17 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1969 | * checksumming here. | 2139 | * checksumming here. |
1970 | */ | 2140 | */ |
1971 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 2141 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
1972 | skb_set_transport_header(skb, skb->csum_start - | 2142 | skb_set_transport_header(skb, |
1973 | skb_headroom(skb)); | 2143 | skb_checksum_start_offset(skb)); |
1974 | if (!dev_can_checksum(dev, skb) && | 2144 | if (!(features & NETIF_F_ALL_CSUM) && |
1975 | skb_checksum_help(skb)) | 2145 | skb_checksum_help(skb)) |
1976 | goto out_kfree_skb; | 2146 | goto out_kfree_skb; |
1977 | } | 2147 | } |
1978 | } | 2148 | } |
1979 | 2149 | ||
2150 | skb_len = skb->len; | ||
1980 | rc = ops->ndo_start_xmit(skb, dev); | 2151 | rc = ops->ndo_start_xmit(skb, dev); |
2152 | trace_net_dev_xmit(skb, rc, dev, skb_len); | ||
1981 | if (rc == NETDEV_TX_OK) | 2153 | if (rc == NETDEV_TX_OK) |
1982 | txq_trans_update(txq); | 2154 | txq_trans_update(txq); |
1983 | return rc; | 2155 | return rc; |
@@ -1991,13 +2163,15 @@ gso: | |||
1991 | nskb->next = NULL; | 2163 | nskb->next = NULL; |
1992 | 2164 | ||
1993 | /* | 2165 | /* |
1994 | * If device doesnt need nskb->dst, release it right now while | 2166 | * If device doesn't need nskb->dst, release it right now while |
1995 | * its hot in this cpu cache | 2167 | * its hot in this cpu cache |
1996 | */ | 2168 | */ |
1997 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 2169 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1998 | skb_dst_drop(nskb); | 2170 | skb_dst_drop(nskb); |
1999 | 2171 | ||
2172 | skb_len = nskb->len; | ||
2000 | rc = ops->ndo_start_xmit(nskb, dev); | 2173 | rc = ops->ndo_start_xmit(nskb, dev); |
2174 | trace_net_dev_xmit(nskb, rc, dev, skb_len); | ||
2001 | if (unlikely(rc != NETDEV_TX_OK)) { | 2175 | if (unlikely(rc != NETDEV_TX_OK)) { |
2002 | if (rc & ~NETDEV_TX_MASK) | 2176 | if (rc & ~NETDEV_TX_MASK) |
2003 | goto out_kfree_gso_skb; | 2177 | goto out_kfree_gso_skb; |
@@ -2015,31 +2189,45 @@ out_kfree_gso_skb: | |||
2015 | skb->destructor = DEV_GSO_CB(skb)->destructor; | 2189 | skb->destructor = DEV_GSO_CB(skb)->destructor; |
2016 | out_kfree_skb: | 2190 | out_kfree_skb: |
2017 | kfree_skb(skb); | 2191 | kfree_skb(skb); |
2192 | out: | ||
2018 | return rc; | 2193 | return rc; |
2019 | } | 2194 | } |
2020 | 2195 | ||
2021 | static u32 hashrnd __read_mostly; | 2196 | static u32 hashrnd __read_mostly; |
2022 | 2197 | ||
2023 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | 2198 | /* |
2199 | * Returns a Tx hash based on the given packet descriptor a Tx queues' number | ||
2200 | * to be used as a distribution range. | ||
2201 | */ | ||
2202 | u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, | ||
2203 | unsigned int num_tx_queues) | ||
2024 | { | 2204 | { |
2025 | u32 hash; | 2205 | u32 hash; |
2206 | u16 qoffset = 0; | ||
2207 | u16 qcount = num_tx_queues; | ||
2026 | 2208 | ||
2027 | if (skb_rx_queue_recorded(skb)) { | 2209 | if (skb_rx_queue_recorded(skb)) { |
2028 | hash = skb_get_rx_queue(skb); | 2210 | hash = skb_get_rx_queue(skb); |
2029 | while (unlikely(hash >= dev->real_num_tx_queues)) | 2211 | while (unlikely(hash >= num_tx_queues)) |
2030 | hash -= dev->real_num_tx_queues; | 2212 | hash -= num_tx_queues; |
2031 | return hash; | 2213 | return hash; |
2032 | } | 2214 | } |
2033 | 2215 | ||
2216 | if (dev->num_tc) { | ||
2217 | u8 tc = netdev_get_prio_tc_map(dev, skb->priority); | ||
2218 | qoffset = dev->tc_to_txq[tc].offset; | ||
2219 | qcount = dev->tc_to_txq[tc].count; | ||
2220 | } | ||
2221 | |||
2034 | if (skb->sk && skb->sk->sk_hash) | 2222 | if (skb->sk && skb->sk->sk_hash) |
2035 | hash = skb->sk->sk_hash; | 2223 | hash = skb->sk->sk_hash; |
2036 | else | 2224 | else |
2037 | hash = (__force u16) skb->protocol ^ skb->rxhash; | 2225 | hash = (__force u16) skb->protocol ^ skb->rxhash; |
2038 | hash = jhash_1word(hash, hashrnd); | 2226 | hash = jhash_1word(hash, hashrnd); |
2039 | 2227 | ||
2040 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 2228 | return (u16) (((u64) hash * qcount) >> 32) + qoffset; |
2041 | } | 2229 | } |
2042 | EXPORT_SYMBOL(skb_tx_hash); | 2230 | EXPORT_SYMBOL(__skb_tx_hash); |
2043 | 2231 | ||
2044 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | 2232 | static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) |
2045 | { | 2233 | { |
@@ -2054,26 +2242,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
2054 | return queue_index; | 2242 | return queue_index; |
2055 | } | 2243 | } |
2056 | 2244 | ||
2245 | static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) | ||
2246 | { | ||
2247 | #ifdef CONFIG_XPS | ||
2248 | struct xps_dev_maps *dev_maps; | ||
2249 | struct xps_map *map; | ||
2250 | int queue_index = -1; | ||
2251 | |||
2252 | rcu_read_lock(); | ||
2253 | dev_maps = rcu_dereference(dev->xps_maps); | ||
2254 | if (dev_maps) { | ||
2255 | map = rcu_dereference( | ||
2256 | dev_maps->cpu_map[raw_smp_processor_id()]); | ||
2257 | if (map) { | ||
2258 | if (map->len == 1) | ||
2259 | queue_index = map->queues[0]; | ||
2260 | else { | ||
2261 | u32 hash; | ||
2262 | if (skb->sk && skb->sk->sk_hash) | ||
2263 | hash = skb->sk->sk_hash; | ||
2264 | else | ||
2265 | hash = (__force u16) skb->protocol ^ | ||
2266 | skb->rxhash; | ||
2267 | hash = jhash_1word(hash, hashrnd); | ||
2268 | queue_index = map->queues[ | ||
2269 | ((u64)hash * map->len) >> 32]; | ||
2270 | } | ||
2271 | if (unlikely(queue_index >= dev->real_num_tx_queues)) | ||
2272 | queue_index = -1; | ||
2273 | } | ||
2274 | } | ||
2275 | rcu_read_unlock(); | ||
2276 | |||
2277 | return queue_index; | ||
2278 | #else | ||
2279 | return -1; | ||
2280 | #endif | ||
2281 | } | ||
2282 | |||
2057 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 2283 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
2058 | struct sk_buff *skb) | 2284 | struct sk_buff *skb) |
2059 | { | 2285 | { |
2060 | int queue_index; | 2286 | int queue_index; |
2061 | const struct net_device_ops *ops = dev->netdev_ops; | 2287 | const struct net_device_ops *ops = dev->netdev_ops; |
2062 | 2288 | ||
2063 | if (ops->ndo_select_queue) { | 2289 | if (dev->real_num_tx_queues == 1) |
2290 | queue_index = 0; | ||
2291 | else if (ops->ndo_select_queue) { | ||
2064 | queue_index = ops->ndo_select_queue(dev, skb); | 2292 | queue_index = ops->ndo_select_queue(dev, skb); |
2065 | queue_index = dev_cap_txqueue(dev, queue_index); | 2293 | queue_index = dev_cap_txqueue(dev, queue_index); |
2066 | } else { | 2294 | } else { |
2067 | struct sock *sk = skb->sk; | 2295 | struct sock *sk = skb->sk; |
2068 | queue_index = sk_tx_queue_get(sk); | 2296 | queue_index = sk_tx_queue_get(sk); |
2069 | if (queue_index < 0) { | ||
2070 | 2297 | ||
2071 | queue_index = 0; | 2298 | if (queue_index < 0 || skb->ooo_okay || |
2072 | if (dev->real_num_tx_queues > 1) | 2299 | queue_index >= dev->real_num_tx_queues) { |
2300 | int old_index = queue_index; | ||
2301 | |||
2302 | queue_index = get_xps_queue(dev, skb); | ||
2303 | if (queue_index < 0) | ||
2073 | queue_index = skb_tx_hash(dev, skb); | 2304 | queue_index = skb_tx_hash(dev, skb); |
2074 | 2305 | ||
2075 | if (sk) { | 2306 | if (queue_index != old_index && sk) { |
2076 | struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); | 2307 | struct dst_entry *dst = |
2308 | rcu_dereference_check(sk->sk_dst_cache, 1); | ||
2077 | 2309 | ||
2078 | if (dst && skb_dst(skb) == dst) | 2310 | if (dst && skb_dst(skb) == dst) |
2079 | sk_tx_queue_set(sk, queue_index); | 2311 | sk_tx_queue_set(sk, queue_index); |
@@ -2090,15 +2322,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2090 | struct netdev_queue *txq) | 2322 | struct netdev_queue *txq) |
2091 | { | 2323 | { |
2092 | spinlock_t *root_lock = qdisc_lock(q); | 2324 | spinlock_t *root_lock = qdisc_lock(q); |
2093 | bool contended = qdisc_is_running(q); | 2325 | bool contended; |
2094 | int rc; | 2326 | int rc; |
2095 | 2327 | ||
2328 | qdisc_skb_cb(skb)->pkt_len = skb->len; | ||
2329 | qdisc_calculate_pkt_len(skb, q); | ||
2096 | /* | 2330 | /* |
2097 | * Heuristic to force contended enqueues to serialize on a | 2331 | * Heuristic to force contended enqueues to serialize on a |
2098 | * separate lock before trying to get qdisc main lock. | 2332 | * separate lock before trying to get qdisc main lock. |
2099 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often | 2333 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2100 | * and dequeue packets faster. | 2334 | * and dequeue packets faster. |
2101 | */ | 2335 | */ |
2336 | contended = qdisc_is_running(q); | ||
2102 | if (unlikely(contended)) | 2337 | if (unlikely(contended)) |
2103 | spin_lock(&q->busylock); | 2338 | spin_lock(&q->busylock); |
2104 | 2339 | ||
@@ -2115,7 +2350,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2115 | */ | 2350 | */ |
2116 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | 2351 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2117 | skb_dst_force(skb); | 2352 | skb_dst_force(skb); |
2118 | __qdisc_update_bstats(q, skb->len); | 2353 | |
2354 | qdisc_bstats_update(q, skb); | ||
2355 | |||
2119 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { | 2356 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
2120 | if (unlikely(contended)) { | 2357 | if (unlikely(contended)) { |
2121 | spin_unlock(&q->busylock); | 2358 | spin_unlock(&q->busylock); |
@@ -2128,7 +2365,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2128 | rc = NET_XMIT_SUCCESS; | 2365 | rc = NET_XMIT_SUCCESS; |
2129 | } else { | 2366 | } else { |
2130 | skb_dst_force(skb); | 2367 | skb_dst_force(skb); |
2131 | rc = qdisc_enqueue_root(skb, q); | 2368 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2132 | if (qdisc_run_begin(q)) { | 2369 | if (qdisc_run_begin(q)) { |
2133 | if (unlikely(contended)) { | 2370 | if (unlikely(contended)) { |
2134 | spin_unlock(&q->busylock); | 2371 | spin_unlock(&q->busylock); |
@@ -2143,6 +2380,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2143 | return rc; | 2380 | return rc; |
2144 | } | 2381 | } |
2145 | 2382 | ||
2383 | static DEFINE_PER_CPU(int, xmit_recursion); | ||
2384 | #define RECURSION_LIMIT 10 | ||
2385 | |||
2146 | /** | 2386 | /** |
2147 | * dev_queue_xmit - transmit a buffer | 2387 | * dev_queue_xmit - transmit a buffer |
2148 | * @skb: buffer to transmit | 2388 | * @skb: buffer to transmit |
@@ -2186,6 +2426,7 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2186 | #ifdef CONFIG_NET_CLS_ACT | 2426 | #ifdef CONFIG_NET_CLS_ACT |
2187 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); | 2427 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); |
2188 | #endif | 2428 | #endif |
2429 | trace_net_dev_queue(skb); | ||
2189 | if (q->enqueue) { | 2430 | if (q->enqueue) { |
2190 | rc = __dev_xmit_skb(skb, q, dev, txq); | 2431 | rc = __dev_xmit_skb(skb, q, dev, txq); |
2191 | goto out; | 2432 | goto out; |
@@ -2208,10 +2449,15 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2208 | 2449 | ||
2209 | if (txq->xmit_lock_owner != cpu) { | 2450 | if (txq->xmit_lock_owner != cpu) { |
2210 | 2451 | ||
2452 | if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) | ||
2453 | goto recursion_alert; | ||
2454 | |||
2211 | HARD_TX_LOCK(dev, txq, cpu); | 2455 | HARD_TX_LOCK(dev, txq, cpu); |
2212 | 2456 | ||
2213 | if (!netif_tx_queue_stopped(txq)) { | 2457 | if (!netif_tx_queue_stopped(txq)) { |
2458 | __this_cpu_inc(xmit_recursion); | ||
2214 | rc = dev_hard_start_xmit(skb, dev, txq); | 2459 | rc = dev_hard_start_xmit(skb, dev, txq); |
2460 | __this_cpu_dec(xmit_recursion); | ||
2215 | if (dev_xmit_complete(rc)) { | 2461 | if (dev_xmit_complete(rc)) { |
2216 | HARD_TX_UNLOCK(dev, txq); | 2462 | HARD_TX_UNLOCK(dev, txq); |
2217 | goto out; | 2463 | goto out; |
@@ -2223,7 +2469,9 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2223 | "queue packet!\n", dev->name); | 2469 | "queue packet!\n", dev->name); |
2224 | } else { | 2470 | } else { |
2225 | /* Recursion is detected! It is possible, | 2471 | /* Recursion is detected! It is possible, |
2226 | * unfortunately */ | 2472 | * unfortunately |
2473 | */ | ||
2474 | recursion_alert: | ||
2227 | if (net_ratelimit()) | 2475 | if (net_ratelimit()) |
2228 | printk(KERN_CRIT "Dead loop on virtual device " | 2476 | printk(KERN_CRIT "Dead loop on virtual device " |
2229 | "%s, fix it urgently!\n", dev->name); | 2477 | "%s, fix it urgently!\n", dev->name); |
@@ -2259,69 +2507,44 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2259 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 2507 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
2260 | } | 2508 | } |
2261 | 2509 | ||
2262 | #ifdef CONFIG_RPS | ||
2263 | |||
2264 | /* One global table that all flow-based protocols share. */ | ||
2265 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2266 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2267 | |||
2268 | /* | 2510 | /* |
2269 | * get_rps_cpu is called from netif_receive_skb and returns the target | 2511 | * __skb_get_rxhash: calculate a flow hash based on src/dst addresses |
2270 | * CPU from the RPS map of the receiving queue for a given skb. | 2512 | * and src/dst port numbers. Returns a non-zero hash number on success |
2271 | * rcu_read_lock must be held on entry. | 2513 | * and 0 on failure. |
2272 | */ | 2514 | */ |
2273 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2515 | __u32 __skb_get_rxhash(struct sk_buff *skb) |
2274 | struct rps_dev_flow **rflowp) | ||
2275 | { | 2516 | { |
2276 | struct ipv6hdr *ip6; | 2517 | int nhoff, hash = 0, poff; |
2277 | struct iphdr *ip; | 2518 | const struct ipv6hdr *ip6; |
2278 | struct netdev_rx_queue *rxqueue; | 2519 | const struct iphdr *ip; |
2279 | struct rps_map *map; | ||
2280 | struct rps_dev_flow_table *flow_table; | ||
2281 | struct rps_sock_flow_table *sock_flow_table; | ||
2282 | int cpu = -1; | ||
2283 | u8 ip_proto; | 2520 | u8 ip_proto; |
2284 | u16 tcpu; | ||
2285 | u32 addr1, addr2, ihl; | 2521 | u32 addr1, addr2, ihl; |
2286 | union { | 2522 | union { |
2287 | u32 v32; | 2523 | u32 v32; |
2288 | u16 v16[2]; | 2524 | u16 v16[2]; |
2289 | } ports; | 2525 | } ports; |
2290 | 2526 | ||
2291 | if (skb_rx_queue_recorded(skb)) { | 2527 | nhoff = skb_network_offset(skb); |
2292 | u16 index = skb_get_rx_queue(skb); | ||
2293 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2294 | WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " | ||
2295 | "on queue %u, but number of RX queues is %u\n", | ||
2296 | dev->name, index, dev->num_rx_queues); | ||
2297 | goto done; | ||
2298 | } | ||
2299 | rxqueue = dev->_rx + index; | ||
2300 | } else | ||
2301 | rxqueue = dev->_rx; | ||
2302 | |||
2303 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
2304 | goto done; | ||
2305 | |||
2306 | if (skb->rxhash) | ||
2307 | goto got_hash; /* Skip hash computation on packet header */ | ||
2308 | 2528 | ||
2309 | switch (skb->protocol) { | 2529 | switch (skb->protocol) { |
2310 | case __constant_htons(ETH_P_IP): | 2530 | case __constant_htons(ETH_P_IP): |
2311 | if (!pskb_may_pull(skb, sizeof(*ip))) | 2531 | if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) |
2312 | goto done; | 2532 | goto done; |
2313 | 2533 | ||
2314 | ip = (struct iphdr *) skb->data; | 2534 | ip = (const struct iphdr *) (skb->data + nhoff); |
2315 | ip_proto = ip->protocol; | 2535 | if (ip->frag_off & htons(IP_MF | IP_OFFSET)) |
2536 | ip_proto = 0; | ||
2537 | else | ||
2538 | ip_proto = ip->protocol; | ||
2316 | addr1 = (__force u32) ip->saddr; | 2539 | addr1 = (__force u32) ip->saddr; |
2317 | addr2 = (__force u32) ip->daddr; | 2540 | addr2 = (__force u32) ip->daddr; |
2318 | ihl = ip->ihl; | 2541 | ihl = ip->ihl; |
2319 | break; | 2542 | break; |
2320 | case __constant_htons(ETH_P_IPV6): | 2543 | case __constant_htons(ETH_P_IPV6): |
2321 | if (!pskb_may_pull(skb, sizeof(*ip6))) | 2544 | if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) |
2322 | goto done; | 2545 | goto done; |
2323 | 2546 | ||
2324 | ip6 = (struct ipv6hdr *) skb->data; | 2547 | ip6 = (const struct ipv6hdr *) (skb->data + nhoff); |
2325 | ip_proto = ip6->nexthdr; | 2548 | ip_proto = ip6->nexthdr; |
2326 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | 2549 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; |
2327 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | 2550 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; |
@@ -2330,33 +2553,130 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | |||
2330 | default: | 2553 | default: |
2331 | goto done; | 2554 | goto done; |
2332 | } | 2555 | } |
2333 | switch (ip_proto) { | 2556 | |
2334 | case IPPROTO_TCP: | 2557 | ports.v32 = 0; |
2335 | case IPPROTO_UDP: | 2558 | poff = proto_ports_offset(ip_proto); |
2336 | case IPPROTO_DCCP: | 2559 | if (poff >= 0) { |
2337 | case IPPROTO_ESP: | 2560 | nhoff += ihl * 4 + poff; |
2338 | case IPPROTO_AH: | 2561 | if (pskb_may_pull(skb, nhoff + 4)) { |
2339 | case IPPROTO_SCTP: | 2562 | ports.v32 = * (__force u32 *) (skb->data + nhoff); |
2340 | case IPPROTO_UDPLITE: | ||
2341 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
2342 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
2343 | if (ports.v16[1] < ports.v16[0]) | 2563 | if (ports.v16[1] < ports.v16[0]) |
2344 | swap(ports.v16[0], ports.v16[1]); | 2564 | swap(ports.v16[0], ports.v16[1]); |
2345 | break; | ||
2346 | } | 2565 | } |
2347 | default: | ||
2348 | ports.v32 = 0; | ||
2349 | break; | ||
2350 | } | 2566 | } |
2351 | 2567 | ||
2352 | /* get a consistent hash (same value on both flow directions) */ | 2568 | /* get a consistent hash (same value on both flow directions) */ |
2353 | if (addr2 < addr1) | 2569 | if (addr2 < addr1) |
2354 | swap(addr1, addr2); | 2570 | swap(addr1, addr2); |
2355 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
2356 | if (!skb->rxhash) | ||
2357 | skb->rxhash = 1; | ||
2358 | 2571 | ||
2359 | got_hash: | 2572 | hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); |
2573 | if (!hash) | ||
2574 | hash = 1; | ||
2575 | |||
2576 | done: | ||
2577 | return hash; | ||
2578 | } | ||
2579 | EXPORT_SYMBOL(__skb_get_rxhash); | ||
2580 | |||
2581 | #ifdef CONFIG_RPS | ||
2582 | |||
2583 | /* One global table that all flow-based protocols share. */ | ||
2584 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | ||
2585 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2586 | |||
2587 | static struct rps_dev_flow * | ||
2588 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2589 | struct rps_dev_flow *rflow, u16 next_cpu) | ||
2590 | { | ||
2591 | u16 tcpu; | ||
2592 | |||
2593 | tcpu = rflow->cpu = next_cpu; | ||
2594 | if (tcpu != RPS_NO_CPU) { | ||
2595 | #ifdef CONFIG_RFS_ACCEL | ||
2596 | struct netdev_rx_queue *rxqueue; | ||
2597 | struct rps_dev_flow_table *flow_table; | ||
2598 | struct rps_dev_flow *old_rflow; | ||
2599 | u32 flow_id; | ||
2600 | u16 rxq_index; | ||
2601 | int rc; | ||
2602 | |||
2603 | /* Should we steer this flow to a different hardware queue? */ | ||
2604 | if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || | ||
2605 | !(dev->features & NETIF_F_NTUPLE)) | ||
2606 | goto out; | ||
2607 | rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); | ||
2608 | if (rxq_index == skb_get_rx_queue(skb)) | ||
2609 | goto out; | ||
2610 | |||
2611 | rxqueue = dev->_rx + rxq_index; | ||
2612 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2613 | if (!flow_table) | ||
2614 | goto out; | ||
2615 | flow_id = skb->rxhash & flow_table->mask; | ||
2616 | rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | ||
2617 | rxq_index, flow_id); | ||
2618 | if (rc < 0) | ||
2619 | goto out; | ||
2620 | old_rflow = rflow; | ||
2621 | rflow = &flow_table->flows[flow_id]; | ||
2622 | rflow->cpu = next_cpu; | ||
2623 | rflow->filter = rc; | ||
2624 | if (old_rflow->filter == rflow->filter) | ||
2625 | old_rflow->filter = RPS_NO_FILTER; | ||
2626 | out: | ||
2627 | #endif | ||
2628 | rflow->last_qtail = | ||
2629 | per_cpu(softnet_data, tcpu).input_queue_head; | ||
2630 | } | ||
2631 | |||
2632 | return rflow; | ||
2633 | } | ||
2634 | |||
2635 | /* | ||
2636 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2637 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2638 | * rcu_read_lock must be held on entry. | ||
2639 | */ | ||
2640 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2641 | struct rps_dev_flow **rflowp) | ||
2642 | { | ||
2643 | struct netdev_rx_queue *rxqueue; | ||
2644 | struct rps_map *map; | ||
2645 | struct rps_dev_flow_table *flow_table; | ||
2646 | struct rps_sock_flow_table *sock_flow_table; | ||
2647 | int cpu = -1; | ||
2648 | u16 tcpu; | ||
2649 | |||
2650 | if (skb_rx_queue_recorded(skb)) { | ||
2651 | u16 index = skb_get_rx_queue(skb); | ||
2652 | if (unlikely(index >= dev->real_num_rx_queues)) { | ||
2653 | WARN_ONCE(dev->real_num_rx_queues > 1, | ||
2654 | "%s received packet on queue %u, but number " | ||
2655 | "of RX queues is %u\n", | ||
2656 | dev->name, index, dev->real_num_rx_queues); | ||
2657 | goto done; | ||
2658 | } | ||
2659 | rxqueue = dev->_rx + index; | ||
2660 | } else | ||
2661 | rxqueue = dev->_rx; | ||
2662 | |||
2663 | map = rcu_dereference(rxqueue->rps_map); | ||
2664 | if (map) { | ||
2665 | if (map->len == 1 && | ||
2666 | !rcu_dereference_raw(rxqueue->rps_flow_table)) { | ||
2667 | tcpu = map->cpus[0]; | ||
2668 | if (cpu_online(tcpu)) | ||
2669 | cpu = tcpu; | ||
2670 | goto done; | ||
2671 | } | ||
2672 | } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { | ||
2673 | goto done; | ||
2674 | } | ||
2675 | |||
2676 | skb_reset_network_header(skb); | ||
2677 | if (!skb_get_rxhash(skb)) | ||
2678 | goto done; | ||
2679 | |||
2360 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | 2680 | flow_table = rcu_dereference(rxqueue->rps_flow_table); |
2361 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | 2681 | sock_flow_table = rcu_dereference(rps_sock_flow_table); |
2362 | if (flow_table && sock_flow_table) { | 2682 | if (flow_table && sock_flow_table) { |
@@ -2383,12 +2703,9 @@ got_hash: | |||
2383 | if (unlikely(tcpu != next_cpu) && | 2703 | if (unlikely(tcpu != next_cpu) && |
2384 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | 2704 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || |
2385 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | 2705 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - |
2386 | rflow->last_qtail)) >= 0)) { | 2706 | rflow->last_qtail)) >= 0)) |
2387 | tcpu = rflow->cpu = next_cpu; | 2707 | rflow = set_rps_cpu(dev, skb, rflow, next_cpu); |
2388 | if (tcpu != RPS_NO_CPU) | 2708 | |
2389 | rflow->last_qtail = per_cpu(softnet_data, | ||
2390 | tcpu).input_queue_head; | ||
2391 | } | ||
2392 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | 2709 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { |
2393 | *rflowp = rflow; | 2710 | *rflowp = rflow; |
2394 | cpu = tcpu; | 2711 | cpu = tcpu; |
@@ -2396,7 +2713,6 @@ got_hash: | |||
2396 | } | 2713 | } |
2397 | } | 2714 | } |
2398 | 2715 | ||
2399 | map = rcu_dereference(rxqueue->rps_map); | ||
2400 | if (map) { | 2716 | if (map) { |
2401 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | 2717 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; |
2402 | 2718 | ||
@@ -2410,6 +2726,46 @@ done: | |||
2410 | return cpu; | 2726 | return cpu; |
2411 | } | 2727 | } |
2412 | 2728 | ||
2729 | #ifdef CONFIG_RFS_ACCEL | ||
2730 | |||
2731 | /** | ||
2732 | * rps_may_expire_flow - check whether an RFS hardware filter may be removed | ||
2733 | * @dev: Device on which the filter was set | ||
2734 | * @rxq_index: RX queue index | ||
2735 | * @flow_id: Flow ID passed to ndo_rx_flow_steer() | ||
2736 | * @filter_id: Filter ID returned by ndo_rx_flow_steer() | ||
2737 | * | ||
2738 | * Drivers that implement ndo_rx_flow_steer() should periodically call | ||
2739 | * this function for each installed filter and remove the filters for | ||
2740 | * which it returns %true. | ||
2741 | */ | ||
2742 | bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | ||
2743 | u32 flow_id, u16 filter_id) | ||
2744 | { | ||
2745 | struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; | ||
2746 | struct rps_dev_flow_table *flow_table; | ||
2747 | struct rps_dev_flow *rflow; | ||
2748 | bool expire = true; | ||
2749 | int cpu; | ||
2750 | |||
2751 | rcu_read_lock(); | ||
2752 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2753 | if (flow_table && flow_id <= flow_table->mask) { | ||
2754 | rflow = &flow_table->flows[flow_id]; | ||
2755 | cpu = ACCESS_ONCE(rflow->cpu); | ||
2756 | if (rflow->filter == filter_id && cpu != RPS_NO_CPU && | ||
2757 | ((int)(per_cpu(softnet_data, cpu).input_queue_head - | ||
2758 | rflow->last_qtail) < | ||
2759 | (int)(10 * flow_table->mask))) | ||
2760 | expire = false; | ||
2761 | } | ||
2762 | rcu_read_unlock(); | ||
2763 | return expire; | ||
2764 | } | ||
2765 | EXPORT_SYMBOL(rps_may_expire_flow); | ||
2766 | |||
2767 | #endif /* CONFIG_RFS_ACCEL */ | ||
2768 | |||
2413 | /* Called from hardirq (IPI) context */ | 2769 | /* Called from hardirq (IPI) context */ |
2414 | static void rps_trigger_softirq(void *data) | 2770 | static void rps_trigger_softirq(void *data) |
2415 | { | 2771 | { |
@@ -2482,6 +2838,7 @@ enqueue: | |||
2482 | 2838 | ||
2483 | local_irq_restore(flags); | 2839 | local_irq_restore(flags); |
2484 | 2840 | ||
2841 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2485 | kfree_skb(skb); | 2842 | kfree_skb(skb); |
2486 | return NET_RX_DROP; | 2843 | return NET_RX_DROP; |
2487 | } | 2844 | } |
@@ -2512,6 +2869,7 @@ int netif_rx(struct sk_buff *skb) | |||
2512 | if (netdev_tstamp_prequeue) | 2869 | if (netdev_tstamp_prequeue) |
2513 | net_timestamp_check(skb); | 2870 | net_timestamp_check(skb); |
2514 | 2871 | ||
2872 | trace_netif_rx(skb); | ||
2515 | #ifdef CONFIG_RPS | 2873 | #ifdef CONFIG_RPS |
2516 | { | 2874 | { |
2517 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2875 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
@@ -2571,6 +2929,7 @@ static void net_tx_action(struct softirq_action *h) | |||
2571 | clist = clist->next; | 2929 | clist = clist->next; |
2572 | 2930 | ||
2573 | WARN_ON(atomic_read(&skb->users)); | 2931 | WARN_ON(atomic_read(&skb->users)); |
2932 | trace_kfree_skb(skb, net_tx_action); | ||
2574 | __kfree_skb(skb); | 2933 | __kfree_skb(skb); |
2575 | } | 2934 | } |
2576 | } | 2935 | } |
@@ -2611,14 +2970,6 @@ static void net_tx_action(struct softirq_action *h) | |||
2611 | } | 2970 | } |
2612 | } | 2971 | } |
2613 | 2972 | ||
2614 | static inline int deliver_skb(struct sk_buff *skb, | ||
2615 | struct packet_type *pt_prev, | ||
2616 | struct net_device *orig_dev) | ||
2617 | { | ||
2618 | atomic_inc(&skb->users); | ||
2619 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | ||
2620 | } | ||
2621 | |||
2622 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ | 2973 | #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ |
2623 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) | 2974 | (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) |
2624 | /* This hook is defined here for ATM LANE */ | 2975 | /* This hook is defined here for ATM LANE */ |
@@ -2632,15 +2983,14 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); | |||
2632 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | 2983 | * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions |
2633 | * a compare and 2 stores extra right now if we dont have it on | 2984 | * a compare and 2 stores extra right now if we dont have it on |
2634 | * but have CONFIG_NET_CLS_ACT | 2985 | * but have CONFIG_NET_CLS_ACT |
2635 | * NOTE: This doesnt stop any functionality; if you dont have | 2986 | * NOTE: This doesn't stop any functionality; if you dont have |
2636 | * the ingress scheduler, you just cant add policies on ingress. | 2987 | * the ingress scheduler, you just can't add policies on ingress. |
2637 | * | 2988 | * |
2638 | */ | 2989 | */ |
2639 | static int ing_filter(struct sk_buff *skb) | 2990 | static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) |
2640 | { | 2991 | { |
2641 | struct net_device *dev = skb->dev; | 2992 | struct net_device *dev = skb->dev; |
2642 | u32 ttl = G_TC_RTTL(skb->tc_verd); | 2993 | u32 ttl = G_TC_RTTL(skb->tc_verd); |
2643 | struct netdev_queue *rxq; | ||
2644 | int result = TC_ACT_OK; | 2994 | int result = TC_ACT_OK; |
2645 | struct Qdisc *q; | 2995 | struct Qdisc *q; |
2646 | 2996 | ||
@@ -2654,8 +3004,6 @@ static int ing_filter(struct sk_buff *skb) | |||
2654 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); | 3004 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); |
2655 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); | 3005 | skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); |
2656 | 3006 | ||
2657 | rxq = &dev->rx_queue; | ||
2658 | |||
2659 | q = rxq->qdisc; | 3007 | q = rxq->qdisc; |
2660 | if (q != &noop_qdisc) { | 3008 | if (q != &noop_qdisc) { |
2661 | spin_lock(qdisc_lock(q)); | 3009 | spin_lock(qdisc_lock(q)); |
@@ -2671,7 +3019,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2671 | struct packet_type **pt_prev, | 3019 | struct packet_type **pt_prev, |
2672 | int *ret, struct net_device *orig_dev) | 3020 | int *ret, struct net_device *orig_dev) |
2673 | { | 3021 | { |
2674 | if (skb->dev->rx_queue.qdisc == &noop_qdisc) | 3022 | struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); |
3023 | |||
3024 | if (!rxq || rxq->qdisc == &noop_qdisc) | ||
2675 | goto out; | 3025 | goto out; |
2676 | 3026 | ||
2677 | if (*pt_prev) { | 3027 | if (*pt_prev) { |
@@ -2679,7 +3029,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, | |||
2679 | *pt_prev = NULL; | 3029 | *pt_prev = NULL; |
2680 | } | 3030 | } |
2681 | 3031 | ||
2682 | switch (ing_filter(skb)) { | 3032 | switch (ing_filter(skb, rxq)) { |
2683 | case TC_ACT_SHOT: | 3033 | case TC_ACT_SHOT: |
2684 | case TC_ACT_STOLEN: | 3034 | case TC_ACT_STOLEN: |
2685 | kfree_skb(skb); | 3035 | kfree_skb(skb); |
@@ -2692,33 +3042,6 @@ out: | |||
2692 | } | 3042 | } |
2693 | #endif | 3043 | #endif |
2694 | 3044 | ||
2695 | /* | ||
2696 | * netif_nit_deliver - deliver received packets to network taps | ||
2697 | * @skb: buffer | ||
2698 | * | ||
2699 | * This function is used to deliver incoming packets to network | ||
2700 | * taps. It should be used when the normal netif_receive_skb path | ||
2701 | * is bypassed, for example because of VLAN acceleration. | ||
2702 | */ | ||
2703 | void netif_nit_deliver(struct sk_buff *skb) | ||
2704 | { | ||
2705 | struct packet_type *ptype; | ||
2706 | |||
2707 | if (list_empty(&ptype_all)) | ||
2708 | return; | ||
2709 | |||
2710 | skb_reset_network_header(skb); | ||
2711 | skb_reset_transport_header(skb); | ||
2712 | skb->mac_len = skb->network_header - skb->mac_header; | ||
2713 | |||
2714 | rcu_read_lock(); | ||
2715 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | ||
2716 | if (!ptype->dev || ptype->dev == skb->dev) | ||
2717 | deliver_skb(skb, ptype, skb->dev); | ||
2718 | } | ||
2719 | rcu_read_unlock(); | ||
2720 | } | ||
2721 | |||
2722 | /** | 3045 | /** |
2723 | * netdev_rx_handler_register - register receive handler | 3046 | * netdev_rx_handler_register - register receive handler |
2724 | * @dev: device to register a handler for | 3047 | * @dev: device to register a handler for |
@@ -2730,6 +3053,8 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
2730 | * on a failure. | 3053 | * on a failure. |
2731 | * | 3054 | * |
2732 | * The caller must hold the rtnl_mutex. | 3055 | * The caller must hold the rtnl_mutex. |
3056 | * | ||
3057 | * For a general description of rx_handler, see enum rx_handler_result. | ||
2733 | */ | 3058 | */ |
2734 | int netdev_rx_handler_register(struct net_device *dev, | 3059 | int netdev_rx_handler_register(struct net_device *dev, |
2735 | rx_handler_func_t *rx_handler, | 3060 | rx_handler_func_t *rx_handler, |
@@ -2764,72 +3089,20 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |||
2764 | } | 3089 | } |
2765 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3090 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
2766 | 3091 | ||
2767 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, | ||
2768 | struct net_device *master) | ||
2769 | { | ||
2770 | if (skb->pkt_type == PACKET_HOST) { | ||
2771 | u16 *dest = (u16 *) eth_hdr(skb)->h_dest; | ||
2772 | |||
2773 | memcpy(dest, master->dev_addr, ETH_ALEN); | ||
2774 | } | ||
2775 | } | ||
2776 | |||
2777 | /* On bonding slaves other than the currently active slave, suppress | ||
2778 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and | ||
2779 | * ARP on active-backup slaves with arp_validate enabled. | ||
2780 | */ | ||
2781 | int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) | ||
2782 | { | ||
2783 | struct net_device *dev = skb->dev; | ||
2784 | |||
2785 | if (master->priv_flags & IFF_MASTER_ARPMON) | ||
2786 | dev->last_rx = jiffies; | ||
2787 | |||
2788 | if ((master->priv_flags & IFF_MASTER_ALB) && | ||
2789 | (master->priv_flags & IFF_BRIDGE_PORT)) { | ||
2790 | /* Do address unmangle. The local destination address | ||
2791 | * will be always the one master has. Provides the right | ||
2792 | * functionality in a bridge. | ||
2793 | */ | ||
2794 | skb_bond_set_mac_by_master(skb, master); | ||
2795 | } | ||
2796 | |||
2797 | if (dev->priv_flags & IFF_SLAVE_INACTIVE) { | ||
2798 | if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && | ||
2799 | skb->protocol == __cpu_to_be16(ETH_P_ARP)) | ||
2800 | return 0; | ||
2801 | |||
2802 | if (master->priv_flags & IFF_MASTER_ALB) { | ||
2803 | if (skb->pkt_type != PACKET_BROADCAST && | ||
2804 | skb->pkt_type != PACKET_MULTICAST) | ||
2805 | return 0; | ||
2806 | } | ||
2807 | if (master->priv_flags & IFF_MASTER_8023AD && | ||
2808 | skb->protocol == __cpu_to_be16(ETH_P_SLOW)) | ||
2809 | return 0; | ||
2810 | |||
2811 | return 1; | ||
2812 | } | ||
2813 | return 0; | ||
2814 | } | ||
2815 | EXPORT_SYMBOL(__skb_bond_should_drop); | ||
2816 | |||
2817 | static int __netif_receive_skb(struct sk_buff *skb) | 3092 | static int __netif_receive_skb(struct sk_buff *skb) |
2818 | { | 3093 | { |
2819 | struct packet_type *ptype, *pt_prev; | 3094 | struct packet_type *ptype, *pt_prev; |
2820 | rx_handler_func_t *rx_handler; | 3095 | rx_handler_func_t *rx_handler; |
2821 | struct net_device *orig_dev; | 3096 | struct net_device *orig_dev; |
2822 | struct net_device *master; | 3097 | struct net_device *null_or_dev; |
2823 | struct net_device *null_or_orig; | 3098 | bool deliver_exact = false; |
2824 | struct net_device *orig_or_bond; | ||
2825 | int ret = NET_RX_DROP; | 3099 | int ret = NET_RX_DROP; |
2826 | __be16 type; | 3100 | __be16 type; |
2827 | 3101 | ||
2828 | if (!netdev_tstamp_prequeue) | 3102 | if (!netdev_tstamp_prequeue) |
2829 | net_timestamp_check(skb); | 3103 | net_timestamp_check(skb); |
2830 | 3104 | ||
2831 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | 3105 | trace_netif_receive_skb(skb); |
2832 | return NET_RX_SUCCESS; | ||
2833 | 3106 | ||
2834 | /* if we've gotten here through NAPI, check netpoll */ | 3107 | /* if we've gotten here through NAPI, check netpoll */ |
2835 | if (netpoll_receive_skb(skb)) | 3108 | if (netpoll_receive_skb(skb)) |
@@ -2837,37 +3110,26 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2837 | 3110 | ||
2838 | if (!skb->skb_iif) | 3111 | if (!skb->skb_iif) |
2839 | skb->skb_iif = skb->dev->ifindex; | 3112 | skb->skb_iif = skb->dev->ifindex; |
2840 | |||
2841 | /* | ||
2842 | * bonding note: skbs received on inactive slaves should only | ||
2843 | * be delivered to pkt handlers that are exact matches. Also | ||
2844 | * the deliver_no_wcard flag will be set. If packet handlers | ||
2845 | * are sensitive to duplicate packets these skbs will need to | ||
2846 | * be dropped at the handler. The vlan accel path may have | ||
2847 | * already set the deliver_no_wcard flag. | ||
2848 | */ | ||
2849 | null_or_orig = NULL; | ||
2850 | orig_dev = skb->dev; | 3113 | orig_dev = skb->dev; |
2851 | master = ACCESS_ONCE(orig_dev->master); | ||
2852 | if (skb->deliver_no_wcard) | ||
2853 | null_or_orig = orig_dev; | ||
2854 | else if (master) { | ||
2855 | if (skb_bond_should_drop(skb, master)) { | ||
2856 | skb->deliver_no_wcard = 1; | ||
2857 | null_or_orig = orig_dev; /* deliver only exact match */ | ||
2858 | } else | ||
2859 | skb->dev = master; | ||
2860 | } | ||
2861 | 3114 | ||
2862 | __this_cpu_inc(softnet_data.processed); | ||
2863 | skb_reset_network_header(skb); | 3115 | skb_reset_network_header(skb); |
2864 | skb_reset_transport_header(skb); | 3116 | skb_reset_transport_header(skb); |
2865 | skb->mac_len = skb->network_header - skb->mac_header; | 3117 | skb_reset_mac_len(skb); |
2866 | 3118 | ||
2867 | pt_prev = NULL; | 3119 | pt_prev = NULL; |
2868 | 3120 | ||
2869 | rcu_read_lock(); | 3121 | rcu_read_lock(); |
2870 | 3122 | ||
3123 | another_round: | ||
3124 | |||
3125 | __this_cpu_inc(softnet_data.processed); | ||
3126 | |||
3127 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | ||
3128 | skb = vlan_untag(skb); | ||
3129 | if (unlikely(!skb)) | ||
3130 | goto out; | ||
3131 | } | ||
3132 | |||
2871 | #ifdef CONFIG_NET_CLS_ACT | 3133 | #ifdef CONFIG_NET_CLS_ACT |
2872 | if (skb->tc_verd & TC_NCLS) { | 3134 | if (skb->tc_verd & TC_NCLS) { |
2873 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 3135 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
@@ -2876,8 +3138,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2876 | #endif | 3138 | #endif |
2877 | 3139 | ||
2878 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3140 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
2879 | if (ptype->dev == null_or_orig || ptype->dev == skb->dev || | 3141 | if (!ptype->dev || ptype->dev == skb->dev) { |
2880 | ptype->dev == orig_dev) { | ||
2881 | if (pt_prev) | 3142 | if (pt_prev) |
2882 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3143 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2883 | pt_prev = ptype; | 3144 | pt_prev = ptype; |
@@ -2891,36 +3152,47 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
2891 | ncls: | 3152 | ncls: |
2892 | #endif | 3153 | #endif |
2893 | 3154 | ||
2894 | /* Handle special case of bridge or macvlan */ | ||
2895 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3155 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
2896 | if (rx_handler) { | 3156 | if (rx_handler) { |
2897 | if (pt_prev) { | 3157 | if (pt_prev) { |
2898 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3158 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2899 | pt_prev = NULL; | 3159 | pt_prev = NULL; |
2900 | } | 3160 | } |
2901 | skb = rx_handler(skb); | 3161 | switch (rx_handler(&skb)) { |
2902 | if (!skb) | 3162 | case RX_HANDLER_CONSUMED: |
2903 | goto out; | 3163 | goto out; |
3164 | case RX_HANDLER_ANOTHER: | ||
3165 | goto another_round; | ||
3166 | case RX_HANDLER_EXACT: | ||
3167 | deliver_exact = true; | ||
3168 | case RX_HANDLER_PASS: | ||
3169 | break; | ||
3170 | default: | ||
3171 | BUG(); | ||
3172 | } | ||
2904 | } | 3173 | } |
2905 | 3174 | ||
2906 | /* | 3175 | if (vlan_tx_tag_present(skb)) { |
2907 | * Make sure frames received on VLAN interfaces stacked on | 3176 | if (pt_prev) { |
2908 | * bonding interfaces still make their way to any base bonding | 3177 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2909 | * device that may have registered for a specific ptype. The | 3178 | pt_prev = NULL; |
2910 | * handler may have to adjust skb->dev and orig_dev. | 3179 | } |
2911 | */ | 3180 | if (vlan_do_receive(&skb)) { |
2912 | orig_or_bond = orig_dev; | 3181 | ret = __netif_receive_skb(skb); |
2913 | if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && | 3182 | goto out; |
2914 | (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { | 3183 | } else if (unlikely(!skb)) |
2915 | orig_or_bond = vlan_dev_real_dev(skb->dev); | 3184 | goto out; |
2916 | } | 3185 | } |
2917 | 3186 | ||
3187 | /* deliver only exact match when indicated */ | ||
3188 | null_or_dev = deliver_exact ? skb->dev : NULL; | ||
3189 | |||
2918 | type = skb->protocol; | 3190 | type = skb->protocol; |
2919 | list_for_each_entry_rcu(ptype, | 3191 | list_for_each_entry_rcu(ptype, |
2920 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { | 3192 | &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { |
2921 | if (ptype->type == type && (ptype->dev == null_or_orig || | 3193 | if (ptype->type == type && |
2922 | ptype->dev == skb->dev || ptype->dev == orig_dev || | 3194 | (ptype->dev == null_or_dev || ptype->dev == skb->dev || |
2923 | ptype->dev == orig_or_bond)) { | 3195 | ptype->dev == orig_dev)) { |
2924 | if (pt_prev) | 3196 | if (pt_prev) |
2925 | ret = deliver_skb(skb, pt_prev, orig_dev); | 3197 | ret = deliver_skb(skb, pt_prev, orig_dev); |
2926 | pt_prev = ptype; | 3198 | pt_prev = ptype; |
@@ -2930,6 +3202,7 @@ ncls: | |||
2930 | if (pt_prev) { | 3202 | if (pt_prev) { |
2931 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3203 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
2932 | } else { | 3204 | } else { |
3205 | atomic_long_inc(&skb->dev->rx_dropped); | ||
2933 | kfree_skb(skb); | 3206 | kfree_skb(skb); |
2934 | /* Jamal, now you will not able to escape explaining | 3207 | /* Jamal, now you will not able to escape explaining |
2935 | * me how you were going to use this. :-) | 3208 | * me how you were going to use this. :-) |
@@ -3050,7 +3323,7 @@ out: | |||
3050 | return netif_receive_skb(skb); | 3323 | return netif_receive_skb(skb); |
3051 | } | 3324 | } |
3052 | 3325 | ||
3053 | static void napi_gro_flush(struct napi_struct *napi) | 3326 | inline void napi_gro_flush(struct napi_struct *napi) |
3054 | { | 3327 | { |
3055 | struct sk_buff *skb, *next; | 3328 | struct sk_buff *skb, *next; |
3056 | 3329 | ||
@@ -3063,6 +3336,7 @@ static void napi_gro_flush(struct napi_struct *napi) | |||
3063 | napi->gro_count = 0; | 3336 | napi->gro_count = 0; |
3064 | napi->gro_list = NULL; | 3337 | napi->gro_list = NULL; |
3065 | } | 3338 | } |
3339 | EXPORT_SYMBOL(napi_gro_flush); | ||
3066 | 3340 | ||
3067 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3341 | enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3068 | { | 3342 | { |
@@ -3077,7 +3351,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3077 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) | 3351 | if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) |
3078 | goto normal; | 3352 | goto normal; |
3079 | 3353 | ||
3080 | if (skb_is_gso(skb) || skb_has_frags(skb)) | 3354 | if (skb_is_gso(skb) || skb_has_frag_list(skb)) |
3081 | goto normal; | 3355 | goto normal; |
3082 | 3356 | ||
3083 | rcu_read_lock(); | 3357 | rcu_read_lock(); |
@@ -3156,16 +3430,19 @@ normal: | |||
3156 | } | 3430 | } |
3157 | EXPORT_SYMBOL(dev_gro_receive); | 3431 | EXPORT_SYMBOL(dev_gro_receive); |
3158 | 3432 | ||
3159 | static gro_result_t | 3433 | static inline gro_result_t |
3160 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3434 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3161 | { | 3435 | { |
3162 | struct sk_buff *p; | 3436 | struct sk_buff *p; |
3163 | 3437 | ||
3164 | for (p = napi->gro_list; p; p = p->next) { | 3438 | for (p = napi->gro_list; p; p = p->next) { |
3165 | NAPI_GRO_CB(p)->same_flow = | 3439 | unsigned long diffs; |
3166 | (p->dev == skb->dev) && | 3440 | |
3167 | !compare_ether_header(skb_mac_header(p), | 3441 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3442 | diffs |= p->vlan_tci ^ skb->vlan_tci; | ||
3443 | diffs |= compare_ether_header(skb_mac_header(p), | ||
3168 | skb_gro_mac_header(skb)); | 3444 | skb_gro_mac_header(skb)); |
3445 | NAPI_GRO_CB(p)->same_flow = !diffs; | ||
3169 | NAPI_GRO_CB(p)->flush = 0; | 3446 | NAPI_GRO_CB(p)->flush = 0; |
3170 | } | 3447 | } |
3171 | 3448 | ||
@@ -3218,14 +3495,16 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
3218 | } | 3495 | } |
3219 | EXPORT_SYMBOL(napi_gro_receive); | 3496 | EXPORT_SYMBOL(napi_gro_receive); |
3220 | 3497 | ||
3221 | void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | 3498 | static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) |
3222 | { | 3499 | { |
3223 | __skb_pull(skb, skb_headlen(skb)); | 3500 | __skb_pull(skb, skb_headlen(skb)); |
3224 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); | 3501 | skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); |
3502 | skb->vlan_tci = 0; | ||
3503 | skb->dev = napi->dev; | ||
3504 | skb->skb_iif = 0; | ||
3225 | 3505 | ||
3226 | napi->skb = skb; | 3506 | napi->skb = skb; |
3227 | } | 3507 | } |
3228 | EXPORT_SYMBOL(napi_reuse_skb); | ||
3229 | 3508 | ||
3230 | struct sk_buff *napi_get_frags(struct napi_struct *napi) | 3509 | struct sk_buff *napi_get_frags(struct napi_struct *napi) |
3231 | { | 3510 | { |
@@ -3519,7 +3798,7 @@ static void net_rx_action(struct softirq_action *h) | |||
3519 | * with netpoll's poll_napi(). Only the entity which | 3798 | * with netpoll's poll_napi(). Only the entity which |
3520 | * obtains the lock and sees NAPI_STATE_SCHED set will | 3799 | * obtains the lock and sees NAPI_STATE_SCHED set will |
3521 | * actually make the ->poll() call. Therefore we avoid | 3800 | * actually make the ->poll() call. Therefore we avoid |
3522 | * accidently calling ->poll() when NAPI is not scheduled. | 3801 | * accidentally calling ->poll() when NAPI is not scheduled. |
3523 | */ | 3802 | */ |
3524 | work = 0; | 3803 | work = 0; |
3525 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { | 3804 | if (test_bit(NAPI_STATE_SCHED, &n->state)) { |
@@ -3710,12 +3989,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) | |||
3710 | 3989 | ||
3711 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 3990 | void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
3712 | { | 3991 | { |
3713 | struct net_device *dev = (v == SEQ_START_TOKEN) ? | 3992 | struct net_device *dev = v; |
3714 | first_net_device(seq_file_net(seq)) : | 3993 | |
3715 | next_net_device((struct net_device *)v); | 3994 | if (v == SEQ_START_TOKEN) |
3995 | dev = first_net_device_rcu(seq_file_net(seq)); | ||
3996 | else | ||
3997 | dev = next_net_device_rcu(dev); | ||
3716 | 3998 | ||
3717 | ++*pos; | 3999 | ++*pos; |
3718 | return rcu_dereference(dev); | 4000 | return dev; |
3719 | } | 4001 | } |
3720 | 4002 | ||
3721 | void dev_seq_stop(struct seq_file *seq, void *v) | 4003 | void dev_seq_stop(struct seq_file *seq, void *v) |
@@ -3999,15 +4281,14 @@ static int __init dev_proc_init(void) | |||
3999 | 4281 | ||
4000 | 4282 | ||
4001 | /** | 4283 | /** |
4002 | * netdev_set_master - set up master/slave pair | 4284 | * netdev_set_master - set up master pointer |
4003 | * @slave: slave device | 4285 | * @slave: slave device |
4004 | * @master: new master device | 4286 | * @master: new master device |
4005 | * | 4287 | * |
4006 | * Changes the master device of the slave. Pass %NULL to break the | 4288 | * Changes the master device of the slave. Pass %NULL to break the |
4007 | * bonding. The caller must hold the RTNL semaphore. On a failure | 4289 | * bonding. The caller must hold the RTNL semaphore. On a failure |
4008 | * a negative errno code is returned. On success the reference counts | 4290 | * a negative errno code is returned. On success the reference counts |
4009 | * are adjusted, %RTM_NEWLINK is sent to the routing socket and the | 4291 | * are adjusted and the function returns zero. |
4010 | * function returns zero. | ||
4011 | */ | 4292 | */ |
4012 | int netdev_set_master(struct net_device *slave, struct net_device *master) | 4293 | int netdev_set_master(struct net_device *slave, struct net_device *master) |
4013 | { | 4294 | { |
@@ -4023,10 +4304,31 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
4023 | 4304 | ||
4024 | slave->master = master; | 4305 | slave->master = master; |
4025 | 4306 | ||
4026 | if (old) { | 4307 | if (old) |
4027 | synchronize_net(); | ||
4028 | dev_put(old); | 4308 | dev_put(old); |
4029 | } | 4309 | return 0; |
4310 | } | ||
4311 | EXPORT_SYMBOL(netdev_set_master); | ||
4312 | |||
4313 | /** | ||
4314 | * netdev_set_bond_master - set up bonding master/slave pair | ||
4315 | * @slave: slave device | ||
4316 | * @master: new master device | ||
4317 | * | ||
4318 | * Changes the master device of the slave. Pass %NULL to break the | ||
4319 | * bonding. The caller must hold the RTNL semaphore. On a failure | ||
4320 | * a negative errno code is returned. On success %RTM_NEWLINK is sent | ||
4321 | * to the routing socket and the function returns zero. | ||
4322 | */ | ||
4323 | int netdev_set_bond_master(struct net_device *slave, struct net_device *master) | ||
4324 | { | ||
4325 | int err; | ||
4326 | |||
4327 | ASSERT_RTNL(); | ||
4328 | |||
4329 | err = netdev_set_master(slave, master); | ||
4330 | if (err) | ||
4331 | return err; | ||
4030 | if (master) | 4332 | if (master) |
4031 | slave->flags |= IFF_SLAVE; | 4333 | slave->flags |= IFF_SLAVE; |
4032 | else | 4334 | else |
@@ -4035,7 +4337,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
4035 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); | 4337 | rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); |
4036 | return 0; | 4338 | return 0; |
4037 | } | 4339 | } |
4038 | EXPORT_SYMBOL(netdev_set_master); | 4340 | EXPORT_SYMBOL(netdev_set_bond_master); |
4039 | 4341 | ||
4040 | static void dev_change_rx_flags(struct net_device *dev, int flags) | 4342 | static void dev_change_rx_flags(struct net_device *dev, int flags) |
4041 | { | 4343 | { |
@@ -4204,6 +4506,30 @@ void dev_set_rx_mode(struct net_device *dev) | |||
4204 | } | 4506 | } |
4205 | 4507 | ||
4206 | /** | 4508 | /** |
4509 | * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() | ||
4510 | * @dev: device | ||
4511 | * @cmd: memory area for ethtool_ops::get_settings() result | ||
4512 | * | ||
4513 | * The cmd arg is initialized properly (cleared and | ||
4514 | * ethtool_cmd::cmd field set to ETHTOOL_GSET). | ||
4515 | * | ||
4516 | * Return device's ethtool_ops::get_settings() result value or | ||
4517 | * -EOPNOTSUPP when device doesn't expose | ||
4518 | * ethtool_ops::get_settings() operation. | ||
4519 | */ | ||
4520 | int dev_ethtool_get_settings(struct net_device *dev, | ||
4521 | struct ethtool_cmd *cmd) | ||
4522 | { | ||
4523 | if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) | ||
4524 | return -EOPNOTSUPP; | ||
4525 | |||
4526 | memset(cmd, 0, sizeof(struct ethtool_cmd)); | ||
4527 | cmd->cmd = ETHTOOL_GSET; | ||
4528 | return dev->ethtool_ops->get_settings(dev, cmd); | ||
4529 | } | ||
4530 | EXPORT_SYMBOL(dev_ethtool_get_settings); | ||
4531 | |||
4532 | /** | ||
4207 | * dev_get_flags - get flags reported to userspace | 4533 | * dev_get_flags - get flags reported to userspace |
4208 | * @dev: device | 4534 | * @dev: device |
4209 | * | 4535 | * |
@@ -4372,6 +4698,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) | |||
4372 | EXPORT_SYMBOL(dev_set_mtu); | 4698 | EXPORT_SYMBOL(dev_set_mtu); |
4373 | 4699 | ||
4374 | /** | 4700 | /** |
4701 | * dev_set_group - Change group this device belongs to | ||
4702 | * @dev: device | ||
4703 | * @new_group: group this device should belong to | ||
4704 | */ | ||
4705 | void dev_set_group(struct net_device *dev, int new_group) | ||
4706 | { | ||
4707 | dev->group = new_group; | ||
4708 | } | ||
4709 | EXPORT_SYMBOL(dev_set_group); | ||
4710 | |||
4711 | /** | ||
4375 | * dev_set_mac_address - Change Media Access Control Address | 4712 | * dev_set_mac_address - Change Media Access Control Address |
4376 | * @dev: device | 4713 | * @dev: device |
4377 | * @sa: new address | 4714 | * @sa: new address |
@@ -4456,7 +4793,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm | |||
4456 | * is never reached | 4793 | * is never reached |
4457 | */ | 4794 | */ |
4458 | WARN_ON(1); | 4795 | WARN_ON(1); |
4459 | err = -EINVAL; | 4796 | err = -ENOTTY; |
4460 | break; | 4797 | break; |
4461 | 4798 | ||
4462 | } | 4799 | } |
@@ -4724,7 +5061,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4724 | /* Set the per device memory buffer space. | 5061 | /* Set the per device memory buffer space. |
4725 | * Not applicable in our case */ | 5062 | * Not applicable in our case */ |
4726 | case SIOCSIFLINK: | 5063 | case SIOCSIFLINK: |
4727 | return -EINVAL; | 5064 | return -ENOTTY; |
4728 | 5065 | ||
4729 | /* | 5066 | /* |
4730 | * Unknown or private ioctl. | 5067 | * Unknown or private ioctl. |
@@ -4745,7 +5082,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
4745 | /* Take care of Wireless Extensions */ | 5082 | /* Take care of Wireless Extensions */ |
4746 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) | 5083 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) |
4747 | return wext_handle_ioctl(net, &ifr, cmd, arg); | 5084 | return wext_handle_ioctl(net, &ifr, cmd, arg); |
4748 | return -EINVAL; | 5085 | return -ENOTTY; |
4749 | } | 5086 | } |
4750 | } | 5087 | } |
4751 | 5088 | ||
@@ -4797,12 +5134,14 @@ static void rollback_registered_many(struct list_head *head) | |||
4797 | list_del(&dev->unreg_list); | 5134 | list_del(&dev->unreg_list); |
4798 | continue; | 5135 | continue; |
4799 | } | 5136 | } |
4800 | 5137 | dev->dismantle = true; | |
4801 | BUG_ON(dev->reg_state != NETREG_REGISTERED); | 5138 | BUG_ON(dev->reg_state != NETREG_REGISTERED); |
5139 | } | ||
4802 | 5140 | ||
4803 | /* If device is running, close it first. */ | 5141 | /* If device is running, close it first. */ |
4804 | dev_close(dev); | 5142 | dev_close_many(head); |
4805 | 5143 | ||
5144 | list_for_each_entry(dev, head, unreg_list) { | ||
4806 | /* And unlink it from device chain. */ | 5145 | /* And unlink it from device chain. */ |
4807 | unlist_netdevice(dev); | 5146 | unlist_netdevice(dev); |
4808 | 5147 | ||
@@ -4857,55 +5196,62 @@ static void rollback_registered(struct net_device *dev) | |||
4857 | 5196 | ||
4858 | list_add(&dev->unreg_list, &single); | 5197 | list_add(&dev->unreg_list, &single); |
4859 | rollback_registered_many(&single); | 5198 | rollback_registered_many(&single); |
5199 | list_del(&single); | ||
4860 | } | 5200 | } |
4861 | 5201 | ||
4862 | static void __netdev_init_queue_locks_one(struct net_device *dev, | 5202 | u32 netdev_fix_features(struct net_device *dev, u32 features) |
4863 | struct netdev_queue *dev_queue, | ||
4864 | void *_unused) | ||
4865 | { | 5203 | { |
4866 | spin_lock_init(&dev_queue->_xmit_lock); | 5204 | /* Fix illegal checksum combinations */ |
4867 | netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); | 5205 | if ((features & NETIF_F_HW_CSUM) && |
4868 | dev_queue->xmit_lock_owner = -1; | 5206 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4869 | } | 5207 | netdev_warn(dev, "mixed HW and IP checksum settings.\n"); |
5208 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | ||
5209 | } | ||
4870 | 5210 | ||
4871 | static void netdev_init_queue_locks(struct net_device *dev) | 5211 | if ((features & NETIF_F_NO_CSUM) && |
4872 | { | 5212 | (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4873 | netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); | 5213 | netdev_warn(dev, "mixed no checksumming and other settings.\n"); |
4874 | __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); | 5214 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); |
4875 | } | 5215 | } |
4876 | 5216 | ||
4877 | unsigned long netdev_fix_features(unsigned long features, const char *name) | ||
4878 | { | ||
4879 | /* Fix illegal SG+CSUM combinations. */ | 5217 | /* Fix illegal SG+CSUM combinations. */ |
4880 | if ((features & NETIF_F_SG) && | 5218 | if ((features & NETIF_F_SG) && |
4881 | !(features & NETIF_F_ALL_CSUM)) { | 5219 | !(features & NETIF_F_ALL_CSUM)) { |
4882 | if (name) | 5220 | netdev_dbg(dev, |
4883 | printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " | 5221 | "Dropping NETIF_F_SG since no checksum feature.\n"); |
4884 | "checksum feature.\n", name); | ||
4885 | features &= ~NETIF_F_SG; | 5222 | features &= ~NETIF_F_SG; |
4886 | } | 5223 | } |
4887 | 5224 | ||
4888 | /* TSO requires that SG is present as well. */ | 5225 | /* TSO requires that SG is present as well. */ |
4889 | if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { | 5226 | if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { |
4890 | if (name) | 5227 | netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); |
4891 | printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " | 5228 | features &= ~NETIF_F_ALL_TSO; |
4892 | "SG feature.\n", name); | ||
4893 | features &= ~NETIF_F_TSO; | ||
4894 | } | 5229 | } |
4895 | 5230 | ||
5231 | /* TSO ECN requires that TSO is present as well. */ | ||
5232 | if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) | ||
5233 | features &= ~NETIF_F_TSO_ECN; | ||
5234 | |||
5235 | /* Software GSO depends on SG. */ | ||
5236 | if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { | ||
5237 | netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); | ||
5238 | features &= ~NETIF_F_GSO; | ||
5239 | } | ||
5240 | |||
5241 | /* UFO needs SG and checksumming */ | ||
4896 | if (features & NETIF_F_UFO) { | 5242 | if (features & NETIF_F_UFO) { |
4897 | if (!(features & NETIF_F_GEN_CSUM)) { | 5243 | /* maybe split UFO into V4 and V6? */ |
4898 | if (name) | 5244 | if (!((features & NETIF_F_GEN_CSUM) || |
4899 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " | 5245 | (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) |
4900 | "since no NETIF_F_HW_CSUM feature.\n", | 5246 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { |
4901 | name); | 5247 | netdev_dbg(dev, |
5248 | "Dropping NETIF_F_UFO since no checksum offload features.\n"); | ||
4902 | features &= ~NETIF_F_UFO; | 5249 | features &= ~NETIF_F_UFO; |
4903 | } | 5250 | } |
4904 | 5251 | ||
4905 | if (!(features & NETIF_F_SG)) { | 5252 | if (!(features & NETIF_F_SG)) { |
4906 | if (name) | 5253 | netdev_dbg(dev, |
4907 | printk(KERN_ERR "%s: Dropping NETIF_F_UFO " | 5254 | "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); |
4908 | "since no NETIF_F_SG feature.\n", name); | ||
4909 | features &= ~NETIF_F_UFO; | 5255 | features &= ~NETIF_F_UFO; |
4910 | } | 5256 | } |
4911 | } | 5257 | } |
@@ -4914,6 +5260,75 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
4914 | } | 5260 | } |
4915 | EXPORT_SYMBOL(netdev_fix_features); | 5261 | EXPORT_SYMBOL(netdev_fix_features); |
4916 | 5262 | ||
5263 | int __netdev_update_features(struct net_device *dev) | ||
5264 | { | ||
5265 | u32 features; | ||
5266 | int err = 0; | ||
5267 | |||
5268 | ASSERT_RTNL(); | ||
5269 | |||
5270 | features = netdev_get_wanted_features(dev); | ||
5271 | |||
5272 | if (dev->netdev_ops->ndo_fix_features) | ||
5273 | features = dev->netdev_ops->ndo_fix_features(dev, features); | ||
5274 | |||
5275 | /* driver might be less strict about feature dependencies */ | ||
5276 | features = netdev_fix_features(dev, features); | ||
5277 | |||
5278 | if (dev->features == features) | ||
5279 | return 0; | ||
5280 | |||
5281 | netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", | ||
5282 | dev->features, features); | ||
5283 | |||
5284 | if (dev->netdev_ops->ndo_set_features) | ||
5285 | err = dev->netdev_ops->ndo_set_features(dev, features); | ||
5286 | |||
5287 | if (unlikely(err < 0)) { | ||
5288 | netdev_err(dev, | ||
5289 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", | ||
5290 | err, features, dev->features); | ||
5291 | return -1; | ||
5292 | } | ||
5293 | |||
5294 | if (!err) | ||
5295 | dev->features = features; | ||
5296 | |||
5297 | return 1; | ||
5298 | } | ||
5299 | |||
5300 | /** | ||
5301 | * netdev_update_features - recalculate device features | ||
5302 | * @dev: the device to check | ||
5303 | * | ||
5304 | * Recalculate dev->features set and send notifications if it | ||
5305 | * has changed. Should be called after driver or hardware dependent | ||
5306 | * conditions might have changed that influence the features. | ||
5307 | */ | ||
5308 | void netdev_update_features(struct net_device *dev) | ||
5309 | { | ||
5310 | if (__netdev_update_features(dev)) | ||
5311 | netdev_features_change(dev); | ||
5312 | } | ||
5313 | EXPORT_SYMBOL(netdev_update_features); | ||
5314 | |||
5315 | /** | ||
5316 | * netdev_change_features - recalculate device features | ||
5317 | * @dev: the device to check | ||
5318 | * | ||
5319 | * Recalculate dev->features set and send notifications even | ||
5320 | * if they have not changed. Should be called instead of | ||
5321 | * netdev_update_features() if also dev->vlan_features might | ||
5322 | * have changed to allow the changes to be propagated to stacked | ||
5323 | * VLAN devices. | ||
5324 | */ | ||
5325 | void netdev_change_features(struct net_device *dev) | ||
5326 | { | ||
5327 | __netdev_update_features(dev); | ||
5328 | netdev_features_change(dev); | ||
5329 | } | ||
5330 | EXPORT_SYMBOL(netdev_change_features); | ||
5331 | |||
4917 | /** | 5332 | /** |
4918 | * netif_stacked_transfer_operstate - transfer operstate | 5333 | * netif_stacked_transfer_operstate - transfer operstate |
4919 | * @rootdev: the root or lower level device to transfer state from | 5334 | * @rootdev: the root or lower level device to transfer state from |
@@ -4941,6 +5356,59 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
4941 | } | 5356 | } |
4942 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 5357 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
4943 | 5358 | ||
5359 | #ifdef CONFIG_RPS | ||
5360 | static int netif_alloc_rx_queues(struct net_device *dev) | ||
5361 | { | ||
5362 | unsigned int i, count = dev->num_rx_queues; | ||
5363 | struct netdev_rx_queue *rx; | ||
5364 | |||
5365 | BUG_ON(count < 1); | ||
5366 | |||
5367 | rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5368 | if (!rx) { | ||
5369 | pr_err("netdev: Unable to allocate %u rx queues.\n", count); | ||
5370 | return -ENOMEM; | ||
5371 | } | ||
5372 | dev->_rx = rx; | ||
5373 | |||
5374 | for (i = 0; i < count; i++) | ||
5375 | rx[i].dev = dev; | ||
5376 | return 0; | ||
5377 | } | ||
5378 | #endif | ||
5379 | |||
5380 | static void netdev_init_one_queue(struct net_device *dev, | ||
5381 | struct netdev_queue *queue, void *_unused) | ||
5382 | { | ||
5383 | /* Initialize queue lock */ | ||
5384 | spin_lock_init(&queue->_xmit_lock); | ||
5385 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); | ||
5386 | queue->xmit_lock_owner = -1; | ||
5387 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); | ||
5388 | queue->dev = dev; | ||
5389 | } | ||
5390 | |||
5391 | static int netif_alloc_netdev_queues(struct net_device *dev) | ||
5392 | { | ||
5393 | unsigned int count = dev->num_tx_queues; | ||
5394 | struct netdev_queue *tx; | ||
5395 | |||
5396 | BUG_ON(count < 1); | ||
5397 | |||
5398 | tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5399 | if (!tx) { | ||
5400 | pr_err("netdev: Unable to allocate %u tx queues.\n", | ||
5401 | count); | ||
5402 | return -ENOMEM; | ||
5403 | } | ||
5404 | dev->_tx = tx; | ||
5405 | |||
5406 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | ||
5407 | spin_lock_init(&dev->tx_global_lock); | ||
5408 | |||
5409 | return 0; | ||
5410 | } | ||
5411 | |||
4944 | /** | 5412 | /** |
4945 | * register_netdevice - register a network device | 5413 | * register_netdevice - register a network device |
4946 | * @dev: device to register | 5414 | * @dev: device to register |
@@ -4974,28 +5442,13 @@ int register_netdevice(struct net_device *dev) | |||
4974 | 5442 | ||
4975 | spin_lock_init(&dev->addr_list_lock); | 5443 | spin_lock_init(&dev->addr_list_lock); |
4976 | netdev_set_addr_lockdep_class(dev); | 5444 | netdev_set_addr_lockdep_class(dev); |
4977 | netdev_init_queue_locks(dev); | ||
4978 | 5445 | ||
4979 | dev->iflink = -1; | 5446 | dev->iflink = -1; |
4980 | 5447 | ||
4981 | #ifdef CONFIG_RPS | 5448 | ret = dev_get_valid_name(dev, dev->name); |
4982 | if (!dev->num_rx_queues) { | 5449 | if (ret < 0) |
4983 | /* | 5450 | goto out; |
4984 | * Allocate a single RX queue if driver never called | ||
4985 | * alloc_netdev_mq | ||
4986 | */ | ||
4987 | |||
4988 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
4989 | if (!dev->_rx) { | ||
4990 | ret = -ENOMEM; | ||
4991 | goto out; | ||
4992 | } | ||
4993 | 5451 | ||
4994 | dev->_rx->first = dev->_rx; | ||
4995 | atomic_set(&dev->_rx->count, 1); | ||
4996 | dev->num_rx_queues = 1; | ||
4997 | } | ||
4998 | #endif | ||
4999 | /* Init, if this function is available */ | 5452 | /* Init, if this function is available */ |
5000 | if (dev->netdev_ops->ndo_init) { | 5453 | if (dev->netdev_ops->ndo_init) { |
5001 | ret = dev->netdev_ops->ndo_init(dev); | 5454 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5006,34 +5459,30 @@ int register_netdevice(struct net_device *dev) | |||
5006 | } | 5459 | } |
5007 | } | 5460 | } |
5008 | 5461 | ||
5009 | ret = dev_get_valid_name(dev, dev->name, 0); | ||
5010 | if (ret) | ||
5011 | goto err_uninit; | ||
5012 | |||
5013 | dev->ifindex = dev_new_index(net); | 5462 | dev->ifindex = dev_new_index(net); |
5014 | if (dev->iflink == -1) | 5463 | if (dev->iflink == -1) |
5015 | dev->iflink = dev->ifindex; | 5464 | dev->iflink = dev->ifindex; |
5016 | 5465 | ||
5017 | /* Fix illegal checksum combinations */ | 5466 | /* Transfer changeable features to wanted_features and enable |
5018 | if ((dev->features & NETIF_F_HW_CSUM) && | 5467 | * software offloads (GSO and GRO). |
5019 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5468 | */ |
5020 | printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", | 5469 | dev->hw_features |= NETIF_F_SOFT_FEATURES; |
5021 | dev->name); | 5470 | dev->features |= NETIF_F_SOFT_FEATURES; |
5022 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | 5471 | dev->wanted_features = dev->features & dev->hw_features; |
5023 | } | ||
5024 | 5472 | ||
5025 | if ((dev->features & NETIF_F_NO_CSUM) && | 5473 | /* Turn on no cache copy if HW is doing checksum */ |
5026 | (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | 5474 | dev->hw_features |= NETIF_F_NOCACHE_COPY; |
5027 | printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", | 5475 | if ((dev->features & NETIF_F_ALL_CSUM) && |
5028 | dev->name); | 5476 | !(dev->features & NETIF_F_NO_CSUM)) { |
5029 | dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); | 5477 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; |
5478 | dev->features |= NETIF_F_NOCACHE_COPY; | ||
5030 | } | 5479 | } |
5031 | 5480 | ||
5032 | dev->features = netdev_fix_features(dev->features, dev->name); | 5481 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, |
5033 | 5482 | * vlan_dev_init() will do the dev->features check, so these features | |
5034 | /* Enable software GSO if SG is supported. */ | 5483 | * are enabled only if supported by underlying device. |
5035 | if (dev->features & NETIF_F_SG) | 5484 | */ |
5036 | dev->features |= NETIF_F_GSO; | 5485 | dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); |
5037 | 5486 | ||
5038 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 5487 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
5039 | ret = notifier_to_errno(ret); | 5488 | ret = notifier_to_errno(ret); |
@@ -5045,6 +5494,8 @@ int register_netdevice(struct net_device *dev) | |||
5045 | goto err_uninit; | 5494 | goto err_uninit; |
5046 | dev->reg_state = NETREG_REGISTERED; | 5495 | dev->reg_state = NETREG_REGISTERED; |
5047 | 5496 | ||
5497 | __netdev_update_features(dev); | ||
5498 | |||
5048 | /* | 5499 | /* |
5049 | * Default initial state at registry is that the | 5500 | * Default initial state at registry is that the |
5050 | * device is present. | 5501 | * device is present. |
@@ -5105,9 +5556,6 @@ int init_dummy_netdev(struct net_device *dev) | |||
5105 | */ | 5556 | */ |
5106 | dev->reg_state = NETREG_DUMMY; | 5557 | dev->reg_state = NETREG_DUMMY; |
5107 | 5558 | ||
5108 | /* initialize the ref count */ | ||
5109 | atomic_set(&dev->refcnt, 1); | ||
5110 | |||
5111 | /* NAPI wants this */ | 5559 | /* NAPI wants this */ |
5112 | INIT_LIST_HEAD(&dev->napi_list); | 5560 | INIT_LIST_HEAD(&dev->napi_list); |
5113 | 5561 | ||
@@ -5115,6 +5563,11 @@ int init_dummy_netdev(struct net_device *dev) | |||
5115 | set_bit(__LINK_STATE_PRESENT, &dev->state); | 5563 | set_bit(__LINK_STATE_PRESENT, &dev->state); |
5116 | set_bit(__LINK_STATE_START, &dev->state); | 5564 | set_bit(__LINK_STATE_START, &dev->state); |
5117 | 5565 | ||
5566 | /* Note : We dont allocate pcpu_refcnt for dummy devices, | ||
5567 | * because users of this 'device' dont need to change | ||
5568 | * its refcount. | ||
5569 | */ | ||
5570 | |||
5118 | return 0; | 5571 | return 0; |
5119 | } | 5572 | } |
5120 | EXPORT_SYMBOL_GPL(init_dummy_netdev); | 5573 | EXPORT_SYMBOL_GPL(init_dummy_netdev); |
@@ -5138,24 +5591,22 @@ int register_netdev(struct net_device *dev) | |||
5138 | int err; | 5591 | int err; |
5139 | 5592 | ||
5140 | rtnl_lock(); | 5593 | rtnl_lock(); |
5141 | |||
5142 | /* | ||
5143 | * If the name is a format string the caller wants us to do a | ||
5144 | * name allocation. | ||
5145 | */ | ||
5146 | if (strchr(dev->name, '%')) { | ||
5147 | err = dev_alloc_name(dev, dev->name); | ||
5148 | if (err < 0) | ||
5149 | goto out; | ||
5150 | } | ||
5151 | |||
5152 | err = register_netdevice(dev); | 5594 | err = register_netdevice(dev); |
5153 | out: | ||
5154 | rtnl_unlock(); | 5595 | rtnl_unlock(); |
5155 | return err; | 5596 | return err; |
5156 | } | 5597 | } |
5157 | EXPORT_SYMBOL(register_netdev); | 5598 | EXPORT_SYMBOL(register_netdev); |
5158 | 5599 | ||
5600 | int netdev_refcnt_read(const struct net_device *dev) | ||
5601 | { | ||
5602 | int i, refcnt = 0; | ||
5603 | |||
5604 | for_each_possible_cpu(i) | ||
5605 | refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); | ||
5606 | return refcnt; | ||
5607 | } | ||
5608 | EXPORT_SYMBOL(netdev_refcnt_read); | ||
5609 | |||
5159 | /* | 5610 | /* |
5160 | * netdev_wait_allrefs - wait until all references are gone. | 5611 | * netdev_wait_allrefs - wait until all references are gone. |
5161 | * | 5612 | * |
@@ -5170,11 +5621,14 @@ EXPORT_SYMBOL(register_netdev); | |||
5170 | static void netdev_wait_allrefs(struct net_device *dev) | 5621 | static void netdev_wait_allrefs(struct net_device *dev) |
5171 | { | 5622 | { |
5172 | unsigned long rebroadcast_time, warning_time; | 5623 | unsigned long rebroadcast_time, warning_time; |
5624 | int refcnt; | ||
5173 | 5625 | ||
5174 | linkwatch_forget_dev(dev); | 5626 | linkwatch_forget_dev(dev); |
5175 | 5627 | ||
5176 | rebroadcast_time = warning_time = jiffies; | 5628 | rebroadcast_time = warning_time = jiffies; |
5177 | while (atomic_read(&dev->refcnt) != 0) { | 5629 | refcnt = netdev_refcnt_read(dev); |
5630 | |||
5631 | while (refcnt != 0) { | ||
5178 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | 5632 | if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { |
5179 | rtnl_lock(); | 5633 | rtnl_lock(); |
5180 | 5634 | ||
@@ -5201,11 +5655,13 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
5201 | 5655 | ||
5202 | msleep(250); | 5656 | msleep(250); |
5203 | 5657 | ||
5658 | refcnt = netdev_refcnt_read(dev); | ||
5659 | |||
5204 | if (time_after(jiffies, warning_time + 10 * HZ)) { | 5660 | if (time_after(jiffies, warning_time + 10 * HZ)) { |
5205 | printk(KERN_EMERG "unregister_netdevice: " | 5661 | printk(KERN_EMERG "unregister_netdevice: " |
5206 | "waiting for %s to become free. Usage " | 5662 | "waiting for %s to become free. Usage " |
5207 | "count = %d\n", | 5663 | "count = %d\n", |
5208 | dev->name, atomic_read(&dev->refcnt)); | 5664 | dev->name, refcnt); |
5209 | warning_time = jiffies; | 5665 | warning_time = jiffies; |
5210 | } | 5666 | } |
5211 | } | 5667 | } |
@@ -5263,9 +5719,9 @@ void netdev_run_todo(void) | |||
5263 | netdev_wait_allrefs(dev); | 5719 | netdev_wait_allrefs(dev); |
5264 | 5720 | ||
5265 | /* paranoia */ | 5721 | /* paranoia */ |
5266 | BUG_ON(atomic_read(&dev->refcnt)); | 5722 | BUG_ON(netdev_refcnt_read(dev)); |
5267 | WARN_ON(dev->ip_ptr); | 5723 | WARN_ON(rcu_dereference_raw(dev->ip_ptr)); |
5268 | WARN_ON(dev->ip6_ptr); | 5724 | WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); |
5269 | WARN_ON(dev->dn_ptr); | 5725 | WARN_ON(dev->dn_ptr); |
5270 | 5726 | ||
5271 | if (dev->destructor) | 5727 | if (dev->destructor) |
@@ -5276,34 +5732,6 @@ void netdev_run_todo(void) | |||
5276 | } | 5732 | } |
5277 | } | 5733 | } |
5278 | 5734 | ||
5279 | /** | ||
5280 | * dev_txq_stats_fold - fold tx_queues stats | ||
5281 | * @dev: device to get statistics from | ||
5282 | * @stats: struct rtnl_link_stats64 to hold results | ||
5283 | */ | ||
5284 | void dev_txq_stats_fold(const struct net_device *dev, | ||
5285 | struct rtnl_link_stats64 *stats) | ||
5286 | { | ||
5287 | u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; | ||
5288 | unsigned int i; | ||
5289 | struct netdev_queue *txq; | ||
5290 | |||
5291 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
5292 | txq = netdev_get_tx_queue(dev, i); | ||
5293 | spin_lock_bh(&txq->_xmit_lock); | ||
5294 | tx_bytes += txq->tx_bytes; | ||
5295 | tx_packets += txq->tx_packets; | ||
5296 | tx_dropped += txq->tx_dropped; | ||
5297 | spin_unlock_bh(&txq->_xmit_lock); | ||
5298 | } | ||
5299 | if (tx_bytes || tx_packets || tx_dropped) { | ||
5300 | stats->tx_bytes = tx_bytes; | ||
5301 | stats->tx_packets = tx_packets; | ||
5302 | stats->tx_dropped = tx_dropped; | ||
5303 | } | ||
5304 | } | ||
5305 | EXPORT_SYMBOL(dev_txq_stats_fold); | ||
5306 | |||
5307 | /* Convert net_device_stats to rtnl_link_stats64. They have the same | 5735 | /* Convert net_device_stats to rtnl_link_stats64. They have the same |
5308 | * fields in the same order, with only the type differing. | 5736 | * fields in the same order, with only the type differing. |
5309 | */ | 5737 | */ |
@@ -5342,57 +5770,71 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, | |||
5342 | 5770 | ||
5343 | if (ops->ndo_get_stats64) { | 5771 | if (ops->ndo_get_stats64) { |
5344 | memset(storage, 0, sizeof(*storage)); | 5772 | memset(storage, 0, sizeof(*storage)); |
5345 | return ops->ndo_get_stats64(dev, storage); | 5773 | ops->ndo_get_stats64(dev, storage); |
5346 | } | 5774 | } else if (ops->ndo_get_stats) { |
5347 | if (ops->ndo_get_stats) { | ||
5348 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); | 5775 | netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); |
5349 | return storage; | 5776 | } else { |
5777 | netdev_stats_to_stats64(storage, &dev->stats); | ||
5350 | } | 5778 | } |
5351 | netdev_stats_to_stats64(storage, &dev->stats); | 5779 | storage->rx_dropped += atomic_long_read(&dev->rx_dropped); |
5352 | dev_txq_stats_fold(dev, storage); | ||
5353 | return storage; | 5780 | return storage; |
5354 | } | 5781 | } |
5355 | EXPORT_SYMBOL(dev_get_stats); | 5782 | EXPORT_SYMBOL(dev_get_stats); |
5356 | 5783 | ||
5357 | static void netdev_init_one_queue(struct net_device *dev, | 5784 | struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) |
5358 | struct netdev_queue *queue, | ||
5359 | void *_unused) | ||
5360 | { | 5785 | { |
5361 | queue->dev = dev; | 5786 | struct netdev_queue *queue = dev_ingress_queue(dev); |
5362 | } | ||
5363 | 5787 | ||
5364 | static void netdev_init_queues(struct net_device *dev) | 5788 | #ifdef CONFIG_NET_CLS_ACT |
5365 | { | 5789 | if (queue) |
5366 | netdev_init_one_queue(dev, &dev->rx_queue, NULL); | 5790 | return queue; |
5367 | netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); | 5791 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
5368 | spin_lock_init(&dev->tx_global_lock); | 5792 | if (!queue) |
5793 | return NULL; | ||
5794 | netdev_init_one_queue(dev, queue, NULL); | ||
5795 | queue->qdisc = &noop_qdisc; | ||
5796 | queue->qdisc_sleeping = &noop_qdisc; | ||
5797 | rcu_assign_pointer(dev->ingress_queue, queue); | ||
5798 | #endif | ||
5799 | return queue; | ||
5369 | } | 5800 | } |
5370 | 5801 | ||
5371 | /** | 5802 | /** |
5372 | * alloc_netdev_mq - allocate network device | 5803 | * alloc_netdev_mqs - allocate network device |
5373 | * @sizeof_priv: size of private data to allocate space for | 5804 | * @sizeof_priv: size of private data to allocate space for |
5374 | * @name: device name format string | 5805 | * @name: device name format string |
5375 | * @setup: callback to initialize device | 5806 | * @setup: callback to initialize device |
5376 | * @queue_count: the number of subqueues to allocate | 5807 | * @txqs: the number of TX subqueues to allocate |
5808 | * @rxqs: the number of RX subqueues to allocate | ||
5377 | * | 5809 | * |
5378 | * Allocates a struct net_device with private data area for driver use | 5810 | * Allocates a struct net_device with private data area for driver use |
5379 | * and performs basic initialization. Also allocates subquue structs | 5811 | * and performs basic initialization. Also allocates subquue structs |
5380 | * for each queue on the device at the end of the netdevice. | 5812 | * for each queue on the device. |
5381 | */ | 5813 | */ |
5382 | struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | 5814 | struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, |
5383 | void (*setup)(struct net_device *), unsigned int queue_count) | 5815 | void (*setup)(struct net_device *), |
5816 | unsigned int txqs, unsigned int rxqs) | ||
5384 | { | 5817 | { |
5385 | struct netdev_queue *tx; | ||
5386 | struct net_device *dev; | 5818 | struct net_device *dev; |
5387 | size_t alloc_size; | 5819 | size_t alloc_size; |
5388 | struct net_device *p; | 5820 | struct net_device *p; |
5389 | #ifdef CONFIG_RPS | ||
5390 | struct netdev_rx_queue *rx; | ||
5391 | int i; | ||
5392 | #endif | ||
5393 | 5821 | ||
5394 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5822 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5395 | 5823 | ||
5824 | if (txqs < 1) { | ||
5825 | pr_err("alloc_netdev: Unable to allocate device " | ||
5826 | "with zero queues.\n"); | ||
5827 | return NULL; | ||
5828 | } | ||
5829 | |||
5830 | #ifdef CONFIG_RPS | ||
5831 | if (rxqs < 1) { | ||
5832 | pr_err("alloc_netdev: Unable to allocate device " | ||
5833 | "with zero RX queues.\n"); | ||
5834 | return NULL; | ||
5835 | } | ||
5836 | #endif | ||
5837 | |||
5396 | alloc_size = sizeof(struct net_device); | 5838 | alloc_size = sizeof(struct net_device); |
5397 | if (sizeof_priv) { | 5839 | if (sizeof_priv) { |
5398 | /* ensure 32-byte alignment of private area */ | 5840 | /* ensure 32-byte alignment of private area */ |
@@ -5408,55 +5850,23 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5408 | return NULL; | 5850 | return NULL; |
5409 | } | 5851 | } |
5410 | 5852 | ||
5411 | tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL); | ||
5412 | if (!tx) { | ||
5413 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5414 | "tx qdiscs.\n"); | ||
5415 | goto free_p; | ||
5416 | } | ||
5417 | |||
5418 | #ifdef CONFIG_RPS | ||
5419 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5420 | if (!rx) { | ||
5421 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5422 | "rx queues.\n"); | ||
5423 | goto free_tx; | ||
5424 | } | ||
5425 | |||
5426 | atomic_set(&rx->count, queue_count); | ||
5427 | |||
5428 | /* | ||
5429 | * Set a pointer to first element in the array which holds the | ||
5430 | * reference count. | ||
5431 | */ | ||
5432 | for (i = 0; i < queue_count; i++) | ||
5433 | rx[i].first = rx; | ||
5434 | #endif | ||
5435 | |||
5436 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5853 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5437 | dev->padded = (char *)dev - (char *)p; | 5854 | dev->padded = (char *)dev - (char *)p; |
5438 | 5855 | ||
5856 | dev->pcpu_refcnt = alloc_percpu(int); | ||
5857 | if (!dev->pcpu_refcnt) | ||
5858 | goto free_p; | ||
5859 | |||
5439 | if (dev_addr_init(dev)) | 5860 | if (dev_addr_init(dev)) |
5440 | goto free_rx; | 5861 | goto free_pcpu; |
5441 | 5862 | ||
5442 | dev_mc_init(dev); | 5863 | dev_mc_init(dev); |
5443 | dev_uc_init(dev); | 5864 | dev_uc_init(dev); |
5444 | 5865 | ||
5445 | dev_net_set(dev, &init_net); | 5866 | dev_net_set(dev, &init_net); |
5446 | 5867 | ||
5447 | dev->_tx = tx; | ||
5448 | dev->num_tx_queues = queue_count; | ||
5449 | dev->real_num_tx_queues = queue_count; | ||
5450 | |||
5451 | #ifdef CONFIG_RPS | ||
5452 | dev->_rx = rx; | ||
5453 | dev->num_rx_queues = queue_count; | ||
5454 | #endif | ||
5455 | |||
5456 | dev->gso_max_size = GSO_MAX_SIZE; | 5868 | dev->gso_max_size = GSO_MAX_SIZE; |
5457 | 5869 | ||
5458 | netdev_init_queues(dev); | ||
5459 | |||
5460 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); | 5870 | INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); |
5461 | dev->ethtool_ntuple_list.count = 0; | 5871 | dev->ethtool_ntuple_list.count = 0; |
5462 | INIT_LIST_HEAD(&dev->napi_list); | 5872 | INIT_LIST_HEAD(&dev->napi_list); |
@@ -5464,20 +5874,39 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5464 | INIT_LIST_HEAD(&dev->link_watch_list); | 5874 | INIT_LIST_HEAD(&dev->link_watch_list); |
5465 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 5875 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5466 | setup(dev); | 5876 | setup(dev); |
5877 | |||
5878 | dev->num_tx_queues = txqs; | ||
5879 | dev->real_num_tx_queues = txqs; | ||
5880 | if (netif_alloc_netdev_queues(dev)) | ||
5881 | goto free_all; | ||
5882 | |||
5883 | #ifdef CONFIG_RPS | ||
5884 | dev->num_rx_queues = rxqs; | ||
5885 | dev->real_num_rx_queues = rxqs; | ||
5886 | if (netif_alloc_rx_queues(dev)) | ||
5887 | goto free_all; | ||
5888 | #endif | ||
5889 | |||
5467 | strcpy(dev->name, name); | 5890 | strcpy(dev->name, name); |
5891 | dev->group = INIT_NETDEV_GROUP; | ||
5468 | return dev; | 5892 | return dev; |
5469 | 5893 | ||
5470 | free_rx: | 5894 | free_all: |
5895 | free_netdev(dev); | ||
5896 | return NULL; | ||
5897 | |||
5898 | free_pcpu: | ||
5899 | free_percpu(dev->pcpu_refcnt); | ||
5900 | kfree(dev->_tx); | ||
5471 | #ifdef CONFIG_RPS | 5901 | #ifdef CONFIG_RPS |
5472 | kfree(rx); | 5902 | kfree(dev->_rx); |
5473 | free_tx: | ||
5474 | #endif | 5903 | #endif |
5475 | kfree(tx); | 5904 | |
5476 | free_p: | 5905 | free_p: |
5477 | kfree(p); | 5906 | kfree(p); |
5478 | return NULL; | 5907 | return NULL; |
5479 | } | 5908 | } |
5480 | EXPORT_SYMBOL(alloc_netdev_mq); | 5909 | EXPORT_SYMBOL(alloc_netdev_mqs); |
5481 | 5910 | ||
5482 | /** | 5911 | /** |
5483 | * free_netdev - free network device | 5912 | * free_netdev - free network device |
@@ -5494,6 +5923,11 @@ void free_netdev(struct net_device *dev) | |||
5494 | release_net(dev_net(dev)); | 5923 | release_net(dev_net(dev)); |
5495 | 5924 | ||
5496 | kfree(dev->_tx); | 5925 | kfree(dev->_tx); |
5926 | #ifdef CONFIG_RPS | ||
5927 | kfree(dev->_rx); | ||
5928 | #endif | ||
5929 | |||
5930 | kfree(rcu_dereference_raw(dev->ingress_queue)); | ||
5497 | 5931 | ||
5498 | /* Flush device addresses */ | 5932 | /* Flush device addresses */ |
5499 | dev_addr_flush(dev); | 5933 | dev_addr_flush(dev); |
@@ -5504,6 +5938,9 @@ void free_netdev(struct net_device *dev) | |||
5504 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) | 5938 | list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) |
5505 | netif_napi_del(p); | 5939 | netif_napi_del(p); |
5506 | 5940 | ||
5941 | free_percpu(dev->pcpu_refcnt); | ||
5942 | dev->pcpu_refcnt = NULL; | ||
5943 | |||
5507 | /* Compatibility with error handling in drivers */ | 5944 | /* Compatibility with error handling in drivers */ |
5508 | if (dev->reg_state == NETREG_UNINITIALIZED) { | 5945 | if (dev->reg_state == NETREG_UNINITIALIZED) { |
5509 | kfree((char *)dev - dev->padded); | 5946 | kfree((char *)dev - dev->padded); |
@@ -5527,7 +5964,10 @@ EXPORT_SYMBOL(free_netdev); | |||
5527 | void synchronize_net(void) | 5964 | void synchronize_net(void) |
5528 | { | 5965 | { |
5529 | might_sleep(); | 5966 | might_sleep(); |
5530 | synchronize_rcu(); | 5967 | if (rtnl_is_locked()) |
5968 | synchronize_rcu_expedited(); | ||
5969 | else | ||
5970 | synchronize_rcu(); | ||
5531 | } | 5971 | } |
5532 | EXPORT_SYMBOL(synchronize_net); | 5972 | EXPORT_SYMBOL(synchronize_net); |
5533 | 5973 | ||
@@ -5636,7 +6076,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5636 | /* We get here if we can't use the current device name */ | 6076 | /* We get here if we can't use the current device name */ |
5637 | if (!pat) | 6077 | if (!pat) |
5638 | goto out; | 6078 | goto out; |
5639 | if (dev_get_valid_name(dev, pat, 1)) | 6079 | if (dev_get_valid_name(dev, pat) < 0) |
5640 | goto out; | 6080 | goto out; |
5641 | } | 6081 | } |
5642 | 6082 | ||
@@ -5658,6 +6098,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5658 | 6098 | ||
5659 | /* Notify protocols, that we are about to destroy | 6099 | /* Notify protocols, that we are about to destroy |
5660 | this device. They should clean all the things. | 6100 | this device. They should clean all the things. |
6101 | |||
6102 | Note that dev->reg_state stays at NETREG_REGISTERED. | ||
6103 | This is wanted because this way 8021q and macvlan know | ||
6104 | the device is just moving and can keep their slaves up. | ||
5661 | */ | 6105 | */ |
5662 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 6106 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5663 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | 6107 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); |
@@ -5734,6 +6178,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5734 | oldsd->output_queue = NULL; | 6178 | oldsd->output_queue = NULL; |
5735 | oldsd->output_queue_tailp = &oldsd->output_queue; | 6179 | oldsd->output_queue_tailp = &oldsd->output_queue; |
5736 | } | 6180 | } |
6181 | /* Append NAPI poll list from offline CPU. */ | ||
6182 | if (!list_empty(&oldsd->poll_list)) { | ||
6183 | list_splice_init(&oldsd->poll_list, &sd->poll_list); | ||
6184 | raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
6185 | } | ||
5737 | 6186 | ||
5738 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 6187 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
5739 | local_irq_enable(); | 6188 | local_irq_enable(); |
@@ -5762,32 +6211,22 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5762 | * @one to the master device with current feature set @all. Will not | 6211 | * @one to the master device with current feature set @all. Will not |
5763 | * enable anything that is off in @mask. Returns the new feature set. | 6212 | * enable anything that is off in @mask. Returns the new feature set. |
5764 | */ | 6213 | */ |
5765 | unsigned long netdev_increment_features(unsigned long all, unsigned long one, | 6214 | u32 netdev_increment_features(u32 all, u32 one, u32 mask) |
5766 | unsigned long mask) | ||
5767 | { | 6215 | { |
5768 | /* If device needs checksumming, downgrade to it. */ | 6216 | if (mask & NETIF_F_GEN_CSUM) |
5769 | if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) | 6217 | mask |= NETIF_F_ALL_CSUM; |
5770 | all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); | 6218 | mask |= NETIF_F_VLAN_CHALLENGED; |
5771 | else if (mask & NETIF_F_ALL_CSUM) { | ||
5772 | /* If one device supports v4/v6 checksumming, set for all. */ | ||
5773 | if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && | ||
5774 | !(all & NETIF_F_GEN_CSUM)) { | ||
5775 | all &= ~NETIF_F_ALL_CSUM; | ||
5776 | all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); | ||
5777 | } | ||
5778 | 6219 | ||
5779 | /* If one device supports hw checksumming, set for all. */ | 6220 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; |
5780 | if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { | 6221 | all &= one | ~NETIF_F_ALL_FOR_ALL; |
5781 | all &= ~NETIF_F_ALL_CSUM; | ||
5782 | all |= NETIF_F_HW_CSUM; | ||
5783 | } | ||
5784 | } | ||
5785 | 6222 | ||
5786 | one |= NETIF_F_ALL_CSUM; | 6223 | /* If device needs checksumming, downgrade to it. */ |
6224 | if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) | ||
6225 | all &= ~NETIF_F_NO_CSUM; | ||
5787 | 6226 | ||
5788 | one |= all & NETIF_F_ONE_FOR_ALL; | 6227 | /* If one device supports hw checksumming, set for all. */ |
5789 | all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; | 6228 | if (all & NETIF_F_GEN_CSUM) |
5790 | all |= one & mask & NETIF_F_ONE_FOR_ALL; | 6229 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); |
5791 | 6230 | ||
5792 | return all; | 6231 | return all; |
5793 | } | 6232 | } |
@@ -5830,29 +6269,23 @@ err_name: | |||
5830 | /** | 6269 | /** |
5831 | * netdev_drivername - network driver for the device | 6270 | * netdev_drivername - network driver for the device |
5832 | * @dev: network device | 6271 | * @dev: network device |
5833 | * @buffer: buffer for resulting name | ||
5834 | * @len: size of buffer | ||
5835 | * | 6272 | * |
5836 | * Determine network driver for device. | 6273 | * Determine network driver for device. |
5837 | */ | 6274 | */ |
5838 | char *netdev_drivername(const struct net_device *dev, char *buffer, int len) | 6275 | const char *netdev_drivername(const struct net_device *dev) |
5839 | { | 6276 | { |
5840 | const struct device_driver *driver; | 6277 | const struct device_driver *driver; |
5841 | const struct device *parent; | 6278 | const struct device *parent; |
5842 | 6279 | const char *empty = ""; | |
5843 | if (len <= 0 || !buffer) | ||
5844 | return buffer; | ||
5845 | buffer[0] = 0; | ||
5846 | 6280 | ||
5847 | parent = dev->dev.parent; | 6281 | parent = dev->dev.parent; |
5848 | |||
5849 | if (!parent) | 6282 | if (!parent) |
5850 | return buffer; | 6283 | return empty; |
5851 | 6284 | ||
5852 | driver = parent->driver; | 6285 | driver = parent->driver; |
5853 | if (driver && driver->name) | 6286 | if (driver && driver->name) |
5854 | strlcpy(buffer, driver->name, len); | 6287 | return driver->name; |
5855 | return buffer; | 6288 | return empty; |
5856 | } | 6289 | } |
5857 | 6290 | ||
5858 | static int __netdev_printk(const char *level, const struct net_device *dev, | 6291 | static int __netdev_printk(const char *level, const struct net_device *dev, |
@@ -5948,7 +6381,7 @@ static void __net_exit default_device_exit(struct net *net) | |||
5948 | if (dev->rtnl_link_ops) | 6381 | if (dev->rtnl_link_ops) |
5949 | continue; | 6382 | continue; |
5950 | 6383 | ||
5951 | /* Push remaing network devices to init_net */ | 6384 | /* Push remaining network devices to init_net */ |
5952 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); | 6385 | snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); |
5953 | err = dev_change_net_namespace(dev, &init_net, fb_name); | 6386 | err = dev_change_net_namespace(dev, &init_net, fb_name); |
5954 | if (err) { | 6387 | if (err) { |
@@ -5963,7 +6396,7 @@ static void __net_exit default_device_exit(struct net *net) | |||
5963 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | 6396 | static void __net_exit default_device_exit_batch(struct list_head *net_list) |
5964 | { | 6397 | { |
5965 | /* At exit all network devices most be removed from a network | 6398 | /* At exit all network devices most be removed from a network |
5966 | * namespace. Do this in the reverse order of registeration. | 6399 | * namespace. Do this in the reverse order of registration. |
5967 | * Do this across as many network namespaces as possible to | 6400 | * Do this across as many network namespaces as possible to |
5968 | * improve batching efficiency. | 6401 | * improve batching efficiency. |
5969 | */ | 6402 | */ |
@@ -5981,6 +6414,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) | |||
5981 | } | 6414 | } |
5982 | } | 6415 | } |
5983 | unregister_netdevice_many(&dev_kill_list); | 6416 | unregister_netdevice_many(&dev_kill_list); |
6417 | list_del(&dev_kill_list); | ||
5984 | rtnl_unlock(); | 6418 | rtnl_unlock(); |
5985 | } | 6419 | } |
5986 | 6420 | ||
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 508f9c18992f..e2e66939ed00 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c | |||
@@ -68,14 +68,6 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, | |||
68 | return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); | 68 | return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); |
69 | } | 69 | } |
70 | 70 | ||
71 | static void ha_rcu_free(struct rcu_head *head) | ||
72 | { | ||
73 | struct netdev_hw_addr *ha; | ||
74 | |||
75 | ha = container_of(head, struct netdev_hw_addr, rcu_head); | ||
76 | kfree(ha); | ||
77 | } | ||
78 | |||
79 | static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, | 71 | static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, |
80 | unsigned char *addr, int addr_len, | 72 | unsigned char *addr, int addr_len, |
81 | unsigned char addr_type, bool global) | 73 | unsigned char addr_type, bool global) |
@@ -94,7 +86,7 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, | |||
94 | if (--ha->refcount) | 86 | if (--ha->refcount) |
95 | return 0; | 87 | return 0; |
96 | list_del_rcu(&ha->list); | 88 | list_del_rcu(&ha->list); |
97 | call_rcu(&ha->rcu_head, ha_rcu_free); | 89 | kfree_rcu(ha, rcu_head); |
98 | list->count--; | 90 | list->count--; |
99 | return 0; | 91 | return 0; |
100 | } | 92 | } |
@@ -144,7 +136,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | |||
144 | 136 | ||
145 | list_for_each_entry(ha, &from_list->list, list) { | 137 | list_for_each_entry(ha, &from_list->list, list) { |
146 | type = addr_type ? addr_type : ha->type; | 138 | type = addr_type ? addr_type : ha->type; |
147 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); | 139 | __hw_addr_del(to_list, ha->addr, addr_len, type); |
148 | } | 140 | } |
149 | } | 141 | } |
150 | EXPORT_SYMBOL(__hw_addr_del_multiple); | 142 | EXPORT_SYMBOL(__hw_addr_del_multiple); |
@@ -197,7 +189,7 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list) | |||
197 | 189 | ||
198 | list_for_each_entry_safe(ha, tmp, &list->list, list) { | 190 | list_for_each_entry_safe(ha, tmp, &list->list, list) { |
199 | list_del_rcu(&ha->list); | 191 | list_del_rcu(&ha->list); |
200 | call_rcu(&ha->rcu_head, ha_rcu_free); | 192 | kfree_rcu(ha, rcu_head); |
201 | } | 193 | } |
202 | list->count = 0; | 194 | list->count = 0; |
203 | } | 195 | } |
@@ -357,8 +349,8 @@ EXPORT_SYMBOL(dev_addr_add_multiple); | |||
357 | /** | 349 | /** |
358 | * dev_addr_del_multiple - Delete device addresses by another device | 350 | * dev_addr_del_multiple - Delete device addresses by another device |
359 | * @to_dev: device where the addresses will be deleted | 351 | * @to_dev: device where the addresses will be deleted |
360 | * @from_dev: device by which addresses the addresses will be deleted | 352 | * @from_dev: device supplying the addresses to be deleted |
361 | * @addr_type: address type - 0 means type will used from from_dev | 353 | * @addr_type: address type - 0 means type will be used from from_dev |
362 | * | 354 | * |
363 | * Deletes addresses in to device by the list of addresses in from device. | 355 | * Deletes addresses in to device by the list of addresses in from device. |
364 | * | 356 | * |
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 36e603c78ce9..7f36b38e060f 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c | |||
@@ -207,14 +207,6 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi) | |||
207 | rcu_read_unlock(); | 207 | rcu_read_unlock(); |
208 | } | 208 | } |
209 | 209 | ||
210 | |||
211 | static void free_dm_hw_stat(struct rcu_head *head) | ||
212 | { | ||
213 | struct dm_hw_stat_delta *n; | ||
214 | n = container_of(head, struct dm_hw_stat_delta, rcu); | ||
215 | kfree(n); | ||
216 | } | ||
217 | |||
218 | static int set_all_monitor_traces(int state) | 210 | static int set_all_monitor_traces(int state) |
219 | { | 211 | { |
220 | int rc = 0; | 212 | int rc = 0; |
@@ -245,7 +237,7 @@ static int set_all_monitor_traces(int state) | |||
245 | list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { | 237 | list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { |
246 | if (new_stat->dev == NULL) { | 238 | if (new_stat->dev == NULL) { |
247 | list_del_rcu(&new_stat->list); | 239 | list_del_rcu(&new_stat->list); |
248 | call_rcu(&new_stat->rcu, free_dm_hw_stat); | 240 | kfree_rcu(new_stat, rcu); |
249 | } | 241 | } |
250 | } | 242 | } |
251 | break; | 243 | break; |
@@ -314,7 +306,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, | |||
314 | new_stat->dev = NULL; | 306 | new_stat->dev = NULL; |
315 | if (trace_state == TRACE_OFF) { | 307 | if (trace_state == TRACE_OFF) { |
316 | list_del_rcu(&new_stat->list); | 308 | list_del_rcu(&new_stat->list); |
317 | call_rcu(&new_stat->rcu, free_dm_hw_stat); | 309 | kfree_rcu(new_stat, rcu); |
318 | break; | 310 | break; |
319 | } | 311 | } |
320 | } | 312 | } |
@@ -350,7 +342,7 @@ static int __init init_net_drop_monitor(void) | |||
350 | struct per_cpu_dm_data *data; | 342 | struct per_cpu_dm_data *data; |
351 | int cpu, rc; | 343 | int cpu, rc; |
352 | 344 | ||
353 | printk(KERN_INFO "Initalizing network drop monitor service\n"); | 345 | printk(KERN_INFO "Initializing network drop monitor service\n"); |
354 | 346 | ||
355 | if (sizeof(void *) > 8) { | 347 | if (sizeof(void *) > 8) { |
356 | printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); | 348 | printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); |
diff --git a/net/core/dst.c b/net/core/dst.c index 6c41b1fac3db..6135f3671692 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/types.h> | 19 | #include <linux/types.h> |
20 | #include <net/net_namespace.h> | 20 | #include <net/net_namespace.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/prefetch.h> | ||
22 | 23 | ||
23 | #include <net/dst.h> | 24 | #include <net/dst.h> |
24 | 25 | ||
@@ -33,9 +34,6 @@ | |||
33 | * 3) This list is guarded by a mutex, | 34 | * 3) This list is guarded by a mutex, |
34 | * so that the gc_task and dst_dev_event() can be synchronized. | 35 | * so that the gc_task and dst_dev_event() can be synchronized. |
35 | */ | 36 | */ |
36 | #if RT_CACHE_DEBUG >= 2 | ||
37 | static atomic_t dst_total = ATOMIC_INIT(0); | ||
38 | #endif | ||
39 | 37 | ||
40 | /* | 38 | /* |
41 | * We want to keep lock & list close together | 39 | * We want to keep lock & list close together |
@@ -69,10 +67,6 @@ static void dst_gc_task(struct work_struct *work) | |||
69 | unsigned long expires = ~0L; | 67 | unsigned long expires = ~0L; |
70 | struct dst_entry *dst, *next, head; | 68 | struct dst_entry *dst, *next, head; |
71 | struct dst_entry *last = &head; | 69 | struct dst_entry *last = &head; |
72 | #if RT_CACHE_DEBUG >= 2 | ||
73 | ktime_t time_start = ktime_get(); | ||
74 | struct timespec elapsed; | ||
75 | #endif | ||
76 | 70 | ||
77 | mutex_lock(&dst_gc_mutex); | 71 | mutex_lock(&dst_gc_mutex); |
78 | next = dst_busy_list; | 72 | next = dst_busy_list; |
@@ -146,15 +140,6 @@ loop: | |||
146 | 140 | ||
147 | spin_unlock_bh(&dst_garbage.lock); | 141 | spin_unlock_bh(&dst_garbage.lock); |
148 | mutex_unlock(&dst_gc_mutex); | 142 | mutex_unlock(&dst_gc_mutex); |
149 | #if RT_CACHE_DEBUG >= 2 | ||
150 | elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start)); | ||
151 | printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d" | ||
152 | " expires: %lu elapsed: %lu us\n", | ||
153 | atomic_read(&dst_total), delayed, work_performed, | ||
154 | expires, | ||
155 | elapsed.tv_sec * USEC_PER_SEC + | ||
156 | elapsed.tv_nsec / NSEC_PER_USEC); | ||
157 | #endif | ||
158 | } | 143 | } |
159 | 144 | ||
160 | int dst_discard(struct sk_buff *skb) | 145 | int dst_discard(struct sk_buff *skb) |
@@ -164,26 +149,49 @@ int dst_discard(struct sk_buff *skb) | |||
164 | } | 149 | } |
165 | EXPORT_SYMBOL(dst_discard); | 150 | EXPORT_SYMBOL(dst_discard); |
166 | 151 | ||
167 | void *dst_alloc(struct dst_ops *ops) | 152 | const u32 dst_default_metrics[RTAX_MAX]; |
153 | |||
154 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, | ||
155 | int initial_ref, int initial_obsolete, int flags) | ||
168 | { | 156 | { |
169 | struct dst_entry *dst; | 157 | struct dst_entry *dst; |
170 | 158 | ||
171 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { | 159 | if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { |
172 | if (ops->gc(ops)) | 160 | if (ops->gc(ops)) |
173 | return NULL; | 161 | return NULL; |
174 | } | 162 | } |
175 | dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); | 163 | dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); |
176 | if (!dst) | 164 | if (!dst) |
177 | return NULL; | 165 | return NULL; |
178 | atomic_set(&dst->__refcnt, 0); | 166 | dst->child = NULL; |
167 | dst->dev = dev; | ||
168 | if (dev) | ||
169 | dev_hold(dev); | ||
179 | dst->ops = ops; | 170 | dst->ops = ops; |
180 | dst->lastuse = jiffies; | 171 | dst_init_metrics(dst, dst_default_metrics, true); |
172 | dst->expires = 0UL; | ||
181 | dst->path = dst; | 173 | dst->path = dst; |
182 | dst->input = dst->output = dst_discard; | 174 | dst->neighbour = NULL; |
183 | #if RT_CACHE_DEBUG >= 2 | 175 | dst->hh = NULL; |
184 | atomic_inc(&dst_total); | 176 | #ifdef CONFIG_XFRM |
177 | dst->xfrm = NULL; | ||
185 | #endif | 178 | #endif |
186 | atomic_inc(&ops->entries); | 179 | dst->input = dst_discard; |
180 | dst->output = dst_discard; | ||
181 | dst->error = 0; | ||
182 | dst->obsolete = initial_obsolete; | ||
183 | dst->header_len = 0; | ||
184 | dst->trailer_len = 0; | ||
185 | #ifdef CONFIG_IP_ROUTE_CLASSID | ||
186 | dst->tclassid = 0; | ||
187 | #endif | ||
188 | atomic_set(&dst->__refcnt, initial_ref); | ||
189 | dst->__use = 0; | ||
190 | dst->lastuse = jiffies; | ||
191 | dst->flags = flags; | ||
192 | dst->next = NULL; | ||
193 | if (!(flags & DST_NOCOUNT)) | ||
194 | dst_entries_add(ops, 1); | ||
187 | return dst; | 195 | return dst; |
188 | } | 196 | } |
189 | EXPORT_SYMBOL(dst_alloc); | 197 | EXPORT_SYMBOL(dst_alloc); |
@@ -228,23 +236,21 @@ again: | |||
228 | child = dst->child; | 236 | child = dst->child; |
229 | 237 | ||
230 | dst->hh = NULL; | 238 | dst->hh = NULL; |
231 | if (hh && atomic_dec_and_test(&hh->hh_refcnt)) | 239 | if (hh) |
232 | kfree(hh); | 240 | hh_cache_put(hh); |
233 | 241 | ||
234 | if (neigh) { | 242 | if (neigh) { |
235 | dst->neighbour = NULL; | 243 | dst->neighbour = NULL; |
236 | neigh_release(neigh); | 244 | neigh_release(neigh); |
237 | } | 245 | } |
238 | 246 | ||
239 | atomic_dec(&dst->ops->entries); | 247 | if (!(dst->flags & DST_NOCOUNT)) |
248 | dst_entries_add(dst->ops, -1); | ||
240 | 249 | ||
241 | if (dst->ops->destroy) | 250 | if (dst->ops->destroy) |
242 | dst->ops->destroy(dst); | 251 | dst->ops->destroy(dst); |
243 | if (dst->dev) | 252 | if (dst->dev) |
244 | dev_put(dst->dev); | 253 | dev_put(dst->dev); |
245 | #if RT_CACHE_DEBUG >= 2 | ||
246 | atomic_dec(&dst_total); | ||
247 | #endif | ||
248 | kmem_cache_free(dst->ops->kmem_cachep, dst); | 254 | kmem_cache_free(dst->ops->kmem_cachep, dst); |
249 | 255 | ||
250 | dst = child; | 256 | dst = child; |
@@ -271,13 +277,76 @@ void dst_release(struct dst_entry *dst) | |||
271 | if (dst) { | 277 | if (dst) { |
272 | int newrefcnt; | 278 | int newrefcnt; |
273 | 279 | ||
274 | smp_mb__before_atomic_dec(); | ||
275 | newrefcnt = atomic_dec_return(&dst->__refcnt); | 280 | newrefcnt = atomic_dec_return(&dst->__refcnt); |
276 | WARN_ON(newrefcnt < 0); | 281 | WARN_ON(newrefcnt < 0); |
282 | if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { | ||
283 | dst = dst_destroy(dst); | ||
284 | if (dst) | ||
285 | __dst_free(dst); | ||
286 | } | ||
277 | } | 287 | } |
278 | } | 288 | } |
279 | EXPORT_SYMBOL(dst_release); | 289 | EXPORT_SYMBOL(dst_release); |
280 | 290 | ||
291 | u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) | ||
292 | { | ||
293 | u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); | ||
294 | |||
295 | if (p) { | ||
296 | u32 *old_p = __DST_METRICS_PTR(old); | ||
297 | unsigned long prev, new; | ||
298 | |||
299 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
300 | |||
301 | new = (unsigned long) p; | ||
302 | prev = cmpxchg(&dst->_metrics, old, new); | ||
303 | |||
304 | if (prev != old) { | ||
305 | kfree(p); | ||
306 | p = __DST_METRICS_PTR(prev); | ||
307 | if (prev & DST_METRICS_READ_ONLY) | ||
308 | p = NULL; | ||
309 | } | ||
310 | } | ||
311 | return p; | ||
312 | } | ||
313 | EXPORT_SYMBOL(dst_cow_metrics_generic); | ||
314 | |||
315 | /* Caller asserts that dst_metrics_read_only(dst) is false. */ | ||
316 | void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) | ||
317 | { | ||
318 | unsigned long prev, new; | ||
319 | |||
320 | new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; | ||
321 | prev = cmpxchg(&dst->_metrics, old, new); | ||
322 | if (prev == old) | ||
323 | kfree(__DST_METRICS_PTR(old)); | ||
324 | } | ||
325 | EXPORT_SYMBOL(__dst_destroy_metrics_generic); | ||
326 | |||
327 | /** | ||
328 | * skb_dst_set_noref - sets skb dst, without a reference | ||
329 | * @skb: buffer | ||
330 | * @dst: dst entry | ||
331 | * | ||
332 | * Sets skb dst, assuming a reference was not taken on dst | ||
333 | * skb_dst_drop() should not dst_release() this dst | ||
334 | */ | ||
335 | void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) | ||
336 | { | ||
337 | WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); | ||
338 | /* If dst not in cache, we must take a reference, because | ||
339 | * dst_release() will destroy dst as soon as its refcount becomes zero | ||
340 | */ | ||
341 | if (unlikely(dst->flags & DST_NOCACHE)) { | ||
342 | dst_hold(dst); | ||
343 | skb_dst_set(skb, dst); | ||
344 | } else { | ||
345 | skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; | ||
346 | } | ||
347 | } | ||
348 | EXPORT_SYMBOL(skb_dst_set_noref); | ||
349 | |||
281 | /* Dirty hack. We did it in 2.2 (in __dst_free), | 350 | /* Dirty hack. We did it in 2.2 (in __dst_free), |
282 | * we have _very_ good reasons not to repeat | 351 | * we have _very_ good reasons not to repeat |
283 | * this mistake in 2.3, but we have no choice | 352 | * this mistake in 2.3, but we have no choice |
@@ -343,6 +412,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, | |||
343 | 412 | ||
344 | static struct notifier_block dst_dev_notifier = { | 413 | static struct notifier_block dst_dev_notifier = { |
345 | .notifier_call = dst_dev_event, | 414 | .notifier_call = dst_dev_event, |
415 | .priority = -10, /* must be called after other network notifiers */ | ||
346 | }; | 416 | }; |
347 | 417 | ||
348 | void __init dst_init(void) | 418 | void __init dst_init(void) |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8451ab481095..fd14116ad7f0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -19,7 +19,10 @@ | |||
19 | #include <linux/netdevice.h> | 19 | #include <linux/netdevice.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/vmalloc.h> | ||
22 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/rtnetlink.h> | ||
25 | #include <linux/sched.h> | ||
23 | 26 | ||
24 | /* | 27 | /* |
25 | * Some useful ethtool_ops methods that're device independent. | 28 | * Some useful ethtool_ops methods that're device independent. |
@@ -33,12 +36,6 @@ u32 ethtool_op_get_link(struct net_device *dev) | |||
33 | } | 36 | } |
34 | EXPORT_SYMBOL(ethtool_op_get_link); | 37 | EXPORT_SYMBOL(ethtool_op_get_link); |
35 | 38 | ||
36 | u32 ethtool_op_get_rx_csum(struct net_device *dev) | ||
37 | { | ||
38 | return (dev->features & NETIF_F_ALL_CSUM) != 0; | ||
39 | } | ||
40 | EXPORT_SYMBOL(ethtool_op_get_rx_csum); | ||
41 | |||
42 | u32 ethtool_op_get_tx_csum(struct net_device *dev) | 39 | u32 ethtool_op_get_tx_csum(struct net_device *dev) |
43 | { | 40 | { |
44 | return (dev->features & NETIF_F_ALL_CSUM) != 0; | 41 | return (dev->features & NETIF_F_ALL_CSUM) != 0; |
@@ -54,6 +51,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) | |||
54 | 51 | ||
55 | return 0; | 52 | return 0; |
56 | } | 53 | } |
54 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
57 | 55 | ||
58 | int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) | 56 | int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) |
59 | { | 57 | { |
@@ -131,7 +129,8 @@ EXPORT_SYMBOL(ethtool_op_set_ufo); | |||
131 | * NETIF_F_xxx values in include/linux/netdevice.h | 129 | * NETIF_F_xxx values in include/linux/netdevice.h |
132 | */ | 130 | */ |
133 | static const u32 flags_dup_features = | 131 | static const u32 flags_dup_features = |
134 | (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); | 132 | (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE | |
133 | ETH_FLAG_RXHASH); | ||
135 | 134 | ||
136 | u32 ethtool_op_get_flags(struct net_device *dev) | 135 | u32 ethtool_op_get_flags(struct net_device *dev) |
137 | { | 136 | { |
@@ -144,9 +143,24 @@ u32 ethtool_op_get_flags(struct net_device *dev) | |||
144 | } | 143 | } |
145 | EXPORT_SYMBOL(ethtool_op_get_flags); | 144 | EXPORT_SYMBOL(ethtool_op_get_flags); |
146 | 145 | ||
146 | /* Check if device can enable (or disable) particular feature coded in "data" | ||
147 | * argument. Flags "supported" describe features that can be toggled by device. | ||
148 | * If feature can not be toggled, it state (enabled or disabled) must match | ||
149 | * hardcoded device features state, otherwise flags are marked as invalid. | ||
150 | */ | ||
151 | bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported) | ||
152 | { | ||
153 | u32 features = dev->features & flags_dup_features; | ||
154 | /* "data" can contain only flags_dup_features bits, | ||
155 | * see __ethtool_set_flags */ | ||
156 | |||
157 | return (features & ~supported) != (data & ~supported); | ||
158 | } | ||
159 | EXPORT_SYMBOL(ethtool_invalid_flags); | ||
160 | |||
147 | int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) | 161 | int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) |
148 | { | 162 | { |
149 | if (data & ~supported) | 163 | if (ethtool_invalid_flags(dev, data, supported)) |
150 | return -EINVAL; | 164 | return -EINVAL; |
151 | 165 | ||
152 | dev->features = ((dev->features & ~flags_dup_features) | | 166 | dev->features = ((dev->features & ~flags_dup_features) | |
@@ -169,6 +183,404 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); | |||
169 | 183 | ||
170 | /* Handlers for each ethtool command */ | 184 | /* Handlers for each ethtool command */ |
171 | 185 | ||
186 | #define ETHTOOL_DEV_FEATURE_WORDS 1 | ||
187 | |||
188 | static void ethtool_get_features_compat(struct net_device *dev, | ||
189 | struct ethtool_get_features_block *features) | ||
190 | { | ||
191 | if (!dev->ethtool_ops) | ||
192 | return; | ||
193 | |||
194 | /* getting RX checksum */ | ||
195 | if (dev->ethtool_ops->get_rx_csum) | ||
196 | if (dev->ethtool_ops->get_rx_csum(dev)) | ||
197 | features[0].active |= NETIF_F_RXCSUM; | ||
198 | |||
199 | /* mark legacy-changeable features */ | ||
200 | if (dev->ethtool_ops->set_sg) | ||
201 | features[0].available |= NETIF_F_SG; | ||
202 | if (dev->ethtool_ops->set_tx_csum) | ||
203 | features[0].available |= NETIF_F_ALL_CSUM; | ||
204 | if (dev->ethtool_ops->set_tso) | ||
205 | features[0].available |= NETIF_F_ALL_TSO; | ||
206 | if (dev->ethtool_ops->set_rx_csum) | ||
207 | features[0].available |= NETIF_F_RXCSUM; | ||
208 | if (dev->ethtool_ops->set_flags) | ||
209 | features[0].available |= flags_dup_features; | ||
210 | } | ||
211 | |||
212 | static int ethtool_set_feature_compat(struct net_device *dev, | ||
213 | int (*legacy_set)(struct net_device *, u32), | ||
214 | struct ethtool_set_features_block *features, u32 mask) | ||
215 | { | ||
216 | u32 do_set; | ||
217 | |||
218 | if (!legacy_set) | ||
219 | return 0; | ||
220 | |||
221 | if (!(features[0].valid & mask)) | ||
222 | return 0; | ||
223 | |||
224 | features[0].valid &= ~mask; | ||
225 | |||
226 | do_set = !!(features[0].requested & mask); | ||
227 | |||
228 | if (legacy_set(dev, do_set) < 0) | ||
229 | netdev_info(dev, | ||
230 | "Legacy feature change (%s) failed for 0x%08x\n", | ||
231 | do_set ? "set" : "clear", mask); | ||
232 | |||
233 | return 1; | ||
234 | } | ||
235 | |||
236 | static int ethtool_set_flags_compat(struct net_device *dev, | ||
237 | int (*legacy_set)(struct net_device *, u32), | ||
238 | struct ethtool_set_features_block *features, u32 mask) | ||
239 | { | ||
240 | u32 value; | ||
241 | |||
242 | if (!legacy_set) | ||
243 | return 0; | ||
244 | |||
245 | if (!(features[0].valid & mask)) | ||
246 | return 0; | ||
247 | |||
248 | value = dev->features & ~features[0].valid; | ||
249 | value |= features[0].requested; | ||
250 | |||
251 | features[0].valid &= ~mask; | ||
252 | |||
253 | if (legacy_set(dev, value & mask) < 0) | ||
254 | netdev_info(dev, "Legacy flags change failed\n"); | ||
255 | |||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static int ethtool_set_features_compat(struct net_device *dev, | ||
260 | struct ethtool_set_features_block *features) | ||
261 | { | ||
262 | int compat; | ||
263 | |||
264 | if (!dev->ethtool_ops) | ||
265 | return 0; | ||
266 | |||
267 | compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, | ||
268 | features, NETIF_F_SG); | ||
269 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, | ||
270 | features, NETIF_F_ALL_CSUM); | ||
271 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, | ||
272 | features, NETIF_F_ALL_TSO); | ||
273 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, | ||
274 | features, NETIF_F_RXCSUM); | ||
275 | compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags, | ||
276 | features, flags_dup_features); | ||
277 | |||
278 | return compat; | ||
279 | } | ||
280 | |||
281 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) | ||
282 | { | ||
283 | struct ethtool_gfeatures cmd = { | ||
284 | .cmd = ETHTOOL_GFEATURES, | ||
285 | .size = ETHTOOL_DEV_FEATURE_WORDS, | ||
286 | }; | ||
287 | struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { | ||
288 | { | ||
289 | .available = dev->hw_features, | ||
290 | .requested = dev->wanted_features, | ||
291 | .active = dev->features, | ||
292 | .never_changed = NETIF_F_NEVER_CHANGE, | ||
293 | }, | ||
294 | }; | ||
295 | u32 __user *sizeaddr; | ||
296 | u32 copy_size; | ||
297 | |||
298 | ethtool_get_features_compat(dev, features); | ||
299 | |||
300 | sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); | ||
301 | if (get_user(copy_size, sizeaddr)) | ||
302 | return -EFAULT; | ||
303 | |||
304 | if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) | ||
305 | copy_size = ETHTOOL_DEV_FEATURE_WORDS; | ||
306 | |||
307 | if (copy_to_user(useraddr, &cmd, sizeof(cmd))) | ||
308 | return -EFAULT; | ||
309 | useraddr += sizeof(cmd); | ||
310 | if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) | ||
311 | return -EFAULT; | ||
312 | |||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | static int ethtool_set_features(struct net_device *dev, void __user *useraddr) | ||
317 | { | ||
318 | struct ethtool_sfeatures cmd; | ||
319 | struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; | ||
320 | int ret = 0; | ||
321 | |||
322 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | ||
323 | return -EFAULT; | ||
324 | useraddr += sizeof(cmd); | ||
325 | |||
326 | if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) | ||
327 | return -EINVAL; | ||
328 | |||
329 | if (copy_from_user(features, useraddr, sizeof(features))) | ||
330 | return -EFAULT; | ||
331 | |||
332 | if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) | ||
333 | return -EINVAL; | ||
334 | |||
335 | if (ethtool_set_features_compat(dev, features)) | ||
336 | ret |= ETHTOOL_F_COMPAT; | ||
337 | |||
338 | if (features[0].valid & ~dev->hw_features) { | ||
339 | features[0].valid &= dev->hw_features; | ||
340 | ret |= ETHTOOL_F_UNSUPPORTED; | ||
341 | } | ||
342 | |||
343 | dev->wanted_features &= ~features[0].valid; | ||
344 | dev->wanted_features |= features[0].valid & features[0].requested; | ||
345 | __netdev_update_features(dev); | ||
346 | |||
347 | if ((dev->wanted_features ^ dev->features) & features[0].valid) | ||
348 | ret |= ETHTOOL_F_WISH; | ||
349 | |||
350 | return ret; | ||
351 | } | ||
352 | |||
353 | static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { | ||
354 | /* NETIF_F_SG */ "tx-scatter-gather", | ||
355 | /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", | ||
356 | /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", | ||
357 | /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", | ||
358 | /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6", | ||
359 | /* NETIF_F_HIGHDMA */ "highdma", | ||
360 | /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", | ||
361 | /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", | ||
362 | |||
363 | /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", | ||
364 | /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", | ||
365 | /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", | ||
366 | /* NETIF_F_GSO */ "tx-generic-segmentation", | ||
367 | /* NETIF_F_LLTX */ "tx-lockless", | ||
368 | /* NETIF_F_NETNS_LOCAL */ "netns-local", | ||
369 | /* NETIF_F_GRO */ "rx-gro", | ||
370 | /* NETIF_F_LRO */ "rx-lro", | ||
371 | |||
372 | /* NETIF_F_TSO */ "tx-tcp-segmentation", | ||
373 | /* NETIF_F_UFO */ "tx-udp-fragmentation", | ||
374 | /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", | ||
375 | /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", | ||
376 | /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", | ||
377 | /* NETIF_F_FSO */ "tx-fcoe-segmentation", | ||
378 | "", | ||
379 | "", | ||
380 | |||
381 | /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", | ||
382 | /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", | ||
383 | /* NETIF_F_FCOE_MTU */ "fcoe-mtu", | ||
384 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", | ||
385 | /* NETIF_F_RXHASH */ "rx-hashing", | ||
386 | /* NETIF_F_RXCSUM */ "rx-checksum", | ||
387 | /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy", | ||
388 | /* NETIF_F_LOOPBACK */ "loopback", | ||
389 | }; | ||
390 | |||
391 | static int __ethtool_get_sset_count(struct net_device *dev, int sset) | ||
392 | { | ||
393 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
394 | |||
395 | if (sset == ETH_SS_FEATURES) | ||
396 | return ARRAY_SIZE(netdev_features_strings); | ||
397 | |||
398 | if (ops && ops->get_sset_count && ops->get_strings) | ||
399 | return ops->get_sset_count(dev, sset); | ||
400 | else | ||
401 | return -EOPNOTSUPP; | ||
402 | } | ||
403 | |||
404 | static void __ethtool_get_strings(struct net_device *dev, | ||
405 | u32 stringset, u8 *data) | ||
406 | { | ||
407 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
408 | |||
409 | if (stringset == ETH_SS_FEATURES) | ||
410 | memcpy(data, netdev_features_strings, | ||
411 | sizeof(netdev_features_strings)); | ||
412 | else | ||
413 | /* ops->get_strings is valid because checked earlier */ | ||
414 | ops->get_strings(dev, stringset, data); | ||
415 | } | ||
416 | |||
417 | static u32 ethtool_get_feature_mask(u32 eth_cmd) | ||
418 | { | ||
419 | /* feature masks of legacy discrete ethtool ops */ | ||
420 | |||
421 | switch (eth_cmd) { | ||
422 | case ETHTOOL_GTXCSUM: | ||
423 | case ETHTOOL_STXCSUM: | ||
424 | return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; | ||
425 | case ETHTOOL_GRXCSUM: | ||
426 | case ETHTOOL_SRXCSUM: | ||
427 | return NETIF_F_RXCSUM; | ||
428 | case ETHTOOL_GSG: | ||
429 | case ETHTOOL_SSG: | ||
430 | return NETIF_F_SG; | ||
431 | case ETHTOOL_GTSO: | ||
432 | case ETHTOOL_STSO: | ||
433 | return NETIF_F_ALL_TSO; | ||
434 | case ETHTOOL_GUFO: | ||
435 | case ETHTOOL_SUFO: | ||
436 | return NETIF_F_UFO; | ||
437 | case ETHTOOL_GGSO: | ||
438 | case ETHTOOL_SGSO: | ||
439 | return NETIF_F_GSO; | ||
440 | case ETHTOOL_GGRO: | ||
441 | case ETHTOOL_SGRO: | ||
442 | return NETIF_F_GRO; | ||
443 | default: | ||
444 | BUG(); | ||
445 | } | ||
446 | } | ||
447 | |||
448 | static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) | ||
449 | { | ||
450 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
451 | |||
452 | if (!ops) | ||
453 | return NULL; | ||
454 | |||
455 | switch (ethcmd) { | ||
456 | case ETHTOOL_GTXCSUM: | ||
457 | return ops->get_tx_csum; | ||
458 | case ETHTOOL_GRXCSUM: | ||
459 | return ops->get_rx_csum; | ||
460 | case ETHTOOL_SSG: | ||
461 | return ops->get_sg; | ||
462 | case ETHTOOL_STSO: | ||
463 | return ops->get_tso; | ||
464 | case ETHTOOL_SUFO: | ||
465 | return ops->get_ufo; | ||
466 | default: | ||
467 | return NULL; | ||
468 | } | ||
469 | } | ||
470 | |||
471 | static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) | ||
472 | { | ||
473 | return !!(dev->features & NETIF_F_ALL_CSUM); | ||
474 | } | ||
475 | |||
476 | static int ethtool_get_one_feature(struct net_device *dev, | ||
477 | char __user *useraddr, u32 ethcmd) | ||
478 | { | ||
479 | u32 mask = ethtool_get_feature_mask(ethcmd); | ||
480 | struct ethtool_value edata = { | ||
481 | .cmd = ethcmd, | ||
482 | .data = !!(dev->features & mask), | ||
483 | }; | ||
484 | |||
485 | /* compatibility with discrete get_ ops */ | ||
486 | if (!(dev->hw_features & mask)) { | ||
487 | u32 (*actor)(struct net_device *); | ||
488 | |||
489 | actor = __ethtool_get_one_feature_actor(dev, ethcmd); | ||
490 | |||
491 | /* bug compatibility with old get_rx_csum */ | ||
492 | if (ethcmd == ETHTOOL_GRXCSUM && !actor) | ||
493 | actor = __ethtool_get_rx_csum_oldbug; | ||
494 | |||
495 | if (actor) | ||
496 | edata.data = actor(dev); | ||
497 | } | ||
498 | |||
499 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
500 | return -EFAULT; | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); | ||
505 | static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); | ||
506 | static int __ethtool_set_sg(struct net_device *dev, u32 data); | ||
507 | static int __ethtool_set_tso(struct net_device *dev, u32 data); | ||
508 | static int __ethtool_set_ufo(struct net_device *dev, u32 data); | ||
509 | |||
510 | static int ethtool_set_one_feature(struct net_device *dev, | ||
511 | void __user *useraddr, u32 ethcmd) | ||
512 | { | ||
513 | struct ethtool_value edata; | ||
514 | u32 mask; | ||
515 | |||
516 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
517 | return -EFAULT; | ||
518 | |||
519 | mask = ethtool_get_feature_mask(ethcmd); | ||
520 | mask &= dev->hw_features; | ||
521 | if (mask) { | ||
522 | if (edata.data) | ||
523 | dev->wanted_features |= mask; | ||
524 | else | ||
525 | dev->wanted_features &= ~mask; | ||
526 | |||
527 | __netdev_update_features(dev); | ||
528 | return 0; | ||
529 | } | ||
530 | |||
531 | /* Driver is not converted to ndo_fix_features or does not | ||
532 | * support changing this offload. In the latter case it won't | ||
533 | * have corresponding ethtool_ops field set. | ||
534 | * | ||
535 | * Following part is to be removed after all drivers advertise | ||
536 | * their changeable features in netdev->hw_features and stop | ||
537 | * using discrete offload setting ops. | ||
538 | */ | ||
539 | |||
540 | switch (ethcmd) { | ||
541 | case ETHTOOL_STXCSUM: | ||
542 | return __ethtool_set_tx_csum(dev, edata.data); | ||
543 | case ETHTOOL_SRXCSUM: | ||
544 | return __ethtool_set_rx_csum(dev, edata.data); | ||
545 | case ETHTOOL_SSG: | ||
546 | return __ethtool_set_sg(dev, edata.data); | ||
547 | case ETHTOOL_STSO: | ||
548 | return __ethtool_set_tso(dev, edata.data); | ||
549 | case ETHTOOL_SUFO: | ||
550 | return __ethtool_set_ufo(dev, edata.data); | ||
551 | default: | ||
552 | return -EOPNOTSUPP; | ||
553 | } | ||
554 | } | ||
555 | |||
556 | int __ethtool_set_flags(struct net_device *dev, u32 data) | ||
557 | { | ||
558 | u32 changed; | ||
559 | |||
560 | if (data & ~flags_dup_features) | ||
561 | return -EINVAL; | ||
562 | |||
563 | /* legacy set_flags() op */ | ||
564 | if (dev->ethtool_ops->set_flags) { | ||
565 | if (unlikely(dev->hw_features & flags_dup_features)) | ||
566 | netdev_warn(dev, | ||
567 | "driver BUG: mixed hw_features and set_flags()\n"); | ||
568 | return dev->ethtool_ops->set_flags(dev, data); | ||
569 | } | ||
570 | |||
571 | /* allow changing only bits set in hw_features */ | ||
572 | changed = (data ^ dev->features) & flags_dup_features; | ||
573 | if (changed & ~dev->hw_features) | ||
574 | return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; | ||
575 | |||
576 | dev->wanted_features = | ||
577 | (dev->wanted_features & ~changed) | (data & dev->hw_features); | ||
578 | |||
579 | __netdev_update_features(dev); | ||
580 | |||
581 | return 0; | ||
582 | } | ||
583 | |||
172 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) | 584 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) |
173 | { | 585 | { |
174 | struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; | 586 | struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; |
@@ -205,18 +617,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, | |||
205 | struct ethtool_drvinfo info; | 617 | struct ethtool_drvinfo info; |
206 | const struct ethtool_ops *ops = dev->ethtool_ops; | 618 | const struct ethtool_ops *ops = dev->ethtool_ops; |
207 | 619 | ||
208 | if (!ops->get_drvinfo) | ||
209 | return -EOPNOTSUPP; | ||
210 | |||
211 | memset(&info, 0, sizeof(info)); | 620 | memset(&info, 0, sizeof(info)); |
212 | info.cmd = ETHTOOL_GDRVINFO; | 621 | info.cmd = ETHTOOL_GDRVINFO; |
213 | ops->get_drvinfo(dev, &info); | 622 | if (ops && ops->get_drvinfo) { |
623 | ops->get_drvinfo(dev, &info); | ||
624 | } else if (dev->dev.parent && dev->dev.parent->driver) { | ||
625 | strlcpy(info.bus_info, dev_name(dev->dev.parent), | ||
626 | sizeof(info.bus_info)); | ||
627 | strlcpy(info.driver, dev->dev.parent->driver->name, | ||
628 | sizeof(info.driver)); | ||
629 | } else { | ||
630 | return -EOPNOTSUPP; | ||
631 | } | ||
214 | 632 | ||
215 | /* | 633 | /* |
216 | * this method of obtaining string set info is deprecated; | 634 | * this method of obtaining string set info is deprecated; |
217 | * Use ETHTOOL_GSSET_INFO instead. | 635 | * Use ETHTOOL_GSSET_INFO instead. |
218 | */ | 636 | */ |
219 | if (ops->get_sset_count) { | 637 | if (ops && ops->get_sset_count) { |
220 | int rc; | 638 | int rc; |
221 | 639 | ||
222 | rc = ops->get_sset_count(dev, ETH_SS_TEST); | 640 | rc = ops->get_sset_count(dev, ETH_SS_TEST); |
@@ -229,9 +647,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, | |||
229 | if (rc >= 0) | 647 | if (rc >= 0) |
230 | info.n_priv_flags = rc; | 648 | info.n_priv_flags = rc; |
231 | } | 649 | } |
232 | if (ops->get_regs_len) | 650 | if (ops && ops->get_regs_len) |
233 | info.regdump_len = ops->get_regs_len(dev); | 651 | info.regdump_len = ops->get_regs_len(dev); |
234 | if (ops->get_eeprom_len) | 652 | if (ops && ops->get_eeprom_len) |
235 | info.eedump_len = ops->get_eeprom_len(dev); | 653 | info.eedump_len = ops->get_eeprom_len(dev); |
236 | 654 | ||
237 | if (copy_to_user(useraddr, &info, sizeof(info))) | 655 | if (copy_to_user(useraddr, &info, sizeof(info))) |
@@ -243,14 +661,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, | |||
243 | void __user *useraddr) | 661 | void __user *useraddr) |
244 | { | 662 | { |
245 | struct ethtool_sset_info info; | 663 | struct ethtool_sset_info info; |
246 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
247 | u64 sset_mask; | 664 | u64 sset_mask; |
248 | int i, idx = 0, n_bits = 0, ret, rc; | 665 | int i, idx = 0, n_bits = 0, ret, rc; |
249 | u32 *info_buf = NULL; | 666 | u32 *info_buf = NULL; |
250 | 667 | ||
251 | if (!ops->get_sset_count) | ||
252 | return -EOPNOTSUPP; | ||
253 | |||
254 | if (copy_from_user(&info, useraddr, sizeof(info))) | 668 | if (copy_from_user(&info, useraddr, sizeof(info))) |
255 | return -EFAULT; | 669 | return -EFAULT; |
256 | 670 | ||
@@ -277,7 +691,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, | |||
277 | if (!(sset_mask & (1ULL << i))) | 691 | if (!(sset_mask & (1ULL << i))) |
278 | continue; | 692 | continue; |
279 | 693 | ||
280 | rc = ops->get_sset_count(dev, i); | 694 | rc = __ethtool_get_sset_count(dev, i); |
281 | if (rc >= 0) { | 695 | if (rc >= 0) { |
282 | info.sset_mask |= (1ULL << i); | 696 | info.sset_mask |= (1ULL << i); |
283 | info_buf[idx++] = rc; | 697 | info_buf[idx++] = rc; |
@@ -479,6 +893,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, | |||
479 | list->count++; | 893 | list->count++; |
480 | } | 894 | } |
481 | 895 | ||
896 | /* | ||
897 | * ethtool does not (or did not) set masks for flow parameters that are | ||
898 | * not specified, so if both value and mask are 0 then this must be | ||
899 | * treated as equivalent to a mask with all bits set. Implement that | ||
900 | * here rather than in drivers. | ||
901 | */ | ||
902 | static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs) | ||
903 | { | ||
904 | struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec; | ||
905 | struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec; | ||
906 | |||
907 | if (fs->flow_type != TCP_V4_FLOW && | ||
908 | fs->flow_type != UDP_V4_FLOW && | ||
909 | fs->flow_type != SCTP_V4_FLOW) | ||
910 | return; | ||
911 | |||
912 | if (!(entry->ip4src | mask->ip4src)) | ||
913 | mask->ip4src = htonl(0xffffffff); | ||
914 | if (!(entry->ip4dst | mask->ip4dst)) | ||
915 | mask->ip4dst = htonl(0xffffffff); | ||
916 | if (!(entry->psrc | mask->psrc)) | ||
917 | mask->psrc = htons(0xffff); | ||
918 | if (!(entry->pdst | mask->pdst)) | ||
919 | mask->pdst = htons(0xffff); | ||
920 | if (!(entry->tos | mask->tos)) | ||
921 | mask->tos = 0xff; | ||
922 | if (!(fs->vlan_tag | fs->vlan_tag_mask)) | ||
923 | fs->vlan_tag_mask = 0xffff; | ||
924 | if (!(fs->data | fs->data_mask)) | ||
925 | fs->data_mask = 0xffffffffffffffffULL; | ||
926 | } | ||
927 | |||
482 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, | 928 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, |
483 | void __user *useraddr) | 929 | void __user *useraddr) |
484 | { | 930 | { |
@@ -487,12 +933,17 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, | |||
487 | struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; | 933 | struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; |
488 | int ret; | 934 | int ret; |
489 | 935 | ||
936 | if (!ops->set_rx_ntuple) | ||
937 | return -EOPNOTSUPP; | ||
938 | |||
490 | if (!(dev->features & NETIF_F_NTUPLE)) | 939 | if (!(dev->features & NETIF_F_NTUPLE)) |
491 | return -EINVAL; | 940 | return -EINVAL; |
492 | 941 | ||
493 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | 942 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) |
494 | return -EFAULT; | 943 | return -EFAULT; |
495 | 944 | ||
945 | rx_ntuple_fix_masks(&cmd.fs); | ||
946 | |||
496 | /* | 947 | /* |
497 | * Cache filter in dev struct for GET operation only if | 948 | * Cache filter in dev struct for GET operation only if |
498 | * the underlying driver doesn't have its own GET operation, and | 949 | * the underlying driver doesn't have its own GET operation, and |
@@ -667,19 +1118,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
667 | break; | 1118 | break; |
668 | case IP_USER_FLOW: | 1119 | case IP_USER_FLOW: |
669 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 1120 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
670 | fsc->fs.h_u.raw_ip4_spec.ip4src); | 1121 | fsc->fs.h_u.usr_ip4_spec.ip4src); |
671 | p += ETH_GSTRING_LEN; | 1122 | p += ETH_GSTRING_LEN; |
672 | num_strings++; | 1123 | num_strings++; |
673 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 1124 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
674 | fsc->fs.m_u.raw_ip4_spec.ip4src); | 1125 | fsc->fs.m_u.usr_ip4_spec.ip4src); |
675 | p += ETH_GSTRING_LEN; | 1126 | p += ETH_GSTRING_LEN; |
676 | num_strings++; | 1127 | num_strings++; |
677 | sprintf(p, "\tDest IP addr: 0x%x\n", | 1128 | sprintf(p, "\tDest IP addr: 0x%x\n", |
678 | fsc->fs.h_u.raw_ip4_spec.ip4dst); | 1129 | fsc->fs.h_u.usr_ip4_spec.ip4dst); |
679 | p += ETH_GSTRING_LEN; | 1130 | p += ETH_GSTRING_LEN; |
680 | num_strings++; | 1131 | num_strings++; |
681 | sprintf(p, "\tDest IP mask: 0x%x\n", | 1132 | sprintf(p, "\tDest IP mask: 0x%x\n", |
682 | fsc->fs.m_u.raw_ip4_spec.ip4dst); | 1133 | fsc->fs.m_u.usr_ip4_spec.ip4dst); |
683 | p += ETH_GSTRING_LEN; | 1134 | p += ETH_GSTRING_LEN; |
684 | num_strings++; | 1135 | num_strings++; |
685 | break; | 1136 | break; |
@@ -775,7 +1226,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | |||
775 | if (regs.len > reglen) | 1226 | if (regs.len > reglen) |
776 | regs.len = reglen; | 1227 | regs.len = reglen; |
777 | 1228 | ||
778 | regbuf = kzalloc(reglen, GFP_USER); | 1229 | regbuf = vzalloc(reglen); |
779 | if (!regbuf) | 1230 | if (!regbuf) |
780 | return -ENOMEM; | 1231 | return -ENOMEM; |
781 | 1232 | ||
@@ -790,7 +1241,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | |||
790 | ret = 0; | 1241 | ret = 0; |
791 | 1242 | ||
792 | out: | 1243 | out: |
793 | kfree(regbuf); | 1244 | vfree(regbuf); |
794 | return ret; | 1245 | return ret; |
795 | } | 1246 | } |
796 | 1247 | ||
@@ -849,6 +1300,20 @@ static int ethtool_nway_reset(struct net_device *dev) | |||
849 | return dev->ethtool_ops->nway_reset(dev); | 1300 | return dev->ethtool_ops->nway_reset(dev); |
850 | } | 1301 | } |
851 | 1302 | ||
1303 | static int ethtool_get_link(struct net_device *dev, char __user *useraddr) | ||
1304 | { | ||
1305 | struct ethtool_value edata = { .cmd = ETHTOOL_GLINK }; | ||
1306 | |||
1307 | if (!dev->ethtool_ops->get_link) | ||
1308 | return -EOPNOTSUPP; | ||
1309 | |||
1310 | edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev); | ||
1311 | |||
1312 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
1313 | return -EFAULT; | ||
1314 | return 0; | ||
1315 | } | ||
1316 | |||
852 | static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) | 1317 | static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) |
853 | { | 1318 | { |
854 | struct ethtool_eeprom eeprom; | 1319 | struct ethtool_eeprom eeprom; |
@@ -1004,6 +1469,35 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) | |||
1004 | return dev->ethtool_ops->set_ringparam(dev, &ringparam); | 1469 | return dev->ethtool_ops->set_ringparam(dev, &ringparam); |
1005 | } | 1470 | } |
1006 | 1471 | ||
1472 | static noinline_for_stack int ethtool_get_channels(struct net_device *dev, | ||
1473 | void __user *useraddr) | ||
1474 | { | ||
1475 | struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; | ||
1476 | |||
1477 | if (!dev->ethtool_ops->get_channels) | ||
1478 | return -EOPNOTSUPP; | ||
1479 | |||
1480 | dev->ethtool_ops->get_channels(dev, &channels); | ||
1481 | |||
1482 | if (copy_to_user(useraddr, &channels, sizeof(channels))) | ||
1483 | return -EFAULT; | ||
1484 | return 0; | ||
1485 | } | ||
1486 | |||
1487 | static noinline_for_stack int ethtool_set_channels(struct net_device *dev, | ||
1488 | void __user *useraddr) | ||
1489 | { | ||
1490 | struct ethtool_channels channels; | ||
1491 | |||
1492 | if (!dev->ethtool_ops->set_channels) | ||
1493 | return -EOPNOTSUPP; | ||
1494 | |||
1495 | if (copy_from_user(&channels, useraddr, sizeof(channels))) | ||
1496 | return -EFAULT; | ||
1497 | |||
1498 | return dev->ethtool_ops->set_channels(dev, &channels); | ||
1499 | } | ||
1500 | |||
1007 | static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) | 1501 | static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) |
1008 | { | 1502 | { |
1009 | struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; | 1503 | struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; |
@@ -1035,6 +1529,12 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) | |||
1035 | { | 1529 | { |
1036 | int err; | 1530 | int err; |
1037 | 1531 | ||
1532 | if (!dev->ethtool_ops->set_sg) | ||
1533 | return -EOPNOTSUPP; | ||
1534 | |||
1535 | if (data && !(dev->features & NETIF_F_ALL_CSUM)) | ||
1536 | return -EINVAL; | ||
1537 | |||
1038 | if (!data && dev->ethtool_ops->set_tso) { | 1538 | if (!data && dev->ethtool_ops->set_tso) { |
1039 | err = dev->ethtool_ops->set_tso(dev, 0); | 1539 | err = dev->ethtool_ops->set_tso(dev, 0); |
1040 | if (err) | 1540 | if (err) |
@@ -1049,140 +1549,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) | |||
1049 | return dev->ethtool_ops->set_sg(dev, data); | 1549 | return dev->ethtool_ops->set_sg(dev, data); |
1050 | } | 1550 | } |
1051 | 1551 | ||
1052 | static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) | 1552 | static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) |
1053 | { | 1553 | { |
1054 | struct ethtool_value edata; | ||
1055 | int err; | 1554 | int err; |
1056 | 1555 | ||
1057 | if (!dev->ethtool_ops->set_tx_csum) | 1556 | if (!dev->ethtool_ops->set_tx_csum) |
1058 | return -EOPNOTSUPP; | 1557 | return -EOPNOTSUPP; |
1059 | 1558 | ||
1060 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | 1559 | if (!data && dev->ethtool_ops->set_sg) { |
1061 | return -EFAULT; | ||
1062 | |||
1063 | if (!edata.data && dev->ethtool_ops->set_sg) { | ||
1064 | err = __ethtool_set_sg(dev, 0); | 1560 | err = __ethtool_set_sg(dev, 0); |
1065 | if (err) | 1561 | if (err) |
1066 | return err; | 1562 | return err; |
1067 | } | 1563 | } |
1068 | 1564 | ||
1069 | return dev->ethtool_ops->set_tx_csum(dev, edata.data); | 1565 | return dev->ethtool_ops->set_tx_csum(dev, data); |
1070 | } | 1566 | } |
1071 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
1072 | 1567 | ||
1073 | static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) | 1568 | static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) |
1074 | { | 1569 | { |
1075 | struct ethtool_value edata; | ||
1076 | |||
1077 | if (!dev->ethtool_ops->set_rx_csum) | 1570 | if (!dev->ethtool_ops->set_rx_csum) |
1078 | return -EOPNOTSUPP; | 1571 | return -EOPNOTSUPP; |
1079 | 1572 | ||
1080 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | 1573 | if (!data) |
1081 | return -EFAULT; | ||
1082 | |||
1083 | if (!edata.data && dev->ethtool_ops->set_sg) | ||
1084 | dev->features &= ~NETIF_F_GRO; | 1574 | dev->features &= ~NETIF_F_GRO; |
1085 | 1575 | ||
1086 | return dev->ethtool_ops->set_rx_csum(dev, edata.data); | 1576 | return dev->ethtool_ops->set_rx_csum(dev, data); |
1087 | } | 1577 | } |
1088 | 1578 | ||
1089 | static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) | 1579 | static int __ethtool_set_tso(struct net_device *dev, u32 data) |
1090 | { | 1580 | { |
1091 | struct ethtool_value edata; | ||
1092 | |||
1093 | if (!dev->ethtool_ops->set_sg) | ||
1094 | return -EOPNOTSUPP; | ||
1095 | |||
1096 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
1097 | return -EFAULT; | ||
1098 | |||
1099 | if (edata.data && | ||
1100 | !(dev->features & NETIF_F_ALL_CSUM)) | ||
1101 | return -EINVAL; | ||
1102 | |||
1103 | return __ethtool_set_sg(dev, edata.data); | ||
1104 | } | ||
1105 | |||
1106 | static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) | ||
1107 | { | ||
1108 | struct ethtool_value edata; | ||
1109 | |||
1110 | if (!dev->ethtool_ops->set_tso) | 1581 | if (!dev->ethtool_ops->set_tso) |
1111 | return -EOPNOTSUPP; | 1582 | return -EOPNOTSUPP; |
1112 | 1583 | ||
1113 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | 1584 | if (data && !(dev->features & NETIF_F_SG)) |
1114 | return -EFAULT; | ||
1115 | |||
1116 | if (edata.data && !(dev->features & NETIF_F_SG)) | ||
1117 | return -EINVAL; | 1585 | return -EINVAL; |
1118 | 1586 | ||
1119 | return dev->ethtool_ops->set_tso(dev, edata.data); | 1587 | return dev->ethtool_ops->set_tso(dev, data); |
1120 | } | 1588 | } |
1121 | 1589 | ||
1122 | static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) | 1590 | static int __ethtool_set_ufo(struct net_device *dev, u32 data) |
1123 | { | 1591 | { |
1124 | struct ethtool_value edata; | ||
1125 | |||
1126 | if (!dev->ethtool_ops->set_ufo) | 1592 | if (!dev->ethtool_ops->set_ufo) |
1127 | return -EOPNOTSUPP; | 1593 | return -EOPNOTSUPP; |
1128 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | 1594 | if (data && !(dev->features & NETIF_F_SG)) |
1129 | return -EFAULT; | ||
1130 | if (edata.data && !(dev->features & NETIF_F_SG)) | ||
1131 | return -EINVAL; | 1595 | return -EINVAL; |
1132 | if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) | 1596 | if (data && !((dev->features & NETIF_F_GEN_CSUM) || |
1597 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) | ||
1598 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) | ||
1133 | return -EINVAL; | 1599 | return -EINVAL; |
1134 | return dev->ethtool_ops->set_ufo(dev, edata.data); | 1600 | return dev->ethtool_ops->set_ufo(dev, data); |
1135 | } | ||
1136 | |||
1137 | static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) | ||
1138 | { | ||
1139 | struct ethtool_value edata = { ETHTOOL_GGSO }; | ||
1140 | |||
1141 | edata.data = dev->features & NETIF_F_GSO; | ||
1142 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
1143 | return -EFAULT; | ||
1144 | return 0; | ||
1145 | } | ||
1146 | |||
1147 | static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) | ||
1148 | { | ||
1149 | struct ethtool_value edata; | ||
1150 | |||
1151 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
1152 | return -EFAULT; | ||
1153 | if (edata.data) | ||
1154 | dev->features |= NETIF_F_GSO; | ||
1155 | else | ||
1156 | dev->features &= ~NETIF_F_GSO; | ||
1157 | return 0; | ||
1158 | } | ||
1159 | |||
1160 | static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) | ||
1161 | { | ||
1162 | struct ethtool_value edata = { ETHTOOL_GGRO }; | ||
1163 | |||
1164 | edata.data = dev->features & NETIF_F_GRO; | ||
1165 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | ||
1166 | return -EFAULT; | ||
1167 | return 0; | ||
1168 | } | ||
1169 | |||
1170 | static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) | ||
1171 | { | ||
1172 | struct ethtool_value edata; | ||
1173 | |||
1174 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | ||
1175 | return -EFAULT; | ||
1176 | |||
1177 | if (edata.data) { | ||
1178 | if (!dev->ethtool_ops->get_rx_csum || | ||
1179 | !dev->ethtool_ops->get_rx_csum(dev)) | ||
1180 | return -EINVAL; | ||
1181 | dev->features |= NETIF_F_GRO; | ||
1182 | } else | ||
1183 | dev->features &= ~NETIF_F_GRO; | ||
1184 | |||
1185 | return 0; | ||
1186 | } | 1601 | } |
1187 | 1602 | ||
1188 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) | 1603 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) |
@@ -1226,17 +1641,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) | |||
1226 | static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) | 1641 | static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) |
1227 | { | 1642 | { |
1228 | struct ethtool_gstrings gstrings; | 1643 | struct ethtool_gstrings gstrings; |
1229 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
1230 | u8 *data; | 1644 | u8 *data; |
1231 | int ret; | 1645 | int ret; |
1232 | 1646 | ||
1233 | if (!ops->get_strings || !ops->get_sset_count) | ||
1234 | return -EOPNOTSUPP; | ||
1235 | |||
1236 | if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) | 1647 | if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) |
1237 | return -EFAULT; | 1648 | return -EFAULT; |
1238 | 1649 | ||
1239 | ret = ops->get_sset_count(dev, gstrings.string_set); | 1650 | ret = __ethtool_get_sset_count(dev, gstrings.string_set); |
1240 | if (ret < 0) | 1651 | if (ret < 0) |
1241 | return ret; | 1652 | return ret; |
1242 | 1653 | ||
@@ -1246,7 +1657,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) | |||
1246 | if (!data) | 1657 | if (!data) |
1247 | return -ENOMEM; | 1658 | return -ENOMEM; |
1248 | 1659 | ||
1249 | ops->get_strings(dev, gstrings.string_set, data); | 1660 | __ethtool_get_strings(dev, gstrings.string_set, data); |
1250 | 1661 | ||
1251 | ret = -EFAULT; | 1662 | ret = -EFAULT; |
1252 | if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) | 1663 | if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) |
@@ -1256,7 +1667,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) | |||
1256 | goto out; | 1667 | goto out; |
1257 | ret = 0; | 1668 | ret = 0; |
1258 | 1669 | ||
1259 | out: | 1670 | out: |
1260 | kfree(data); | 1671 | kfree(data); |
1261 | return ret; | 1672 | return ret; |
1262 | } | 1673 | } |
@@ -1264,14 +1675,60 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) | |||
1264 | static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) | 1675 | static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) |
1265 | { | 1676 | { |
1266 | struct ethtool_value id; | 1677 | struct ethtool_value id; |
1678 | static bool busy; | ||
1679 | int rc; | ||
1267 | 1680 | ||
1268 | if (!dev->ethtool_ops->phys_id) | 1681 | if (!dev->ethtool_ops->set_phys_id) |
1269 | return -EOPNOTSUPP; | 1682 | return -EOPNOTSUPP; |
1270 | 1683 | ||
1684 | if (busy) | ||
1685 | return -EBUSY; | ||
1686 | |||
1271 | if (copy_from_user(&id, useraddr, sizeof(id))) | 1687 | if (copy_from_user(&id, useraddr, sizeof(id))) |
1272 | return -EFAULT; | 1688 | return -EFAULT; |
1273 | 1689 | ||
1274 | return dev->ethtool_ops->phys_id(dev, id.data); | 1690 | rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); |
1691 | if (rc < 0) | ||
1692 | return rc; | ||
1693 | |||
1694 | /* Drop the RTNL lock while waiting, but prevent reentry or | ||
1695 | * removal of the device. | ||
1696 | */ | ||
1697 | busy = true; | ||
1698 | dev_hold(dev); | ||
1699 | rtnl_unlock(); | ||
1700 | |||
1701 | if (rc == 0) { | ||
1702 | /* Driver will handle this itself */ | ||
1703 | schedule_timeout_interruptible( | ||
1704 | id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT); | ||
1705 | } else { | ||
1706 | /* Driver expects to be called at twice the frequency in rc */ | ||
1707 | int n = rc * 2, i, interval = HZ / n; | ||
1708 | |||
1709 | /* Count down seconds */ | ||
1710 | do { | ||
1711 | /* Count down iterations per second */ | ||
1712 | i = n; | ||
1713 | do { | ||
1714 | rtnl_lock(); | ||
1715 | rc = dev->ethtool_ops->set_phys_id(dev, | ||
1716 | (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); | ||
1717 | rtnl_unlock(); | ||
1718 | if (rc) | ||
1719 | break; | ||
1720 | schedule_timeout_interruptible(interval); | ||
1721 | } while (!signal_pending(current) && --i != 0); | ||
1722 | } while (!signal_pending(current) && | ||
1723 | (id.data == 0 || --id.data != 0)); | ||
1724 | } | ||
1725 | |||
1726 | rtnl_lock(); | ||
1727 | dev_put(dev); | ||
1728 | busy = false; | ||
1729 | |||
1730 | (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); | ||
1731 | return rc; | ||
1275 | } | 1732 | } |
1276 | 1733 | ||
1277 | static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) | 1734 | static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) |
@@ -1389,6 +1846,87 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev, | |||
1389 | return dev->ethtool_ops->flash_device(dev, &efl); | 1846 | return dev->ethtool_ops->flash_device(dev, &efl); |
1390 | } | 1847 | } |
1391 | 1848 | ||
1849 | static int ethtool_set_dump(struct net_device *dev, | ||
1850 | void __user *useraddr) | ||
1851 | { | ||
1852 | struct ethtool_dump dump; | ||
1853 | |||
1854 | if (!dev->ethtool_ops->set_dump) | ||
1855 | return -EOPNOTSUPP; | ||
1856 | |||
1857 | if (copy_from_user(&dump, useraddr, sizeof(dump))) | ||
1858 | return -EFAULT; | ||
1859 | |||
1860 | return dev->ethtool_ops->set_dump(dev, &dump); | ||
1861 | } | ||
1862 | |||
1863 | static int ethtool_get_dump_flag(struct net_device *dev, | ||
1864 | void __user *useraddr) | ||
1865 | { | ||
1866 | int ret; | ||
1867 | struct ethtool_dump dump; | ||
1868 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
1869 | |||
1870 | if (!dev->ethtool_ops->get_dump_flag) | ||
1871 | return -EOPNOTSUPP; | ||
1872 | |||
1873 | if (copy_from_user(&dump, useraddr, sizeof(dump))) | ||
1874 | return -EFAULT; | ||
1875 | |||
1876 | ret = ops->get_dump_flag(dev, &dump); | ||
1877 | if (ret) | ||
1878 | return ret; | ||
1879 | |||
1880 | if (copy_to_user(useraddr, &dump, sizeof(dump))) | ||
1881 | return -EFAULT; | ||
1882 | return 0; | ||
1883 | } | ||
1884 | |||
1885 | static int ethtool_get_dump_data(struct net_device *dev, | ||
1886 | void __user *useraddr) | ||
1887 | { | ||
1888 | int ret; | ||
1889 | __u32 len; | ||
1890 | struct ethtool_dump dump, tmp; | ||
1891 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
1892 | void *data = NULL; | ||
1893 | |||
1894 | if (!dev->ethtool_ops->get_dump_data || | ||
1895 | !dev->ethtool_ops->get_dump_flag) | ||
1896 | return -EOPNOTSUPP; | ||
1897 | |||
1898 | if (copy_from_user(&dump, useraddr, sizeof(dump))) | ||
1899 | return -EFAULT; | ||
1900 | |||
1901 | memset(&tmp, 0, sizeof(tmp)); | ||
1902 | tmp.cmd = ETHTOOL_GET_DUMP_FLAG; | ||
1903 | ret = ops->get_dump_flag(dev, &tmp); | ||
1904 | if (ret) | ||
1905 | return ret; | ||
1906 | |||
1907 | len = (tmp.len > dump.len) ? dump.len : tmp.len; | ||
1908 | if (!len) | ||
1909 | return -EFAULT; | ||
1910 | |||
1911 | data = vzalloc(tmp.len); | ||
1912 | if (!data) | ||
1913 | return -ENOMEM; | ||
1914 | ret = ops->get_dump_data(dev, &dump, data); | ||
1915 | if (ret) | ||
1916 | goto out; | ||
1917 | |||
1918 | if (copy_to_user(useraddr, &dump, sizeof(dump))) { | ||
1919 | ret = -EFAULT; | ||
1920 | goto out; | ||
1921 | } | ||
1922 | useraddr += offsetof(struct ethtool_dump, data); | ||
1923 | if (copy_to_user(useraddr, data, len)) | ||
1924 | ret = -EFAULT; | ||
1925 | out: | ||
1926 | vfree(data); | ||
1927 | return ret; | ||
1928 | } | ||
1929 | |||
1392 | /* The main entry point in this file. Called from net/core/dev.c */ | 1930 | /* The main entry point in this file. Called from net/core/dev.c */ |
1393 | 1931 | ||
1394 | int dev_ethtool(struct net *net, struct ifreq *ifr) | 1932 | int dev_ethtool(struct net *net, struct ifreq *ifr) |
@@ -1397,19 +1935,27 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1397 | void __user *useraddr = ifr->ifr_data; | 1935 | void __user *useraddr = ifr->ifr_data; |
1398 | u32 ethcmd; | 1936 | u32 ethcmd; |
1399 | int rc; | 1937 | int rc; |
1400 | unsigned long old_features; | 1938 | u32 old_features; |
1401 | 1939 | ||
1402 | if (!dev || !netif_device_present(dev)) | 1940 | if (!dev || !netif_device_present(dev)) |
1403 | return -ENODEV; | 1941 | return -ENODEV; |
1404 | 1942 | ||
1405 | if (!dev->ethtool_ops) | ||
1406 | return -EOPNOTSUPP; | ||
1407 | |||
1408 | if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) | 1943 | if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) |
1409 | return -EFAULT; | 1944 | return -EFAULT; |
1410 | 1945 | ||
1946 | if (!dev->ethtool_ops) { | ||
1947 | /* ETHTOOL_GDRVINFO does not require any driver support. | ||
1948 | * It is also unprivileged and does not change anything, | ||
1949 | * so we can take a shortcut to it. */ | ||
1950 | if (ethcmd == ETHTOOL_GDRVINFO) | ||
1951 | return ethtool_get_drvinfo(dev, useraddr); | ||
1952 | else | ||
1953 | return -EOPNOTSUPP; | ||
1954 | } | ||
1955 | |||
1411 | /* Allow some commands to be done by anyone */ | 1956 | /* Allow some commands to be done by anyone */ |
1412 | switch (ethcmd) { | 1957 | switch (ethcmd) { |
1958 | case ETHTOOL_GSET: | ||
1413 | case ETHTOOL_GDRVINFO: | 1959 | case ETHTOOL_GDRVINFO: |
1414 | case ETHTOOL_GMSGLVL: | 1960 | case ETHTOOL_GMSGLVL: |
1415 | case ETHTOOL_GCOALESCE: | 1961 | case ETHTOOL_GCOALESCE: |
@@ -1431,6 +1977,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1431 | case ETHTOOL_GRXCLSRLCNT: | 1977 | case ETHTOOL_GRXCLSRLCNT: |
1432 | case ETHTOOL_GRXCLSRULE: | 1978 | case ETHTOOL_GRXCLSRULE: |
1433 | case ETHTOOL_GRXCLSRLALL: | 1979 | case ETHTOOL_GRXCLSRLALL: |
1980 | case ETHTOOL_GFEATURES: | ||
1434 | break; | 1981 | break; |
1435 | default: | 1982 | default: |
1436 | if (!capable(CAP_NET_ADMIN)) | 1983 | if (!capable(CAP_NET_ADMIN)) |
@@ -1475,8 +2022,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1475 | rc = ethtool_nway_reset(dev); | 2022 | rc = ethtool_nway_reset(dev); |
1476 | break; | 2023 | break; |
1477 | case ETHTOOL_GLINK: | 2024 | case ETHTOOL_GLINK: |
1478 | rc = ethtool_get_value(dev, useraddr, ethcmd, | 2025 | rc = ethtool_get_link(dev, useraddr); |
1479 | dev->ethtool_ops->get_link); | ||
1480 | break; | 2026 | break; |
1481 | case ETHTOOL_GEEPROM: | 2027 | case ETHTOOL_GEEPROM: |
1482 | rc = ethtool_get_eeprom(dev, useraddr); | 2028 | rc = ethtool_get_eeprom(dev, useraddr); |
@@ -1502,42 +2048,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1502 | case ETHTOOL_SPAUSEPARAM: | 2048 | case ETHTOOL_SPAUSEPARAM: |
1503 | rc = ethtool_set_pauseparam(dev, useraddr); | 2049 | rc = ethtool_set_pauseparam(dev, useraddr); |
1504 | break; | 2050 | break; |
1505 | case ETHTOOL_GRXCSUM: | ||
1506 | rc = ethtool_get_value(dev, useraddr, ethcmd, | ||
1507 | (dev->ethtool_ops->get_rx_csum ? | ||
1508 | dev->ethtool_ops->get_rx_csum : | ||
1509 | ethtool_op_get_rx_csum)); | ||
1510 | break; | ||
1511 | case ETHTOOL_SRXCSUM: | ||
1512 | rc = ethtool_set_rx_csum(dev, useraddr); | ||
1513 | break; | ||
1514 | case ETHTOOL_GTXCSUM: | ||
1515 | rc = ethtool_get_value(dev, useraddr, ethcmd, | ||
1516 | (dev->ethtool_ops->get_tx_csum ? | ||
1517 | dev->ethtool_ops->get_tx_csum : | ||
1518 | ethtool_op_get_tx_csum)); | ||
1519 | break; | ||
1520 | case ETHTOOL_STXCSUM: | ||
1521 | rc = ethtool_set_tx_csum(dev, useraddr); | ||
1522 | break; | ||
1523 | case ETHTOOL_GSG: | ||
1524 | rc = ethtool_get_value(dev, useraddr, ethcmd, | ||
1525 | (dev->ethtool_ops->get_sg ? | ||
1526 | dev->ethtool_ops->get_sg : | ||
1527 | ethtool_op_get_sg)); | ||
1528 | break; | ||
1529 | case ETHTOOL_SSG: | ||
1530 | rc = ethtool_set_sg(dev, useraddr); | ||
1531 | break; | ||
1532 | case ETHTOOL_GTSO: | ||
1533 | rc = ethtool_get_value(dev, useraddr, ethcmd, | ||
1534 | (dev->ethtool_ops->get_tso ? | ||
1535 | dev->ethtool_ops->get_tso : | ||
1536 | ethtool_op_get_tso)); | ||
1537 | break; | ||
1538 | case ETHTOOL_STSO: | ||
1539 | rc = ethtool_set_tso(dev, useraddr); | ||
1540 | break; | ||
1541 | case ETHTOOL_TEST: | 2051 | case ETHTOOL_TEST: |
1542 | rc = ethtool_self_test(dev, useraddr); | 2052 | rc = ethtool_self_test(dev, useraddr); |
1543 | break; | 2053 | break; |
@@ -1553,21 +2063,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1553 | case ETHTOOL_GPERMADDR: | 2063 | case ETHTOOL_GPERMADDR: |
1554 | rc = ethtool_get_perm_addr(dev, useraddr); | 2064 | rc = ethtool_get_perm_addr(dev, useraddr); |
1555 | break; | 2065 | break; |
1556 | case ETHTOOL_GUFO: | ||
1557 | rc = ethtool_get_value(dev, useraddr, ethcmd, | ||
1558 | (dev->ethtool_ops->get_ufo ? | ||
1559 | dev->ethtool_ops->get_ufo : | ||
1560 | ethtool_op_get_ufo)); | ||
1561 | break; | ||
1562 | case ETHTOOL_SUFO: | ||
1563 | rc = ethtool_set_ufo(dev, useraddr); | ||
1564 | break; | ||
1565 | case ETHTOOL_GGSO: | ||
1566 | rc = ethtool_get_gso(dev, useraddr); | ||
1567 | break; | ||
1568 | case ETHTOOL_SGSO: | ||
1569 | rc = ethtool_set_gso(dev, useraddr); | ||
1570 | break; | ||
1571 | case ETHTOOL_GFLAGS: | 2066 | case ETHTOOL_GFLAGS: |
1572 | rc = ethtool_get_value(dev, useraddr, ethcmd, | 2067 | rc = ethtool_get_value(dev, useraddr, ethcmd, |
1573 | (dev->ethtool_ops->get_flags ? | 2068 | (dev->ethtool_ops->get_flags ? |
@@ -1575,8 +2070,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1575 | ethtool_op_get_flags)); | 2070 | ethtool_op_get_flags)); |
1576 | break; | 2071 | break; |
1577 | case ETHTOOL_SFLAGS: | 2072 | case ETHTOOL_SFLAGS: |
1578 | rc = ethtool_set_value(dev, useraddr, | 2073 | rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); |
1579 | dev->ethtool_ops->set_flags); | ||
1580 | break; | 2074 | break; |
1581 | case ETHTOOL_GPFLAGS: | 2075 | case ETHTOOL_GPFLAGS: |
1582 | rc = ethtool_get_value(dev, useraddr, ethcmd, | 2076 | rc = ethtool_get_value(dev, useraddr, ethcmd, |
@@ -1598,12 +2092,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1598 | case ETHTOOL_SRXCLSRLINS: | 2092 | case ETHTOOL_SRXCLSRLINS: |
1599 | rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); | 2093 | rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); |
1600 | break; | 2094 | break; |
1601 | case ETHTOOL_GGRO: | ||
1602 | rc = ethtool_get_gro(dev, useraddr); | ||
1603 | break; | ||
1604 | case ETHTOOL_SGRO: | ||
1605 | rc = ethtool_set_gro(dev, useraddr); | ||
1606 | break; | ||
1607 | case ETHTOOL_FLASHDEV: | 2095 | case ETHTOOL_FLASHDEV: |
1608 | rc = ethtool_flash_device(dev, useraddr); | 2096 | rc = ethtool_flash_device(dev, useraddr); |
1609 | break; | 2097 | break; |
@@ -1625,6 +2113,45 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1625 | case ETHTOOL_SRXFHINDIR: | 2113 | case ETHTOOL_SRXFHINDIR: |
1626 | rc = ethtool_set_rxfh_indir(dev, useraddr); | 2114 | rc = ethtool_set_rxfh_indir(dev, useraddr); |
1627 | break; | 2115 | break; |
2116 | case ETHTOOL_GFEATURES: | ||
2117 | rc = ethtool_get_features(dev, useraddr); | ||
2118 | break; | ||
2119 | case ETHTOOL_SFEATURES: | ||
2120 | rc = ethtool_set_features(dev, useraddr); | ||
2121 | break; | ||
2122 | case ETHTOOL_GTXCSUM: | ||
2123 | case ETHTOOL_GRXCSUM: | ||
2124 | case ETHTOOL_GSG: | ||
2125 | case ETHTOOL_GTSO: | ||
2126 | case ETHTOOL_GUFO: | ||
2127 | case ETHTOOL_GGSO: | ||
2128 | case ETHTOOL_GGRO: | ||
2129 | rc = ethtool_get_one_feature(dev, useraddr, ethcmd); | ||
2130 | break; | ||
2131 | case ETHTOOL_STXCSUM: | ||
2132 | case ETHTOOL_SRXCSUM: | ||
2133 | case ETHTOOL_SSG: | ||
2134 | case ETHTOOL_STSO: | ||
2135 | case ETHTOOL_SUFO: | ||
2136 | case ETHTOOL_SGSO: | ||
2137 | case ETHTOOL_SGRO: | ||
2138 | rc = ethtool_set_one_feature(dev, useraddr, ethcmd); | ||
2139 | break; | ||
2140 | case ETHTOOL_GCHANNELS: | ||
2141 | rc = ethtool_get_channels(dev, useraddr); | ||
2142 | break; | ||
2143 | case ETHTOOL_SCHANNELS: | ||
2144 | rc = ethtool_set_channels(dev, useraddr); | ||
2145 | break; | ||
2146 | case ETHTOOL_SET_DUMP: | ||
2147 | rc = ethtool_set_dump(dev, useraddr); | ||
2148 | break; | ||
2149 | case ETHTOOL_GET_DUMP_FLAG: | ||
2150 | rc = ethtool_get_dump_flag(dev, useraddr); | ||
2151 | break; | ||
2152 | case ETHTOOL_GET_DUMP_DATA: | ||
2153 | rc = ethtool_get_dump_data(dev, useraddr); | ||
2154 | break; | ||
1628 | default: | 2155 | default: |
1629 | rc = -EOPNOTSUPP; | 2156 | rc = -EOPNOTSUPP; |
1630 | } | 2157 | } |
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 42e84e08a1be..008dc70b064b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) | |||
144 | } | 144 | } |
145 | EXPORT_SYMBOL_GPL(fib_rules_register); | 145 | EXPORT_SYMBOL_GPL(fib_rules_register); |
146 | 146 | ||
147 | void fib_rules_cleanup_ops(struct fib_rules_ops *ops) | 147 | static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) |
148 | { | 148 | { |
149 | struct fib_rule *rule, *tmp; | 149 | struct fib_rule *rule, *tmp; |
150 | 150 | ||
@@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops) | |||
153 | fib_rule_put(rule); | 153 | fib_rule_put(rule); |
154 | } | 154 | } |
155 | } | 155 | } |
156 | EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); | ||
157 | 156 | ||
158 | static void fib_rules_put_rcu(struct rcu_head *head) | 157 | static void fib_rules_put_rcu(struct rcu_head *head) |
159 | { | 158 | { |
@@ -182,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, | |||
182 | { | 181 | { |
183 | int ret = 0; | 182 | int ret = 0; |
184 | 183 | ||
185 | if (rule->iifindex && (rule->iifindex != fl->iif)) | 184 | if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) |
186 | goto out; | 185 | goto out; |
187 | 186 | ||
188 | if (rule->oifindex && (rule->oifindex != fl->oif)) | 187 | if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) |
189 | goto out; | 188 | goto out; |
190 | 189 | ||
191 | if ((rule->mark ^ fl->mark) & rule->mark_mask) | 190 | if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) |
192 | goto out; | 191 | goto out; |
193 | 192 | ||
194 | ret = ops->match(rule, fl, flags); | 193 | ret = ops->match(rule, fl, flags); |
@@ -225,9 +224,12 @@ jumped: | |||
225 | err = ops->action(rule, fl, flags, arg); | 224 | err = ops->action(rule, fl, flags, arg); |
226 | 225 | ||
227 | if (err != -EAGAIN) { | 226 | if (err != -EAGAIN) { |
228 | fib_rule_get(rule); | 227 | if ((arg->flags & FIB_LOOKUP_NOREF) || |
229 | arg->rule = rule; | 228 | likely(atomic_inc_not_zero(&rule->refcnt))) { |
230 | goto out; | 229 | arg->rule = rule; |
230 | goto out; | ||
231 | } | ||
232 | break; | ||
231 | } | 233 | } |
232 | } | 234 | } |
233 | 235 | ||
@@ -348,12 +350,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
348 | 350 | ||
349 | list_for_each_entry(r, &ops->rules_list, list) { | 351 | list_for_each_entry(r, &ops->rules_list, list) { |
350 | if (r->pref == rule->target) { | 352 | if (r->pref == rule->target) { |
351 | rule->ctarget = r; | 353 | RCU_INIT_POINTER(rule->ctarget, r); |
352 | break; | 354 | break; |
353 | } | 355 | } |
354 | } | 356 | } |
355 | 357 | ||
356 | if (rule->ctarget == NULL) | 358 | if (rcu_dereference_protected(rule->ctarget, 1) == NULL) |
357 | unresolved = 1; | 359 | unresolved = 1; |
358 | } else if (rule->action == FR_ACT_GOTO) | 360 | } else if (rule->action == FR_ACT_GOTO) |
359 | goto errout_free; | 361 | goto errout_free; |
@@ -370,6 +372,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
370 | 372 | ||
371 | fib_rule_get(rule); | 373 | fib_rule_get(rule); |
372 | 374 | ||
375 | if (last) | ||
376 | list_add_rcu(&rule->list, &last->list); | ||
377 | else | ||
378 | list_add_rcu(&rule->list, &ops->rules_list); | ||
379 | |||
373 | if (ops->unresolved_rules) { | 380 | if (ops->unresolved_rules) { |
374 | /* | 381 | /* |
375 | * There are unresolved goto rules in the list, check if | 382 | * There are unresolved goto rules in the list, check if |
@@ -378,7 +385,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
378 | list_for_each_entry(r, &ops->rules_list, list) { | 385 | list_for_each_entry(r, &ops->rules_list, list) { |
379 | if (r->action == FR_ACT_GOTO && | 386 | if (r->action == FR_ACT_GOTO && |
380 | r->target == rule->pref) { | 387 | r->target == rule->pref) { |
381 | BUG_ON(r->ctarget != NULL); | 388 | BUG_ON(rtnl_dereference(r->ctarget) != NULL); |
382 | rcu_assign_pointer(r->ctarget, rule); | 389 | rcu_assign_pointer(r->ctarget, rule); |
383 | if (--ops->unresolved_rules == 0) | 390 | if (--ops->unresolved_rules == 0) |
384 | break; | 391 | break; |
@@ -392,11 +399,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
392 | if (unresolved) | 399 | if (unresolved) |
393 | ops->unresolved_rules++; | 400 | ops->unresolved_rules++; |
394 | 401 | ||
395 | if (last) | ||
396 | list_add_rcu(&rule->list, &last->list); | ||
397 | else | ||
398 | list_add_rcu(&rule->list, &ops->rules_list); | ||
399 | |||
400 | notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); | 402 | notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); |
401 | flush_route_cache(ops); | 403 | flush_route_cache(ops); |
402 | rules_ops_put(ops); | 404 | rules_ops_put(ops); |
@@ -484,14 +486,13 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
484 | */ | 486 | */ |
485 | if (ops->nr_goto_rules > 0) { | 487 | if (ops->nr_goto_rules > 0) { |
486 | list_for_each_entry(tmp, &ops->rules_list, list) { | 488 | list_for_each_entry(tmp, &ops->rules_list, list) { |
487 | if (tmp->ctarget == rule) { | 489 | if (rtnl_dereference(tmp->ctarget) == rule) { |
488 | rcu_assign_pointer(tmp->ctarget, NULL); | 490 | rcu_assign_pointer(tmp->ctarget, NULL); |
489 | ops->unresolved_rules++; | 491 | ops->unresolved_rules++; |
490 | } | 492 | } |
491 | } | 493 | } |
492 | } | 494 | } |
493 | 495 | ||
494 | synchronize_rcu(); | ||
495 | notify_rule_change(RTM_DELRULE, rule, ops, nlh, | 496 | notify_rule_change(RTM_DELRULE, rule, ops, nlh, |
496 | NETLINK_CB(skb).pid); | 497 | NETLINK_CB(skb).pid); |
497 | fib_rule_put(rule); | 498 | fib_rule_put(rule); |
@@ -543,7 +544,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, | |||
543 | frh->action = rule->action; | 544 | frh->action = rule->action; |
544 | frh->flags = rule->flags; | 545 | frh->flags = rule->flags; |
545 | 546 | ||
546 | if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) | 547 | if (rule->action == FR_ACT_GOTO && |
548 | rcu_dereference_raw(rule->ctarget) == NULL) | ||
547 | frh->flags |= FIB_RULE_UNRESOLVED; | 549 | frh->flags |= FIB_RULE_UNRESOLVED; |
548 | 550 | ||
549 | if (rule->iifname[0]) { | 551 | if (rule->iifname[0]) { |
@@ -588,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, | |||
588 | int idx = 0; | 590 | int idx = 0; |
589 | struct fib_rule *rule; | 591 | struct fib_rule *rule; |
590 | 592 | ||
591 | list_for_each_entry(rule, &ops->rules_list, list) { | 593 | rcu_read_lock(); |
594 | list_for_each_entry_rcu(rule, &ops->rules_list, list) { | ||
592 | if (idx < cb->args[1]) | 595 | if (idx < cb->args[1]) |
593 | goto skip; | 596 | goto skip; |
594 | 597 | ||
@@ -599,6 +602,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, | |||
599 | skip: | 602 | skip: |
600 | idx++; | 603 | idx++; |
601 | } | 604 | } |
605 | rcu_read_unlock(); | ||
602 | cb->args[1] = idx; | 606 | cb->args[1] = idx; |
603 | rules_ops_put(ops); | 607 | rules_ops_put(ops); |
604 | 608 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 52b051f82a01..36f975fa87cb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -37,9 +37,11 @@ | |||
37 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
38 | #include <asm/unaligned.h> | 38 | #include <asm/unaligned.h> |
39 | #include <linux/filter.h> | 39 | #include <linux/filter.h> |
40 | #include <linux/reciprocal_div.h> | ||
41 | #include <linux/ratelimit.h> | ||
40 | 42 | ||
41 | /* No hurry in this branch */ | 43 | /* No hurry in this branch */ |
42 | static void *__load_pointer(struct sk_buff *skb, int k) | 44 | static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) |
43 | { | 45 | { |
44 | u8 *ptr = NULL; | 46 | u8 *ptr = NULL; |
45 | 47 | ||
@@ -48,21 +50,17 @@ static void *__load_pointer(struct sk_buff *skb, int k) | |||
48 | else if (k >= SKF_LL_OFF) | 50 | else if (k >= SKF_LL_OFF) |
49 | ptr = skb_mac_header(skb) + k - SKF_LL_OFF; | 51 | ptr = skb_mac_header(skb) + k - SKF_LL_OFF; |
50 | 52 | ||
51 | if (ptr >= skb->head && ptr < skb_tail_pointer(skb)) | 53 | if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) |
52 | return ptr; | 54 | return ptr; |
53 | return NULL; | 55 | return NULL; |
54 | } | 56 | } |
55 | 57 | ||
56 | static inline void *load_pointer(struct sk_buff *skb, int k, | 58 | static inline void *load_pointer(const struct sk_buff *skb, int k, |
57 | unsigned int size, void *buffer) | 59 | unsigned int size, void *buffer) |
58 | { | 60 | { |
59 | if (k >= 0) | 61 | if (k >= 0) |
60 | return skb_header_pointer(skb, k, size, buffer); | 62 | return skb_header_pointer(skb, k, size, buffer); |
61 | else { | 63 | return __load_pointer(skb, k, size); |
62 | if (k >= SKF_AD_OFF) | ||
63 | return NULL; | ||
64 | return __load_pointer(skb, k); | ||
65 | } | ||
66 | } | 64 | } |
67 | 65 | ||
68 | /** | 66 | /** |
@@ -86,14 +84,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | |||
86 | if (err) | 84 | if (err) |
87 | return err; | 85 | return err; |
88 | 86 | ||
89 | rcu_read_lock_bh(); | 87 | rcu_read_lock(); |
90 | filter = rcu_dereference_bh(sk->sk_filter); | 88 | filter = rcu_dereference(sk->sk_filter); |
91 | if (filter) { | 89 | if (filter) { |
92 | unsigned int pkt_len = sk_run_filter(skb, filter->insns, | 90 | unsigned int pkt_len = SK_RUN_FILTER(filter, skb); |
93 | filter->len); | 91 | |
94 | err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; | 92 | err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; |
95 | } | 93 | } |
96 | rcu_read_unlock_bh(); | 94 | rcu_read_unlock(); |
97 | 95 | ||
98 | return err; | 96 | return err; |
99 | } | 97 | } |
@@ -102,49 +100,53 @@ EXPORT_SYMBOL(sk_filter); | |||
102 | /** | 100 | /** |
103 | * sk_run_filter - run a filter on a socket | 101 | * sk_run_filter - run a filter on a socket |
104 | * @skb: buffer to run the filter on | 102 | * @skb: buffer to run the filter on |
105 | * @filter: filter to apply | 103 | * @fentry: filter to apply |
106 | * @flen: length of filter | ||
107 | * | 104 | * |
108 | * Decode and apply filter instructions to the skb->data. | 105 | * Decode and apply filter instructions to the skb->data. |
109 | * Return length to keep, 0 for none. skb is the data we are | 106 | * Return length to keep, 0 for none. @skb is the data we are |
110 | * filtering, filter is the array of filter instructions, and | 107 | * filtering, @filter is the array of filter instructions. |
111 | * len is the number of filter blocks in the array. | 108 | * Because all jumps are guaranteed to be before last instruction, |
109 | * and last instruction guaranteed to be a RET, we dont need to check | ||
110 | * flen. (We used to pass to this function the length of filter) | ||
112 | */ | 111 | */ |
113 | unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) | 112 | unsigned int sk_run_filter(const struct sk_buff *skb, |
113 | const struct sock_filter *fentry) | ||
114 | { | 114 | { |
115 | struct sock_filter *fentry; /* We walk down these */ | ||
116 | void *ptr; | 115 | void *ptr; |
117 | u32 A = 0; /* Accumulator */ | 116 | u32 A = 0; /* Accumulator */ |
118 | u32 X = 0; /* Index Register */ | 117 | u32 X = 0; /* Index Register */ |
119 | u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ | 118 | u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ |
120 | u32 tmp; | 119 | u32 tmp; |
121 | int k; | 120 | int k; |
122 | int pc; | ||
123 | 121 | ||
124 | /* | 122 | /* |
125 | * Process array of filter instructions. | 123 | * Process array of filter instructions. |
126 | */ | 124 | */ |
127 | for (pc = 0; pc < flen; pc++) { | 125 | for (;; fentry++) { |
128 | fentry = &filter[pc]; | 126 | #if defined(CONFIG_X86_32) |
127 | #define K (fentry->k) | ||
128 | #else | ||
129 | const u32 K = fentry->k; | ||
130 | #endif | ||
129 | 131 | ||
130 | switch (fentry->code) { | 132 | switch (fentry->code) { |
131 | case BPF_S_ALU_ADD_X: | 133 | case BPF_S_ALU_ADD_X: |
132 | A += X; | 134 | A += X; |
133 | continue; | 135 | continue; |
134 | case BPF_S_ALU_ADD_K: | 136 | case BPF_S_ALU_ADD_K: |
135 | A += fentry->k; | 137 | A += K; |
136 | continue; | 138 | continue; |
137 | case BPF_S_ALU_SUB_X: | 139 | case BPF_S_ALU_SUB_X: |
138 | A -= X; | 140 | A -= X; |
139 | continue; | 141 | continue; |
140 | case BPF_S_ALU_SUB_K: | 142 | case BPF_S_ALU_SUB_K: |
141 | A -= fentry->k; | 143 | A -= K; |
142 | continue; | 144 | continue; |
143 | case BPF_S_ALU_MUL_X: | 145 | case BPF_S_ALU_MUL_X: |
144 | A *= X; | 146 | A *= X; |
145 | continue; | 147 | continue; |
146 | case BPF_S_ALU_MUL_K: | 148 | case BPF_S_ALU_MUL_K: |
147 | A *= fentry->k; | 149 | A *= K; |
148 | continue; | 150 | continue; |
149 | case BPF_S_ALU_DIV_X: | 151 | case BPF_S_ALU_DIV_X: |
150 | if (X == 0) | 152 | if (X == 0) |
@@ -152,89 +154,89 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int | |||
152 | A /= X; | 154 | A /= X; |
153 | continue; | 155 | continue; |
154 | case BPF_S_ALU_DIV_K: | 156 | case BPF_S_ALU_DIV_K: |
155 | A /= fentry->k; | 157 | A = reciprocal_divide(A, K); |
156 | continue; | 158 | continue; |
157 | case BPF_S_ALU_AND_X: | 159 | case BPF_S_ALU_AND_X: |
158 | A &= X; | 160 | A &= X; |
159 | continue; | 161 | continue; |
160 | case BPF_S_ALU_AND_K: | 162 | case BPF_S_ALU_AND_K: |
161 | A &= fentry->k; | 163 | A &= K; |
162 | continue; | 164 | continue; |
163 | case BPF_S_ALU_OR_X: | 165 | case BPF_S_ALU_OR_X: |
164 | A |= X; | 166 | A |= X; |
165 | continue; | 167 | continue; |
166 | case BPF_S_ALU_OR_K: | 168 | case BPF_S_ALU_OR_K: |
167 | A |= fentry->k; | 169 | A |= K; |
168 | continue; | 170 | continue; |
169 | case BPF_S_ALU_LSH_X: | 171 | case BPF_S_ALU_LSH_X: |
170 | A <<= X; | 172 | A <<= X; |
171 | continue; | 173 | continue; |
172 | case BPF_S_ALU_LSH_K: | 174 | case BPF_S_ALU_LSH_K: |
173 | A <<= fentry->k; | 175 | A <<= K; |
174 | continue; | 176 | continue; |
175 | case BPF_S_ALU_RSH_X: | 177 | case BPF_S_ALU_RSH_X: |
176 | A >>= X; | 178 | A >>= X; |
177 | continue; | 179 | continue; |
178 | case BPF_S_ALU_RSH_K: | 180 | case BPF_S_ALU_RSH_K: |
179 | A >>= fentry->k; | 181 | A >>= K; |
180 | continue; | 182 | continue; |
181 | case BPF_S_ALU_NEG: | 183 | case BPF_S_ALU_NEG: |
182 | A = -A; | 184 | A = -A; |
183 | continue; | 185 | continue; |
184 | case BPF_S_JMP_JA: | 186 | case BPF_S_JMP_JA: |
185 | pc += fentry->k; | 187 | fentry += K; |
186 | continue; | 188 | continue; |
187 | case BPF_S_JMP_JGT_K: | 189 | case BPF_S_JMP_JGT_K: |
188 | pc += (A > fentry->k) ? fentry->jt : fentry->jf; | 190 | fentry += (A > K) ? fentry->jt : fentry->jf; |
189 | continue; | 191 | continue; |
190 | case BPF_S_JMP_JGE_K: | 192 | case BPF_S_JMP_JGE_K: |
191 | pc += (A >= fentry->k) ? fentry->jt : fentry->jf; | 193 | fentry += (A >= K) ? fentry->jt : fentry->jf; |
192 | continue; | 194 | continue; |
193 | case BPF_S_JMP_JEQ_K: | 195 | case BPF_S_JMP_JEQ_K: |
194 | pc += (A == fentry->k) ? fentry->jt : fentry->jf; | 196 | fentry += (A == K) ? fentry->jt : fentry->jf; |
195 | continue; | 197 | continue; |
196 | case BPF_S_JMP_JSET_K: | 198 | case BPF_S_JMP_JSET_K: |
197 | pc += (A & fentry->k) ? fentry->jt : fentry->jf; | 199 | fentry += (A & K) ? fentry->jt : fentry->jf; |
198 | continue; | 200 | continue; |
199 | case BPF_S_JMP_JGT_X: | 201 | case BPF_S_JMP_JGT_X: |
200 | pc += (A > X) ? fentry->jt : fentry->jf; | 202 | fentry += (A > X) ? fentry->jt : fentry->jf; |
201 | continue; | 203 | continue; |
202 | case BPF_S_JMP_JGE_X: | 204 | case BPF_S_JMP_JGE_X: |
203 | pc += (A >= X) ? fentry->jt : fentry->jf; | 205 | fentry += (A >= X) ? fentry->jt : fentry->jf; |
204 | continue; | 206 | continue; |
205 | case BPF_S_JMP_JEQ_X: | 207 | case BPF_S_JMP_JEQ_X: |
206 | pc += (A == X) ? fentry->jt : fentry->jf; | 208 | fentry += (A == X) ? fentry->jt : fentry->jf; |
207 | continue; | 209 | continue; |
208 | case BPF_S_JMP_JSET_X: | 210 | case BPF_S_JMP_JSET_X: |
209 | pc += (A & X) ? fentry->jt : fentry->jf; | 211 | fentry += (A & X) ? fentry->jt : fentry->jf; |
210 | continue; | 212 | continue; |
211 | case BPF_S_LD_W_ABS: | 213 | case BPF_S_LD_W_ABS: |
212 | k = fentry->k; | 214 | k = K; |
213 | load_w: | 215 | load_w: |
214 | ptr = load_pointer(skb, k, 4, &tmp); | 216 | ptr = load_pointer(skb, k, 4, &tmp); |
215 | if (ptr != NULL) { | 217 | if (ptr != NULL) { |
216 | A = get_unaligned_be32(ptr); | 218 | A = get_unaligned_be32(ptr); |
217 | continue; | 219 | continue; |
218 | } | 220 | } |
219 | break; | 221 | return 0; |
220 | case BPF_S_LD_H_ABS: | 222 | case BPF_S_LD_H_ABS: |
221 | k = fentry->k; | 223 | k = K; |
222 | load_h: | 224 | load_h: |
223 | ptr = load_pointer(skb, k, 2, &tmp); | 225 | ptr = load_pointer(skb, k, 2, &tmp); |
224 | if (ptr != NULL) { | 226 | if (ptr != NULL) { |
225 | A = get_unaligned_be16(ptr); | 227 | A = get_unaligned_be16(ptr); |
226 | continue; | 228 | continue; |
227 | } | 229 | } |
228 | break; | 230 | return 0; |
229 | case BPF_S_LD_B_ABS: | 231 | case BPF_S_LD_B_ABS: |
230 | k = fentry->k; | 232 | k = K; |
231 | load_b: | 233 | load_b: |
232 | ptr = load_pointer(skb, k, 1, &tmp); | 234 | ptr = load_pointer(skb, k, 1, &tmp); |
233 | if (ptr != NULL) { | 235 | if (ptr != NULL) { |
234 | A = *(u8 *)ptr; | 236 | A = *(u8 *)ptr; |
235 | continue; | 237 | continue; |
236 | } | 238 | } |
237 | break; | 239 | return 0; |
238 | case BPF_S_LD_W_LEN: | 240 | case BPF_S_LD_W_LEN: |
239 | A = skb->len; | 241 | A = skb->len; |
240 | continue; | 242 | continue; |
@@ -242,32 +244,32 @@ load_b: | |||
242 | X = skb->len; | 244 | X = skb->len; |
243 | continue; | 245 | continue; |
244 | case BPF_S_LD_W_IND: | 246 | case BPF_S_LD_W_IND: |
245 | k = X + fentry->k; | 247 | k = X + K; |
246 | goto load_w; | 248 | goto load_w; |
247 | case BPF_S_LD_H_IND: | 249 | case BPF_S_LD_H_IND: |
248 | k = X + fentry->k; | 250 | k = X + K; |
249 | goto load_h; | 251 | goto load_h; |
250 | case BPF_S_LD_B_IND: | 252 | case BPF_S_LD_B_IND: |
251 | k = X + fentry->k; | 253 | k = X + K; |
252 | goto load_b; | 254 | goto load_b; |
253 | case BPF_S_LDX_B_MSH: | 255 | case BPF_S_LDX_B_MSH: |
254 | ptr = load_pointer(skb, fentry->k, 1, &tmp); | 256 | ptr = load_pointer(skb, K, 1, &tmp); |
255 | if (ptr != NULL) { | 257 | if (ptr != NULL) { |
256 | X = (*(u8 *)ptr & 0xf) << 2; | 258 | X = (*(u8 *)ptr & 0xf) << 2; |
257 | continue; | 259 | continue; |
258 | } | 260 | } |
259 | return 0; | 261 | return 0; |
260 | case BPF_S_LD_IMM: | 262 | case BPF_S_LD_IMM: |
261 | A = fentry->k; | 263 | A = K; |
262 | continue; | 264 | continue; |
263 | case BPF_S_LDX_IMM: | 265 | case BPF_S_LDX_IMM: |
264 | X = fentry->k; | 266 | X = K; |
265 | continue; | 267 | continue; |
266 | case BPF_S_LD_MEM: | 268 | case BPF_S_LD_MEM: |
267 | A = mem[fentry->k]; | 269 | A = mem[K]; |
268 | continue; | 270 | continue; |
269 | case BPF_S_LDX_MEM: | 271 | case BPF_S_LDX_MEM: |
270 | X = mem[fentry->k]; | 272 | X = mem[K]; |
271 | continue; | 273 | continue; |
272 | case BPF_S_MISC_TAX: | 274 | case BPF_S_MISC_TAX: |
273 | X = A; | 275 | X = A; |
@@ -276,48 +278,44 @@ load_b: | |||
276 | A = X; | 278 | A = X; |
277 | continue; | 279 | continue; |
278 | case BPF_S_RET_K: | 280 | case BPF_S_RET_K: |
279 | return fentry->k; | 281 | return K; |
280 | case BPF_S_RET_A: | 282 | case BPF_S_RET_A: |
281 | return A; | 283 | return A; |
282 | case BPF_S_ST: | 284 | case BPF_S_ST: |
283 | mem[fentry->k] = A; | 285 | mem[K] = A; |
284 | continue; | 286 | continue; |
285 | case BPF_S_STX: | 287 | case BPF_S_STX: |
286 | mem[fentry->k] = X; | 288 | mem[K] = X; |
287 | continue; | 289 | continue; |
288 | default: | 290 | case BPF_S_ANC_PROTOCOL: |
289 | WARN_ON(1); | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * Handle ancillary data, which are impossible | ||
295 | * (or very difficult) to get parsing packet contents. | ||
296 | */ | ||
297 | switch (k-SKF_AD_OFF) { | ||
298 | case SKF_AD_PROTOCOL: | ||
299 | A = ntohs(skb->protocol); | 291 | A = ntohs(skb->protocol); |
300 | continue; | 292 | continue; |
301 | case SKF_AD_PKTTYPE: | 293 | case BPF_S_ANC_PKTTYPE: |
302 | A = skb->pkt_type; | 294 | A = skb->pkt_type; |
303 | continue; | 295 | continue; |
304 | case SKF_AD_IFINDEX: | 296 | case BPF_S_ANC_IFINDEX: |
305 | if (!skb->dev) | 297 | if (!skb->dev) |
306 | return 0; | 298 | return 0; |
307 | A = skb->dev->ifindex; | 299 | A = skb->dev->ifindex; |
308 | continue; | 300 | continue; |
309 | case SKF_AD_MARK: | 301 | case BPF_S_ANC_MARK: |
310 | A = skb->mark; | 302 | A = skb->mark; |
311 | continue; | 303 | continue; |
312 | case SKF_AD_QUEUE: | 304 | case BPF_S_ANC_QUEUE: |
313 | A = skb->queue_mapping; | 305 | A = skb->queue_mapping; |
314 | continue; | 306 | continue; |
315 | case SKF_AD_HATYPE: | 307 | case BPF_S_ANC_HATYPE: |
316 | if (!skb->dev) | 308 | if (!skb->dev) |
317 | return 0; | 309 | return 0; |
318 | A = skb->dev->type; | 310 | A = skb->dev->type; |
319 | continue; | 311 | continue; |
320 | case SKF_AD_NLATTR: { | 312 | case BPF_S_ANC_RXHASH: |
313 | A = skb->rxhash; | ||
314 | continue; | ||
315 | case BPF_S_ANC_CPU: | ||
316 | A = raw_smp_processor_id(); | ||
317 | continue; | ||
318 | case BPF_S_ANC_NLATTR: { | ||
321 | struct nlattr *nla; | 319 | struct nlattr *nla; |
322 | 320 | ||
323 | if (skb_is_nonlinear(skb)) | 321 | if (skb_is_nonlinear(skb)) |
@@ -333,7 +331,7 @@ load_b: | |||
333 | A = 0; | 331 | A = 0; |
334 | continue; | 332 | continue; |
335 | } | 333 | } |
336 | case SKF_AD_NLATTR_NEST: { | 334 | case BPF_S_ANC_NLATTR_NEST: { |
337 | struct nlattr *nla; | 335 | struct nlattr *nla; |
338 | 336 | ||
339 | if (skb_is_nonlinear(skb)) | 337 | if (skb_is_nonlinear(skb)) |
@@ -353,6 +351,9 @@ load_b: | |||
353 | continue; | 351 | continue; |
354 | } | 352 | } |
355 | default: | 353 | default: |
354 | WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", | ||
355 | fentry->code, fentry->jt, | ||
356 | fentry->jf, fentry->k); | ||
356 | return 0; | 357 | return 0; |
357 | } | 358 | } |
358 | } | 359 | } |
@@ -361,6 +362,66 @@ load_b: | |||
361 | } | 362 | } |
362 | EXPORT_SYMBOL(sk_run_filter); | 363 | EXPORT_SYMBOL(sk_run_filter); |
363 | 364 | ||
365 | /* | ||
366 | * Security : | ||
367 | * A BPF program is able to use 16 cells of memory to store intermediate | ||
368 | * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) | ||
369 | * As we dont want to clear mem[] array for each packet going through | ||
370 | * sk_run_filter(), we check that filter loaded by user never try to read | ||
371 | * a cell if not previously written, and we check all branches to be sure | ||
372 | * a malicious user doesn't try to abuse us. | ||
373 | */ | ||
374 | static int check_load_and_stores(struct sock_filter *filter, int flen) | ||
375 | { | ||
376 | u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ | ||
377 | int pc, ret = 0; | ||
378 | |||
379 | BUILD_BUG_ON(BPF_MEMWORDS > 16); | ||
380 | masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); | ||
381 | if (!masks) | ||
382 | return -ENOMEM; | ||
383 | memset(masks, 0xff, flen * sizeof(*masks)); | ||
384 | |||
385 | for (pc = 0; pc < flen; pc++) { | ||
386 | memvalid &= masks[pc]; | ||
387 | |||
388 | switch (filter[pc].code) { | ||
389 | case BPF_S_ST: | ||
390 | case BPF_S_STX: | ||
391 | memvalid |= (1 << filter[pc].k); | ||
392 | break; | ||
393 | case BPF_S_LD_MEM: | ||
394 | case BPF_S_LDX_MEM: | ||
395 | if (!(memvalid & (1 << filter[pc].k))) { | ||
396 | ret = -EINVAL; | ||
397 | goto error; | ||
398 | } | ||
399 | break; | ||
400 | case BPF_S_JMP_JA: | ||
401 | /* a jump must set masks on target */ | ||
402 | masks[pc + 1 + filter[pc].k] &= memvalid; | ||
403 | memvalid = ~0; | ||
404 | break; | ||
405 | case BPF_S_JMP_JEQ_K: | ||
406 | case BPF_S_JMP_JEQ_X: | ||
407 | case BPF_S_JMP_JGE_K: | ||
408 | case BPF_S_JMP_JGE_X: | ||
409 | case BPF_S_JMP_JGT_K: | ||
410 | case BPF_S_JMP_JGT_X: | ||
411 | case BPF_S_JMP_JSET_X: | ||
412 | case BPF_S_JMP_JSET_K: | ||
413 | /* a jump must set masks on targets */ | ||
414 | masks[pc + 1 + filter[pc].jt] &= memvalid; | ||
415 | masks[pc + 1 + filter[pc].jf] &= memvalid; | ||
416 | memvalid = ~0; | ||
417 | break; | ||
418 | } | ||
419 | } | ||
420 | error: | ||
421 | kfree(masks); | ||
422 | return ret; | ||
423 | } | ||
424 | |||
364 | /** | 425 | /** |
365 | * sk_chk_filter - verify socket filter code | 426 | * sk_chk_filter - verify socket filter code |
366 | * @filter: filter to verify | 427 | * @filter: filter to verify |
@@ -377,7 +438,57 @@ EXPORT_SYMBOL(sk_run_filter); | |||
377 | */ | 438 | */ |
378 | int sk_chk_filter(struct sock_filter *filter, int flen) | 439 | int sk_chk_filter(struct sock_filter *filter, int flen) |
379 | { | 440 | { |
380 | struct sock_filter *ftest; | 441 | /* |
442 | * Valid instructions are initialized to non-0. | ||
443 | * Invalid instructions are initialized to 0. | ||
444 | */ | ||
445 | static const u8 codes[] = { | ||
446 | [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, | ||
447 | [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, | ||
448 | [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, | ||
449 | [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, | ||
450 | [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, | ||
451 | [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, | ||
452 | [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, | ||
453 | [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, | ||
454 | [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, | ||
455 | [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, | ||
456 | [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, | ||
457 | [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, | ||
458 | [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, | ||
459 | [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, | ||
460 | [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, | ||
461 | [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, | ||
462 | [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, | ||
463 | [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, | ||
464 | [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, | ||
465 | [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, | ||
466 | [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, | ||
467 | [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, | ||
468 | [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, | ||
469 | [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, | ||
470 | [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, | ||
471 | [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, | ||
472 | [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, | ||
473 | [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, | ||
474 | [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, | ||
475 | [BPF_RET|BPF_K] = BPF_S_RET_K, | ||
476 | [BPF_RET|BPF_A] = BPF_S_RET_A, | ||
477 | [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, | ||
478 | [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, | ||
479 | [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, | ||
480 | [BPF_ST] = BPF_S_ST, | ||
481 | [BPF_STX] = BPF_S_STX, | ||
482 | [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, | ||
483 | [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, | ||
484 | [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, | ||
485 | [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, | ||
486 | [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, | ||
487 | [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, | ||
488 | [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, | ||
489 | [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, | ||
490 | [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, | ||
491 | }; | ||
381 | int pc; | 492 | int pc; |
382 | 493 | ||
383 | if (flen == 0 || flen > BPF_MAXINSNS) | 494 | if (flen == 0 || flen > BPF_MAXINSNS) |
@@ -385,136 +496,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen) | |||
385 | 496 | ||
386 | /* check the filter code now */ | 497 | /* check the filter code now */ |
387 | for (pc = 0; pc < flen; pc++) { | 498 | for (pc = 0; pc < flen; pc++) { |
388 | ftest = &filter[pc]; | 499 | struct sock_filter *ftest = &filter[pc]; |
389 | 500 | u16 code = ftest->code; | |
390 | /* Only allow valid instructions */ | ||
391 | switch (ftest->code) { | ||
392 | case BPF_ALU|BPF_ADD|BPF_K: | ||
393 | ftest->code = BPF_S_ALU_ADD_K; | ||
394 | break; | ||
395 | case BPF_ALU|BPF_ADD|BPF_X: | ||
396 | ftest->code = BPF_S_ALU_ADD_X; | ||
397 | break; | ||
398 | case BPF_ALU|BPF_SUB|BPF_K: | ||
399 | ftest->code = BPF_S_ALU_SUB_K; | ||
400 | break; | ||
401 | case BPF_ALU|BPF_SUB|BPF_X: | ||
402 | ftest->code = BPF_S_ALU_SUB_X; | ||
403 | break; | ||
404 | case BPF_ALU|BPF_MUL|BPF_K: | ||
405 | ftest->code = BPF_S_ALU_MUL_K; | ||
406 | break; | ||
407 | case BPF_ALU|BPF_MUL|BPF_X: | ||
408 | ftest->code = BPF_S_ALU_MUL_X; | ||
409 | break; | ||
410 | case BPF_ALU|BPF_DIV|BPF_X: | ||
411 | ftest->code = BPF_S_ALU_DIV_X; | ||
412 | break; | ||
413 | case BPF_ALU|BPF_AND|BPF_K: | ||
414 | ftest->code = BPF_S_ALU_AND_K; | ||
415 | break; | ||
416 | case BPF_ALU|BPF_AND|BPF_X: | ||
417 | ftest->code = BPF_S_ALU_AND_X; | ||
418 | break; | ||
419 | case BPF_ALU|BPF_OR|BPF_K: | ||
420 | ftest->code = BPF_S_ALU_OR_K; | ||
421 | break; | ||
422 | case BPF_ALU|BPF_OR|BPF_X: | ||
423 | ftest->code = BPF_S_ALU_OR_X; | ||
424 | break; | ||
425 | case BPF_ALU|BPF_LSH|BPF_K: | ||
426 | ftest->code = BPF_S_ALU_LSH_K; | ||
427 | break; | ||
428 | case BPF_ALU|BPF_LSH|BPF_X: | ||
429 | ftest->code = BPF_S_ALU_LSH_X; | ||
430 | break; | ||
431 | case BPF_ALU|BPF_RSH|BPF_K: | ||
432 | ftest->code = BPF_S_ALU_RSH_K; | ||
433 | break; | ||
434 | case BPF_ALU|BPF_RSH|BPF_X: | ||
435 | ftest->code = BPF_S_ALU_RSH_X; | ||
436 | break; | ||
437 | case BPF_ALU|BPF_NEG: | ||
438 | ftest->code = BPF_S_ALU_NEG; | ||
439 | break; | ||
440 | case BPF_LD|BPF_W|BPF_ABS: | ||
441 | ftest->code = BPF_S_LD_W_ABS; | ||
442 | break; | ||
443 | case BPF_LD|BPF_H|BPF_ABS: | ||
444 | ftest->code = BPF_S_LD_H_ABS; | ||
445 | break; | ||
446 | case BPF_LD|BPF_B|BPF_ABS: | ||
447 | ftest->code = BPF_S_LD_B_ABS; | ||
448 | break; | ||
449 | case BPF_LD|BPF_W|BPF_LEN: | ||
450 | ftest->code = BPF_S_LD_W_LEN; | ||
451 | break; | ||
452 | case BPF_LD|BPF_W|BPF_IND: | ||
453 | ftest->code = BPF_S_LD_W_IND; | ||
454 | break; | ||
455 | case BPF_LD|BPF_H|BPF_IND: | ||
456 | ftest->code = BPF_S_LD_H_IND; | ||
457 | break; | ||
458 | case BPF_LD|BPF_B|BPF_IND: | ||
459 | ftest->code = BPF_S_LD_B_IND; | ||
460 | break; | ||
461 | case BPF_LD|BPF_IMM: | ||
462 | ftest->code = BPF_S_LD_IMM; | ||
463 | break; | ||
464 | case BPF_LDX|BPF_W|BPF_LEN: | ||
465 | ftest->code = BPF_S_LDX_W_LEN; | ||
466 | break; | ||
467 | case BPF_LDX|BPF_B|BPF_MSH: | ||
468 | ftest->code = BPF_S_LDX_B_MSH; | ||
469 | break; | ||
470 | case BPF_LDX|BPF_IMM: | ||
471 | ftest->code = BPF_S_LDX_IMM; | ||
472 | break; | ||
473 | case BPF_MISC|BPF_TAX: | ||
474 | ftest->code = BPF_S_MISC_TAX; | ||
475 | break; | ||
476 | case BPF_MISC|BPF_TXA: | ||
477 | ftest->code = BPF_S_MISC_TXA; | ||
478 | break; | ||
479 | case BPF_RET|BPF_K: | ||
480 | ftest->code = BPF_S_RET_K; | ||
481 | break; | ||
482 | case BPF_RET|BPF_A: | ||
483 | ftest->code = BPF_S_RET_A; | ||
484 | break; | ||
485 | 501 | ||
502 | if (code >= ARRAY_SIZE(codes)) | ||
503 | return -EINVAL; | ||
504 | code = codes[code]; | ||
505 | if (!code) | ||
506 | return -EINVAL; | ||
486 | /* Some instructions need special checks */ | 507 | /* Some instructions need special checks */ |
487 | 508 | switch (code) { | |
509 | case BPF_S_ALU_DIV_K: | ||
488 | /* check for division by zero */ | 510 | /* check for division by zero */ |
489 | case BPF_ALU|BPF_DIV|BPF_K: | ||
490 | if (ftest->k == 0) | 511 | if (ftest->k == 0) |
491 | return -EINVAL; | 512 | return -EINVAL; |
492 | ftest->code = BPF_S_ALU_DIV_K; | 513 | ftest->k = reciprocal_value(ftest->k); |
493 | break; | ||
494 | |||
495 | /* check for invalid memory addresses */ | ||
496 | case BPF_LD|BPF_MEM: | ||
497 | if (ftest->k >= BPF_MEMWORDS) | ||
498 | return -EINVAL; | ||
499 | ftest->code = BPF_S_LD_MEM; | ||
500 | break; | ||
501 | case BPF_LDX|BPF_MEM: | ||
502 | if (ftest->k >= BPF_MEMWORDS) | ||
503 | return -EINVAL; | ||
504 | ftest->code = BPF_S_LDX_MEM; | ||
505 | break; | ||
506 | case BPF_ST: | ||
507 | if (ftest->k >= BPF_MEMWORDS) | ||
508 | return -EINVAL; | ||
509 | ftest->code = BPF_S_ST; | ||
510 | break; | 514 | break; |
511 | case BPF_STX: | 515 | case BPF_S_LD_MEM: |
516 | case BPF_S_LDX_MEM: | ||
517 | case BPF_S_ST: | ||
518 | case BPF_S_STX: | ||
519 | /* check for invalid memory addresses */ | ||
512 | if (ftest->k >= BPF_MEMWORDS) | 520 | if (ftest->k >= BPF_MEMWORDS) |
513 | return -EINVAL; | 521 | return -EINVAL; |
514 | ftest->code = BPF_S_STX; | ||
515 | break; | 522 | break; |
516 | 523 | case BPF_S_JMP_JA: | |
517 | case BPF_JMP|BPF_JA: | ||
518 | /* | 524 | /* |
519 | * Note, the large ftest->k might cause loops. | 525 | * Note, the large ftest->k might cause loops. |
520 | * Compare this with conditional jumps below, | 526 | * Compare this with conditional jumps below, |
@@ -522,40 +528,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) | |||
522 | */ | 528 | */ |
523 | if (ftest->k >= (unsigned)(flen-pc-1)) | 529 | if (ftest->k >= (unsigned)(flen-pc-1)) |
524 | return -EINVAL; | 530 | return -EINVAL; |
525 | ftest->code = BPF_S_JMP_JA; | ||
526 | break; | ||
527 | |||
528 | case BPF_JMP|BPF_JEQ|BPF_K: | ||
529 | ftest->code = BPF_S_JMP_JEQ_K; | ||
530 | break; | ||
531 | case BPF_JMP|BPF_JEQ|BPF_X: | ||
532 | ftest->code = BPF_S_JMP_JEQ_X; | ||
533 | break; | ||
534 | case BPF_JMP|BPF_JGE|BPF_K: | ||
535 | ftest->code = BPF_S_JMP_JGE_K; | ||
536 | break; | ||
537 | case BPF_JMP|BPF_JGE|BPF_X: | ||
538 | ftest->code = BPF_S_JMP_JGE_X; | ||
539 | break; | ||
540 | case BPF_JMP|BPF_JGT|BPF_K: | ||
541 | ftest->code = BPF_S_JMP_JGT_K; | ||
542 | break; | 531 | break; |
543 | case BPF_JMP|BPF_JGT|BPF_X: | ||
544 | ftest->code = BPF_S_JMP_JGT_X; | ||
545 | break; | ||
546 | case BPF_JMP|BPF_JSET|BPF_K: | ||
547 | ftest->code = BPF_S_JMP_JSET_K; | ||
548 | break; | ||
549 | case BPF_JMP|BPF_JSET|BPF_X: | ||
550 | ftest->code = BPF_S_JMP_JSET_X; | ||
551 | break; | ||
552 | |||
553 | default: | ||
554 | return -EINVAL; | ||
555 | } | ||
556 | |||
557 | /* for conditionals both must be safe */ | ||
558 | switch (ftest->code) { | ||
559 | case BPF_S_JMP_JEQ_K: | 532 | case BPF_S_JMP_JEQ_K: |
560 | case BPF_S_JMP_JEQ_X: | 533 | case BPF_S_JMP_JEQ_X: |
561 | case BPF_S_JMP_JGE_K: | 534 | case BPF_S_JMP_JGE_K: |
@@ -564,42 +537,55 @@ int sk_chk_filter(struct sock_filter *filter, int flen) | |||
564 | case BPF_S_JMP_JGT_X: | 537 | case BPF_S_JMP_JGT_X: |
565 | case BPF_S_JMP_JSET_X: | 538 | case BPF_S_JMP_JSET_X: |
566 | case BPF_S_JMP_JSET_K: | 539 | case BPF_S_JMP_JSET_K: |
540 | /* for conditionals both must be safe */ | ||
567 | if (pc + ftest->jt + 1 >= flen || | 541 | if (pc + ftest->jt + 1 >= flen || |
568 | pc + ftest->jf + 1 >= flen) | 542 | pc + ftest->jf + 1 >= flen) |
569 | return -EINVAL; | 543 | return -EINVAL; |
544 | break; | ||
545 | case BPF_S_LD_W_ABS: | ||
546 | case BPF_S_LD_H_ABS: | ||
547 | case BPF_S_LD_B_ABS: | ||
548 | #define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ | ||
549 | code = BPF_S_ANC_##CODE; \ | ||
550 | break | ||
551 | switch (ftest->k) { | ||
552 | ANCILLARY(PROTOCOL); | ||
553 | ANCILLARY(PKTTYPE); | ||
554 | ANCILLARY(IFINDEX); | ||
555 | ANCILLARY(NLATTR); | ||
556 | ANCILLARY(NLATTR_NEST); | ||
557 | ANCILLARY(MARK); | ||
558 | ANCILLARY(QUEUE); | ||
559 | ANCILLARY(HATYPE); | ||
560 | ANCILLARY(RXHASH); | ||
561 | ANCILLARY(CPU); | ||
562 | } | ||
570 | } | 563 | } |
564 | ftest->code = code; | ||
571 | } | 565 | } |
572 | 566 | ||
573 | /* last instruction must be a RET code */ | 567 | /* last instruction must be a RET code */ |
574 | switch (filter[flen - 1].code) { | 568 | switch (filter[flen - 1].code) { |
575 | case BPF_S_RET_K: | 569 | case BPF_S_RET_K: |
576 | case BPF_S_RET_A: | 570 | case BPF_S_RET_A: |
577 | return 0; | 571 | return check_load_and_stores(filter, flen); |
578 | break; | 572 | } |
579 | default: | 573 | return -EINVAL; |
580 | return -EINVAL; | ||
581 | } | ||
582 | } | 574 | } |
583 | EXPORT_SYMBOL(sk_chk_filter); | 575 | EXPORT_SYMBOL(sk_chk_filter); |
584 | 576 | ||
585 | /** | 577 | /** |
586 | * sk_filter_rcu_release: Release a socket filter by rcu_head | 578 | * sk_filter_release_rcu - Release a socket filter by rcu_head |
587 | * @rcu: rcu_head that contains the sk_filter to free | 579 | * @rcu: rcu_head that contains the sk_filter to free |
588 | */ | 580 | */ |
589 | static void sk_filter_rcu_release(struct rcu_head *rcu) | 581 | void sk_filter_release_rcu(struct rcu_head *rcu) |
590 | { | 582 | { |
591 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); | 583 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); |
592 | 584 | ||
593 | sk_filter_release(fp); | 585 | bpf_jit_free(fp); |
594 | } | 586 | kfree(fp); |
595 | |||
596 | static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) | ||
597 | { | ||
598 | unsigned int size = sk_filter_len(fp); | ||
599 | |||
600 | atomic_sub(size, &sk->sk_omem_alloc); | ||
601 | call_rcu_bh(&fp->rcu, sk_filter_rcu_release); | ||
602 | } | 587 | } |
588 | EXPORT_SYMBOL(sk_filter_release_rcu); | ||
603 | 589 | ||
604 | /** | 590 | /** |
605 | * sk_attach_filter - attach a socket filter | 591 | * sk_attach_filter - attach a socket filter |
@@ -631,6 +617,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
631 | 617 | ||
632 | atomic_set(&fp->refcnt, 1); | 618 | atomic_set(&fp->refcnt, 1); |
633 | fp->len = fprog->len; | 619 | fp->len = fprog->len; |
620 | fp->bpf_func = sk_run_filter; | ||
634 | 621 | ||
635 | err = sk_chk_filter(fp->insns, fp->len); | 622 | err = sk_chk_filter(fp->insns, fp->len); |
636 | if (err) { | 623 | if (err) { |
@@ -638,13 +625,14 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
638 | return err; | 625 | return err; |
639 | } | 626 | } |
640 | 627 | ||
641 | rcu_read_lock_bh(); | 628 | bpf_jit_compile(fp); |
642 | old_fp = rcu_dereference_bh(sk->sk_filter); | 629 | |
630 | old_fp = rcu_dereference_protected(sk->sk_filter, | ||
631 | sock_owned_by_user(sk)); | ||
643 | rcu_assign_pointer(sk->sk_filter, fp); | 632 | rcu_assign_pointer(sk->sk_filter, fp); |
644 | rcu_read_unlock_bh(); | ||
645 | 633 | ||
646 | if (old_fp) | 634 | if (old_fp) |
647 | sk_filter_delayed_uncharge(sk, old_fp); | 635 | sk_filter_uncharge(sk, old_fp); |
648 | return 0; | 636 | return 0; |
649 | } | 637 | } |
650 | EXPORT_SYMBOL_GPL(sk_attach_filter); | 638 | EXPORT_SYMBOL_GPL(sk_attach_filter); |
@@ -654,14 +642,13 @@ int sk_detach_filter(struct sock *sk) | |||
654 | int ret = -ENOENT; | 642 | int ret = -ENOENT; |
655 | struct sk_filter *filter; | 643 | struct sk_filter *filter; |
656 | 644 | ||
657 | rcu_read_lock_bh(); | 645 | filter = rcu_dereference_protected(sk->sk_filter, |
658 | filter = rcu_dereference_bh(sk->sk_filter); | 646 | sock_owned_by_user(sk)); |
659 | if (filter) { | 647 | if (filter) { |
660 | rcu_assign_pointer(sk->sk_filter, NULL); | 648 | rcu_assign_pointer(sk->sk_filter, NULL); |
661 | sk_filter_delayed_uncharge(sk, filter); | 649 | sk_filter_uncharge(sk, filter); |
662 | ret = 0; | 650 | ret = 0; |
663 | } | 651 | } |
664 | rcu_read_unlock_bh(); | ||
665 | return ret; | 652 | return ret; |
666 | } | 653 | } |
667 | EXPORT_SYMBOL_GPL(sk_detach_filter); | 654 | EXPORT_SYMBOL_GPL(sk_detach_filter); |
diff --git a/net/core/flow.c b/net/core/flow.c index f67dcbfe54ef..990703b8863b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -53,8 +53,7 @@ struct flow_flush_info { | |||
53 | 53 | ||
54 | struct flow_cache { | 54 | struct flow_cache { |
55 | u32 hash_shift; | 55 | u32 hash_shift; |
56 | unsigned long order; | 56 | struct flow_cache_percpu __percpu *percpu; |
57 | struct flow_cache_percpu *percpu; | ||
58 | struct notifier_block hotcpu_notifier; | 57 | struct notifier_block hotcpu_notifier; |
59 | int low_watermark; | 58 | int low_watermark; |
60 | int high_watermark; | 59 | int high_watermark; |
@@ -64,7 +63,7 @@ struct flow_cache { | |||
64 | atomic_t flow_cache_genid = ATOMIC_INIT(0); | 63 | atomic_t flow_cache_genid = ATOMIC_INIT(0); |
65 | EXPORT_SYMBOL(flow_cache_genid); | 64 | EXPORT_SYMBOL(flow_cache_genid); |
66 | static struct flow_cache flow_cache_global; | 65 | static struct flow_cache flow_cache_global; |
67 | static struct kmem_cache *flow_cachep; | 66 | static struct kmem_cache *flow_cachep __read_mostly; |
68 | 67 | ||
69 | static DEFINE_SPINLOCK(flow_cache_gc_lock); | 68 | static DEFINE_SPINLOCK(flow_cache_gc_lock); |
70 | static LIST_HEAD(flow_cache_gc_list); | 69 | static LIST_HEAD(flow_cache_gc_list); |
@@ -173,35 +172,31 @@ static void flow_new_hash_rnd(struct flow_cache *fc, | |||
173 | 172 | ||
174 | static u32 flow_hash_code(struct flow_cache *fc, | 173 | static u32 flow_hash_code(struct flow_cache *fc, |
175 | struct flow_cache_percpu *fcp, | 174 | struct flow_cache_percpu *fcp, |
176 | struct flowi *key) | 175 | const struct flowi *key) |
177 | { | 176 | { |
178 | u32 *k = (u32 *) key; | 177 | const u32 *k = (const u32 *) key; |
179 | 178 | ||
180 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) | 179 | return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) |
181 | & (flow_cache_hash_size(fc) - 1)); | 180 | & (flow_cache_hash_size(fc) - 1); |
182 | } | 181 | } |
183 | 182 | ||
184 | #if (BITS_PER_LONG == 64) | 183 | typedef unsigned long flow_compare_t; |
185 | typedef u64 flow_compare_t; | ||
186 | #else | ||
187 | typedef u32 flow_compare_t; | ||
188 | #endif | ||
189 | 184 | ||
190 | /* I hear what you're saying, use memcmp. But memcmp cannot make | 185 | /* I hear what you're saying, use memcmp. But memcmp cannot make |
191 | * important assumptions that we can here, such as alignment and | 186 | * important assumptions that we can here, such as alignment and |
192 | * constant size. | 187 | * constant size. |
193 | */ | 188 | */ |
194 | static int flow_key_compare(struct flowi *key1, struct flowi *key2) | 189 | static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) |
195 | { | 190 | { |
196 | flow_compare_t *k1, *k1_lim, *k2; | 191 | const flow_compare_t *k1, *k1_lim, *k2; |
197 | const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); | 192 | const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); |
198 | 193 | ||
199 | BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); | 194 | BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); |
200 | 195 | ||
201 | k1 = (flow_compare_t *) key1; | 196 | k1 = (const flow_compare_t *) key1; |
202 | k1_lim = k1 + n_elem; | 197 | k1_lim = k1 + n_elem; |
203 | 198 | ||
204 | k2 = (flow_compare_t *) key2; | 199 | k2 = (const flow_compare_t *) key2; |
205 | 200 | ||
206 | do { | 201 | do { |
207 | if (*k1++ != *k2++) | 202 | if (*k1++ != *k2++) |
@@ -212,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) | |||
212 | } | 207 | } |
213 | 208 | ||
214 | struct flow_cache_object * | 209 | struct flow_cache_object * |
215 | flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, | 210 | flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, |
216 | flow_resolve_t resolver, void *ctx) | 211 | flow_resolve_t resolver, void *ctx) |
217 | { | 212 | { |
218 | struct flow_cache *fc = &flow_cache_global; | 213 | struct flow_cache *fc = &flow_cache_global; |
@@ -357,62 +352,73 @@ void flow_cache_flush(void) | |||
357 | put_online_cpus(); | 352 | put_online_cpus(); |
358 | } | 353 | } |
359 | 354 | ||
360 | static void __init flow_cache_cpu_prepare(struct flow_cache *fc, | 355 | static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) |
361 | struct flow_cache_percpu *fcp) | ||
362 | { | 356 | { |
363 | fcp->hash_table = (struct hlist_head *) | 357 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); |
364 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); | 358 | size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); |
365 | if (!fcp->hash_table) | ||
366 | panic("NET: failed to allocate flow cache order %lu\n", fc->order); | ||
367 | 359 | ||
368 | fcp->hash_rnd_recalc = 1; | 360 | if (!fcp->hash_table) { |
369 | fcp->hash_count = 0; | 361 | fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); |
370 | tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); | 362 | if (!fcp->hash_table) { |
363 | pr_err("NET: failed to allocate flow cache sz %zu\n", sz); | ||
364 | return -ENOMEM; | ||
365 | } | ||
366 | fcp->hash_rnd_recalc = 1; | ||
367 | fcp->hash_count = 0; | ||
368 | tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); | ||
369 | } | ||
370 | return 0; | ||
371 | } | 371 | } |
372 | 372 | ||
373 | static int flow_cache_cpu(struct notifier_block *nfb, | 373 | static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, |
374 | unsigned long action, | 374 | unsigned long action, |
375 | void *hcpu) | 375 | void *hcpu) |
376 | { | 376 | { |
377 | struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); | 377 | struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); |
378 | int cpu = (unsigned long) hcpu; | 378 | int res, cpu = (unsigned long) hcpu; |
379 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); | 379 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); |
380 | 380 | ||
381 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | 381 | switch (action) { |
382 | case CPU_UP_PREPARE: | ||
383 | case CPU_UP_PREPARE_FROZEN: | ||
384 | res = flow_cache_cpu_prepare(fc, cpu); | ||
385 | if (res) | ||
386 | return notifier_from_errno(res); | ||
387 | break; | ||
388 | case CPU_DEAD: | ||
389 | case CPU_DEAD_FROZEN: | ||
382 | __flow_cache_shrink(fc, fcp, 0); | 390 | __flow_cache_shrink(fc, fcp, 0); |
391 | break; | ||
392 | } | ||
383 | return NOTIFY_OK; | 393 | return NOTIFY_OK; |
384 | } | 394 | } |
385 | 395 | ||
386 | static int flow_cache_init(struct flow_cache *fc) | 396 | static int __init flow_cache_init(struct flow_cache *fc) |
387 | { | 397 | { |
388 | unsigned long order; | ||
389 | int i; | 398 | int i; |
390 | 399 | ||
391 | fc->hash_shift = 10; | 400 | fc->hash_shift = 10; |
392 | fc->low_watermark = 2 * flow_cache_hash_size(fc); | 401 | fc->low_watermark = 2 * flow_cache_hash_size(fc); |
393 | fc->high_watermark = 4 * flow_cache_hash_size(fc); | 402 | fc->high_watermark = 4 * flow_cache_hash_size(fc); |
394 | 403 | ||
395 | for (order = 0; | ||
396 | (PAGE_SIZE << order) < | ||
397 | (sizeof(struct hlist_head)*flow_cache_hash_size(fc)); | ||
398 | order++) | ||
399 | /* NOTHING */; | ||
400 | fc->order = order; | ||
401 | fc->percpu = alloc_percpu(struct flow_cache_percpu); | 404 | fc->percpu = alloc_percpu(struct flow_cache_percpu); |
405 | if (!fc->percpu) | ||
406 | return -ENOMEM; | ||
402 | 407 | ||
403 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, | 408 | for_each_online_cpu(i) { |
404 | (unsigned long) fc); | 409 | if (flow_cache_cpu_prepare(fc, i)) |
405 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 410 | return -ENOMEM; |
406 | add_timer(&fc->rnd_timer); | 411 | } |
407 | |||
408 | for_each_possible_cpu(i) | ||
409 | flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i)); | ||
410 | |||
411 | fc->hotcpu_notifier = (struct notifier_block){ | 412 | fc->hotcpu_notifier = (struct notifier_block){ |
412 | .notifier_call = flow_cache_cpu, | 413 | .notifier_call = flow_cache_cpu, |
413 | }; | 414 | }; |
414 | register_hotcpu_notifier(&fc->hotcpu_notifier); | 415 | register_hotcpu_notifier(&fc->hotcpu_notifier); |
415 | 416 | ||
417 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, | ||
418 | (unsigned long) fc); | ||
419 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | ||
420 | add_timer(&fc->rnd_timer); | ||
421 | |||
416 | return 0; | 422 | return 0; |
417 | } | 423 | } |
418 | 424 | ||
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 6743146e4d6b..43b03dd71e85 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c | |||
@@ -249,13 +249,6 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |||
249 | } | 249 | } |
250 | EXPORT_SYMBOL(gen_new_estimator); | 250 | EXPORT_SYMBOL(gen_new_estimator); |
251 | 251 | ||
252 | static void __gen_kill_estimator(struct rcu_head *head) | ||
253 | { | ||
254 | struct gen_estimator *e = container_of(head, | ||
255 | struct gen_estimator, e_rcu); | ||
256 | kfree(e); | ||
257 | } | ||
258 | |||
259 | /** | 252 | /** |
260 | * gen_kill_estimator - remove a rate estimator | 253 | * gen_kill_estimator - remove a rate estimator |
261 | * @bstats: basic statistics | 254 | * @bstats: basic statistics |
@@ -274,12 +267,12 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, | |||
274 | while ((e = gen_find_node(bstats, rate_est))) { | 267 | while ((e = gen_find_node(bstats, rate_est))) { |
275 | rb_erase(&e->node, &est_root); | 268 | rb_erase(&e->node, &est_root); |
276 | 269 | ||
277 | write_lock_bh(&est_lock); | 270 | write_lock(&est_lock); |
278 | e->bstats = NULL; | 271 | e->bstats = NULL; |
279 | write_unlock_bh(&est_lock); | 272 | write_unlock(&est_lock); |
280 | 273 | ||
281 | list_del_rcu(&e->list); | 274 | list_del_rcu(&e->list); |
282 | call_rcu(&e->e_rcu, __gen_kill_estimator); | 275 | kfree_rcu(e, e_rcu); |
283 | } | 276 | } |
284 | spin_unlock_bh(&est_tree_lock); | 277 | spin_unlock_bh(&est_tree_lock); |
285 | } | 278 | } |
diff --git a/net/core/iovec.c b/net/core/iovec.c index e6b133b77ccb..c40f27e7d208 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c | |||
@@ -35,14 +35,15 @@ | |||
35 | * in any case. | 35 | * in any case. |
36 | */ | 36 | */ |
37 | 37 | ||
38 | long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) | 38 | int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) |
39 | { | 39 | { |
40 | int size, ct; | 40 | int size, ct, err; |
41 | long err; | ||
42 | 41 | ||
43 | if (m->msg_namelen) { | 42 | if (m->msg_namelen) { |
44 | if (mode == VERIFY_READ) { | 43 | if (mode == VERIFY_READ) { |
45 | err = move_addr_to_kernel(m->msg_name, m->msg_namelen, | 44 | void __user *namep; |
45 | namep = (void __user __force *) m->msg_name; | ||
46 | err = move_addr_to_kernel(namep, m->msg_namelen, | ||
46 | address); | 47 | address); |
47 | if (err < 0) | 48 | if (err < 0) |
48 | return err; | 49 | return err; |
@@ -53,21 +54,20 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, | |||
53 | } | 54 | } |
54 | 55 | ||
55 | size = m->msg_iovlen * sizeof(struct iovec); | 56 | size = m->msg_iovlen * sizeof(struct iovec); |
56 | if (copy_from_user(iov, m->msg_iov, size)) | 57 | if (copy_from_user(iov, (void __user __force *) m->msg_iov, size)) |
57 | return -EFAULT; | 58 | return -EFAULT; |
58 | 59 | ||
59 | m->msg_iov = iov; | 60 | m->msg_iov = iov; |
60 | err = 0; | 61 | err = 0; |
61 | 62 | ||
62 | for (ct = 0; ct < m->msg_iovlen; ct++) { | 63 | for (ct = 0; ct < m->msg_iovlen; ct++) { |
63 | err += iov[ct].iov_len; | 64 | size_t len = iov[ct].iov_len; |
64 | /* | 65 | |
65 | * Goal is not to verify user data, but to prevent returning | 66 | if (len > INT_MAX - err) { |
66 | * negative value, which is interpreted as errno. | 67 | len = INT_MAX - err; |
67 | * Overflow is still possible, but it is harmless. | 68 | iov[ct].iov_len = len; |
68 | */ | 69 | } |
69 | if (err < 0) | 70 | err += len; |
70 | return -EMSGSIZE; | ||
71 | } | 71 | } |
72 | 72 | ||
73 | return err; | 73 | return err; |
diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 01a1101b5936..a7b342131869 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c | |||
@@ -129,7 +129,7 @@ static void linkwatch_schedule_work(int urgent) | |||
129 | if (!cancel_delayed_work(&linkwatch_work)) | 129 | if (!cancel_delayed_work(&linkwatch_work)) |
130 | return; | 130 | return; |
131 | 131 | ||
132 | /* Otherwise we reschedule it again for immediate exection. */ | 132 | /* Otherwise we reschedule it again for immediate execution. */ |
133 | schedule_delayed_work(&linkwatch_work, 0); | 133 | schedule_delayed_work(&linkwatch_work, 0); |
134 | } | 134 | } |
135 | 135 | ||
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a4e0a7482c2b..799f06e03a22 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -41,7 +41,6 @@ | |||
41 | 41 | ||
42 | #define NEIGH_PRINTK(x...) printk(x) | 42 | #define NEIGH_PRINTK(x...) printk(x) |
43 | #define NEIGH_NOPRINTK(x...) do { ; } while(0) | 43 | #define NEIGH_NOPRINTK(x...) do { ; } while(0) |
44 | #define NEIGH_PRINTK0 NEIGH_PRINTK | ||
45 | #define NEIGH_PRINTK1 NEIGH_NOPRINTK | 44 | #define NEIGH_PRINTK1 NEIGH_NOPRINTK |
46 | #define NEIGH_PRINTK2 NEIGH_NOPRINTK | 45 | #define NEIGH_PRINTK2 NEIGH_NOPRINTK |
47 | 46 | ||
@@ -122,7 +121,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) | |||
122 | 121 | ||
123 | unsigned long neigh_rand_reach_time(unsigned long base) | 122 | unsigned long neigh_rand_reach_time(unsigned long base) |
124 | { | 123 | { |
125 | return (base ? (net_random() % base) + (base >> 1) : 0); | 124 | return base ? (net_random() % base) + (base >> 1) : 0; |
126 | } | 125 | } |
127 | EXPORT_SYMBOL(neigh_rand_reach_time); | 126 | EXPORT_SYMBOL(neigh_rand_reach_time); |
128 | 127 | ||
@@ -131,15 +130,20 @@ static int neigh_forced_gc(struct neigh_table *tbl) | |||
131 | { | 130 | { |
132 | int shrunk = 0; | 131 | int shrunk = 0; |
133 | int i; | 132 | int i; |
133 | struct neigh_hash_table *nht; | ||
134 | 134 | ||
135 | NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); | 135 | NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); |
136 | 136 | ||
137 | write_lock_bh(&tbl->lock); | 137 | write_lock_bh(&tbl->lock); |
138 | for (i = 0; i <= tbl->hash_mask; i++) { | 138 | nht = rcu_dereference_protected(tbl->nht, |
139 | struct neighbour *n, **np; | 139 | lockdep_is_held(&tbl->lock)); |
140 | for (i = 0; i <= nht->hash_mask; i++) { | ||
141 | struct neighbour *n; | ||
142 | struct neighbour __rcu **np; | ||
140 | 143 | ||
141 | np = &tbl->hash_buckets[i]; | 144 | np = &nht->hash_buckets[i]; |
142 | while ((n = *np) != NULL) { | 145 | while ((n = rcu_dereference_protected(*np, |
146 | lockdep_is_held(&tbl->lock))) != NULL) { | ||
143 | /* Neighbour record may be discarded if: | 147 | /* Neighbour record may be discarded if: |
144 | * - nobody refers to it. | 148 | * - nobody refers to it. |
145 | * - it is not permanent | 149 | * - it is not permanent |
@@ -147,7 +151,9 @@ static int neigh_forced_gc(struct neigh_table *tbl) | |||
147 | write_lock(&n->lock); | 151 | write_lock(&n->lock); |
148 | if (atomic_read(&n->refcnt) == 1 && | 152 | if (atomic_read(&n->refcnt) == 1 && |
149 | !(n->nud_state & NUD_PERMANENT)) { | 153 | !(n->nud_state & NUD_PERMANENT)) { |
150 | *np = n->next; | 154 | rcu_assign_pointer(*np, |
155 | rcu_dereference_protected(n->next, | ||
156 | lockdep_is_held(&tbl->lock))); | ||
151 | n->dead = 1; | 157 | n->dead = 1; |
152 | shrunk = 1; | 158 | shrunk = 1; |
153 | write_unlock(&n->lock); | 159 | write_unlock(&n->lock); |
@@ -199,16 +205,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list) | |||
199 | static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) | 205 | static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) |
200 | { | 206 | { |
201 | int i; | 207 | int i; |
208 | struct neigh_hash_table *nht; | ||
202 | 209 | ||
203 | for (i = 0; i <= tbl->hash_mask; i++) { | 210 | nht = rcu_dereference_protected(tbl->nht, |
204 | struct neighbour *n, **np = &tbl->hash_buckets[i]; | 211 | lockdep_is_held(&tbl->lock)); |
205 | 212 | ||
206 | while ((n = *np) != NULL) { | 213 | for (i = 0; i <= nht->hash_mask; i++) { |
214 | struct neighbour *n; | ||
215 | struct neighbour __rcu **np = &nht->hash_buckets[i]; | ||
216 | |||
217 | while ((n = rcu_dereference_protected(*np, | ||
218 | lockdep_is_held(&tbl->lock))) != NULL) { | ||
207 | if (dev && n->dev != dev) { | 219 | if (dev && n->dev != dev) { |
208 | np = &n->next; | 220 | np = &n->next; |
209 | continue; | 221 | continue; |
210 | } | 222 | } |
211 | *np = n->next; | 223 | rcu_assign_pointer(*np, |
224 | rcu_dereference_protected(n->next, | ||
225 | lockdep_is_held(&tbl->lock))); | ||
212 | write_lock(&n->lock); | 226 | write_lock(&n->lock); |
213 | neigh_del_timer(n); | 227 | neigh_del_timer(n); |
214 | n->dead = 1; | 228 | n->dead = 1; |
@@ -279,6 +293,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) | |||
279 | 293 | ||
280 | skb_queue_head_init(&n->arp_queue); | 294 | skb_queue_head_init(&n->arp_queue); |
281 | rwlock_init(&n->lock); | 295 | rwlock_init(&n->lock); |
296 | seqlock_init(&n->ha_lock); | ||
282 | n->updated = n->used = now; | 297 | n->updated = n->used = now; |
283 | n->nud_state = NUD_NONE; | 298 | n->nud_state = NUD_NONE; |
284 | n->output = neigh_blackhole; | 299 | n->output = neigh_blackhole; |
@@ -297,64 +312,86 @@ out_entries: | |||
297 | goto out; | 312 | goto out; |
298 | } | 313 | } |
299 | 314 | ||
300 | static struct neighbour **neigh_hash_alloc(unsigned int entries) | 315 | static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) |
301 | { | 316 | { |
302 | unsigned long size = entries * sizeof(struct neighbour *); | 317 | size_t size = entries * sizeof(struct neighbour *); |
303 | struct neighbour **ret; | 318 | struct neigh_hash_table *ret; |
319 | struct neighbour __rcu **buckets; | ||
304 | 320 | ||
305 | if (size <= PAGE_SIZE) { | 321 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); |
306 | ret = kzalloc(size, GFP_ATOMIC); | 322 | if (!ret) |
307 | } else { | 323 | return NULL; |
308 | ret = (struct neighbour **) | 324 | if (size <= PAGE_SIZE) |
309 | __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); | 325 | buckets = kzalloc(size, GFP_ATOMIC); |
326 | else | ||
327 | buckets = (struct neighbour __rcu **) | ||
328 | __get_free_pages(GFP_ATOMIC | __GFP_ZERO, | ||
329 | get_order(size)); | ||
330 | if (!buckets) { | ||
331 | kfree(ret); | ||
332 | return NULL; | ||
310 | } | 333 | } |
334 | ret->hash_buckets = buckets; | ||
335 | ret->hash_mask = entries - 1; | ||
336 | get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); | ||
311 | return ret; | 337 | return ret; |
312 | } | 338 | } |
313 | 339 | ||
314 | static void neigh_hash_free(struct neighbour **hash, unsigned int entries) | 340 | static void neigh_hash_free_rcu(struct rcu_head *head) |
315 | { | 341 | { |
316 | unsigned long size = entries * sizeof(struct neighbour *); | 342 | struct neigh_hash_table *nht = container_of(head, |
343 | struct neigh_hash_table, | ||
344 | rcu); | ||
345 | size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); | ||
346 | struct neighbour __rcu **buckets = nht->hash_buckets; | ||
317 | 347 | ||
318 | if (size <= PAGE_SIZE) | 348 | if (size <= PAGE_SIZE) |
319 | kfree(hash); | 349 | kfree(buckets); |
320 | else | 350 | else |
321 | free_pages((unsigned long)hash, get_order(size)); | 351 | free_pages((unsigned long)buckets, get_order(size)); |
352 | kfree(nht); | ||
322 | } | 353 | } |
323 | 354 | ||
324 | static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) | 355 | static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, |
356 | unsigned long new_entries) | ||
325 | { | 357 | { |
326 | struct neighbour **new_hash, **old_hash; | 358 | unsigned int i, hash; |
327 | unsigned int i, new_hash_mask, old_entries; | 359 | struct neigh_hash_table *new_nht, *old_nht; |
328 | 360 | ||
329 | NEIGH_CACHE_STAT_INC(tbl, hash_grows); | 361 | NEIGH_CACHE_STAT_INC(tbl, hash_grows); |
330 | 362 | ||
331 | BUG_ON(!is_power_of_2(new_entries)); | 363 | BUG_ON(!is_power_of_2(new_entries)); |
332 | new_hash = neigh_hash_alloc(new_entries); | 364 | old_nht = rcu_dereference_protected(tbl->nht, |
333 | if (!new_hash) | 365 | lockdep_is_held(&tbl->lock)); |
334 | return; | 366 | new_nht = neigh_hash_alloc(new_entries); |
335 | 367 | if (!new_nht) | |
336 | old_entries = tbl->hash_mask + 1; | 368 | return old_nht; |
337 | new_hash_mask = new_entries - 1; | ||
338 | old_hash = tbl->hash_buckets; | ||
339 | 369 | ||
340 | get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); | 370 | for (i = 0; i <= old_nht->hash_mask; i++) { |
341 | for (i = 0; i < old_entries; i++) { | ||
342 | struct neighbour *n, *next; | 371 | struct neighbour *n, *next; |
343 | 372 | ||
344 | for (n = old_hash[i]; n; n = next) { | 373 | for (n = rcu_dereference_protected(old_nht->hash_buckets[i], |
345 | unsigned int hash_val = tbl->hash(n->primary_key, n->dev); | 374 | lockdep_is_held(&tbl->lock)); |
346 | 375 | n != NULL; | |
347 | hash_val &= new_hash_mask; | 376 | n = next) { |
348 | next = n->next; | 377 | hash = tbl->hash(n->primary_key, n->dev, |
349 | 378 | new_nht->hash_rnd); | |
350 | n->next = new_hash[hash_val]; | 379 | |
351 | new_hash[hash_val] = n; | 380 | hash &= new_nht->hash_mask; |
381 | next = rcu_dereference_protected(n->next, | ||
382 | lockdep_is_held(&tbl->lock)); | ||
383 | |||
384 | rcu_assign_pointer(n->next, | ||
385 | rcu_dereference_protected( | ||
386 | new_nht->hash_buckets[hash], | ||
387 | lockdep_is_held(&tbl->lock))); | ||
388 | rcu_assign_pointer(new_nht->hash_buckets[hash], n); | ||
352 | } | 389 | } |
353 | } | 390 | } |
354 | tbl->hash_buckets = new_hash; | ||
355 | tbl->hash_mask = new_hash_mask; | ||
356 | 391 | ||
357 | neigh_hash_free(old_hash, old_entries); | 392 | rcu_assign_pointer(tbl->nht, new_nht); |
393 | call_rcu(&old_nht->rcu, neigh_hash_free_rcu); | ||
394 | return new_nht; | ||
358 | } | 395 | } |
359 | 396 | ||
360 | struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, | 397 | struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, |
@@ -363,19 +400,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, | |||
363 | struct neighbour *n; | 400 | struct neighbour *n; |
364 | int key_len = tbl->key_len; | 401 | int key_len = tbl->key_len; |
365 | u32 hash_val; | 402 | u32 hash_val; |
403 | struct neigh_hash_table *nht; | ||
366 | 404 | ||
367 | NEIGH_CACHE_STAT_INC(tbl, lookups); | 405 | NEIGH_CACHE_STAT_INC(tbl, lookups); |
368 | 406 | ||
369 | read_lock_bh(&tbl->lock); | 407 | rcu_read_lock_bh(); |
370 | hash_val = tbl->hash(pkey, dev); | 408 | nht = rcu_dereference_bh(tbl->nht); |
371 | for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { | 409 | hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; |
410 | |||
411 | for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); | ||
412 | n != NULL; | ||
413 | n = rcu_dereference_bh(n->next)) { | ||
372 | if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { | 414 | if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { |
373 | neigh_hold(n); | 415 | if (!atomic_inc_not_zero(&n->refcnt)) |
416 | n = NULL; | ||
374 | NEIGH_CACHE_STAT_INC(tbl, hits); | 417 | NEIGH_CACHE_STAT_INC(tbl, hits); |
375 | break; | 418 | break; |
376 | } | 419 | } |
377 | } | 420 | } |
378 | read_unlock_bh(&tbl->lock); | 421 | |
422 | rcu_read_unlock_bh(); | ||
379 | return n; | 423 | return n; |
380 | } | 424 | } |
381 | EXPORT_SYMBOL(neigh_lookup); | 425 | EXPORT_SYMBOL(neigh_lookup); |
@@ -386,20 +430,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, | |||
386 | struct neighbour *n; | 430 | struct neighbour *n; |
387 | int key_len = tbl->key_len; | 431 | int key_len = tbl->key_len; |
388 | u32 hash_val; | 432 | u32 hash_val; |
433 | struct neigh_hash_table *nht; | ||
389 | 434 | ||
390 | NEIGH_CACHE_STAT_INC(tbl, lookups); | 435 | NEIGH_CACHE_STAT_INC(tbl, lookups); |
391 | 436 | ||
392 | read_lock_bh(&tbl->lock); | 437 | rcu_read_lock_bh(); |
393 | hash_val = tbl->hash(pkey, NULL); | 438 | nht = rcu_dereference_bh(tbl->nht); |
394 | for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { | 439 | hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; |
440 | |||
441 | for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); | ||
442 | n != NULL; | ||
443 | n = rcu_dereference_bh(n->next)) { | ||
395 | if (!memcmp(n->primary_key, pkey, key_len) && | 444 | if (!memcmp(n->primary_key, pkey, key_len) && |
396 | net_eq(dev_net(n->dev), net)) { | 445 | net_eq(dev_net(n->dev), net)) { |
397 | neigh_hold(n); | 446 | if (!atomic_inc_not_zero(&n->refcnt)) |
447 | n = NULL; | ||
398 | NEIGH_CACHE_STAT_INC(tbl, hits); | 448 | NEIGH_CACHE_STAT_INC(tbl, hits); |
399 | break; | 449 | break; |
400 | } | 450 | } |
401 | } | 451 | } |
402 | read_unlock_bh(&tbl->lock); | 452 | |
453 | rcu_read_unlock_bh(); | ||
403 | return n; | 454 | return n; |
404 | } | 455 | } |
405 | EXPORT_SYMBOL(neigh_lookup_nodev); | 456 | EXPORT_SYMBOL(neigh_lookup_nodev); |
@@ -411,6 +462,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
411 | int key_len = tbl->key_len; | 462 | int key_len = tbl->key_len; |
412 | int error; | 463 | int error; |
413 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl); | 464 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl); |
465 | struct neigh_hash_table *nht; | ||
414 | 466 | ||
415 | if (!n) { | 467 | if (!n) { |
416 | rc = ERR_PTR(-ENOBUFS); | 468 | rc = ERR_PTR(-ENOBUFS); |
@@ -437,18 +489,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
437 | n->confirmed = jiffies - (n->parms->base_reachable_time << 1); | 489 | n->confirmed = jiffies - (n->parms->base_reachable_time << 1); |
438 | 490 | ||
439 | write_lock_bh(&tbl->lock); | 491 | write_lock_bh(&tbl->lock); |
492 | nht = rcu_dereference_protected(tbl->nht, | ||
493 | lockdep_is_held(&tbl->lock)); | ||
440 | 494 | ||
441 | if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) | 495 | if (atomic_read(&tbl->entries) > (nht->hash_mask + 1)) |
442 | neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); | 496 | nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1); |
443 | 497 | ||
444 | hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; | 498 | hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; |
445 | 499 | ||
446 | if (n->parms->dead) { | 500 | if (n->parms->dead) { |
447 | rc = ERR_PTR(-EINVAL); | 501 | rc = ERR_PTR(-EINVAL); |
448 | goto out_tbl_unlock; | 502 | goto out_tbl_unlock; |
449 | } | 503 | } |
450 | 504 | ||
451 | for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { | 505 | for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], |
506 | lockdep_is_held(&tbl->lock)); | ||
507 | n1 != NULL; | ||
508 | n1 = rcu_dereference_protected(n1->next, | ||
509 | lockdep_is_held(&tbl->lock))) { | ||
452 | if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { | 510 | if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { |
453 | neigh_hold(n1); | 511 | neigh_hold(n1); |
454 | rc = n1; | 512 | rc = n1; |
@@ -456,10 +514,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
456 | } | 514 | } |
457 | } | 515 | } |
458 | 516 | ||
459 | n->next = tbl->hash_buckets[hash_val]; | ||
460 | tbl->hash_buckets[hash_val] = n; | ||
461 | n->dead = 0; | 517 | n->dead = 0; |
462 | neigh_hold(n); | 518 | neigh_hold(n); |
519 | rcu_assign_pointer(n->next, | ||
520 | rcu_dereference_protected(nht->hash_buckets[hash_val], | ||
521 | lockdep_is_held(&tbl->lock))); | ||
522 | rcu_assign_pointer(nht->hash_buckets[hash_val], n); | ||
463 | write_unlock_bh(&tbl->lock); | 523 | write_unlock_bh(&tbl->lock); |
464 | NEIGH_PRINTK2("neigh %p is created.\n", n); | 524 | NEIGH_PRINTK2("neigh %p is created.\n", n); |
465 | rc = n; | 525 | rc = n; |
@@ -616,6 +676,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms) | |||
616 | neigh_parms_destroy(parms); | 676 | neigh_parms_destroy(parms); |
617 | } | 677 | } |
618 | 678 | ||
679 | static void neigh_destroy_rcu(struct rcu_head *head) | ||
680 | { | ||
681 | struct neighbour *neigh = container_of(head, struct neighbour, rcu); | ||
682 | |||
683 | kmem_cache_free(neigh->tbl->kmem_cachep, neigh); | ||
684 | } | ||
619 | /* | 685 | /* |
620 | * neighbour must already be out of the table; | 686 | * neighbour must already be out of the table; |
621 | * | 687 | * |
@@ -643,8 +709,7 @@ void neigh_destroy(struct neighbour *neigh) | |||
643 | write_seqlock_bh(&hh->hh_lock); | 709 | write_seqlock_bh(&hh->hh_lock); |
644 | hh->hh_output = neigh_blackhole; | 710 | hh->hh_output = neigh_blackhole; |
645 | write_sequnlock_bh(&hh->hh_lock); | 711 | write_sequnlock_bh(&hh->hh_lock); |
646 | if (atomic_dec_and_test(&hh->hh_refcnt)) | 712 | hh_cache_put(hh); |
647 | kfree(hh); | ||
648 | } | 713 | } |
649 | 714 | ||
650 | skb_queue_purge(&neigh->arp_queue); | 715 | skb_queue_purge(&neigh->arp_queue); |
@@ -655,7 +720,7 @@ void neigh_destroy(struct neighbour *neigh) | |||
655 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); | 720 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); |
656 | 721 | ||
657 | atomic_dec(&neigh->tbl->entries); | 722 | atomic_dec(&neigh->tbl->entries); |
658 | kmem_cache_free(neigh->tbl->kmem_cachep, neigh); | 723 | call_rcu(&neigh->rcu, neigh_destroy_rcu); |
659 | } | 724 | } |
660 | EXPORT_SYMBOL(neigh_destroy); | 725 | EXPORT_SYMBOL(neigh_destroy); |
661 | 726 | ||
@@ -696,12 +761,16 @@ static void neigh_connect(struct neighbour *neigh) | |||
696 | static void neigh_periodic_work(struct work_struct *work) | 761 | static void neigh_periodic_work(struct work_struct *work) |
697 | { | 762 | { |
698 | struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); | 763 | struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); |
699 | struct neighbour *n, **np; | 764 | struct neighbour *n; |
765 | struct neighbour __rcu **np; | ||
700 | unsigned int i; | 766 | unsigned int i; |
767 | struct neigh_hash_table *nht; | ||
701 | 768 | ||
702 | NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); | 769 | NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); |
703 | 770 | ||
704 | write_lock_bh(&tbl->lock); | 771 | write_lock_bh(&tbl->lock); |
772 | nht = rcu_dereference_protected(tbl->nht, | ||
773 | lockdep_is_held(&tbl->lock)); | ||
705 | 774 | ||
706 | /* | 775 | /* |
707 | * periodically recompute ReachableTime from random function | 776 | * periodically recompute ReachableTime from random function |
@@ -715,10 +784,11 @@ static void neigh_periodic_work(struct work_struct *work) | |||
715 | neigh_rand_reach_time(p->base_reachable_time); | 784 | neigh_rand_reach_time(p->base_reachable_time); |
716 | } | 785 | } |
717 | 786 | ||
718 | for (i = 0 ; i <= tbl->hash_mask; i++) { | 787 | for (i = 0 ; i <= nht->hash_mask; i++) { |
719 | np = &tbl->hash_buckets[i]; | 788 | np = &nht->hash_buckets[i]; |
720 | 789 | ||
721 | while ((n = *np) != NULL) { | 790 | while ((n = rcu_dereference_protected(*np, |
791 | lockdep_is_held(&tbl->lock))) != NULL) { | ||
722 | unsigned int state; | 792 | unsigned int state; |
723 | 793 | ||
724 | write_lock(&n->lock); | 794 | write_lock(&n->lock); |
@@ -766,9 +836,9 @@ next_elt: | |||
766 | static __inline__ int neigh_max_probes(struct neighbour *n) | 836 | static __inline__ int neigh_max_probes(struct neighbour *n) |
767 | { | 837 | { |
768 | struct neigh_parms *p = n->parms; | 838 | struct neigh_parms *p = n->parms; |
769 | return (n->nud_state & NUD_PROBE ? | 839 | return (n->nud_state & NUD_PROBE) ? |
770 | p->ucast_probes : | 840 | p->ucast_probes : |
771 | p->ucast_probes + p->app_probes + p->mcast_probes); | 841 | p->ucast_probes + p->app_probes + p->mcast_probes; |
772 | } | 842 | } |
773 | 843 | ||
774 | static void neigh_invalidate(struct neighbour *neigh) | 844 | static void neigh_invalidate(struct neighbour *neigh) |
@@ -945,7 +1015,7 @@ out_unlock_bh: | |||
945 | } | 1015 | } |
946 | EXPORT_SYMBOL(__neigh_event_send); | 1016 | EXPORT_SYMBOL(__neigh_event_send); |
947 | 1017 | ||
948 | static void neigh_update_hhs(struct neighbour *neigh) | 1018 | static void neigh_update_hhs(const struct neighbour *neigh) |
949 | { | 1019 | { |
950 | struct hh_cache *hh; | 1020 | struct hh_cache *hh; |
951 | void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) | 1021 | void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) |
@@ -1081,7 +1151,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1081 | } | 1151 | } |
1082 | 1152 | ||
1083 | if (lladdr != neigh->ha) { | 1153 | if (lladdr != neigh->ha) { |
1154 | write_seqlock(&neigh->ha_lock); | ||
1084 | memcpy(&neigh->ha, lladdr, dev->addr_len); | 1155 | memcpy(&neigh->ha, lladdr, dev->addr_len); |
1156 | write_sequnlock(&neigh->ha_lock); | ||
1085 | neigh_update_hhs(neigh); | 1157 | neigh_update_hhs(neigh); |
1086 | if (!(new & NUD_CONNECTED)) | 1158 | if (!(new & NUD_CONNECTED)) |
1087 | neigh->confirmed = jiffies - | 1159 | neigh->confirmed = jiffies - |
@@ -1139,44 +1211,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, | |||
1139 | } | 1211 | } |
1140 | EXPORT_SYMBOL(neigh_event_ns); | 1212 | EXPORT_SYMBOL(neigh_event_ns); |
1141 | 1213 | ||
1214 | static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, | ||
1215 | __be16 protocol) | ||
1216 | { | ||
1217 | struct hh_cache *hh; | ||
1218 | |||
1219 | smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ | ||
1220 | for (hh = n->hh; hh; hh = hh->hh_next) { | ||
1221 | if (hh->hh_type == protocol) { | ||
1222 | atomic_inc(&hh->hh_refcnt); | ||
1223 | if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) | ||
1224 | hh_cache_put(hh); | ||
1225 | return true; | ||
1226 | } | ||
1227 | } | ||
1228 | return false; | ||
1229 | } | ||
1230 | |||
1231 | /* called with read_lock_bh(&n->lock); */ | ||
1142 | static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, | 1232 | static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, |
1143 | __be16 protocol) | 1233 | __be16 protocol) |
1144 | { | 1234 | { |
1145 | struct hh_cache *hh; | 1235 | struct hh_cache *hh; |
1146 | struct net_device *dev = dst->dev; | 1236 | struct net_device *dev = dst->dev; |
1147 | 1237 | ||
1148 | for (hh = n->hh; hh; hh = hh->hh_next) | 1238 | if (likely(neigh_hh_lookup(n, dst, protocol))) |
1149 | if (hh->hh_type == protocol) | 1239 | return; |
1150 | break; | ||
1151 | 1240 | ||
1152 | if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { | 1241 | /* slow path */ |
1153 | seqlock_init(&hh->hh_lock); | 1242 | hh = kzalloc(sizeof(*hh), GFP_ATOMIC); |
1154 | hh->hh_type = protocol; | 1243 | if (!hh) |
1155 | atomic_set(&hh->hh_refcnt, 0); | 1244 | return; |
1156 | hh->hh_next = NULL; | ||
1157 | 1245 | ||
1158 | if (dev->header_ops->cache(n, hh)) { | 1246 | seqlock_init(&hh->hh_lock); |
1159 | kfree(hh); | 1247 | hh->hh_type = protocol; |
1160 | hh = NULL; | 1248 | atomic_set(&hh->hh_refcnt, 2); |
1161 | } else { | 1249 | |
1162 | atomic_inc(&hh->hh_refcnt); | 1250 | if (dev->header_ops->cache(n, hh)) { |
1163 | hh->hh_next = n->hh; | 1251 | kfree(hh); |
1164 | n->hh = hh; | 1252 | return; |
1165 | if (n->nud_state & NUD_CONNECTED) | ||
1166 | hh->hh_output = n->ops->hh_output; | ||
1167 | else | ||
1168 | hh->hh_output = n->ops->output; | ||
1169 | } | ||
1170 | } | 1253 | } |
1171 | if (hh) { | 1254 | |
1172 | atomic_inc(&hh->hh_refcnt); | 1255 | write_lock_bh(&n->lock); |
1173 | dst->hh = hh; | 1256 | |
1257 | /* must check if another thread already did the insert */ | ||
1258 | if (neigh_hh_lookup(n, dst, protocol)) { | ||
1259 | kfree(hh); | ||
1260 | goto end; | ||
1174 | } | 1261 | } |
1262 | |||
1263 | if (n->nud_state & NUD_CONNECTED) | ||
1264 | hh->hh_output = n->ops->hh_output; | ||
1265 | else | ||
1266 | hh->hh_output = n->ops->output; | ||
1267 | |||
1268 | hh->hh_next = n->hh; | ||
1269 | smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ | ||
1270 | n->hh = hh; | ||
1271 | |||
1272 | if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) | ||
1273 | hh_cache_put(hh); | ||
1274 | end: | ||
1275 | write_unlock_bh(&n->lock); | ||
1175 | } | 1276 | } |
1176 | 1277 | ||
1177 | /* This function can be used in contexts, where only old dev_queue_xmit | 1278 | /* This function can be used in contexts, where only old dev_queue_xmit |
1178 | worked, f.e. if you want to override normal output path (eql, shaper), | 1279 | * worked, f.e. if you want to override normal output path (eql, shaper), |
1179 | but resolution is not made yet. | 1280 | * but resolution is not made yet. |
1180 | */ | 1281 | */ |
1181 | 1282 | ||
1182 | int neigh_compat_output(struct sk_buff *skb) | 1283 | int neigh_compat_output(struct sk_buff *skb) |
@@ -1210,19 +1311,19 @@ int neigh_resolve_output(struct sk_buff *skb) | |||
1210 | if (!neigh_event_send(neigh, skb)) { | 1311 | if (!neigh_event_send(neigh, skb)) { |
1211 | int err; | 1312 | int err; |
1212 | struct net_device *dev = neigh->dev; | 1313 | struct net_device *dev = neigh->dev; |
1213 | if (dev->header_ops->cache && !dst->hh) { | 1314 | unsigned int seq; |
1214 | write_lock_bh(&neigh->lock); | 1315 | |
1215 | if (!dst->hh) | 1316 | if (dev->header_ops->cache && |
1216 | neigh_hh_init(neigh, dst, dst->ops->protocol); | 1317 | !dst->hh && |
1217 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 1318 | !(dst->flags & DST_NOCACHE)) |
1218 | neigh->ha, NULL, skb->len); | 1319 | neigh_hh_init(neigh, dst, dst->ops->protocol); |
1219 | write_unlock_bh(&neigh->lock); | 1320 | |
1220 | } else { | 1321 | do { |
1221 | read_lock_bh(&neigh->lock); | 1322 | seq = read_seqbegin(&neigh->ha_lock); |
1222 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 1323 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), |
1223 | neigh->ha, NULL, skb->len); | 1324 | neigh->ha, NULL, skb->len); |
1224 | read_unlock_bh(&neigh->lock); | 1325 | } while (read_seqretry(&neigh->ha_lock, seq)); |
1225 | } | 1326 | |
1226 | if (err >= 0) | 1327 | if (err >= 0) |
1227 | rc = neigh->ops->queue_xmit(skb); | 1328 | rc = neigh->ops->queue_xmit(skb); |
1228 | else | 1329 | else |
@@ -1248,13 +1349,16 @@ int neigh_connected_output(struct sk_buff *skb) | |||
1248 | struct dst_entry *dst = skb_dst(skb); | 1349 | struct dst_entry *dst = skb_dst(skb); |
1249 | struct neighbour *neigh = dst->neighbour; | 1350 | struct neighbour *neigh = dst->neighbour; |
1250 | struct net_device *dev = neigh->dev; | 1351 | struct net_device *dev = neigh->dev; |
1352 | unsigned int seq; | ||
1251 | 1353 | ||
1252 | __skb_pull(skb, skb_network_offset(skb)); | 1354 | __skb_pull(skb, skb_network_offset(skb)); |
1253 | 1355 | ||
1254 | read_lock_bh(&neigh->lock); | 1356 | do { |
1255 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 1357 | seq = read_seqbegin(&neigh->ha_lock); |
1256 | neigh->ha, NULL, skb->len); | 1358 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), |
1257 | read_unlock_bh(&neigh->lock); | 1359 | neigh->ha, NULL, skb->len); |
1360 | } while (read_seqretry(&neigh->ha_lock, seq)); | ||
1361 | |||
1258 | if (err >= 0) | 1362 | if (err >= 0) |
1259 | err = neigh->ops->queue_xmit(skb); | 1363 | err = neigh->ops->queue_xmit(skb); |
1260 | else { | 1364 | else { |
@@ -1436,17 +1540,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1436 | panic("cannot create neighbour proc dir entry"); | 1540 | panic("cannot create neighbour proc dir entry"); |
1437 | #endif | 1541 | #endif |
1438 | 1542 | ||
1439 | tbl->hash_mask = 1; | 1543 | RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); |
1440 | tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); | ||
1441 | 1544 | ||
1442 | phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); | 1545 | phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); |
1443 | tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); | 1546 | tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); |
1444 | 1547 | ||
1445 | if (!tbl->hash_buckets || !tbl->phash_buckets) | 1548 | if (!tbl->nht || !tbl->phash_buckets) |
1446 | panic("cannot allocate neighbour cache hashes"); | 1549 | panic("cannot allocate neighbour cache hashes"); |
1447 | 1550 | ||
1448 | get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); | ||
1449 | |||
1450 | rwlock_init(&tbl->lock); | 1551 | rwlock_init(&tbl->lock); |
1451 | INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); | 1552 | INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); |
1452 | schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); | 1553 | schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); |
@@ -1486,8 +1587,7 @@ int neigh_table_clear(struct neigh_table *tbl) | |||
1486 | struct neigh_table **tp; | 1587 | struct neigh_table **tp; |
1487 | 1588 | ||
1488 | /* It is not clean... Fix it to unload IPv6 module safely */ | 1589 | /* It is not clean... Fix it to unload IPv6 module safely */ |
1489 | cancel_delayed_work(&tbl->gc_work); | 1590 | cancel_delayed_work_sync(&tbl->gc_work); |
1490 | flush_scheduled_work(); | ||
1491 | del_timer_sync(&tbl->proxy_timer); | 1591 | del_timer_sync(&tbl->proxy_timer); |
1492 | pneigh_queue_purge(&tbl->proxy_queue); | 1592 | pneigh_queue_purge(&tbl->proxy_queue); |
1493 | neigh_ifdown(tbl, NULL); | 1593 | neigh_ifdown(tbl, NULL); |
@@ -1502,8 +1602,9 @@ int neigh_table_clear(struct neigh_table *tbl) | |||
1502 | } | 1602 | } |
1503 | write_unlock(&neigh_tbl_lock); | 1603 | write_unlock(&neigh_tbl_lock); |
1504 | 1604 | ||
1505 | neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); | 1605 | call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, |
1506 | tbl->hash_buckets = NULL; | 1606 | neigh_hash_free_rcu); |
1607 | tbl->nht = NULL; | ||
1507 | 1608 | ||
1508 | kfree(tbl->phash_buckets); | 1609 | kfree(tbl->phash_buckets); |
1509 | tbl->phash_buckets = NULL; | 1610 | tbl->phash_buckets = NULL; |
@@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1529 | struct net_device *dev = NULL; | 1630 | struct net_device *dev = NULL; |
1530 | int err = -EINVAL; | 1631 | int err = -EINVAL; |
1531 | 1632 | ||
1633 | ASSERT_RTNL(); | ||
1532 | if (nlmsg_len(nlh) < sizeof(*ndm)) | 1634 | if (nlmsg_len(nlh) < sizeof(*ndm)) |
1533 | goto out; | 1635 | goto out; |
1534 | 1636 | ||
@@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1538 | 1640 | ||
1539 | ndm = nlmsg_data(nlh); | 1641 | ndm = nlmsg_data(nlh); |
1540 | if (ndm->ndm_ifindex) { | 1642 | if (ndm->ndm_ifindex) { |
1541 | dev = dev_get_by_index(net, ndm->ndm_ifindex); | 1643 | dev = __dev_get_by_index(net, ndm->ndm_ifindex); |
1542 | if (dev == NULL) { | 1644 | if (dev == NULL) { |
1543 | err = -ENODEV; | 1645 | err = -ENODEV; |
1544 | goto out; | 1646 | goto out; |
@@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1554 | read_unlock(&neigh_tbl_lock); | 1656 | read_unlock(&neigh_tbl_lock); |
1555 | 1657 | ||
1556 | if (nla_len(dst_attr) < tbl->key_len) | 1658 | if (nla_len(dst_attr) < tbl->key_len) |
1557 | goto out_dev_put; | 1659 | goto out; |
1558 | 1660 | ||
1559 | if (ndm->ndm_flags & NTF_PROXY) { | 1661 | if (ndm->ndm_flags & NTF_PROXY) { |
1560 | err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); | 1662 | err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); |
1561 | goto out_dev_put; | 1663 | goto out; |
1562 | } | 1664 | } |
1563 | 1665 | ||
1564 | if (dev == NULL) | 1666 | if (dev == NULL) |
1565 | goto out_dev_put; | 1667 | goto out; |
1566 | 1668 | ||
1567 | neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); | 1669 | neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); |
1568 | if (neigh == NULL) { | 1670 | if (neigh == NULL) { |
1569 | err = -ENOENT; | 1671 | err = -ENOENT; |
1570 | goto out_dev_put; | 1672 | goto out; |
1571 | } | 1673 | } |
1572 | 1674 | ||
1573 | err = neigh_update(neigh, NULL, NUD_FAILED, | 1675 | err = neigh_update(neigh, NULL, NUD_FAILED, |
1574 | NEIGH_UPDATE_F_OVERRIDE | | 1676 | NEIGH_UPDATE_F_OVERRIDE | |
1575 | NEIGH_UPDATE_F_ADMIN); | 1677 | NEIGH_UPDATE_F_ADMIN); |
1576 | neigh_release(neigh); | 1678 | neigh_release(neigh); |
1577 | goto out_dev_put; | 1679 | goto out; |
1578 | } | 1680 | } |
1579 | read_unlock(&neigh_tbl_lock); | 1681 | read_unlock(&neigh_tbl_lock); |
1580 | err = -EAFNOSUPPORT; | 1682 | err = -EAFNOSUPPORT; |
1581 | 1683 | ||
1582 | out_dev_put: | ||
1583 | if (dev) | ||
1584 | dev_put(dev); | ||
1585 | out: | 1684 | out: |
1586 | return err; | 1685 | return err; |
1587 | } | 1686 | } |
@@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1595 | struct net_device *dev = NULL; | 1694 | struct net_device *dev = NULL; |
1596 | int err; | 1695 | int err; |
1597 | 1696 | ||
1697 | ASSERT_RTNL(); | ||
1598 | err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); | 1698 | err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); |
1599 | if (err < 0) | 1699 | if (err < 0) |
1600 | goto out; | 1700 | goto out; |
@@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1605 | 1705 | ||
1606 | ndm = nlmsg_data(nlh); | 1706 | ndm = nlmsg_data(nlh); |
1607 | if (ndm->ndm_ifindex) { | 1707 | if (ndm->ndm_ifindex) { |
1608 | dev = dev_get_by_index(net, ndm->ndm_ifindex); | 1708 | dev = __dev_get_by_index(net, ndm->ndm_ifindex); |
1609 | if (dev == NULL) { | 1709 | if (dev == NULL) { |
1610 | err = -ENODEV; | 1710 | err = -ENODEV; |
1611 | goto out; | 1711 | goto out; |
1612 | } | 1712 | } |
1613 | 1713 | ||
1614 | if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) | 1714 | if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) |
1615 | goto out_dev_put; | 1715 | goto out; |
1616 | } | 1716 | } |
1617 | 1717 | ||
1618 | read_lock(&neigh_tbl_lock); | 1718 | read_lock(&neigh_tbl_lock); |
@@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1626 | read_unlock(&neigh_tbl_lock); | 1726 | read_unlock(&neigh_tbl_lock); |
1627 | 1727 | ||
1628 | if (nla_len(tb[NDA_DST]) < tbl->key_len) | 1728 | if (nla_len(tb[NDA_DST]) < tbl->key_len) |
1629 | goto out_dev_put; | 1729 | goto out; |
1630 | dst = nla_data(tb[NDA_DST]); | 1730 | dst = nla_data(tb[NDA_DST]); |
1631 | lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; | 1731 | lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; |
1632 | 1732 | ||
@@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1639 | pn->flags = ndm->ndm_flags; | 1739 | pn->flags = ndm->ndm_flags; |
1640 | err = 0; | 1740 | err = 0; |
1641 | } | 1741 | } |
1642 | goto out_dev_put; | 1742 | goto out; |
1643 | } | 1743 | } |
1644 | 1744 | ||
1645 | if (dev == NULL) | 1745 | if (dev == NULL) |
1646 | goto out_dev_put; | 1746 | goto out; |
1647 | 1747 | ||
1648 | neigh = neigh_lookup(tbl, dst, dev); | 1748 | neigh = neigh_lookup(tbl, dst, dev); |
1649 | if (neigh == NULL) { | 1749 | if (neigh == NULL) { |
1650 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { | 1750 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { |
1651 | err = -ENOENT; | 1751 | err = -ENOENT; |
1652 | goto out_dev_put; | 1752 | goto out; |
1653 | } | 1753 | } |
1654 | 1754 | ||
1655 | neigh = __neigh_lookup_errno(tbl, dst, dev); | 1755 | neigh = __neigh_lookup_errno(tbl, dst, dev); |
1656 | if (IS_ERR(neigh)) { | 1756 | if (IS_ERR(neigh)) { |
1657 | err = PTR_ERR(neigh); | 1757 | err = PTR_ERR(neigh); |
1658 | goto out_dev_put; | 1758 | goto out; |
1659 | } | 1759 | } |
1660 | } else { | 1760 | } else { |
1661 | if (nlh->nlmsg_flags & NLM_F_EXCL) { | 1761 | if (nlh->nlmsg_flags & NLM_F_EXCL) { |
1662 | err = -EEXIST; | 1762 | err = -EEXIST; |
1663 | neigh_release(neigh); | 1763 | neigh_release(neigh); |
1664 | goto out_dev_put; | 1764 | goto out; |
1665 | } | 1765 | } |
1666 | 1766 | ||
1667 | if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) | 1767 | if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) |
@@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1674 | } else | 1774 | } else |
1675 | err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); | 1775 | err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); |
1676 | neigh_release(neigh); | 1776 | neigh_release(neigh); |
1677 | goto out_dev_put; | 1777 | goto out; |
1678 | } | 1778 | } |
1679 | 1779 | ||
1680 | read_unlock(&neigh_tbl_lock); | 1780 | read_unlock(&neigh_tbl_lock); |
1681 | err = -EAFNOSUPPORT; | 1781 | err = -EAFNOSUPPORT; |
1682 | |||
1683 | out_dev_put: | ||
1684 | if (dev) | ||
1685 | dev_put(dev); | ||
1686 | out: | 1782 | out: |
1687 | return err; | 1783 | return err; |
1688 | } | 1784 | } |
@@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, | |||
1748 | unsigned long now = jiffies; | 1844 | unsigned long now = jiffies; |
1749 | unsigned int flush_delta = now - tbl->last_flush; | 1845 | unsigned int flush_delta = now - tbl->last_flush; |
1750 | unsigned int rand_delta = now - tbl->last_rand; | 1846 | unsigned int rand_delta = now - tbl->last_rand; |
1751 | 1847 | struct neigh_hash_table *nht; | |
1752 | struct ndt_config ndc = { | 1848 | struct ndt_config ndc = { |
1753 | .ndtc_key_len = tbl->key_len, | 1849 | .ndtc_key_len = tbl->key_len, |
1754 | .ndtc_entry_size = tbl->entry_size, | 1850 | .ndtc_entry_size = tbl->entry_size, |
1755 | .ndtc_entries = atomic_read(&tbl->entries), | 1851 | .ndtc_entries = atomic_read(&tbl->entries), |
1756 | .ndtc_last_flush = jiffies_to_msecs(flush_delta), | 1852 | .ndtc_last_flush = jiffies_to_msecs(flush_delta), |
1757 | .ndtc_last_rand = jiffies_to_msecs(rand_delta), | 1853 | .ndtc_last_rand = jiffies_to_msecs(rand_delta), |
1758 | .ndtc_hash_rnd = tbl->hash_rnd, | ||
1759 | .ndtc_hash_mask = tbl->hash_mask, | ||
1760 | .ndtc_proxy_qlen = tbl->proxy_queue.qlen, | 1854 | .ndtc_proxy_qlen = tbl->proxy_queue.qlen, |
1761 | }; | 1855 | }; |
1762 | 1856 | ||
1857 | rcu_read_lock_bh(); | ||
1858 | nht = rcu_dereference_bh(tbl->nht); | ||
1859 | ndc.ndtc_hash_rnd = nht->hash_rnd; | ||
1860 | ndc.ndtc_hash_mask = nht->hash_mask; | ||
1861 | rcu_read_unlock_bh(); | ||
1862 | |||
1763 | NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); | 1863 | NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); |
1764 | } | 1864 | } |
1765 | 1865 | ||
@@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, | |||
2056 | 2156 | ||
2057 | read_lock_bh(&neigh->lock); | 2157 | read_lock_bh(&neigh->lock); |
2058 | ndm->ndm_state = neigh->nud_state; | 2158 | ndm->ndm_state = neigh->nud_state; |
2059 | if ((neigh->nud_state & NUD_VALID) && | 2159 | if (neigh->nud_state & NUD_VALID) { |
2060 | nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { | 2160 | char haddr[MAX_ADDR_LEN]; |
2061 | read_unlock_bh(&neigh->lock); | 2161 | |
2062 | goto nla_put_failure; | 2162 | neigh_ha_snapshot(haddr, neigh, neigh->dev); |
2163 | if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { | ||
2164 | read_unlock_bh(&neigh->lock); | ||
2165 | goto nla_put_failure; | ||
2166 | } | ||
2063 | } | 2167 | } |
2064 | 2168 | ||
2065 | ci.ndm_used = jiffies_to_clock_t(now - neigh->used); | 2169 | ci.ndm_used = jiffies_to_clock_t(now - neigh->used); |
@@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh) | |||
2087 | static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, | 2191 | static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, |
2088 | struct netlink_callback *cb) | 2192 | struct netlink_callback *cb) |
2089 | { | 2193 | { |
2090 | struct net * net = sock_net(skb->sk); | 2194 | struct net *net = sock_net(skb->sk); |
2091 | struct neighbour *n; | 2195 | struct neighbour *n; |
2092 | int rc, h, s_h = cb->args[1]; | 2196 | int rc, h, s_h = cb->args[1]; |
2093 | int idx, s_idx = idx = cb->args[2]; | 2197 | int idx, s_idx = idx = cb->args[2]; |
2198 | struct neigh_hash_table *nht; | ||
2094 | 2199 | ||
2095 | read_lock_bh(&tbl->lock); | 2200 | rcu_read_lock_bh(); |
2096 | for (h = 0; h <= tbl->hash_mask; h++) { | 2201 | nht = rcu_dereference_bh(tbl->nht); |
2202 | |||
2203 | for (h = 0; h <= nht->hash_mask; h++) { | ||
2097 | if (h < s_h) | 2204 | if (h < s_h) |
2098 | continue; | 2205 | continue; |
2099 | if (h > s_h) | 2206 | if (h > s_h) |
2100 | s_idx = 0; | 2207 | s_idx = 0; |
2101 | for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { | 2208 | for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; |
2209 | n != NULL; | ||
2210 | n = rcu_dereference_bh(n->next)) { | ||
2102 | if (!net_eq(dev_net(n->dev), net)) | 2211 | if (!net_eq(dev_net(n->dev), net)) |
2103 | continue; | 2212 | continue; |
2104 | if (idx < s_idx) | 2213 | if (idx < s_idx) |
@@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, | |||
2107 | cb->nlh->nlmsg_seq, | 2216 | cb->nlh->nlmsg_seq, |
2108 | RTM_NEWNEIGH, | 2217 | RTM_NEWNEIGH, |
2109 | NLM_F_MULTI) <= 0) { | 2218 | NLM_F_MULTI) <= 0) { |
2110 | read_unlock_bh(&tbl->lock); | ||
2111 | rc = -1; | 2219 | rc = -1; |
2112 | goto out; | 2220 | goto out; |
2113 | } | 2221 | } |
2114 | next: | 2222 | next: |
2115 | idx++; | 2223 | idx++; |
2116 | } | 2224 | } |
2117 | } | 2225 | } |
2118 | read_unlock_bh(&tbl->lock); | ||
2119 | rc = skb->len; | 2226 | rc = skb->len; |
2120 | out: | 2227 | out: |
2228 | rcu_read_unlock_bh(); | ||
2121 | cb->args[1] = h; | 2229 | cb->args[1] = h; |
2122 | cb->args[2] = idx; | 2230 | cb->args[2] = idx; |
2123 | return rc; | 2231 | return rc; |
@@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | |||
2150 | void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) | 2258 | void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) |
2151 | { | 2259 | { |
2152 | int chain; | 2260 | int chain; |
2261 | struct neigh_hash_table *nht; | ||
2153 | 2262 | ||
2154 | read_lock_bh(&tbl->lock); | 2263 | rcu_read_lock_bh(); |
2155 | for (chain = 0; chain <= tbl->hash_mask; chain++) { | 2264 | nht = rcu_dereference_bh(tbl->nht); |
2265 | |||
2266 | read_lock(&tbl->lock); /* avoid resizes */ | ||
2267 | for (chain = 0; chain <= nht->hash_mask; chain++) { | ||
2156 | struct neighbour *n; | 2268 | struct neighbour *n; |
2157 | 2269 | ||
2158 | for (n = tbl->hash_buckets[chain]; n; n = n->next) | 2270 | for (n = rcu_dereference_bh(nht->hash_buckets[chain]); |
2271 | n != NULL; | ||
2272 | n = rcu_dereference_bh(n->next)) | ||
2159 | cb(n, cookie); | 2273 | cb(n, cookie); |
2160 | } | 2274 | } |
2161 | read_unlock_bh(&tbl->lock); | 2275 | read_unlock(&tbl->lock); |
2276 | rcu_read_unlock_bh(); | ||
2162 | } | 2277 | } |
2163 | EXPORT_SYMBOL(neigh_for_each); | 2278 | EXPORT_SYMBOL(neigh_for_each); |
2164 | 2279 | ||
@@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl, | |||
2167 | int (*cb)(struct neighbour *)) | 2282 | int (*cb)(struct neighbour *)) |
2168 | { | 2283 | { |
2169 | int chain; | 2284 | int chain; |
2285 | struct neigh_hash_table *nht; | ||
2170 | 2286 | ||
2171 | for (chain = 0; chain <= tbl->hash_mask; chain++) { | 2287 | nht = rcu_dereference_protected(tbl->nht, |
2172 | struct neighbour *n, **np; | 2288 | lockdep_is_held(&tbl->lock)); |
2289 | for (chain = 0; chain <= nht->hash_mask; chain++) { | ||
2290 | struct neighbour *n; | ||
2291 | struct neighbour __rcu **np; | ||
2173 | 2292 | ||
2174 | np = &tbl->hash_buckets[chain]; | 2293 | np = &nht->hash_buckets[chain]; |
2175 | while ((n = *np) != NULL) { | 2294 | while ((n = rcu_dereference_protected(*np, |
2295 | lockdep_is_held(&tbl->lock))) != NULL) { | ||
2176 | int release; | 2296 | int release; |
2177 | 2297 | ||
2178 | write_lock(&n->lock); | 2298 | write_lock(&n->lock); |
2179 | release = cb(n); | 2299 | release = cb(n); |
2180 | if (release) { | 2300 | if (release) { |
2181 | *np = n->next; | 2301 | rcu_assign_pointer(*np, |
2302 | rcu_dereference_protected(n->next, | ||
2303 | lockdep_is_held(&tbl->lock))); | ||
2182 | n->dead = 1; | 2304 | n->dead = 1; |
2183 | } else | 2305 | } else |
2184 | np = &n->next; | 2306 | np = &n->next; |
@@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) | |||
2196 | { | 2318 | { |
2197 | struct neigh_seq_state *state = seq->private; | 2319 | struct neigh_seq_state *state = seq->private; |
2198 | struct net *net = seq_file_net(seq); | 2320 | struct net *net = seq_file_net(seq); |
2199 | struct neigh_table *tbl = state->tbl; | 2321 | struct neigh_hash_table *nht = state->nht; |
2200 | struct neighbour *n = NULL; | 2322 | struct neighbour *n = NULL; |
2201 | int bucket = state->bucket; | 2323 | int bucket = state->bucket; |
2202 | 2324 | ||
2203 | state->flags &= ~NEIGH_SEQ_IS_PNEIGH; | 2325 | state->flags &= ~NEIGH_SEQ_IS_PNEIGH; |
2204 | for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { | 2326 | for (bucket = 0; bucket <= nht->hash_mask; bucket++) { |
2205 | n = tbl->hash_buckets[bucket]; | 2327 | n = rcu_dereference_bh(nht->hash_buckets[bucket]); |
2206 | 2328 | ||
2207 | while (n) { | 2329 | while (n) { |
2208 | if (!net_eq(dev_net(n->dev), net)) | 2330 | if (!net_eq(dev_net(n->dev), net)) |
@@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) | |||
2219 | break; | 2341 | break; |
2220 | if (n->nud_state & ~NUD_NOARP) | 2342 | if (n->nud_state & ~NUD_NOARP) |
2221 | break; | 2343 | break; |
2222 | next: | 2344 | next: |
2223 | n = n->next; | 2345 | n = rcu_dereference_bh(n->next); |
2224 | } | 2346 | } |
2225 | 2347 | ||
2226 | if (n) | 2348 | if (n) |
@@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, | |||
2237 | { | 2359 | { |
2238 | struct neigh_seq_state *state = seq->private; | 2360 | struct neigh_seq_state *state = seq->private; |
2239 | struct net *net = seq_file_net(seq); | 2361 | struct net *net = seq_file_net(seq); |
2240 | struct neigh_table *tbl = state->tbl; | 2362 | struct neigh_hash_table *nht = state->nht; |
2241 | 2363 | ||
2242 | if (state->neigh_sub_iter) { | 2364 | if (state->neigh_sub_iter) { |
2243 | void *v = state->neigh_sub_iter(state, n, pos); | 2365 | void *v = state->neigh_sub_iter(state, n, pos); |
2244 | if (v) | 2366 | if (v) |
2245 | return n; | 2367 | return n; |
2246 | } | 2368 | } |
2247 | n = n->next; | 2369 | n = rcu_dereference_bh(n->next); |
2248 | 2370 | ||
2249 | while (1) { | 2371 | while (1) { |
2250 | while (n) { | 2372 | while (n) { |
@@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, | |||
2261 | 2383 | ||
2262 | if (n->nud_state & ~NUD_NOARP) | 2384 | if (n->nud_state & ~NUD_NOARP) |
2263 | break; | 2385 | break; |
2264 | next: | 2386 | next: |
2265 | n = n->next; | 2387 | n = rcu_dereference_bh(n->next); |
2266 | } | 2388 | } |
2267 | 2389 | ||
2268 | if (n) | 2390 | if (n) |
2269 | break; | 2391 | break; |
2270 | 2392 | ||
2271 | if (++state->bucket > tbl->hash_mask) | 2393 | if (++state->bucket > nht->hash_mask) |
2272 | break; | 2394 | break; |
2273 | 2395 | ||
2274 | n = tbl->hash_buckets[state->bucket]; | 2396 | n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); |
2275 | } | 2397 | } |
2276 | 2398 | ||
2277 | if (n && pos) | 2399 | if (n && pos) |
@@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) | |||
2369 | } | 2491 | } |
2370 | 2492 | ||
2371 | void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) | 2493 | void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) |
2372 | __acquires(tbl->lock) | 2494 | __acquires(rcu_bh) |
2373 | { | 2495 | { |
2374 | struct neigh_seq_state *state = seq->private; | 2496 | struct neigh_seq_state *state = seq->private; |
2375 | 2497 | ||
@@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl | |||
2377 | state->bucket = 0; | 2499 | state->bucket = 0; |
2378 | state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); | 2500 | state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); |
2379 | 2501 | ||
2380 | read_lock_bh(&tbl->lock); | 2502 | rcu_read_lock_bh(); |
2503 | state->nht = rcu_dereference_bh(tbl->nht); | ||
2381 | 2504 | ||
2382 | return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; | 2505 | return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; |
2383 | } | 2506 | } |
@@ -2411,12 +2534,9 @@ out: | |||
2411 | EXPORT_SYMBOL(neigh_seq_next); | 2534 | EXPORT_SYMBOL(neigh_seq_next); |
2412 | 2535 | ||
2413 | void neigh_seq_stop(struct seq_file *seq, void *v) | 2536 | void neigh_seq_stop(struct seq_file *seq, void *v) |
2414 | __releases(tbl->lock) | 2537 | __releases(rcu_bh) |
2415 | { | 2538 | { |
2416 | struct neigh_seq_state *state = seq->private; | 2539 | rcu_read_unlock_bh(); |
2417 | struct neigh_table *tbl = state->tbl; | ||
2418 | |||
2419 | read_unlock_bh(&tbl->lock); | ||
2420 | } | 2540 | } |
2421 | EXPORT_SYMBOL(neigh_seq_stop); | 2541 | EXPORT_SYMBOL(neigh_seq_stop); |
2422 | 2542 | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index af4dfbadf2a0..33d2a1fba131 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -28,6 +28,7 @@ | |||
28 | static const char fmt_hex[] = "%#x\n"; | 28 | static const char fmt_hex[] = "%#x\n"; |
29 | static const char fmt_long_hex[] = "%#lx\n"; | 29 | static const char fmt_long_hex[] = "%#lx\n"; |
30 | static const char fmt_dec[] = "%d\n"; | 30 | static const char fmt_dec[] = "%d\n"; |
31 | static const char fmt_udec[] = "%u\n"; | ||
31 | static const char fmt_ulong[] = "%lu\n"; | 32 | static const char fmt_ulong[] = "%lu\n"; |
32 | static const char fmt_u64[] = "%llu\n"; | 33 | static const char fmt_u64[] = "%llu\n"; |
33 | 34 | ||
@@ -99,7 +100,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); | |||
99 | NETDEVICE_SHOW(addr_len, fmt_dec); | 100 | NETDEVICE_SHOW(addr_len, fmt_dec); |
100 | NETDEVICE_SHOW(iflink, fmt_dec); | 101 | NETDEVICE_SHOW(iflink, fmt_dec); |
101 | NETDEVICE_SHOW(ifindex, fmt_dec); | 102 | NETDEVICE_SHOW(ifindex, fmt_dec); |
102 | NETDEVICE_SHOW(features, fmt_long_hex); | 103 | NETDEVICE_SHOW(features, fmt_hex); |
103 | NETDEVICE_SHOW(type, fmt_dec); | 104 | NETDEVICE_SHOW(type, fmt_dec); |
104 | NETDEVICE_SHOW(link_mode, fmt_dec); | 105 | NETDEVICE_SHOW(link_mode, fmt_dec); |
105 | 106 | ||
@@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev, | |||
145 | if (!rtnl_trylock()) | 146 | if (!rtnl_trylock()) |
146 | return restart_syscall(); | 147 | return restart_syscall(); |
147 | 148 | ||
148 | if (netif_running(netdev) && | 149 | if (netif_running(netdev)) { |
149 | netdev->ethtool_ops && | 150 | struct ethtool_cmd cmd; |
150 | netdev->ethtool_ops->get_settings) { | 151 | if (!dev_ethtool_get_settings(netdev, &cmd)) |
151 | struct ethtool_cmd cmd = { ETHTOOL_GSET }; | 152 | ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); |
152 | |||
153 | if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) | ||
154 | ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); | ||
155 | } | 153 | } |
156 | rtnl_unlock(); | 154 | rtnl_unlock(); |
157 | return ret; | 155 | return ret; |
@@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev, | |||
166 | if (!rtnl_trylock()) | 164 | if (!rtnl_trylock()) |
167 | return restart_syscall(); | 165 | return restart_syscall(); |
168 | 166 | ||
169 | if (netif_running(netdev) && | 167 | if (netif_running(netdev)) { |
170 | netdev->ethtool_ops && | 168 | struct ethtool_cmd cmd; |
171 | netdev->ethtool_ops->get_settings) { | 169 | if (!dev_ethtool_get_settings(netdev, &cmd)) |
172 | struct ethtool_cmd cmd = { ETHTOOL_GSET }; | 170 | ret = sprintf(buf, "%s\n", |
173 | 171 | cmd.duplex ? "full" : "half"); | |
174 | if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) | ||
175 | ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); | ||
176 | } | 172 | } |
177 | rtnl_unlock(); | 173 | rtnl_unlock(); |
178 | return ret; | 174 | return ret; |
@@ -295,6 +291,20 @@ static ssize_t show_ifalias(struct device *dev, | |||
295 | return ret; | 291 | return ret; |
296 | } | 292 | } |
297 | 293 | ||
294 | NETDEVICE_SHOW(group, fmt_dec); | ||
295 | |||
296 | static int change_group(struct net_device *net, unsigned long new_group) | ||
297 | { | ||
298 | dev_set_group(net, (int) new_group); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static ssize_t store_group(struct device *dev, struct device_attribute *attr, | ||
303 | const char *buf, size_t len) | ||
304 | { | ||
305 | return netdev_store(dev, attr, buf, len, change_group); | ||
306 | } | ||
307 | |||
298 | static struct device_attribute net_class_attributes[] = { | 308 | static struct device_attribute net_class_attributes[] = { |
299 | __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), | 309 | __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), |
300 | __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), | 310 | __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), |
@@ -316,6 +326,7 @@ static struct device_attribute net_class_attributes[] = { | |||
316 | __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), | 326 | __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), |
317 | __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, | 327 | __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, |
318 | store_tx_queue_len), | 328 | store_tx_queue_len), |
329 | __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), | ||
319 | {} | 330 | {} |
320 | }; | 331 | }; |
321 | 332 | ||
@@ -515,7 +526,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, | |||
515 | return attribute->store(queue, attribute, buf, count); | 526 | return attribute->store(queue, attribute, buf, count); |
516 | } | 527 | } |
517 | 528 | ||
518 | static struct sysfs_ops rx_queue_sysfs_ops = { | 529 | static const struct sysfs_ops rx_queue_sysfs_ops = { |
519 | .show = rx_queue_attr_show, | 530 | .show = rx_queue_attr_show, |
520 | .store = rx_queue_attr_store, | 531 | .store = rx_queue_attr_store, |
521 | }; | 532 | }; |
@@ -550,13 +561,6 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, | |||
550 | return len; | 561 | return len; |
551 | } | 562 | } |
552 | 563 | ||
553 | static void rps_map_release(struct rcu_head *rcu) | ||
554 | { | ||
555 | struct rps_map *map = container_of(rcu, struct rps_map, rcu); | ||
556 | |||
557 | kfree(map); | ||
558 | } | ||
559 | |||
560 | static ssize_t store_rps_map(struct netdev_rx_queue *queue, | 564 | static ssize_t store_rps_map(struct netdev_rx_queue *queue, |
561 | struct rx_queue_attribute *attribute, | 565 | struct rx_queue_attribute *attribute, |
562 | const char *buf, size_t len) | 566 | const char *buf, size_t len) |
@@ -598,12 +602,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, | |||
598 | } | 602 | } |
599 | 603 | ||
600 | spin_lock(&rps_map_lock); | 604 | spin_lock(&rps_map_lock); |
601 | old_map = queue->rps_map; | 605 | old_map = rcu_dereference_protected(queue->rps_map, |
606 | lockdep_is_held(&rps_map_lock)); | ||
602 | rcu_assign_pointer(queue->rps_map, map); | 607 | rcu_assign_pointer(queue->rps_map, map); |
603 | spin_unlock(&rps_map_lock); | 608 | spin_unlock(&rps_map_lock); |
604 | 609 | ||
605 | if (old_map) | 610 | if (old_map) |
606 | call_rcu(&old_map->rcu, rps_map_release); | 611 | kfree_rcu(old_map, rcu); |
607 | 612 | ||
608 | free_cpumask_var(mask); | 613 | free_cpumask_var(mask); |
609 | return len; | 614 | return len; |
@@ -677,7 +682,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | |||
677 | table = NULL; | 682 | table = NULL; |
678 | 683 | ||
679 | spin_lock(&rps_dev_flow_lock); | 684 | spin_lock(&rps_dev_flow_lock); |
680 | old_table = queue->rps_flow_table; | 685 | old_table = rcu_dereference_protected(queue->rps_flow_table, |
686 | lockdep_is_held(&rps_dev_flow_lock)); | ||
681 | rcu_assign_pointer(queue->rps_flow_table, table); | 687 | rcu_assign_pointer(queue->rps_flow_table, table); |
682 | spin_unlock(&rps_dev_flow_lock); | 688 | spin_unlock(&rps_dev_flow_lock); |
683 | 689 | ||
@@ -704,17 +710,24 @@ static struct attribute *rx_queue_default_attrs[] = { | |||
704 | static void rx_queue_release(struct kobject *kobj) | 710 | static void rx_queue_release(struct kobject *kobj) |
705 | { | 711 | { |
706 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | 712 | struct netdev_rx_queue *queue = to_rx_queue(kobj); |
707 | struct netdev_rx_queue *first = queue->first; | 713 | struct rps_map *map; |
714 | struct rps_dev_flow_table *flow_table; | ||
708 | 715 | ||
709 | if (queue->rps_map) | ||
710 | call_rcu(&queue->rps_map->rcu, rps_map_release); | ||
711 | 716 | ||
712 | if (queue->rps_flow_table) | 717 | map = rcu_dereference_raw(queue->rps_map); |
713 | call_rcu(&queue->rps_flow_table->rcu, | 718 | if (map) { |
714 | rps_dev_flow_table_release); | 719 | RCU_INIT_POINTER(queue->rps_map, NULL); |
720 | kfree_rcu(map, rcu); | ||
721 | } | ||
715 | 722 | ||
716 | if (atomic_dec_and_test(&first->count)) | 723 | flow_table = rcu_dereference_raw(queue->rps_flow_table); |
717 | kfree(first); | 724 | if (flow_table) { |
725 | RCU_INIT_POINTER(queue->rps_flow_table, NULL); | ||
726 | call_rcu(&flow_table->rcu, rps_dev_flow_table_release); | ||
727 | } | ||
728 | |||
729 | memset(kobj, 0, sizeof(*kobj)); | ||
730 | dev_put(queue->dev); | ||
718 | } | 731 | } |
719 | 732 | ||
720 | static struct kobj_type rx_queue_ktype = { | 733 | static struct kobj_type rx_queue_ktype = { |
@@ -738,45 +751,442 @@ static int rx_queue_add_kobject(struct net_device *net, int index) | |||
738 | } | 751 | } |
739 | 752 | ||
740 | kobject_uevent(kobj, KOBJ_ADD); | 753 | kobject_uevent(kobj, KOBJ_ADD); |
754 | dev_hold(queue->dev); | ||
741 | 755 | ||
742 | return error; | 756 | return error; |
743 | } | 757 | } |
758 | #endif /* CONFIG_RPS */ | ||
744 | 759 | ||
745 | static int rx_queue_register_kobjects(struct net_device *net) | 760 | int |
761 | net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | ||
746 | { | 762 | { |
763 | #ifdef CONFIG_RPS | ||
747 | int i; | 764 | int i; |
748 | int error = 0; | 765 | int error = 0; |
749 | 766 | ||
767 | for (i = old_num; i < new_num; i++) { | ||
768 | error = rx_queue_add_kobject(net, i); | ||
769 | if (error) { | ||
770 | new_num = old_num; | ||
771 | break; | ||
772 | } | ||
773 | } | ||
774 | |||
775 | while (--i >= new_num) | ||
776 | kobject_put(&net->_rx[i].kobj); | ||
777 | |||
778 | return error; | ||
779 | #else | ||
780 | return 0; | ||
781 | #endif | ||
782 | } | ||
783 | |||
784 | #ifdef CONFIG_XPS | ||
785 | /* | ||
786 | * netdev_queue sysfs structures and functions. | ||
787 | */ | ||
788 | struct netdev_queue_attribute { | ||
789 | struct attribute attr; | ||
790 | ssize_t (*show)(struct netdev_queue *queue, | ||
791 | struct netdev_queue_attribute *attr, char *buf); | ||
792 | ssize_t (*store)(struct netdev_queue *queue, | ||
793 | struct netdev_queue_attribute *attr, const char *buf, size_t len); | ||
794 | }; | ||
795 | #define to_netdev_queue_attr(_attr) container_of(_attr, \ | ||
796 | struct netdev_queue_attribute, attr) | ||
797 | |||
798 | #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) | ||
799 | |||
800 | static ssize_t netdev_queue_attr_show(struct kobject *kobj, | ||
801 | struct attribute *attr, char *buf) | ||
802 | { | ||
803 | struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); | ||
804 | struct netdev_queue *queue = to_netdev_queue(kobj); | ||
805 | |||
806 | if (!attribute->show) | ||
807 | return -EIO; | ||
808 | |||
809 | return attribute->show(queue, attribute, buf); | ||
810 | } | ||
811 | |||
812 | static ssize_t netdev_queue_attr_store(struct kobject *kobj, | ||
813 | struct attribute *attr, | ||
814 | const char *buf, size_t count) | ||
815 | { | ||
816 | struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); | ||
817 | struct netdev_queue *queue = to_netdev_queue(kobj); | ||
818 | |||
819 | if (!attribute->store) | ||
820 | return -EIO; | ||
821 | |||
822 | return attribute->store(queue, attribute, buf, count); | ||
823 | } | ||
824 | |||
825 | static const struct sysfs_ops netdev_queue_sysfs_ops = { | ||
826 | .show = netdev_queue_attr_show, | ||
827 | .store = netdev_queue_attr_store, | ||
828 | }; | ||
829 | |||
830 | static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) | ||
831 | { | ||
832 | struct net_device *dev = queue->dev; | ||
833 | int i; | ||
834 | |||
835 | for (i = 0; i < dev->num_tx_queues; i++) | ||
836 | if (queue == &dev->_tx[i]) | ||
837 | break; | ||
838 | |||
839 | BUG_ON(i >= dev->num_tx_queues); | ||
840 | |||
841 | return i; | ||
842 | } | ||
843 | |||
844 | |||
845 | static ssize_t show_xps_map(struct netdev_queue *queue, | ||
846 | struct netdev_queue_attribute *attribute, char *buf) | ||
847 | { | ||
848 | struct net_device *dev = queue->dev; | ||
849 | struct xps_dev_maps *dev_maps; | ||
850 | cpumask_var_t mask; | ||
851 | unsigned long index; | ||
852 | size_t len = 0; | ||
853 | int i; | ||
854 | |||
855 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) | ||
856 | return -ENOMEM; | ||
857 | |||
858 | index = get_netdev_queue_index(queue); | ||
859 | |||
860 | rcu_read_lock(); | ||
861 | dev_maps = rcu_dereference(dev->xps_maps); | ||
862 | if (dev_maps) { | ||
863 | for_each_possible_cpu(i) { | ||
864 | struct xps_map *map = | ||
865 | rcu_dereference(dev_maps->cpu_map[i]); | ||
866 | if (map) { | ||
867 | int j; | ||
868 | for (j = 0; j < map->len; j++) { | ||
869 | if (map->queues[j] == index) { | ||
870 | cpumask_set_cpu(i, mask); | ||
871 | break; | ||
872 | } | ||
873 | } | ||
874 | } | ||
875 | } | ||
876 | } | ||
877 | rcu_read_unlock(); | ||
878 | |||
879 | len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); | ||
880 | if (PAGE_SIZE - len < 3) { | ||
881 | free_cpumask_var(mask); | ||
882 | return -EINVAL; | ||
883 | } | ||
884 | |||
885 | free_cpumask_var(mask); | ||
886 | len += sprintf(buf + len, "\n"); | ||
887 | return len; | ||
888 | } | ||
889 | |||
890 | static DEFINE_MUTEX(xps_map_mutex); | ||
891 | #define xmap_dereference(P) \ | ||
892 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) | ||
893 | |||
894 | static ssize_t store_xps_map(struct netdev_queue *queue, | ||
895 | struct netdev_queue_attribute *attribute, | ||
896 | const char *buf, size_t len) | ||
897 | { | ||
898 | struct net_device *dev = queue->dev; | ||
899 | cpumask_var_t mask; | ||
900 | int err, i, cpu, pos, map_len, alloc_len, need_set; | ||
901 | unsigned long index; | ||
902 | struct xps_map *map, *new_map; | ||
903 | struct xps_dev_maps *dev_maps, *new_dev_maps; | ||
904 | int nonempty = 0; | ||
905 | int numa_node = -2; | ||
906 | |||
907 | if (!capable(CAP_NET_ADMIN)) | ||
908 | return -EPERM; | ||
909 | |||
910 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
911 | return -ENOMEM; | ||
912 | |||
913 | index = get_netdev_queue_index(queue); | ||
914 | |||
915 | err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); | ||
916 | if (err) { | ||
917 | free_cpumask_var(mask); | ||
918 | return err; | ||
919 | } | ||
920 | |||
921 | new_dev_maps = kzalloc(max_t(unsigned, | ||
922 | XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); | ||
923 | if (!new_dev_maps) { | ||
924 | free_cpumask_var(mask); | ||
925 | return -ENOMEM; | ||
926 | } | ||
927 | |||
928 | mutex_lock(&xps_map_mutex); | ||
929 | |||
930 | dev_maps = xmap_dereference(dev->xps_maps); | ||
931 | |||
932 | for_each_possible_cpu(cpu) { | ||
933 | map = dev_maps ? | ||
934 | xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; | ||
935 | new_map = map; | ||
936 | if (map) { | ||
937 | for (pos = 0; pos < map->len; pos++) | ||
938 | if (map->queues[pos] == index) | ||
939 | break; | ||
940 | map_len = map->len; | ||
941 | alloc_len = map->alloc_len; | ||
942 | } else | ||
943 | pos = map_len = alloc_len = 0; | ||
944 | |||
945 | need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); | ||
946 | #ifdef CONFIG_NUMA | ||
947 | if (need_set) { | ||
948 | if (numa_node == -2) | ||
949 | numa_node = cpu_to_node(cpu); | ||
950 | else if (numa_node != cpu_to_node(cpu)) | ||
951 | numa_node = -1; | ||
952 | } | ||
953 | #endif | ||
954 | if (need_set && pos >= map_len) { | ||
955 | /* Need to add queue to this CPU's map */ | ||
956 | if (map_len >= alloc_len) { | ||
957 | alloc_len = alloc_len ? | ||
958 | 2 * alloc_len : XPS_MIN_MAP_ALLOC; | ||
959 | new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), | ||
960 | GFP_KERNEL, | ||
961 | cpu_to_node(cpu)); | ||
962 | if (!new_map) | ||
963 | goto error; | ||
964 | new_map->alloc_len = alloc_len; | ||
965 | for (i = 0; i < map_len; i++) | ||
966 | new_map->queues[i] = map->queues[i]; | ||
967 | new_map->len = map_len; | ||
968 | } | ||
969 | new_map->queues[new_map->len++] = index; | ||
970 | } else if (!need_set && pos < map_len) { | ||
971 | /* Need to remove queue from this CPU's map */ | ||
972 | if (map_len > 1) | ||
973 | new_map->queues[pos] = | ||
974 | new_map->queues[--new_map->len]; | ||
975 | else | ||
976 | new_map = NULL; | ||
977 | } | ||
978 | RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); | ||
979 | } | ||
980 | |||
981 | /* Cleanup old maps */ | ||
982 | for_each_possible_cpu(cpu) { | ||
983 | map = dev_maps ? | ||
984 | xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; | ||
985 | if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) | ||
986 | kfree_rcu(map, rcu); | ||
987 | if (new_dev_maps->cpu_map[cpu]) | ||
988 | nonempty = 1; | ||
989 | } | ||
990 | |||
991 | if (nonempty) | ||
992 | rcu_assign_pointer(dev->xps_maps, new_dev_maps); | ||
993 | else { | ||
994 | kfree(new_dev_maps); | ||
995 | rcu_assign_pointer(dev->xps_maps, NULL); | ||
996 | } | ||
997 | |||
998 | if (dev_maps) | ||
999 | kfree_rcu(dev_maps, rcu); | ||
1000 | |||
1001 | netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : | ||
1002 | NUMA_NO_NODE); | ||
1003 | |||
1004 | mutex_unlock(&xps_map_mutex); | ||
1005 | |||
1006 | free_cpumask_var(mask); | ||
1007 | return len; | ||
1008 | |||
1009 | error: | ||
1010 | mutex_unlock(&xps_map_mutex); | ||
1011 | |||
1012 | if (new_dev_maps) | ||
1013 | for_each_possible_cpu(i) | ||
1014 | kfree(rcu_dereference_protected( | ||
1015 | new_dev_maps->cpu_map[i], | ||
1016 | 1)); | ||
1017 | kfree(new_dev_maps); | ||
1018 | free_cpumask_var(mask); | ||
1019 | return -ENOMEM; | ||
1020 | } | ||
1021 | |||
1022 | static struct netdev_queue_attribute xps_cpus_attribute = | ||
1023 | __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); | ||
1024 | |||
1025 | static struct attribute *netdev_queue_default_attrs[] = { | ||
1026 | &xps_cpus_attribute.attr, | ||
1027 | NULL | ||
1028 | }; | ||
1029 | |||
1030 | static void netdev_queue_release(struct kobject *kobj) | ||
1031 | { | ||
1032 | struct netdev_queue *queue = to_netdev_queue(kobj); | ||
1033 | struct net_device *dev = queue->dev; | ||
1034 | struct xps_dev_maps *dev_maps; | ||
1035 | struct xps_map *map; | ||
1036 | unsigned long index; | ||
1037 | int i, pos, nonempty = 0; | ||
1038 | |||
1039 | index = get_netdev_queue_index(queue); | ||
1040 | |||
1041 | mutex_lock(&xps_map_mutex); | ||
1042 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1043 | |||
1044 | if (dev_maps) { | ||
1045 | for_each_possible_cpu(i) { | ||
1046 | map = xmap_dereference(dev_maps->cpu_map[i]); | ||
1047 | if (!map) | ||
1048 | continue; | ||
1049 | |||
1050 | for (pos = 0; pos < map->len; pos++) | ||
1051 | if (map->queues[pos] == index) | ||
1052 | break; | ||
1053 | |||
1054 | if (pos < map->len) { | ||
1055 | if (map->len > 1) | ||
1056 | map->queues[pos] = | ||
1057 | map->queues[--map->len]; | ||
1058 | else { | ||
1059 | RCU_INIT_POINTER(dev_maps->cpu_map[i], | ||
1060 | NULL); | ||
1061 | kfree_rcu(map, rcu); | ||
1062 | map = NULL; | ||
1063 | } | ||
1064 | } | ||
1065 | if (map) | ||
1066 | nonempty = 1; | ||
1067 | } | ||
1068 | |||
1069 | if (!nonempty) { | ||
1070 | RCU_INIT_POINTER(dev->xps_maps, NULL); | ||
1071 | kfree_rcu(dev_maps, rcu); | ||
1072 | } | ||
1073 | } | ||
1074 | |||
1075 | mutex_unlock(&xps_map_mutex); | ||
1076 | |||
1077 | memset(kobj, 0, sizeof(*kobj)); | ||
1078 | dev_put(queue->dev); | ||
1079 | } | ||
1080 | |||
1081 | static struct kobj_type netdev_queue_ktype = { | ||
1082 | .sysfs_ops = &netdev_queue_sysfs_ops, | ||
1083 | .release = netdev_queue_release, | ||
1084 | .default_attrs = netdev_queue_default_attrs, | ||
1085 | }; | ||
1086 | |||
1087 | static int netdev_queue_add_kobject(struct net_device *net, int index) | ||
1088 | { | ||
1089 | struct netdev_queue *queue = net->_tx + index; | ||
1090 | struct kobject *kobj = &queue->kobj; | ||
1091 | int error = 0; | ||
1092 | |||
1093 | kobj->kset = net->queues_kset; | ||
1094 | error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, | ||
1095 | "tx-%u", index); | ||
1096 | if (error) { | ||
1097 | kobject_put(kobj); | ||
1098 | return error; | ||
1099 | } | ||
1100 | |||
1101 | kobject_uevent(kobj, KOBJ_ADD); | ||
1102 | dev_hold(queue->dev); | ||
1103 | |||
1104 | return error; | ||
1105 | } | ||
1106 | #endif /* CONFIG_XPS */ | ||
1107 | |||
1108 | int | ||
1109 | netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | ||
1110 | { | ||
1111 | #ifdef CONFIG_XPS | ||
1112 | int i; | ||
1113 | int error = 0; | ||
1114 | |||
1115 | for (i = old_num; i < new_num; i++) { | ||
1116 | error = netdev_queue_add_kobject(net, i); | ||
1117 | if (error) { | ||
1118 | new_num = old_num; | ||
1119 | break; | ||
1120 | } | ||
1121 | } | ||
1122 | |||
1123 | while (--i >= new_num) | ||
1124 | kobject_put(&net->_tx[i].kobj); | ||
1125 | |||
1126 | return error; | ||
1127 | #else | ||
1128 | return 0; | ||
1129 | #endif | ||
1130 | } | ||
1131 | |||
1132 | static int register_queue_kobjects(struct net_device *net) | ||
1133 | { | ||
1134 | int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; | ||
1135 | |||
1136 | #if defined(CONFIG_RPS) || defined(CONFIG_XPS) | ||
750 | net->queues_kset = kset_create_and_add("queues", | 1137 | net->queues_kset = kset_create_and_add("queues", |
751 | NULL, &net->dev.kobj); | 1138 | NULL, &net->dev.kobj); |
752 | if (!net->queues_kset) | 1139 | if (!net->queues_kset) |
753 | return -ENOMEM; | 1140 | return -ENOMEM; |
754 | for (i = 0; i < net->num_rx_queues; i++) { | 1141 | #endif |
755 | error = rx_queue_add_kobject(net, i); | 1142 | |
756 | if (error) | 1143 | #ifdef CONFIG_RPS |
757 | break; | 1144 | real_rx = net->real_num_rx_queues; |
758 | } | 1145 | #endif |
1146 | real_tx = net->real_num_tx_queues; | ||
759 | 1147 | ||
1148 | error = net_rx_queue_update_kobjects(net, 0, real_rx); | ||
760 | if (error) | 1149 | if (error) |
761 | while (--i >= 0) | 1150 | goto error; |
762 | kobject_put(&net->_rx[i].kobj); | 1151 | rxq = real_rx; |
763 | 1152 | ||
1153 | error = netdev_queue_update_kobjects(net, 0, real_tx); | ||
1154 | if (error) | ||
1155 | goto error; | ||
1156 | txq = real_tx; | ||
1157 | |||
1158 | return 0; | ||
1159 | |||
1160 | error: | ||
1161 | netdev_queue_update_kobjects(net, txq, 0); | ||
1162 | net_rx_queue_update_kobjects(net, rxq, 0); | ||
764 | return error; | 1163 | return error; |
765 | } | 1164 | } |
766 | 1165 | ||
767 | static void rx_queue_remove_kobjects(struct net_device *net) | 1166 | static void remove_queue_kobjects(struct net_device *net) |
768 | { | 1167 | { |
769 | int i; | 1168 | int real_rx = 0, real_tx = 0; |
770 | 1169 | ||
771 | for (i = 0; i < net->num_rx_queues; i++) | 1170 | #ifdef CONFIG_RPS |
772 | kobject_put(&net->_rx[i].kobj); | 1171 | real_rx = net->real_num_rx_queues; |
1172 | #endif | ||
1173 | real_tx = net->real_num_tx_queues; | ||
1174 | |||
1175 | net_rx_queue_update_kobjects(net, real_rx, 0); | ||
1176 | netdev_queue_update_kobjects(net, real_tx, 0); | ||
1177 | #if defined(CONFIG_RPS) || defined(CONFIG_XPS) | ||
773 | kset_unregister(net->queues_kset); | 1178 | kset_unregister(net->queues_kset); |
1179 | #endif | ||
774 | } | 1180 | } |
775 | #endif /* CONFIG_RPS */ | ||
776 | 1181 | ||
777 | static const void *net_current_ns(void) | 1182 | static void *net_grab_current_ns(void) |
778 | { | 1183 | { |
779 | return current->nsproxy->net_ns; | 1184 | struct net *ns = current->nsproxy->net_ns; |
1185 | #ifdef CONFIG_NET_NS | ||
1186 | if (ns) | ||
1187 | atomic_inc(&ns->passive); | ||
1188 | #endif | ||
1189 | return ns; | ||
780 | } | 1190 | } |
781 | 1191 | ||
782 | static const void *net_initial_ns(void) | 1192 | static const void *net_initial_ns(void) |
@@ -789,22 +1199,14 @@ static const void *net_netlink_ns(struct sock *sk) | |||
789 | return sock_net(sk); | 1199 | return sock_net(sk); |
790 | } | 1200 | } |
791 | 1201 | ||
792 | static struct kobj_ns_type_operations net_ns_type_operations = { | 1202 | struct kobj_ns_type_operations net_ns_type_operations = { |
793 | .type = KOBJ_NS_TYPE_NET, | 1203 | .type = KOBJ_NS_TYPE_NET, |
794 | .current_ns = net_current_ns, | 1204 | .grab_current_ns = net_grab_current_ns, |
795 | .netlink_ns = net_netlink_ns, | 1205 | .netlink_ns = net_netlink_ns, |
796 | .initial_ns = net_initial_ns, | 1206 | .initial_ns = net_initial_ns, |
1207 | .drop_ns = net_drop_ns, | ||
797 | }; | 1208 | }; |
798 | 1209 | EXPORT_SYMBOL_GPL(net_ns_type_operations); | |
799 | static void net_kobj_ns_exit(struct net *net) | ||
800 | { | ||
801 | kobj_ns_exit(KOBJ_NS_TYPE_NET, net); | ||
802 | } | ||
803 | |||
804 | static struct pernet_operations kobj_net_ops = { | ||
805 | .exit = net_kobj_ns_exit, | ||
806 | }; | ||
807 | |||
808 | 1210 | ||
809 | #ifdef CONFIG_HOTPLUG | 1211 | #ifdef CONFIG_HOTPLUG |
810 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) | 1212 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) |
@@ -870,9 +1272,7 @@ void netdev_unregister_kobject(struct net_device * net) | |||
870 | 1272 | ||
871 | kobject_get(&dev->kobj); | 1273 | kobject_get(&dev->kobj); |
872 | 1274 | ||
873 | #ifdef CONFIG_RPS | 1275 | remove_queue_kobjects(net); |
874 | rx_queue_remove_kobjects(net); | ||
875 | #endif | ||
876 | 1276 | ||
877 | device_del(dev); | 1277 | device_del(dev); |
878 | } | 1278 | } |
@@ -911,13 +1311,11 @@ int netdev_register_kobject(struct net_device *net) | |||
911 | if (error) | 1311 | if (error) |
912 | return error; | 1312 | return error; |
913 | 1313 | ||
914 | #ifdef CONFIG_RPS | 1314 | error = register_queue_kobjects(net); |
915 | error = rx_queue_register_kobjects(net); | ||
916 | if (error) { | 1315 | if (error) { |
917 | device_del(dev); | 1316 | device_del(dev); |
918 | return error; | 1317 | return error; |
919 | } | 1318 | } |
920 | #endif | ||
921 | 1319 | ||
922 | return error; | 1320 | return error; |
923 | } | 1321 | } |
@@ -937,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); | |||
937 | int netdev_kobject_init(void) | 1335 | int netdev_kobject_init(void) |
938 | { | 1336 | { |
939 | kobj_ns_type_register(&net_ns_type_operations); | 1337 | kobj_ns_type_register(&net_ns_type_operations); |
940 | register_pernet_subsys(&kobj_net_ops); | ||
941 | return class_register(&net_class); | 1338 | return class_register(&net_class); |
942 | } | 1339 | } |
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 805555e8b187..bd7751ec1c4d 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h | |||
@@ -4,4 +4,8 @@ | |||
4 | int netdev_kobject_init(void); | 4 | int netdev_kobject_init(void); |
5 | int netdev_register_kobject(struct net_device *); | 5 | int netdev_register_kobject(struct net_device *); |
6 | void netdev_unregister_kobject(struct net_device *); | 6 | void netdev_unregister_kobject(struct net_device *); |
7 | int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); | ||
8 | int netdev_queue_update_kobjects(struct net_device *net, | ||
9 | int old_num, int new_num); | ||
10 | |||
7 | #endif | 11 | #endif |
diff --git a/net/core/net-traces.c b/net/core/net-traces.c index afa6380ed88a..7f1bb2aba03b 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #define CREATE_TRACE_POINTS | 27 | #define CREATE_TRACE_POINTS |
28 | #include <trace/events/skb.h> | 28 | #include <trace/events/skb.h> |
29 | #include <trace/events/net.h> | ||
29 | #include <trace/events/napi.h> | 30 | #include <trace/events/napi.h> |
30 | 31 | ||
31 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); | 32 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index c988e685433a..ea489db1bc23 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -8,6 +8,8 @@ | |||
8 | #include <linux/idr.h> | 8 | #include <linux/idr.h> |
9 | #include <linux/rculist.h> | 9 | #include <linux/rculist.h> |
10 | #include <linux/nsproxy.h> | 10 | #include <linux/nsproxy.h> |
11 | #include <linux/proc_fs.h> | ||
12 | #include <linux/file.h> | ||
11 | #include <net/net_namespace.h> | 13 | #include <net/net_namespace.h> |
12 | #include <net/netns/generic.h> | 14 | #include <net/netns/generic.h> |
13 | 15 | ||
@@ -27,14 +29,6 @@ EXPORT_SYMBOL(init_net); | |||
27 | 29 | ||
28 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ | 30 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
29 | 31 | ||
30 | static void net_generic_release(struct rcu_head *rcu) | ||
31 | { | ||
32 | struct net_generic *ng; | ||
33 | |||
34 | ng = container_of(rcu, struct net_generic, rcu); | ||
35 | kfree(ng); | ||
36 | } | ||
37 | |||
38 | static int net_assign_generic(struct net *net, int id, void *data) | 32 | static int net_assign_generic(struct net *net, int id, void *data) |
39 | { | 33 | { |
40 | struct net_generic *ng, *old_ng; | 34 | struct net_generic *ng, *old_ng; |
@@ -42,7 +36,9 @@ static int net_assign_generic(struct net *net, int id, void *data) | |||
42 | BUG_ON(!mutex_is_locked(&net_mutex)); | 36 | BUG_ON(!mutex_is_locked(&net_mutex)); |
43 | BUG_ON(id == 0); | 37 | BUG_ON(id == 0); |
44 | 38 | ||
45 | ng = old_ng = net->gen; | 39 | old_ng = rcu_dereference_protected(net->gen, |
40 | lockdep_is_held(&net_mutex)); | ||
41 | ng = old_ng; | ||
46 | if (old_ng->len >= id) | 42 | if (old_ng->len >= id) |
47 | goto assign; | 43 | goto assign; |
48 | 44 | ||
@@ -66,7 +62,7 @@ static int net_assign_generic(struct net *net, int id, void *data) | |||
66 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | 62 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); |
67 | 63 | ||
68 | rcu_assign_pointer(net->gen, ng); | 64 | rcu_assign_pointer(net->gen, ng); |
69 | call_rcu(&old_ng->rcu, net_generic_release); | 65 | kfree_rcu(old_ng, rcu); |
70 | assign: | 66 | assign: |
71 | ng->ptr[id - 1] = data; | 67 | ng->ptr[id - 1] = data; |
72 | return 0; | 68 | return 0; |
@@ -132,6 +128,7 @@ static __net_init int setup_net(struct net *net) | |||
132 | LIST_HEAD(net_exit_list); | 128 | LIST_HEAD(net_exit_list); |
133 | 129 | ||
134 | atomic_set(&net->count, 1); | 130 | atomic_set(&net->count, 1); |
131 | atomic_set(&net->passive, 1); | ||
135 | 132 | ||
136 | #ifdef NETNS_REFCNT_DEBUG | 133 | #ifdef NETNS_REFCNT_DEBUG |
137 | atomic_set(&net->use_count, 0); | 134 | atomic_set(&net->use_count, 0); |
@@ -214,11 +211,21 @@ static void net_free(struct net *net) | |||
214 | kmem_cache_free(net_cachep, net); | 211 | kmem_cache_free(net_cachep, net); |
215 | } | 212 | } |
216 | 213 | ||
217 | static struct net *net_create(void) | 214 | void net_drop_ns(void *p) |
215 | { | ||
216 | struct net *ns = p; | ||
217 | if (ns && atomic_dec_and_test(&ns->passive)) | ||
218 | net_free(ns); | ||
219 | } | ||
220 | |||
221 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | ||
218 | { | 222 | { |
219 | struct net *net; | 223 | struct net *net; |
220 | int rv; | 224 | int rv; |
221 | 225 | ||
226 | if (!(flags & CLONE_NEWNET)) | ||
227 | return get_net(old_net); | ||
228 | |||
222 | net = net_alloc(); | 229 | net = net_alloc(); |
223 | if (!net) | 230 | if (!net) |
224 | return ERR_PTR(-ENOMEM); | 231 | return ERR_PTR(-ENOMEM); |
@@ -231,19 +238,12 @@ static struct net *net_create(void) | |||
231 | } | 238 | } |
232 | mutex_unlock(&net_mutex); | 239 | mutex_unlock(&net_mutex); |
233 | if (rv < 0) { | 240 | if (rv < 0) { |
234 | net_free(net); | 241 | net_drop_ns(net); |
235 | return ERR_PTR(rv); | 242 | return ERR_PTR(rv); |
236 | } | 243 | } |
237 | return net; | 244 | return net; |
238 | } | 245 | } |
239 | 246 | ||
240 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | ||
241 | { | ||
242 | if (!(flags & CLONE_NEWNET)) | ||
243 | return get_net(old_net); | ||
244 | return net_create(); | ||
245 | } | ||
246 | |||
247 | static DEFINE_SPINLOCK(cleanup_list_lock); | 247 | static DEFINE_SPINLOCK(cleanup_list_lock); |
248 | static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ | 248 | static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ |
249 | 249 | ||
@@ -294,7 +294,7 @@ static void cleanup_net(struct work_struct *work) | |||
294 | /* Finally it is safe to free my network namespace structure */ | 294 | /* Finally it is safe to free my network namespace structure */ |
295 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { | 295 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
296 | list_del_init(&net->exit_list); | 296 | list_del_init(&net->exit_list); |
297 | net_free(net); | 297 | net_drop_ns(net); |
298 | } | 298 | } |
299 | } | 299 | } |
300 | static DECLARE_WORK(net_cleanup_work, cleanup_net); | 300 | static DECLARE_WORK(net_cleanup_work, cleanup_net); |
@@ -312,6 +312,26 @@ void __put_net(struct net *net) | |||
312 | } | 312 | } |
313 | EXPORT_SYMBOL_GPL(__put_net); | 313 | EXPORT_SYMBOL_GPL(__put_net); |
314 | 314 | ||
315 | struct net *get_net_ns_by_fd(int fd) | ||
316 | { | ||
317 | struct proc_inode *ei; | ||
318 | struct file *file; | ||
319 | struct net *net; | ||
320 | |||
321 | file = proc_ns_fget(fd); | ||
322 | if (IS_ERR(file)) | ||
323 | return ERR_CAST(file); | ||
324 | |||
325 | ei = PROC_I(file->f_dentry->d_inode); | ||
326 | if (ei->ns_ops == &netns_operations) | ||
327 | net = get_net(ei->ns); | ||
328 | else | ||
329 | net = ERR_PTR(-EINVAL); | ||
330 | |||
331 | fput(file); | ||
332 | return net; | ||
333 | } | ||
334 | |||
315 | #else | 335 | #else |
316 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | 336 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) |
317 | { | 337 | { |
@@ -319,6 +339,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |||
319 | return ERR_PTR(-EINVAL); | 339 | return ERR_PTR(-EINVAL); |
320 | return old_net; | 340 | return old_net; |
321 | } | 341 | } |
342 | |||
343 | struct net *get_net_ns_by_fd(int fd) | ||
344 | { | ||
345 | return ERR_PTR(-EINVAL); | ||
346 | } | ||
322 | #endif | 347 | #endif |
323 | 348 | ||
324 | struct net *get_net_ns_by_pid(pid_t pid) | 349 | struct net *get_net_ns_by_pid(pid_t pid) |
@@ -571,3 +596,39 @@ void unregister_pernet_device(struct pernet_operations *ops) | |||
571 | mutex_unlock(&net_mutex); | 596 | mutex_unlock(&net_mutex); |
572 | } | 597 | } |
573 | EXPORT_SYMBOL_GPL(unregister_pernet_device); | 598 | EXPORT_SYMBOL_GPL(unregister_pernet_device); |
599 | |||
600 | #ifdef CONFIG_NET_NS | ||
601 | static void *netns_get(struct task_struct *task) | ||
602 | { | ||
603 | struct net *net = NULL; | ||
604 | struct nsproxy *nsproxy; | ||
605 | |||
606 | rcu_read_lock(); | ||
607 | nsproxy = task_nsproxy(task); | ||
608 | if (nsproxy) | ||
609 | net = get_net(nsproxy->net_ns); | ||
610 | rcu_read_unlock(); | ||
611 | |||
612 | return net; | ||
613 | } | ||
614 | |||
615 | static void netns_put(void *ns) | ||
616 | { | ||
617 | put_net(ns); | ||
618 | } | ||
619 | |||
620 | static int netns_install(struct nsproxy *nsproxy, void *ns) | ||
621 | { | ||
622 | put_net(nsproxy->net_ns); | ||
623 | nsproxy->net_ns = get_net(ns); | ||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | const struct proc_ns_operations netns_operations = { | ||
628 | .name = "net", | ||
629 | .type = CLONE_NEWNET, | ||
630 | .get = netns_get, | ||
631 | .put = netns_put, | ||
632 | .install = netns_install, | ||
633 | }; | ||
634 | #endif | ||
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 537e01afd81b..18d9cbda3a39 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -35,7 +35,6 @@ | |||
35 | 35 | ||
36 | #define MAX_UDP_CHUNK 1460 | 36 | #define MAX_UDP_CHUNK 1460 |
37 | #define MAX_SKBS 32 | 37 | #define MAX_SKBS 32 |
38 | #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) | ||
39 | 38 | ||
40 | static struct sk_buff_head skb_pool; | 39 | static struct sk_buff_head skb_pool; |
41 | 40 | ||
@@ -76,8 +75,7 @@ static void queue_process(struct work_struct *work) | |||
76 | 75 | ||
77 | local_irq_save(flags); | 76 | local_irq_save(flags); |
78 | __netif_tx_lock(txq, smp_processor_id()); | 77 | __netif_tx_lock(txq, smp_processor_id()); |
79 | if (netif_tx_queue_stopped(txq) || | 78 | if (netif_tx_queue_frozen_or_stopped(txq) || |
80 | netif_tx_queue_frozen(txq) || | ||
81 | ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { | 79 | ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { |
82 | skb_queue_head(&npinfo->txq, skb); | 80 | skb_queue_head(&npinfo->txq, skb); |
83 | __netif_tx_unlock(txq); | 81 | __netif_tx_unlock(txq); |
@@ -195,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev) | |||
195 | 193 | ||
196 | poll_napi(dev); | 194 | poll_napi(dev); |
197 | 195 | ||
196 | if (dev->priv_flags & IFF_SLAVE) { | ||
197 | if (dev->npinfo) { | ||
198 | struct net_device *bond_dev = dev->master; | ||
199 | struct sk_buff *skb; | ||
200 | while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { | ||
201 | skb->dev = bond_dev; | ||
202 | skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | |||
198 | service_arp_queue(dev->npinfo); | 207 | service_arp_queue(dev->npinfo); |
199 | 208 | ||
200 | zap_completion_queue(); | 209 | zap_completion_queue(); |
@@ -288,11 +297,11 @@ static int netpoll_owner_active(struct net_device *dev) | |||
288 | return 0; | 297 | return 0; |
289 | } | 298 | } |
290 | 299 | ||
291 | void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | 300 | void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, |
301 | struct net_device *dev) | ||
292 | { | 302 | { |
293 | int status = NETDEV_TX_BUSY; | 303 | int status = NETDEV_TX_BUSY; |
294 | unsigned long tries; | 304 | unsigned long tries; |
295 | struct net_device *dev = np->dev; | ||
296 | const struct net_device_ops *ops = dev->netdev_ops; | 305 | const struct net_device_ops *ops = dev->netdev_ops; |
297 | /* It is up to the caller to keep npinfo alive. */ | 306 | /* It is up to the caller to keep npinfo alive. */ |
298 | struct netpoll_info *npinfo = np->dev->npinfo; | 307 | struct netpoll_info *npinfo = np->dev->npinfo; |
@@ -315,9 +324,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | |||
315 | tries > 0; --tries) { | 324 | tries > 0; --tries) { |
316 | if (__netif_tx_trylock(txq)) { | 325 | if (__netif_tx_trylock(txq)) { |
317 | if (!netif_tx_queue_stopped(txq)) { | 326 | if (!netif_tx_queue_stopped(txq)) { |
318 | dev->priv_flags |= IFF_IN_NETPOLL; | ||
319 | status = ops->ndo_start_xmit(skb, dev); | 327 | status = ops->ndo_start_xmit(skb, dev); |
320 | dev->priv_flags &= ~IFF_IN_NETPOLL; | ||
321 | if (status == NETDEV_TX_OK) | 328 | if (status == NETDEV_TX_OK) |
322 | txq_trans_update(txq); | 329 | txq_trans_update(txq); |
323 | } | 330 | } |
@@ -346,7 +353,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | |||
346 | schedule_delayed_work(&npinfo->tx_work,0); | 353 | schedule_delayed_work(&npinfo->tx_work,0); |
347 | } | 354 | } |
348 | } | 355 | } |
349 | EXPORT_SYMBOL(netpoll_send_skb); | 356 | EXPORT_SYMBOL(netpoll_send_skb_on_dev); |
350 | 357 | ||
351 | void netpoll_send_udp(struct netpoll *np, const char *msg, int len) | 358 | void netpoll_send_udp(struct netpoll *np, const char *msg, int len) |
352 | { | 359 | { |
@@ -532,7 +539,7 @@ int __netpoll_rx(struct sk_buff *skb) | |||
532 | { | 539 | { |
533 | int proto, len, ulen; | 540 | int proto, len, ulen; |
534 | int hits = 0; | 541 | int hits = 0; |
535 | struct iphdr *iph; | 542 | const struct iphdr *iph; |
536 | struct udphdr *uh; | 543 | struct udphdr *uh; |
537 | struct netpoll_info *npinfo = skb->dev->npinfo; | 544 | struct netpoll_info *npinfo = skb->dev->npinfo; |
538 | struct netpoll *np, *tmp; | 545 | struct netpoll *np, *tmp; |
@@ -691,32 +698,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
691 | 698 | ||
692 | if (*cur != 0) { | 699 | if (*cur != 0) { |
693 | /* MAC address */ | 700 | /* MAC address */ |
694 | if ((delim = strchr(cur, ':')) == NULL) | 701 | if (!mac_pton(cur, np->remote_mac)) |
695 | goto parse_failed; | ||
696 | *delim = 0; | ||
697 | np->remote_mac[0] = simple_strtol(cur, NULL, 16); | ||
698 | cur = delim + 1; | ||
699 | if ((delim = strchr(cur, ':')) == NULL) | ||
700 | goto parse_failed; | ||
701 | *delim = 0; | ||
702 | np->remote_mac[1] = simple_strtol(cur, NULL, 16); | ||
703 | cur = delim + 1; | ||
704 | if ((delim = strchr(cur, ':')) == NULL) | ||
705 | goto parse_failed; | 702 | goto parse_failed; |
706 | *delim = 0; | ||
707 | np->remote_mac[2] = simple_strtol(cur, NULL, 16); | ||
708 | cur = delim + 1; | ||
709 | if ((delim = strchr(cur, ':')) == NULL) | ||
710 | goto parse_failed; | ||
711 | *delim = 0; | ||
712 | np->remote_mac[3] = simple_strtol(cur, NULL, 16); | ||
713 | cur = delim + 1; | ||
714 | if ((delim = strchr(cur, ':')) == NULL) | ||
715 | goto parse_failed; | ||
716 | *delim = 0; | ||
717 | np->remote_mac[4] = simple_strtol(cur, NULL, 16); | ||
718 | cur = delim + 1; | ||
719 | np->remote_mac[5] = simple_strtol(cur, NULL, 16); | ||
720 | } | 703 | } |
721 | 704 | ||
722 | netpoll_print_options(np); | 705 | netpoll_print_options(np); |
@@ -809,6 +792,13 @@ int netpoll_setup(struct netpoll *np) | |||
809 | return -ENODEV; | 792 | return -ENODEV; |
810 | } | 793 | } |
811 | 794 | ||
795 | if (ndev->master) { | ||
796 | printk(KERN_ERR "%s: %s is a slave device, aborting.\n", | ||
797 | np->name, np->dev_name); | ||
798 | err = -EBUSY; | ||
799 | goto put; | ||
800 | } | ||
801 | |||
812 | if (!netif_running(ndev)) { | 802 | if (!netif_running(ndev)) { |
813 | unsigned long atmost, atleast; | 803 | unsigned long atmost, atleast; |
814 | 804 | ||
@@ -925,7 +915,7 @@ void __netpoll_cleanup(struct netpoll *np) | |||
925 | 915 | ||
926 | skb_queue_purge(&npinfo->arp_tx); | 916 | skb_queue_purge(&npinfo->arp_tx); |
927 | skb_queue_purge(&npinfo->txq); | 917 | skb_queue_purge(&npinfo->txq); |
928 | cancel_rearming_delayed_work(&npinfo->tx_work); | 918 | cancel_delayed_work_sync(&npinfo->tx_work); |
929 | 919 | ||
930 | /* clean after last, unfinished work */ | 920 | /* clean after last, unfinished work */ |
931 | __skb_queue_purge(&npinfo->txq); | 921 | __skb_queue_purge(&npinfo->txq); |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 10a1ea72010d..f76079cd750c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -156,6 +156,7 @@ | |||
156 | #include <linux/wait.h> | 156 | #include <linux/wait.h> |
157 | #include <linux/etherdevice.h> | 157 | #include <linux/etherdevice.h> |
158 | #include <linux/kthread.h> | 158 | #include <linux/kthread.h> |
159 | #include <linux/prefetch.h> | ||
159 | #include <net/net_namespace.h> | 160 | #include <net/net_namespace.h> |
160 | #include <net/checksum.h> | 161 | #include <net/checksum.h> |
161 | #include <net/ipv6.h> | 162 | #include <net/ipv6.h> |
@@ -251,6 +252,7 @@ struct pktgen_dev { | |||
251 | int max_pkt_size; /* = ETH_ZLEN; */ | 252 | int max_pkt_size; /* = ETH_ZLEN; */ |
252 | int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ | 253 | int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ |
253 | int nfrags; | 254 | int nfrags; |
255 | struct page *page; | ||
254 | u64 delay; /* nano-seconds */ | 256 | u64 delay; /* nano-seconds */ |
255 | 257 | ||
256 | __u64 count; /* Default No packets to send */ | 258 | __u64 count; /* Default No packets to send */ |
@@ -378,6 +380,7 @@ struct pktgen_dev { | |||
378 | 380 | ||
379 | u16 queue_map_min; | 381 | u16 queue_map_min; |
380 | u16 queue_map_max; | 382 | u16 queue_map_max; |
383 | __u32 skb_priority; /* skb priority field */ | ||
381 | int node; /* Memory node */ | 384 | int node; /* Memory node */ |
382 | 385 | ||
383 | #ifdef CONFIG_XFRM | 386 | #ifdef CONFIG_XFRM |
@@ -394,6 +397,8 @@ struct pktgen_hdr { | |||
394 | __be32 tv_usec; | 397 | __be32 tv_usec; |
395 | }; | 398 | }; |
396 | 399 | ||
400 | static bool pktgen_exiting __read_mostly; | ||
401 | |||
397 | struct pktgen_thread { | 402 | struct pktgen_thread { |
398 | spinlock_t if_lock; /* for list of devices */ | 403 | spinlock_t if_lock; /* for list of devices */ |
399 | struct list_head if_list; /* All device here */ | 404 | struct list_head if_list; /* All device here */ |
@@ -445,7 +450,6 @@ static void pktgen_stop(struct pktgen_thread *t); | |||
445 | static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); | 450 | static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); |
446 | 451 | ||
447 | static unsigned int scan_ip6(const char *s, char ip[16]); | 452 | static unsigned int scan_ip6(const char *s, char ip[16]); |
448 | static unsigned int fmt_ip6(char *s, const char ip[16]); | ||
449 | 453 | ||
450 | /* Module parameters, defaults. */ | 454 | /* Module parameters, defaults. */ |
451 | static int pg_count_d __read_mostly = 1000; | 455 | static int pg_count_d __read_mostly = 1000; |
@@ -547,22 +551,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
547 | pkt_dev->queue_map_min, | 551 | pkt_dev->queue_map_min, |
548 | pkt_dev->queue_map_max); | 552 | pkt_dev->queue_map_max); |
549 | 553 | ||
550 | if (pkt_dev->flags & F_IPV6) { | 554 | if (pkt_dev->skb_priority) |
551 | char b1[128], b2[128], b3[128]; | 555 | seq_printf(seq, " skb_priority: %u\n", |
552 | fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); | 556 | pkt_dev->skb_priority); |
553 | fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); | ||
554 | fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); | ||
555 | seq_printf(seq, | ||
556 | " saddr: %s min_saddr: %s max_saddr: %s\n", b1, | ||
557 | b2, b3); | ||
558 | 557 | ||
559 | fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); | 558 | if (pkt_dev->flags & F_IPV6) { |
560 | fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); | ||
561 | fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); | ||
562 | seq_printf(seq, | 559 | seq_printf(seq, |
563 | " daddr: %s min_daddr: %s max_daddr: %s\n", b1, | 560 | " saddr: %pI6c min_saddr: %pI6c max_saddr: %pI6c\n" |
564 | b2, b3); | 561 | " daddr: %pI6c min_daddr: %pI6c max_daddr: %pI6c\n", |
565 | 562 | &pkt_dev->in6_saddr, | |
563 | &pkt_dev->min_in6_saddr, &pkt_dev->max_in6_saddr, | ||
564 | &pkt_dev->in6_daddr, | ||
565 | &pkt_dev->min_in6_daddr, &pkt_dev->max_in6_daddr); | ||
566 | } else { | 566 | } else { |
567 | seq_printf(seq, | 567 | seq_printf(seq, |
568 | " dst_min: %s dst_max: %s\n", | 568 | " dst_min: %s dst_max: %s\n", |
@@ -698,10 +698,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
698 | pkt_dev->cur_src_mac_offset); | 698 | pkt_dev->cur_src_mac_offset); |
699 | 699 | ||
700 | if (pkt_dev->flags & F_IPV6) { | 700 | if (pkt_dev->flags & F_IPV6) { |
701 | char b1[128], b2[128]; | 701 | seq_printf(seq, " cur_saddr: %pI6c cur_daddr: %pI6c\n", |
702 | fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); | 702 | &pkt_dev->cur_in6_saddr, |
703 | fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); | 703 | &pkt_dev->cur_in6_daddr); |
704 | seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1); | ||
705 | } else | 704 | } else |
706 | seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", | 705 | seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", |
707 | pkt_dev->cur_saddr, pkt_dev->cur_daddr); | 706 | pkt_dev->cur_saddr, pkt_dev->cur_daddr); |
@@ -729,16 +728,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, | |||
729 | *num = 0; | 728 | *num = 0; |
730 | 729 | ||
731 | for (; i < maxlen; i++) { | 730 | for (; i < maxlen; i++) { |
731 | int value; | ||
732 | char c; | 732 | char c; |
733 | *num <<= 4; | 733 | *num <<= 4; |
734 | if (get_user(c, &user_buffer[i])) | 734 | if (get_user(c, &user_buffer[i])) |
735 | return -EFAULT; | 735 | return -EFAULT; |
736 | if ((c >= '0') && (c <= '9')) | 736 | value = hex_to_bin(c); |
737 | *num |= c - '0'; | 737 | if (value >= 0) |
738 | else if ((c >= 'a') && (c <= 'f')) | 738 | *num |= value; |
739 | *num |= c - 'a' + 10; | ||
740 | else if ((c >= 'A') && (c <= 'F')) | ||
741 | *num |= c - 'A' + 10; | ||
742 | else | 739 | else |
743 | break; | 740 | break; |
744 | } | 741 | } |
@@ -773,10 +770,10 @@ done: | |||
773 | static unsigned long num_arg(const char __user * user_buffer, | 770 | static unsigned long num_arg(const char __user * user_buffer, |
774 | unsigned long maxlen, unsigned long *num) | 771 | unsigned long maxlen, unsigned long *num) |
775 | { | 772 | { |
776 | int i = 0; | 773 | int i; |
777 | *num = 0; | 774 | *num = 0; |
778 | 775 | ||
779 | for (; i < maxlen; i++) { | 776 | for (i = 0; i < maxlen; i++) { |
780 | char c; | 777 | char c; |
781 | if (get_user(c, &user_buffer[i])) | 778 | if (get_user(c, &user_buffer[i])) |
782 | return -EFAULT; | 779 | return -EFAULT; |
@@ -791,9 +788,9 @@ static unsigned long num_arg(const char __user * user_buffer, | |||
791 | 788 | ||
792 | static int strn_len(const char __user * user_buffer, unsigned int maxlen) | 789 | static int strn_len(const char __user * user_buffer, unsigned int maxlen) |
793 | { | 790 | { |
794 | int i = 0; | 791 | int i; |
795 | 792 | ||
796 | for (; i < maxlen; i++) { | 793 | for (i = 0; i < maxlen; i++) { |
797 | char c; | 794 | char c; |
798 | if (get_user(c, &user_buffer[i])) | 795 | if (get_user(c, &user_buffer[i])) |
799 | return -EFAULT; | 796 | return -EFAULT; |
@@ -848,7 +845,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
848 | { | 845 | { |
849 | struct seq_file *seq = file->private_data; | 846 | struct seq_file *seq = file->private_data; |
850 | struct pktgen_dev *pkt_dev = seq->private; | 847 | struct pktgen_dev *pkt_dev = seq->private; |
851 | int i = 0, max, len; | 848 | int i, max, len; |
852 | char name[16], valstr[32]; | 849 | char name[16], valstr[32]; |
853 | unsigned long value = 0; | 850 | unsigned long value = 0; |
854 | char *pg_result = NULL; | 851 | char *pg_result = NULL; |
@@ -862,13 +859,13 @@ static ssize_t pktgen_if_write(struct file *file, | |||
862 | return -EINVAL; | 859 | return -EINVAL; |
863 | } | 860 | } |
864 | 861 | ||
865 | max = count - i; | 862 | max = count; |
866 | tmp = count_trail_chars(&user_buffer[i], max); | 863 | tmp = count_trail_chars(user_buffer, max); |
867 | if (tmp < 0) { | 864 | if (tmp < 0) { |
868 | pr_warning("illegal format\n"); | 865 | pr_warning("illegal format\n"); |
869 | return tmp; | 866 | return tmp; |
870 | } | 867 | } |
871 | i += tmp; | 868 | i = tmp; |
872 | 869 | ||
873 | /* Read variable name */ | 870 | /* Read variable name */ |
874 | 871 | ||
@@ -889,10 +886,11 @@ static ssize_t pktgen_if_write(struct file *file, | |||
889 | i += len; | 886 | i += len; |
890 | 887 | ||
891 | if (debug) { | 888 | if (debug) { |
892 | char tb[count + 1]; | 889 | size_t copy = min_t(size_t, count, 1023); |
893 | if (copy_from_user(tb, user_buffer, count)) | 890 | char tb[copy + 1]; |
891 | if (copy_from_user(tb, user_buffer, copy)) | ||
894 | return -EFAULT; | 892 | return -EFAULT; |
895 | tb[count] = 0; | 893 | tb[copy] = 0; |
896 | printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, | 894 | printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, |
897 | (unsigned long)count, tb); | 895 | (unsigned long)count, tb); |
898 | } | 896 | } |
@@ -1128,6 +1126,10 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1128 | if (node_possible(value)) { | 1126 | if (node_possible(value)) { |
1129 | pkt_dev->node = value; | 1127 | pkt_dev->node = value; |
1130 | sprintf(pg_result, "OK: node=%d", pkt_dev->node); | 1128 | sprintf(pg_result, "OK: node=%d", pkt_dev->node); |
1129 | if (pkt_dev->page) { | ||
1130 | put_page(pkt_dev->page); | ||
1131 | pkt_dev->page = NULL; | ||
1132 | } | ||
1131 | } | 1133 | } |
1132 | else | 1134 | else |
1133 | sprintf(pg_result, "ERROR: node not possible"); | 1135 | sprintf(pg_result, "ERROR: node not possible"); |
@@ -1298,7 +1300,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1298 | buf[len] = 0; | 1300 | buf[len] = 0; |
1299 | 1301 | ||
1300 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); | 1302 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); |
1301 | fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr); | 1303 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); |
1302 | 1304 | ||
1303 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); | 1305 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); |
1304 | 1306 | ||
@@ -1321,7 +1323,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1321 | buf[len] = 0; | 1323 | buf[len] = 0; |
1322 | 1324 | ||
1323 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); | 1325 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); |
1324 | fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); | 1326 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); |
1325 | 1327 | ||
1326 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, | 1328 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, |
1327 | &pkt_dev->min_in6_daddr); | 1329 | &pkt_dev->min_in6_daddr); |
@@ -1344,7 +1346,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1344 | buf[len] = 0; | 1346 | buf[len] = 0; |
1345 | 1347 | ||
1346 | scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); | 1348 | scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); |
1347 | fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); | 1349 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr); |
1348 | 1350 | ||
1349 | if (debug) | 1351 | if (debug) |
1350 | printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); | 1352 | printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); |
@@ -1365,7 +1367,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1365 | buf[len] = 0; | 1367 | buf[len] = 0; |
1366 | 1368 | ||
1367 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); | 1369 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); |
1368 | fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr); | 1370 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); |
1369 | 1371 | ||
1370 | ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); | 1372 | ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); |
1371 | 1373 | ||
@@ -1419,11 +1421,6 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1419 | return count; | 1421 | return count; |
1420 | } | 1422 | } |
1421 | if (!strcmp(name, "dst_mac")) { | 1423 | if (!strcmp(name, "dst_mac")) { |
1422 | char *v = valstr; | ||
1423 | unsigned char old_dmac[ETH_ALEN]; | ||
1424 | unsigned char *m = pkt_dev->dst_mac; | ||
1425 | memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN); | ||
1426 | |||
1427 | len = strn_len(&user_buffer[i], sizeof(valstr) - 1); | 1424 | len = strn_len(&user_buffer[i], sizeof(valstr) - 1); |
1428 | if (len < 0) | 1425 | if (len < 0) |
1429 | return len; | 1426 | return len; |
@@ -1431,35 +1428,16 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1431 | memset(valstr, 0, sizeof(valstr)); | 1428 | memset(valstr, 0, sizeof(valstr)); |
1432 | if (copy_from_user(valstr, &user_buffer[i], len)) | 1429 | if (copy_from_user(valstr, &user_buffer[i], len)) |
1433 | return -EFAULT; | 1430 | return -EFAULT; |
1434 | i += len; | ||
1435 | |||
1436 | for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { | ||
1437 | int value; | ||
1438 | |||
1439 | value = hex_to_bin(*v); | ||
1440 | if (value >= 0) | ||
1441 | *m = *m * 16 + value; | ||
1442 | |||
1443 | if (*v == ':') { | ||
1444 | m++; | ||
1445 | *m = 0; | ||
1446 | } | ||
1447 | } | ||
1448 | 1431 | ||
1432 | if (!mac_pton(valstr, pkt_dev->dst_mac)) | ||
1433 | return -EINVAL; | ||
1449 | /* Set up Dest MAC */ | 1434 | /* Set up Dest MAC */ |
1450 | if (compare_ether_addr(old_dmac, pkt_dev->dst_mac)) | 1435 | memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); |
1451 | memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); | ||
1452 | 1436 | ||
1453 | sprintf(pg_result, "OK: dstmac"); | 1437 | sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); |
1454 | return count; | 1438 | return count; |
1455 | } | 1439 | } |
1456 | if (!strcmp(name, "src_mac")) { | 1440 | if (!strcmp(name, "src_mac")) { |
1457 | char *v = valstr; | ||
1458 | unsigned char old_smac[ETH_ALEN]; | ||
1459 | unsigned char *m = pkt_dev->src_mac; | ||
1460 | |||
1461 | memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); | ||
1462 | |||
1463 | len = strn_len(&user_buffer[i], sizeof(valstr) - 1); | 1441 | len = strn_len(&user_buffer[i], sizeof(valstr) - 1); |
1464 | if (len < 0) | 1442 | if (len < 0) |
1465 | return len; | 1443 | return len; |
@@ -1467,26 +1445,13 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1467 | memset(valstr, 0, sizeof(valstr)); | 1445 | memset(valstr, 0, sizeof(valstr)); |
1468 | if (copy_from_user(valstr, &user_buffer[i], len)) | 1446 | if (copy_from_user(valstr, &user_buffer[i], len)) |
1469 | return -EFAULT; | 1447 | return -EFAULT; |
1470 | i += len; | ||
1471 | |||
1472 | for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { | ||
1473 | int value; | ||
1474 | |||
1475 | value = hex_to_bin(*v); | ||
1476 | if (value >= 0) | ||
1477 | *m = *m * 16 + value; | ||
1478 | |||
1479 | if (*v == ':') { | ||
1480 | m++; | ||
1481 | *m = 0; | ||
1482 | } | ||
1483 | } | ||
1484 | 1448 | ||
1449 | if (!mac_pton(valstr, pkt_dev->src_mac)) | ||
1450 | return -EINVAL; | ||
1485 | /* Set up Src MAC */ | 1451 | /* Set up Src MAC */ |
1486 | if (compare_ether_addr(old_smac, pkt_dev->src_mac)) | 1452 | memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); |
1487 | memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); | ||
1488 | 1453 | ||
1489 | sprintf(pg_result, "OK: srcmac"); | 1454 | sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); |
1490 | return count; | 1455 | return count; |
1491 | } | 1456 | } |
1492 | 1457 | ||
@@ -1712,6 +1677,18 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1712 | return count; | 1677 | return count; |
1713 | } | 1678 | } |
1714 | 1679 | ||
1680 | if (!strcmp(name, "skb_priority")) { | ||
1681 | len = num_arg(&user_buffer[i], 9, &value); | ||
1682 | if (len < 0) | ||
1683 | return len; | ||
1684 | |||
1685 | i += len; | ||
1686 | pkt_dev->skb_priority = value; | ||
1687 | sprintf(pg_result, "OK: skb_priority=%i", | ||
1688 | pkt_dev->skb_priority); | ||
1689 | return count; | ||
1690 | } | ||
1691 | |||
1715 | sprintf(pkt_dev->result, "No such parameter \"%s\"", name); | 1692 | sprintf(pkt_dev->result, "No such parameter \"%s\"", name); |
1716 | return -EINVAL; | 1693 | return -EINVAL; |
1717 | } | 1694 | } |
@@ -1766,7 +1743,7 @@ static ssize_t pktgen_thread_write(struct file *file, | |||
1766 | { | 1743 | { |
1767 | struct seq_file *seq = file->private_data; | 1744 | struct seq_file *seq = file->private_data; |
1768 | struct pktgen_thread *t = seq->private; | 1745 | struct pktgen_thread *t = seq->private; |
1769 | int i = 0, max, len, ret; | 1746 | int i, max, len, ret; |
1770 | char name[40]; | 1747 | char name[40]; |
1771 | char *pg_result; | 1748 | char *pg_result; |
1772 | 1749 | ||
@@ -1775,12 +1752,12 @@ static ssize_t pktgen_thread_write(struct file *file, | |||
1775 | return -EINVAL; | 1752 | return -EINVAL; |
1776 | } | 1753 | } |
1777 | 1754 | ||
1778 | max = count - i; | 1755 | max = count; |
1779 | len = count_trail_chars(&user_buffer[i], max); | 1756 | len = count_trail_chars(user_buffer, max); |
1780 | if (len < 0) | 1757 | if (len < 0) |
1781 | return len; | 1758 | return len; |
1782 | 1759 | ||
1783 | i += len; | 1760 | i = len; |
1784 | 1761 | ||
1785 | /* Read variable name */ | 1762 | /* Read variable name */ |
1786 | 1763 | ||
@@ -1977,7 +1954,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev, | |||
1977 | const char *ifname) | 1954 | const char *ifname) |
1978 | { | 1955 | { |
1979 | char b[IFNAMSIZ+5]; | 1956 | char b[IFNAMSIZ+5]; |
1980 | int i = 0; | 1957 | int i; |
1981 | 1958 | ||
1982 | for (i = 0; ifname[i] != '@'; i++) { | 1959 | for (i = 0; ifname[i] != '@'; i++) { |
1983 | if (i == IFNAMSIZ) | 1960 | if (i == IFNAMSIZ) |
@@ -2491,7 +2468,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) | |||
2491 | { | 2468 | { |
2492 | struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; | 2469 | struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; |
2493 | int err = 0; | 2470 | int err = 0; |
2494 | struct iphdr *iph; | ||
2495 | 2471 | ||
2496 | if (!x) | 2472 | if (!x) |
2497 | return 0; | 2473 | return 0; |
@@ -2501,7 +2477,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) | |||
2501 | return 0; | 2477 | return 0; |
2502 | 2478 | ||
2503 | spin_lock(&x->lock); | 2479 | spin_lock(&x->lock); |
2504 | iph = ip_hdr(skb); | ||
2505 | 2480 | ||
2506 | err = x->outer_mode->output(x, skb); | 2481 | err = x->outer_mode->output(x, skb); |
2507 | if (err) | 2482 | if (err) |
@@ -2521,8 +2496,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev) | |||
2521 | { | 2496 | { |
2522 | if (pkt_dev->cflows) { | 2497 | if (pkt_dev->cflows) { |
2523 | /* let go of the SAs if we have them */ | 2498 | /* let go of the SAs if we have them */ |
2524 | int i = 0; | 2499 | int i; |
2525 | for (; i < pkt_dev->cflows; i++) { | 2500 | for (i = 0; i < pkt_dev->cflows; i++) { |
2526 | struct xfrm_state *x = pkt_dev->flows[i].x; | 2501 | struct xfrm_state *x = pkt_dev->flows[i].x; |
2527 | if (x) { | 2502 | if (x) { |
2528 | xfrm_state_put(x); | 2503 | xfrm_state_put(x); |
@@ -2587,6 +2562,72 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, | |||
2587 | return htons(id | (cfi << 12) | (prio << 13)); | 2562 | return htons(id | (cfi << 12) | (prio << 13)); |
2588 | } | 2563 | } |
2589 | 2564 | ||
2565 | static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, | ||
2566 | int datalen) | ||
2567 | { | ||
2568 | struct timeval timestamp; | ||
2569 | struct pktgen_hdr *pgh; | ||
2570 | |||
2571 | pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); | ||
2572 | datalen -= sizeof(*pgh); | ||
2573 | |||
2574 | if (pkt_dev->nfrags <= 0) { | ||
2575 | memset(skb_put(skb, datalen), 0, datalen); | ||
2576 | } else { | ||
2577 | int frags = pkt_dev->nfrags; | ||
2578 | int i, len; | ||
2579 | int frag_len; | ||
2580 | |||
2581 | |||
2582 | if (frags > MAX_SKB_FRAGS) | ||
2583 | frags = MAX_SKB_FRAGS; | ||
2584 | len = datalen - frags * PAGE_SIZE; | ||
2585 | if (len > 0) { | ||
2586 | memset(skb_put(skb, len), 0, len); | ||
2587 | datalen = frags * PAGE_SIZE; | ||
2588 | } | ||
2589 | |||
2590 | i = 0; | ||
2591 | frag_len = (datalen/frags) < PAGE_SIZE ? | ||
2592 | (datalen/frags) : PAGE_SIZE; | ||
2593 | while (datalen > 0) { | ||
2594 | if (unlikely(!pkt_dev->page)) { | ||
2595 | int node = numa_node_id(); | ||
2596 | |||
2597 | if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE)) | ||
2598 | node = pkt_dev->node; | ||
2599 | pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | ||
2600 | if (!pkt_dev->page) | ||
2601 | break; | ||
2602 | } | ||
2603 | skb_shinfo(skb)->frags[i].page = pkt_dev->page; | ||
2604 | get_page(pkt_dev->page); | ||
2605 | skb_shinfo(skb)->frags[i].page_offset = 0; | ||
2606 | /*last fragment, fill rest of data*/ | ||
2607 | if (i == (frags - 1)) | ||
2608 | skb_shinfo(skb)->frags[i].size = | ||
2609 | (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); | ||
2610 | else | ||
2611 | skb_shinfo(skb)->frags[i].size = frag_len; | ||
2612 | datalen -= skb_shinfo(skb)->frags[i].size; | ||
2613 | skb->len += skb_shinfo(skb)->frags[i].size; | ||
2614 | skb->data_len += skb_shinfo(skb)->frags[i].size; | ||
2615 | i++; | ||
2616 | skb_shinfo(skb)->nr_frags = i; | ||
2617 | } | ||
2618 | } | ||
2619 | |||
2620 | /* Stamp the time, and sequence number, | ||
2621 | * convert them to network byte order | ||
2622 | */ | ||
2623 | pgh->pgh_magic = htonl(PKTGEN_MAGIC); | ||
2624 | pgh->seq_num = htonl(pkt_dev->seq_num); | ||
2625 | |||
2626 | do_gettimeofday(×tamp); | ||
2627 | pgh->tv_sec = htonl(timestamp.tv_sec); | ||
2628 | pgh->tv_usec = htonl(timestamp.tv_usec); | ||
2629 | } | ||
2630 | |||
2590 | static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | 2631 | static struct sk_buff *fill_packet_ipv4(struct net_device *odev, |
2591 | struct pktgen_dev *pkt_dev) | 2632 | struct pktgen_dev *pkt_dev) |
2592 | { | 2633 | { |
@@ -2595,7 +2636,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2595 | struct udphdr *udph; | 2636 | struct udphdr *udph; |
2596 | int datalen, iplen; | 2637 | int datalen, iplen; |
2597 | struct iphdr *iph; | 2638 | struct iphdr *iph; |
2598 | struct pktgen_hdr *pgh = NULL; | ||
2599 | __be16 protocol = htons(ETH_P_IP); | 2639 | __be16 protocol = htons(ETH_P_IP); |
2600 | __be32 *mpls; | 2640 | __be32 *mpls; |
2601 | __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ | 2641 | __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ |
@@ -2613,8 +2653,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2613 | /* Update any of the values, used when we're incrementing various | 2653 | /* Update any of the values, used when we're incrementing various |
2614 | * fields. | 2654 | * fields. |
2615 | */ | 2655 | */ |
2616 | queue_map = pkt_dev->cur_queue_map; | ||
2617 | mod_cur_headers(pkt_dev); | 2656 | mod_cur_headers(pkt_dev); |
2657 | queue_map = pkt_dev->cur_queue_map; | ||
2618 | 2658 | ||
2619 | datalen = (odev->hard_header_len + 16) & ~0xf; | 2659 | datalen = (odev->hard_header_len + 16) & ~0xf; |
2620 | 2660 | ||
@@ -2642,6 +2682,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2642 | sprintf(pkt_dev->result, "No memory"); | 2682 | sprintf(pkt_dev->result, "No memory"); |
2643 | return NULL; | 2683 | return NULL; |
2644 | } | 2684 | } |
2685 | prefetchw(skb->data); | ||
2645 | 2686 | ||
2646 | skb_reserve(skb, datalen); | 2687 | skb_reserve(skb, datalen); |
2647 | 2688 | ||
@@ -2672,6 +2713,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2672 | skb->transport_header = skb->network_header + sizeof(struct iphdr); | 2713 | skb->transport_header = skb->network_header + sizeof(struct iphdr); |
2673 | skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); | 2714 | skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); |
2674 | skb_set_queue_mapping(skb, queue_map); | 2715 | skb_set_queue_mapping(skb, queue_map); |
2716 | skb->priority = pkt_dev->skb_priority; | ||
2717 | |||
2675 | iph = ip_hdr(skb); | 2718 | iph = ip_hdr(skb); |
2676 | udph = udp_hdr(skb); | 2719 | udph = udp_hdr(skb); |
2677 | 2720 | ||
@@ -2708,76 +2751,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2708 | pkt_dev->pkt_overhead); | 2751 | pkt_dev->pkt_overhead); |
2709 | skb->dev = odev; | 2752 | skb->dev = odev; |
2710 | skb->pkt_type = PACKET_HOST; | 2753 | skb->pkt_type = PACKET_HOST; |
2711 | 2754 | pktgen_finalize_skb(pkt_dev, skb, datalen); | |
2712 | if (pkt_dev->nfrags <= 0) { | ||
2713 | pgh = (struct pktgen_hdr *)skb_put(skb, datalen); | ||
2714 | memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); | ||
2715 | } else { | ||
2716 | int frags = pkt_dev->nfrags; | ||
2717 | int i, len; | ||
2718 | |||
2719 | pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); | ||
2720 | |||
2721 | if (frags > MAX_SKB_FRAGS) | ||
2722 | frags = MAX_SKB_FRAGS; | ||
2723 | if (datalen > frags * PAGE_SIZE) { | ||
2724 | len = datalen - frags * PAGE_SIZE; | ||
2725 | memset(skb_put(skb, len), 0, len); | ||
2726 | datalen = frags * PAGE_SIZE; | ||
2727 | } | ||
2728 | |||
2729 | i = 0; | ||
2730 | while (datalen > 0) { | ||
2731 | struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); | ||
2732 | skb_shinfo(skb)->frags[i].page = page; | ||
2733 | skb_shinfo(skb)->frags[i].page_offset = 0; | ||
2734 | skb_shinfo(skb)->frags[i].size = | ||
2735 | (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); | ||
2736 | datalen -= skb_shinfo(skb)->frags[i].size; | ||
2737 | skb->len += skb_shinfo(skb)->frags[i].size; | ||
2738 | skb->data_len += skb_shinfo(skb)->frags[i].size; | ||
2739 | i++; | ||
2740 | skb_shinfo(skb)->nr_frags = i; | ||
2741 | } | ||
2742 | |||
2743 | while (i < frags) { | ||
2744 | int rem; | ||
2745 | |||
2746 | if (i == 0) | ||
2747 | break; | ||
2748 | |||
2749 | rem = skb_shinfo(skb)->frags[i - 1].size / 2; | ||
2750 | if (rem == 0) | ||
2751 | break; | ||
2752 | |||
2753 | skb_shinfo(skb)->frags[i - 1].size -= rem; | ||
2754 | |||
2755 | skb_shinfo(skb)->frags[i] = | ||
2756 | skb_shinfo(skb)->frags[i - 1]; | ||
2757 | get_page(skb_shinfo(skb)->frags[i].page); | ||
2758 | skb_shinfo(skb)->frags[i].page = | ||
2759 | skb_shinfo(skb)->frags[i - 1].page; | ||
2760 | skb_shinfo(skb)->frags[i].page_offset += | ||
2761 | skb_shinfo(skb)->frags[i - 1].size; | ||
2762 | skb_shinfo(skb)->frags[i].size = rem; | ||
2763 | i++; | ||
2764 | skb_shinfo(skb)->nr_frags = i; | ||
2765 | } | ||
2766 | } | ||
2767 | |||
2768 | /* Stamp the time, and sequence number, | ||
2769 | * convert them to network byte order | ||
2770 | */ | ||
2771 | if (pgh) { | ||
2772 | struct timeval timestamp; | ||
2773 | |||
2774 | pgh->pgh_magic = htonl(PKTGEN_MAGIC); | ||
2775 | pgh->seq_num = htonl(pkt_dev->seq_num); | ||
2776 | |||
2777 | do_gettimeofday(×tamp); | ||
2778 | pgh->tv_sec = htonl(timestamp.tv_sec); | ||
2779 | pgh->tv_usec = htonl(timestamp.tv_usec); | ||
2780 | } | ||
2781 | 2755 | ||
2782 | #ifdef CONFIG_XFRM | 2756 | #ifdef CONFIG_XFRM |
2783 | if (!process_ipsec(pkt_dev, skb, protocol)) | 2757 | if (!process_ipsec(pkt_dev, skb, protocol)) |
@@ -2878,79 +2852,6 @@ static unsigned int scan_ip6(const char *s, char ip[16]) | |||
2878 | return len; | 2852 | return len; |
2879 | } | 2853 | } |
2880 | 2854 | ||
2881 | static char tohex(char hexdigit) | ||
2882 | { | ||
2883 | return hexdigit > 9 ? hexdigit + 'a' - 10 : hexdigit + '0'; | ||
2884 | } | ||
2885 | |||
2886 | static int fmt_xlong(char *s, unsigned int i) | ||
2887 | { | ||
2888 | char *bak = s; | ||
2889 | *s = tohex((i >> 12) & 0xf); | ||
2890 | if (s != bak || *s != '0') | ||
2891 | ++s; | ||
2892 | *s = tohex((i >> 8) & 0xf); | ||
2893 | if (s != bak || *s != '0') | ||
2894 | ++s; | ||
2895 | *s = tohex((i >> 4) & 0xf); | ||
2896 | if (s != bak || *s != '0') | ||
2897 | ++s; | ||
2898 | *s = tohex(i & 0xf); | ||
2899 | return s - bak + 1; | ||
2900 | } | ||
2901 | |||
2902 | static unsigned int fmt_ip6(char *s, const char ip[16]) | ||
2903 | { | ||
2904 | unsigned int len; | ||
2905 | unsigned int i; | ||
2906 | unsigned int temp; | ||
2907 | unsigned int compressing; | ||
2908 | int j; | ||
2909 | |||
2910 | len = 0; | ||
2911 | compressing = 0; | ||
2912 | for (j = 0; j < 16; j += 2) { | ||
2913 | |||
2914 | #ifdef V4MAPPEDPREFIX | ||
2915 | if (j == 12 && !memcmp(ip, V4mappedprefix, 12)) { | ||
2916 | inet_ntoa_r(*(struct in_addr *)(ip + 12), s); | ||
2917 | temp = strlen(s); | ||
2918 | return len + temp; | ||
2919 | } | ||
2920 | #endif | ||
2921 | temp = ((unsigned long)(unsigned char)ip[j] << 8) + | ||
2922 | (unsigned long)(unsigned char)ip[j + 1]; | ||
2923 | if (temp == 0) { | ||
2924 | if (!compressing) { | ||
2925 | compressing = 1; | ||
2926 | if (j == 0) { | ||
2927 | *s++ = ':'; | ||
2928 | ++len; | ||
2929 | } | ||
2930 | } | ||
2931 | } else { | ||
2932 | if (compressing) { | ||
2933 | compressing = 0; | ||
2934 | *s++ = ':'; | ||
2935 | ++len; | ||
2936 | } | ||
2937 | i = fmt_xlong(s, temp); | ||
2938 | len += i; | ||
2939 | s += i; | ||
2940 | if (j < 14) { | ||
2941 | *s++ = ':'; | ||
2942 | ++len; | ||
2943 | } | ||
2944 | } | ||
2945 | } | ||
2946 | if (compressing) { | ||
2947 | *s++ = ':'; | ||
2948 | ++len; | ||
2949 | } | ||
2950 | *s = 0; | ||
2951 | return len; | ||
2952 | } | ||
2953 | |||
2954 | static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | 2855 | static struct sk_buff *fill_packet_ipv6(struct net_device *odev, |
2955 | struct pktgen_dev *pkt_dev) | 2856 | struct pktgen_dev *pkt_dev) |
2956 | { | 2857 | { |
@@ -2959,7 +2860,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
2959 | struct udphdr *udph; | 2860 | struct udphdr *udph; |
2960 | int datalen; | 2861 | int datalen; |
2961 | struct ipv6hdr *iph; | 2862 | struct ipv6hdr *iph; |
2962 | struct pktgen_hdr *pgh = NULL; | ||
2963 | __be16 protocol = htons(ETH_P_IPV6); | 2863 | __be16 protocol = htons(ETH_P_IPV6); |
2964 | __be32 *mpls; | 2864 | __be32 *mpls; |
2965 | __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ | 2865 | __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ |
@@ -2977,8 +2877,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
2977 | /* Update any of the values, used when we're incrementing various | 2877 | /* Update any of the values, used when we're incrementing various |
2978 | * fields. | 2878 | * fields. |
2979 | */ | 2879 | */ |
2980 | queue_map = pkt_dev->cur_queue_map; | ||
2981 | mod_cur_headers(pkt_dev); | 2880 | mod_cur_headers(pkt_dev); |
2881 | queue_map = pkt_dev->cur_queue_map; | ||
2982 | 2882 | ||
2983 | skb = __netdev_alloc_skb(odev, | 2883 | skb = __netdev_alloc_skb(odev, |
2984 | pkt_dev->cur_pkt_size + 64 | 2884 | pkt_dev->cur_pkt_size + 64 |
@@ -2987,6 +2887,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
2987 | sprintf(pkt_dev->result, "No memory"); | 2887 | sprintf(pkt_dev->result, "No memory"); |
2988 | return NULL; | 2888 | return NULL; |
2989 | } | 2889 | } |
2890 | prefetchw(skb->data); | ||
2990 | 2891 | ||
2991 | skb_reserve(skb, 16); | 2892 | skb_reserve(skb, 16); |
2992 | 2893 | ||
@@ -3017,6 +2918,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
3017 | skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); | 2918 | skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); |
3018 | skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); | 2919 | skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); |
3019 | skb_set_queue_mapping(skb, queue_map); | 2920 | skb_set_queue_mapping(skb, queue_map); |
2921 | skb->priority = pkt_dev->skb_priority; | ||
3020 | iph = ipv6_hdr(skb); | 2922 | iph = ipv6_hdr(skb); |
3021 | udph = udp_hdr(skb); | 2923 | udph = udp_hdr(skb); |
3022 | 2924 | ||
@@ -3060,75 +2962,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
3060 | skb->dev = odev; | 2962 | skb->dev = odev; |
3061 | skb->pkt_type = PACKET_HOST; | 2963 | skb->pkt_type = PACKET_HOST; |
3062 | 2964 | ||
3063 | if (pkt_dev->nfrags <= 0) | 2965 | pktgen_finalize_skb(pkt_dev, skb, datalen); |
3064 | pgh = (struct pktgen_hdr *)skb_put(skb, datalen); | ||
3065 | else { | ||
3066 | int frags = pkt_dev->nfrags; | ||
3067 | int i; | ||
3068 | |||
3069 | pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); | ||
3070 | |||
3071 | if (frags > MAX_SKB_FRAGS) | ||
3072 | frags = MAX_SKB_FRAGS; | ||
3073 | if (datalen > frags * PAGE_SIZE) { | ||
3074 | skb_put(skb, datalen - frags * PAGE_SIZE); | ||
3075 | datalen = frags * PAGE_SIZE; | ||
3076 | } | ||
3077 | |||
3078 | i = 0; | ||
3079 | while (datalen > 0) { | ||
3080 | struct page *page = alloc_pages(GFP_KERNEL, 0); | ||
3081 | skb_shinfo(skb)->frags[i].page = page; | ||
3082 | skb_shinfo(skb)->frags[i].page_offset = 0; | ||
3083 | skb_shinfo(skb)->frags[i].size = | ||
3084 | (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); | ||
3085 | datalen -= skb_shinfo(skb)->frags[i].size; | ||
3086 | skb->len += skb_shinfo(skb)->frags[i].size; | ||
3087 | skb->data_len += skb_shinfo(skb)->frags[i].size; | ||
3088 | i++; | ||
3089 | skb_shinfo(skb)->nr_frags = i; | ||
3090 | } | ||
3091 | |||
3092 | while (i < frags) { | ||
3093 | int rem; | ||
3094 | |||
3095 | if (i == 0) | ||
3096 | break; | ||
3097 | |||
3098 | rem = skb_shinfo(skb)->frags[i - 1].size / 2; | ||
3099 | if (rem == 0) | ||
3100 | break; | ||
3101 | |||
3102 | skb_shinfo(skb)->frags[i - 1].size -= rem; | ||
3103 | |||
3104 | skb_shinfo(skb)->frags[i] = | ||
3105 | skb_shinfo(skb)->frags[i - 1]; | ||
3106 | get_page(skb_shinfo(skb)->frags[i].page); | ||
3107 | skb_shinfo(skb)->frags[i].page = | ||
3108 | skb_shinfo(skb)->frags[i - 1].page; | ||
3109 | skb_shinfo(skb)->frags[i].page_offset += | ||
3110 | skb_shinfo(skb)->frags[i - 1].size; | ||
3111 | skb_shinfo(skb)->frags[i].size = rem; | ||
3112 | i++; | ||
3113 | skb_shinfo(skb)->nr_frags = i; | ||
3114 | } | ||
3115 | } | ||
3116 | |||
3117 | /* Stamp the time, and sequence number, | ||
3118 | * convert them to network byte order | ||
3119 | * should we update cloned packets too ? | ||
3120 | */ | ||
3121 | if (pgh) { | ||
3122 | struct timeval timestamp; | ||
3123 | |||
3124 | pgh->pgh_magic = htonl(PKTGEN_MAGIC); | ||
3125 | pgh->seq_num = htonl(pkt_dev->seq_num); | ||
3126 | |||
3127 | do_gettimeofday(×tamp); | ||
3128 | pgh->tv_sec = htonl(timestamp.tv_sec); | ||
3129 | pgh->tv_usec = htonl(timestamp.tv_usec); | ||
3130 | } | ||
3131 | /* pkt_dev->seq_num++; FF: you really mean this? */ | ||
3132 | 2966 | ||
3133 | return skb; | 2967 | return skb; |
3134 | } | 2968 | } |
@@ -3298,7 +3132,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) | |||
3298 | pkt_dev->started_at); | 3132 | pkt_dev->started_at); |
3299 | ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); | 3133 | ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); |
3300 | 3134 | ||
3301 | p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", | 3135 | p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", |
3302 | (unsigned long long)ktime_to_us(elapsed), | 3136 | (unsigned long long)ktime_to_us(elapsed), |
3303 | (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), | 3137 | (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), |
3304 | (unsigned long long)ktime_to_us(idle), | 3138 | (unsigned long long)ktime_to_us(idle), |
@@ -3432,11 +3266,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t) | |||
3432 | 3266 | ||
3433 | remove_proc_entry(t->tsk->comm, pg_proc_dir); | 3267 | remove_proc_entry(t->tsk->comm, pg_proc_dir); |
3434 | 3268 | ||
3435 | mutex_lock(&pktgen_thread_lock); | ||
3436 | |||
3437 | list_del(&t->th_list); | ||
3438 | |||
3439 | mutex_unlock(&pktgen_thread_lock); | ||
3440 | } | 3269 | } |
3441 | 3270 | ||
3442 | static void pktgen_resched(struct pktgen_dev *pkt_dev) | 3271 | static void pktgen_resched(struct pktgen_dev *pkt_dev) |
@@ -3511,7 +3340,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3511 | 3340 | ||
3512 | __netif_tx_lock_bh(txq); | 3341 | __netif_tx_lock_bh(txq); |
3513 | 3342 | ||
3514 | if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { | 3343 | if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { |
3515 | ret = NETDEV_TX_BUSY; | 3344 | ret = NETDEV_TX_BUSY; |
3516 | pkt_dev->last_ok = 0; | 3345 | pkt_dev->last_ok = 0; |
3517 | goto unlock; | 3346 | goto unlock; |
@@ -3535,8 +3364,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3535 | break; | 3364 | break; |
3536 | default: /* Drivers are not supposed to return other values! */ | 3365 | default: /* Drivers are not supposed to return other values! */ |
3537 | if (net_ratelimit()) | 3366 | if (net_ratelimit()) |
3538 | pr_info("pktgen: %s xmit error: %d\n", | 3367 | pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret); |
3539 | pkt_dev->odevname, ret); | ||
3540 | pkt_dev->errors++; | 3368 | pkt_dev->errors++; |
3541 | /* fallthru */ | 3369 | /* fallthru */ |
3542 | case NETDEV_TX_LOCKED: | 3370 | case NETDEV_TX_LOCKED: |
@@ -3583,6 +3411,8 @@ static int pktgen_thread_worker(void *arg) | |||
3583 | pkt_dev = next_to_run(t); | 3411 | pkt_dev = next_to_run(t); |
3584 | 3412 | ||
3585 | if (unlikely(!pkt_dev && t->control == 0)) { | 3413 | if (unlikely(!pkt_dev && t->control == 0)) { |
3414 | if (pktgen_exiting) | ||
3415 | break; | ||
3586 | wait_event_interruptible_timeout(t->queue, | 3416 | wait_event_interruptible_timeout(t->queue, |
3587 | t->control != 0, | 3417 | t->control != 0, |
3588 | HZ/10); | 3418 | HZ/10); |
@@ -3635,6 +3465,13 @@ static int pktgen_thread_worker(void *arg) | |||
3635 | pr_debug("%s removing thread\n", t->tsk->comm); | 3465 | pr_debug("%s removing thread\n", t->tsk->comm); |
3636 | pktgen_rem_thread(t); | 3466 | pktgen_rem_thread(t); |
3637 | 3467 | ||
3468 | /* Wait for kthread_stop */ | ||
3469 | while (!kthread_should_stop()) { | ||
3470 | set_current_state(TASK_INTERRUPTIBLE); | ||
3471 | schedule(); | ||
3472 | } | ||
3473 | __set_current_state(TASK_RUNNING); | ||
3474 | |||
3638 | return 0; | 3475 | return 0; |
3639 | } | 3476 | } |
3640 | 3477 | ||
@@ -3707,13 +3544,12 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) | |||
3707 | return -ENOMEM; | 3544 | return -ENOMEM; |
3708 | 3545 | ||
3709 | strcpy(pkt_dev->odevname, ifname); | 3546 | strcpy(pkt_dev->odevname, ifname); |
3710 | pkt_dev->flows = vmalloc_node(MAX_CFLOWS * sizeof(struct flow_state), | 3547 | pkt_dev->flows = vzalloc_node(MAX_CFLOWS * sizeof(struct flow_state), |
3711 | node); | 3548 | node); |
3712 | if (pkt_dev->flows == NULL) { | 3549 | if (pkt_dev->flows == NULL) { |
3713 | kfree(pkt_dev); | 3550 | kfree(pkt_dev); |
3714 | return -ENOMEM; | 3551 | return -ENOMEM; |
3715 | } | 3552 | } |
3716 | memset(pkt_dev->flows, 0, MAX_CFLOWS * sizeof(struct flow_state)); | ||
3717 | 3553 | ||
3718 | pkt_dev->removal_mark = 0; | 3554 | pkt_dev->removal_mark = 0; |
3719 | pkt_dev->min_pkt_size = ETH_ZLEN; | 3555 | pkt_dev->min_pkt_size = ETH_ZLEN; |
@@ -3786,7 +3622,10 @@ static int __init pktgen_create_thread(int cpu) | |||
3786 | list_add_tail(&t->th_list, &pktgen_threads); | 3622 | list_add_tail(&t->th_list, &pktgen_threads); |
3787 | init_completion(&t->start_done); | 3623 | init_completion(&t->start_done); |
3788 | 3624 | ||
3789 | p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); | 3625 | p = kthread_create_on_node(pktgen_thread_worker, |
3626 | t, | ||
3627 | cpu_to_node(cpu), | ||
3628 | "kpktgend_%d", cpu); | ||
3790 | if (IS_ERR(p)) { | 3629 | if (IS_ERR(p)) { |
3791 | pr_err("kernel_thread() failed for cpu %d\n", t->cpu); | 3630 | pr_err("kernel_thread() failed for cpu %d\n", t->cpu); |
3792 | list_del(&t->th_list); | 3631 | list_del(&t->th_list); |
@@ -3858,6 +3697,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, | |||
3858 | free_SAs(pkt_dev); | 3697 | free_SAs(pkt_dev); |
3859 | #endif | 3698 | #endif |
3860 | vfree(pkt_dev->flows); | 3699 | vfree(pkt_dev->flows); |
3700 | if (pkt_dev->page) | ||
3701 | put_page(pkt_dev->page); | ||
3861 | kfree(pkt_dev); | 3702 | kfree(pkt_dev); |
3862 | return 0; | 3703 | return 0; |
3863 | } | 3704 | } |
@@ -3866,6 +3707,7 @@ static int __init pg_init(void) | |||
3866 | { | 3707 | { |
3867 | int cpu; | 3708 | int cpu; |
3868 | struct proc_dir_entry *pe; | 3709 | struct proc_dir_entry *pe; |
3710 | int ret = 0; | ||
3869 | 3711 | ||
3870 | pr_info("%s", version); | 3712 | pr_info("%s", version); |
3871 | 3713 | ||
@@ -3876,11 +3718,10 @@ static int __init pg_init(void) | |||
3876 | pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); | 3718 | pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); |
3877 | if (pe == NULL) { | 3719 | if (pe == NULL) { |
3878 | pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); | 3720 | pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); |
3879 | proc_net_remove(&init_net, PG_PROC_DIR); | 3721 | ret = -EINVAL; |
3880 | return -EINVAL; | 3722 | goto remove_dir; |
3881 | } | 3723 | } |
3882 | 3724 | ||
3883 | /* Register us to receive netdevice events */ | ||
3884 | register_netdevice_notifier(&pktgen_notifier_block); | 3725 | register_netdevice_notifier(&pktgen_notifier_block); |
3885 | 3726 | ||
3886 | for_each_online_cpu(cpu) { | 3727 | for_each_online_cpu(cpu) { |
@@ -3894,23 +3735,27 @@ static int __init pg_init(void) | |||
3894 | 3735 | ||
3895 | if (list_empty(&pktgen_threads)) { | 3736 | if (list_empty(&pktgen_threads)) { |
3896 | pr_err("ERROR: Initialization failed for all threads\n"); | 3737 | pr_err("ERROR: Initialization failed for all threads\n"); |
3897 | unregister_netdevice_notifier(&pktgen_notifier_block); | 3738 | ret = -ENODEV; |
3898 | remove_proc_entry(PGCTRL, pg_proc_dir); | 3739 | goto unregister; |
3899 | proc_net_remove(&init_net, PG_PROC_DIR); | ||
3900 | return -ENODEV; | ||
3901 | } | 3740 | } |
3902 | 3741 | ||
3903 | return 0; | 3742 | return 0; |
3743 | |||
3744 | unregister: | ||
3745 | unregister_netdevice_notifier(&pktgen_notifier_block); | ||
3746 | remove_proc_entry(PGCTRL, pg_proc_dir); | ||
3747 | remove_dir: | ||
3748 | proc_net_remove(&init_net, PG_PROC_DIR); | ||
3749 | return ret; | ||
3904 | } | 3750 | } |
3905 | 3751 | ||
3906 | static void __exit pg_cleanup(void) | 3752 | static void __exit pg_cleanup(void) |
3907 | { | 3753 | { |
3908 | struct pktgen_thread *t; | 3754 | struct pktgen_thread *t; |
3909 | struct list_head *q, *n; | 3755 | struct list_head *q, *n; |
3910 | wait_queue_head_t queue; | ||
3911 | init_waitqueue_head(&queue); | ||
3912 | 3756 | ||
3913 | /* Stop all interfaces & threads */ | 3757 | /* Stop all interfaces & threads */ |
3758 | pktgen_exiting = true; | ||
3914 | 3759 | ||
3915 | list_for_each_safe(q, n, &pktgen_threads) { | 3760 | list_for_each_safe(q, n, &pktgen_threads) { |
3916 | t = list_entry(q, struct pktgen_thread, th_list); | 3761 | t = list_entry(q, struct pktgen_thread, th_list); |
diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 7552495aff7a..182236b2510a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c | |||
@@ -33,6 +33,7 @@ | |||
33 | * Note : Dont forget somaxconn that may limit backlog too. | 33 | * Note : Dont forget somaxconn that may limit backlog too. |
34 | */ | 34 | */ |
35 | int sysctl_max_syn_backlog = 256; | 35 | int sysctl_max_syn_backlog = 256; |
36 | EXPORT_SYMBOL(sysctl_max_syn_backlog); | ||
36 | 37 | ||
37 | int reqsk_queue_alloc(struct request_sock_queue *queue, | 38 | int reqsk_queue_alloc(struct request_sock_queue *queue, |
38 | unsigned int nr_table_entries) | 39 | unsigned int nr_table_entries) |
@@ -45,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, | |||
45 | nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); | 46 | nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); |
46 | lopt_size += nr_table_entries * sizeof(struct request_sock *); | 47 | lopt_size += nr_table_entries * sizeof(struct request_sock *); |
47 | if (lopt_size > PAGE_SIZE) | 48 | if (lopt_size > PAGE_SIZE) |
48 | lopt = __vmalloc(lopt_size, | 49 | lopt = vzalloc(lopt_size); |
49 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | ||
50 | PAGE_KERNEL); | ||
51 | else | 50 | else |
52 | lopt = kzalloc(lopt_size, GFP_KERNEL); | 51 | lopt = kzalloc(lopt_size, GFP_KERNEL); |
53 | if (lopt == NULL) | 52 | if (lopt == NULL) |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f78d821bd935..abd936d8a716 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -196,7 +196,7 @@ EXPORT_SYMBOL_GPL(__rtnl_register); | |||
196 | * as failure of this function is very unlikely, it can only happen due | 196 | * as failure of this function is very unlikely, it can only happen due |
197 | * to lack of memory when allocating the chain to store all message | 197 | * to lack of memory when allocating the chain to store all message |
198 | * handlers for a protocol. Meant for use in init functions where lack | 198 | * handlers for a protocol. Meant for use in init functions where lack |
199 | * of memory implies no sense in continueing. | 199 | * of memory implies no sense in continuing. |
200 | */ | 200 | */ |
201 | void rtnl_register(int protocol, int msgtype, | 201 | void rtnl_register(int protocol, int msgtype, |
202 | rtnl_doit_func doit, rtnl_dumpit_func dumpit) | 202 | rtnl_doit_func doit, rtnl_dumpit_func dumpit) |
@@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) | |||
299 | unregister_netdevice_many(&list_kill); | 299 | unregister_netdevice_many(&list_kill); |
300 | } | 300 | } |
301 | 301 | ||
302 | void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) | ||
303 | { | ||
304 | rtnl_lock(); | ||
305 | __rtnl_kill_links(net, ops); | ||
306 | rtnl_unlock(); | ||
307 | } | ||
308 | EXPORT_SYMBOL_GPL(rtnl_kill_links); | ||
309 | |||
310 | /** | 302 | /** |
311 | * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. | 303 | * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. |
312 | * @ops: struct rtnl_link_ops * to unregister | 304 | * @ops: struct rtnl_link_ops * to unregister |
@@ -355,16 +347,106 @@ static size_t rtnl_link_get_size(const struct net_device *dev) | |||
355 | if (!ops) | 347 | if (!ops) |
356 | return 0; | 348 | return 0; |
357 | 349 | ||
358 | size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ | 350 | size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ |
359 | nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ | 351 | nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ |
360 | 352 | ||
361 | if (ops->get_size) | 353 | if (ops->get_size) |
362 | /* IFLA_INFO_DATA + nested data */ | 354 | /* IFLA_INFO_DATA + nested data */ |
363 | size += nlmsg_total_size(sizeof(struct nlattr)) + | 355 | size += nla_total_size(sizeof(struct nlattr)) + |
364 | ops->get_size(dev); | 356 | ops->get_size(dev); |
365 | 357 | ||
366 | if (ops->get_xstats_size) | 358 | if (ops->get_xstats_size) |
367 | size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ | 359 | /* IFLA_INFO_XSTATS */ |
360 | size += nla_total_size(ops->get_xstats_size(dev)); | ||
361 | |||
362 | return size; | ||
363 | } | ||
364 | |||
365 | static LIST_HEAD(rtnl_af_ops); | ||
366 | |||
367 | static const struct rtnl_af_ops *rtnl_af_lookup(const int family) | ||
368 | { | ||
369 | const struct rtnl_af_ops *ops; | ||
370 | |||
371 | list_for_each_entry(ops, &rtnl_af_ops, list) { | ||
372 | if (ops->family == family) | ||
373 | return ops; | ||
374 | } | ||
375 | |||
376 | return NULL; | ||
377 | } | ||
378 | |||
379 | /** | ||
380 | * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. | ||
381 | * @ops: struct rtnl_af_ops * to register | ||
382 | * | ||
383 | * The caller must hold the rtnl_mutex. | ||
384 | * | ||
385 | * Returns 0 on success or a negative error code. | ||
386 | */ | ||
387 | int __rtnl_af_register(struct rtnl_af_ops *ops) | ||
388 | { | ||
389 | list_add_tail(&ops->list, &rtnl_af_ops); | ||
390 | return 0; | ||
391 | } | ||
392 | EXPORT_SYMBOL_GPL(__rtnl_af_register); | ||
393 | |||
394 | /** | ||
395 | * rtnl_af_register - Register rtnl_af_ops with rtnetlink. | ||
396 | * @ops: struct rtnl_af_ops * to register | ||
397 | * | ||
398 | * Returns 0 on success or a negative error code. | ||
399 | */ | ||
400 | int rtnl_af_register(struct rtnl_af_ops *ops) | ||
401 | { | ||
402 | int err; | ||
403 | |||
404 | rtnl_lock(); | ||
405 | err = __rtnl_af_register(ops); | ||
406 | rtnl_unlock(); | ||
407 | return err; | ||
408 | } | ||
409 | EXPORT_SYMBOL_GPL(rtnl_af_register); | ||
410 | |||
411 | /** | ||
412 | * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. | ||
413 | * @ops: struct rtnl_af_ops * to unregister | ||
414 | * | ||
415 | * The caller must hold the rtnl_mutex. | ||
416 | */ | ||
417 | void __rtnl_af_unregister(struct rtnl_af_ops *ops) | ||
418 | { | ||
419 | list_del(&ops->list); | ||
420 | } | ||
421 | EXPORT_SYMBOL_GPL(__rtnl_af_unregister); | ||
422 | |||
423 | /** | ||
424 | * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. | ||
425 | * @ops: struct rtnl_af_ops * to unregister | ||
426 | */ | ||
427 | void rtnl_af_unregister(struct rtnl_af_ops *ops) | ||
428 | { | ||
429 | rtnl_lock(); | ||
430 | __rtnl_af_unregister(ops); | ||
431 | rtnl_unlock(); | ||
432 | } | ||
433 | EXPORT_SYMBOL_GPL(rtnl_af_unregister); | ||
434 | |||
435 | static size_t rtnl_link_get_af_size(const struct net_device *dev) | ||
436 | { | ||
437 | struct rtnl_af_ops *af_ops; | ||
438 | size_t size; | ||
439 | |||
440 | /* IFLA_AF_SPEC */ | ||
441 | size = nla_total_size(sizeof(struct nlattr)); | ||
442 | |||
443 | list_for_each_entry(af_ops, &rtnl_af_ops, list) { | ||
444 | if (af_ops->get_link_af_size) { | ||
445 | /* AF_* + nested data */ | ||
446 | size += nla_total_size(sizeof(struct nlattr)) + | ||
447 | af_ops->get_link_af_size(dev); | ||
448 | } | ||
449 | } | ||
368 | 450 | ||
369 | return size; | 451 | return size; |
370 | } | 452 | } |
@@ -612,36 +694,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, | |||
612 | 694 | ||
613 | static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) | 695 | static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) |
614 | { | 696 | { |
615 | struct rtnl_link_stats64 a; | 697 | memcpy(v, b, sizeof(*b)); |
616 | |||
617 | a.rx_packets = b->rx_packets; | ||
618 | a.tx_packets = b->tx_packets; | ||
619 | a.rx_bytes = b->rx_bytes; | ||
620 | a.tx_bytes = b->tx_bytes; | ||
621 | a.rx_errors = b->rx_errors; | ||
622 | a.tx_errors = b->tx_errors; | ||
623 | a.rx_dropped = b->rx_dropped; | ||
624 | a.tx_dropped = b->tx_dropped; | ||
625 | |||
626 | a.multicast = b->multicast; | ||
627 | a.collisions = b->collisions; | ||
628 | |||
629 | a.rx_length_errors = b->rx_length_errors; | ||
630 | a.rx_over_errors = b->rx_over_errors; | ||
631 | a.rx_crc_errors = b->rx_crc_errors; | ||
632 | a.rx_frame_errors = b->rx_frame_errors; | ||
633 | a.rx_fifo_errors = b->rx_fifo_errors; | ||
634 | a.rx_missed_errors = b->rx_missed_errors; | ||
635 | |||
636 | a.tx_aborted_errors = b->tx_aborted_errors; | ||
637 | a.tx_carrier_errors = b->tx_carrier_errors; | ||
638 | a.tx_fifo_errors = b->tx_fifo_errors; | ||
639 | a.tx_heartbeat_errors = b->tx_heartbeat_errors; | ||
640 | a.tx_window_errors = b->tx_window_errors; | ||
641 | |||
642 | a.rx_compressed = b->rx_compressed; | ||
643 | a.tx_compressed = b->tx_compressed; | ||
644 | memcpy(v, &a, sizeof(a)); | ||
645 | } | 698 | } |
646 | 699 | ||
647 | /* All VF info */ | 700 | /* All VF info */ |
@@ -707,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) | |||
707 | + nla_total_size(4) /* IFLA_NUM_VF */ | 760 | + nla_total_size(4) /* IFLA_NUM_VF */ |
708 | + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ | 761 | + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ |
709 | + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ | 762 | + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ |
710 | + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ | 763 | + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ |
764 | + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ | ||
711 | } | 765 | } |
712 | 766 | ||
713 | static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) | 767 | static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) |
@@ -793,8 +847,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
793 | struct nlmsghdr *nlh; | 847 | struct nlmsghdr *nlh; |
794 | struct rtnl_link_stats64 temp; | 848 | struct rtnl_link_stats64 temp; |
795 | const struct rtnl_link_stats64 *stats; | 849 | const struct rtnl_link_stats64 *stats; |
796 | struct nlattr *attr; | 850 | struct nlattr *attr, *af_spec; |
851 | struct rtnl_af_ops *af_ops; | ||
797 | 852 | ||
853 | ASSERT_RTNL(); | ||
798 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); | 854 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); |
799 | if (nlh == NULL) | 855 | if (nlh == NULL) |
800 | return -EMSGSIZE; | 856 | return -EMSGSIZE; |
@@ -813,6 +869,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
813 | netif_running(dev) ? dev->operstate : IF_OPER_DOWN); | 869 | netif_running(dev) ? dev->operstate : IF_OPER_DOWN); |
814 | NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); | 870 | NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); |
815 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); | 871 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); |
872 | NLA_PUT_U32(skb, IFLA_GROUP, dev->group); | ||
816 | 873 | ||
817 | if (dev->ifindex != dev->iflink) | 874 | if (dev->ifindex != dev->iflink) |
818 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); | 875 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); |
@@ -902,6 +959,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
902 | goto nla_put_failure; | 959 | goto nla_put_failure; |
903 | } | 960 | } |
904 | 961 | ||
962 | if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) | ||
963 | goto nla_put_failure; | ||
964 | |||
965 | list_for_each_entry(af_ops, &rtnl_af_ops, list) { | ||
966 | if (af_ops->fill_link_af) { | ||
967 | struct nlattr *af; | ||
968 | int err; | ||
969 | |||
970 | if (!(af = nla_nest_start(skb, af_ops->family))) | ||
971 | goto nla_put_failure; | ||
972 | |||
973 | err = af_ops->fill_link_af(skb, dev); | ||
974 | |||
975 | /* | ||
976 | * Caller may return ENODATA to indicate that there | ||
977 | * was no data to be dumped. This is not an error, it | ||
978 | * means we should trim the attribute header and | ||
979 | * continue. | ||
980 | */ | ||
981 | if (err == -ENODATA) | ||
982 | nla_nest_cancel(skb, af); | ||
983 | else if (err < 0) | ||
984 | goto nla_put_failure; | ||
985 | |||
986 | nla_nest_end(skb, af); | ||
987 | } | ||
988 | } | ||
989 | |||
990 | nla_nest_end(skb, af_spec); | ||
991 | |||
905 | return nlmsg_end(skb, nlh); | 992 | return nlmsg_end(skb, nlh); |
906 | 993 | ||
907 | nla_put_failure: | 994 | nla_put_failure: |
@@ -921,10 +1008,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
921 | s_h = cb->args[0]; | 1008 | s_h = cb->args[0]; |
922 | s_idx = cb->args[1]; | 1009 | s_idx = cb->args[1]; |
923 | 1010 | ||
1011 | rcu_read_lock(); | ||
924 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 1012 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
925 | idx = 0; | 1013 | idx = 0; |
926 | head = &net->dev_index_head[h]; | 1014 | head = &net->dev_index_head[h]; |
927 | hlist_for_each_entry(dev, node, head, index_hlist) { | 1015 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { |
928 | if (idx < s_idx) | 1016 | if (idx < s_idx) |
929 | goto cont; | 1017 | goto cont; |
930 | if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, | 1018 | if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, |
@@ -937,6 +1025,7 @@ cont: | |||
937 | } | 1025 | } |
938 | } | 1026 | } |
939 | out: | 1027 | out: |
1028 | rcu_read_unlock(); | ||
940 | cb->args[1] = idx; | 1029 | cb->args[1] = idx; |
941 | cb->args[0] = h; | 1030 | cb->args[0] = h; |
942 | 1031 | ||
@@ -950,16 +1039,19 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { | |||
950 | [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, | 1039 | [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, |
951 | [IFLA_MTU] = { .type = NLA_U32 }, | 1040 | [IFLA_MTU] = { .type = NLA_U32 }, |
952 | [IFLA_LINK] = { .type = NLA_U32 }, | 1041 | [IFLA_LINK] = { .type = NLA_U32 }, |
1042 | [IFLA_MASTER] = { .type = NLA_U32 }, | ||
953 | [IFLA_TXQLEN] = { .type = NLA_U32 }, | 1043 | [IFLA_TXQLEN] = { .type = NLA_U32 }, |
954 | [IFLA_WEIGHT] = { .type = NLA_U32 }, | 1044 | [IFLA_WEIGHT] = { .type = NLA_U32 }, |
955 | [IFLA_OPERSTATE] = { .type = NLA_U8 }, | 1045 | [IFLA_OPERSTATE] = { .type = NLA_U8 }, |
956 | [IFLA_LINKMODE] = { .type = NLA_U8 }, | 1046 | [IFLA_LINKMODE] = { .type = NLA_U8 }, |
957 | [IFLA_LINKINFO] = { .type = NLA_NESTED }, | 1047 | [IFLA_LINKINFO] = { .type = NLA_NESTED }, |
958 | [IFLA_NET_NS_PID] = { .type = NLA_U32 }, | 1048 | [IFLA_NET_NS_PID] = { .type = NLA_U32 }, |
1049 | [IFLA_NET_NS_FD] = { .type = NLA_U32 }, | ||
959 | [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, | 1050 | [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, |
960 | [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, | 1051 | [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, |
961 | [IFLA_VF_PORTS] = { .type = NLA_NESTED }, | 1052 | [IFLA_VF_PORTS] = { .type = NLA_NESTED }, |
962 | [IFLA_PORT_SELF] = { .type = NLA_NESTED }, | 1053 | [IFLA_PORT_SELF] = { .type = NLA_NESTED }, |
1054 | [IFLA_AF_SPEC] = { .type = NLA_NESTED }, | ||
963 | }; | 1055 | }; |
964 | EXPORT_SYMBOL(ifla_policy); | 1056 | EXPORT_SYMBOL(ifla_policy); |
965 | 1057 | ||
@@ -1003,6 +1095,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) | |||
1003 | */ | 1095 | */ |
1004 | if (tb[IFLA_NET_NS_PID]) | 1096 | if (tb[IFLA_NET_NS_PID]) |
1005 | net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); | 1097 | net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); |
1098 | else if (tb[IFLA_NET_NS_FD]) | ||
1099 | net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); | ||
1006 | else | 1100 | else |
1007 | net = get_net(src_net); | 1101 | net = get_net(src_net); |
1008 | return net; | 1102 | return net; |
@@ -1021,6 +1115,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) | |||
1021 | return -EINVAL; | 1115 | return -EINVAL; |
1022 | } | 1116 | } |
1023 | 1117 | ||
1118 | if (tb[IFLA_AF_SPEC]) { | ||
1119 | struct nlattr *af; | ||
1120 | int rem, err; | ||
1121 | |||
1122 | nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { | ||
1123 | const struct rtnl_af_ops *af_ops; | ||
1124 | |||
1125 | if (!(af_ops = rtnl_af_lookup(nla_type(af)))) | ||
1126 | return -EAFNOSUPPORT; | ||
1127 | |||
1128 | if (!af_ops->set_link_af) | ||
1129 | return -EOPNOTSUPP; | ||
1130 | |||
1131 | if (af_ops->validate_link_af) { | ||
1132 | err = af_ops->validate_link_af(dev, af); | ||
1133 | if (err < 0) | ||
1134 | return err; | ||
1135 | } | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1024 | return 0; | 1139 | return 0; |
1025 | } | 1140 | } |
1026 | 1141 | ||
@@ -1070,6 +1185,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) | |||
1070 | return err; | 1185 | return err; |
1071 | } | 1186 | } |
1072 | 1187 | ||
1188 | static int do_set_master(struct net_device *dev, int ifindex) | ||
1189 | { | ||
1190 | struct net_device *master_dev; | ||
1191 | const struct net_device_ops *ops; | ||
1192 | int err; | ||
1193 | |||
1194 | if (dev->master) { | ||
1195 | if (dev->master->ifindex == ifindex) | ||
1196 | return 0; | ||
1197 | ops = dev->master->netdev_ops; | ||
1198 | if (ops->ndo_del_slave) { | ||
1199 | err = ops->ndo_del_slave(dev->master, dev); | ||
1200 | if (err) | ||
1201 | return err; | ||
1202 | } else { | ||
1203 | return -EOPNOTSUPP; | ||
1204 | } | ||
1205 | } | ||
1206 | |||
1207 | if (ifindex) { | ||
1208 | master_dev = __dev_get_by_index(dev_net(dev), ifindex); | ||
1209 | if (!master_dev) | ||
1210 | return -EINVAL; | ||
1211 | ops = master_dev->netdev_ops; | ||
1212 | if (ops->ndo_add_slave) { | ||
1213 | err = ops->ndo_add_slave(master_dev, dev); | ||
1214 | if (err) | ||
1215 | return err; | ||
1216 | } else { | ||
1217 | return -EOPNOTSUPP; | ||
1218 | } | ||
1219 | } | ||
1220 | return 0; | ||
1221 | } | ||
1222 | |||
1073 | static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | 1223 | static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, |
1074 | struct nlattr **tb, char *ifname, int modified) | 1224 | struct nlattr **tb, char *ifname, int modified) |
1075 | { | 1225 | { |
@@ -1077,7 +1227,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
1077 | int send_addr_notify = 0; | 1227 | int send_addr_notify = 0; |
1078 | int err; | 1228 | int err; |
1079 | 1229 | ||
1080 | if (tb[IFLA_NET_NS_PID]) { | 1230 | if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { |
1081 | struct net *net = rtnl_link_get_net(dev_net(dev), tb); | 1231 | struct net *net = rtnl_link_get_net(dev_net(dev), tb); |
1082 | if (IS_ERR(net)) { | 1232 | if (IS_ERR(net)) { |
1083 | err = PTR_ERR(net); | 1233 | err = PTR_ERR(net); |
@@ -1157,6 +1307,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
1157 | modified = 1; | 1307 | modified = 1; |
1158 | } | 1308 | } |
1159 | 1309 | ||
1310 | if (tb[IFLA_GROUP]) { | ||
1311 | dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); | ||
1312 | modified = 1; | ||
1313 | } | ||
1314 | |||
1160 | /* | 1315 | /* |
1161 | * Interface selected by interface index but interface | 1316 | * Interface selected by interface index but interface |
1162 | * name provided implies that a name change has been | 1317 | * name provided implies that a name change has been |
@@ -1188,6 +1343,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
1188 | goto errout; | 1343 | goto errout; |
1189 | } | 1344 | } |
1190 | 1345 | ||
1346 | if (tb[IFLA_MASTER]) { | ||
1347 | err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); | ||
1348 | if (err) | ||
1349 | goto errout; | ||
1350 | modified = 1; | ||
1351 | } | ||
1352 | |||
1191 | if (tb[IFLA_TXQLEN]) | 1353 | if (tb[IFLA_TXQLEN]) |
1192 | dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); | 1354 | dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); |
1193 | 1355 | ||
@@ -1261,12 +1423,30 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
1261 | goto errout; | 1423 | goto errout; |
1262 | modified = 1; | 1424 | modified = 1; |
1263 | } | 1425 | } |
1426 | |||
1427 | if (tb[IFLA_AF_SPEC]) { | ||
1428 | struct nlattr *af; | ||
1429 | int rem; | ||
1430 | |||
1431 | nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { | ||
1432 | const struct rtnl_af_ops *af_ops; | ||
1433 | |||
1434 | if (!(af_ops = rtnl_af_lookup(nla_type(af)))) | ||
1435 | BUG(); | ||
1436 | |||
1437 | err = af_ops->set_link_af(dev, af); | ||
1438 | if (err < 0) | ||
1439 | goto errout; | ||
1440 | |||
1441 | modified = 1; | ||
1442 | } | ||
1443 | } | ||
1264 | err = 0; | 1444 | err = 0; |
1265 | 1445 | ||
1266 | errout: | 1446 | errout: |
1267 | if (err < 0 && modified && net_ratelimit()) | 1447 | if (err < 0 && modified && net_ratelimit()) |
1268 | printk(KERN_WARNING "A link change request failed with " | 1448 | printk(KERN_WARNING "A link change request failed with " |
1269 | "some changes comitted already. Interface %s may " | 1449 | "some changes committed already. Interface %s may " |
1270 | "have been left with an inconsistent configuration, " | 1450 | "have been left with an inconsistent configuration, " |
1271 | "please check.\n", dev->name); | 1451 | "please check.\n", dev->name); |
1272 | 1452 | ||
@@ -1325,6 +1505,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1325 | char ifname[IFNAMSIZ]; | 1505 | char ifname[IFNAMSIZ]; |
1326 | struct nlattr *tb[IFLA_MAX+1]; | 1506 | struct nlattr *tb[IFLA_MAX+1]; |
1327 | int err; | 1507 | int err; |
1508 | LIST_HEAD(list_kill); | ||
1328 | 1509 | ||
1329 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); | 1510 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); |
1330 | if (err < 0) | 1511 | if (err < 0) |
@@ -1348,7 +1529,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1348 | if (!ops) | 1529 | if (!ops) |
1349 | return -EOPNOTSUPP; | 1530 | return -EOPNOTSUPP; |
1350 | 1531 | ||
1351 | ops->dellink(dev, NULL); | 1532 | ops->dellink(dev, &list_kill); |
1533 | unregister_netdevice_many(&list_kill); | ||
1534 | list_del(&list_kill); | ||
1352 | return 0; | 1535 | return 0; |
1353 | } | 1536 | } |
1354 | 1537 | ||
@@ -1396,12 +1579,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, | |||
1396 | dev->rtnl_link_state = RTNL_LINK_INITIALIZING; | 1579 | dev->rtnl_link_state = RTNL_LINK_INITIALIZING; |
1397 | dev->real_num_tx_queues = real_num_queues; | 1580 | dev->real_num_tx_queues = real_num_queues; |
1398 | 1581 | ||
1399 | if (strchr(dev->name, '%')) { | ||
1400 | err = dev_alloc_name(dev, dev->name); | ||
1401 | if (err < 0) | ||
1402 | goto err_free; | ||
1403 | } | ||
1404 | |||
1405 | if (tb[IFLA_MTU]) | 1582 | if (tb[IFLA_MTU]) |
1406 | dev->mtu = nla_get_u32(tb[IFLA_MTU]); | 1583 | dev->mtu = nla_get_u32(tb[IFLA_MTU]); |
1407 | if (tb[IFLA_ADDRESS]) | 1584 | if (tb[IFLA_ADDRESS]) |
@@ -1416,16 +1593,34 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, | |||
1416 | set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); | 1593 | set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); |
1417 | if (tb[IFLA_LINKMODE]) | 1594 | if (tb[IFLA_LINKMODE]) |
1418 | dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); | 1595 | dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); |
1596 | if (tb[IFLA_GROUP]) | ||
1597 | dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); | ||
1419 | 1598 | ||
1420 | return dev; | 1599 | return dev; |
1421 | 1600 | ||
1422 | err_free: | ||
1423 | free_netdev(dev); | ||
1424 | err: | 1601 | err: |
1425 | return ERR_PTR(err); | 1602 | return ERR_PTR(err); |
1426 | } | 1603 | } |
1427 | EXPORT_SYMBOL(rtnl_create_link); | 1604 | EXPORT_SYMBOL(rtnl_create_link); |
1428 | 1605 | ||
1606 | static int rtnl_group_changelink(struct net *net, int group, | ||
1607 | struct ifinfomsg *ifm, | ||
1608 | struct nlattr **tb) | ||
1609 | { | ||
1610 | struct net_device *dev; | ||
1611 | int err; | ||
1612 | |||
1613 | for_each_netdev(net, dev) { | ||
1614 | if (dev->group == group) { | ||
1615 | err = do_setlink(dev, ifm, tb, NULL, 0); | ||
1616 | if (err < 0) | ||
1617 | return err; | ||
1618 | } | ||
1619 | } | ||
1620 | |||
1621 | return 0; | ||
1622 | } | ||
1623 | |||
1429 | static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 1624 | static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
1430 | { | 1625 | { |
1431 | struct net *net = sock_net(skb->sk); | 1626 | struct net *net = sock_net(skb->sk); |
@@ -1453,10 +1648,12 @@ replay: | |||
1453 | ifm = nlmsg_data(nlh); | 1648 | ifm = nlmsg_data(nlh); |
1454 | if (ifm->ifi_index > 0) | 1649 | if (ifm->ifi_index > 0) |
1455 | dev = __dev_get_by_index(net, ifm->ifi_index); | 1650 | dev = __dev_get_by_index(net, ifm->ifi_index); |
1456 | else if (ifname[0]) | 1651 | else { |
1457 | dev = __dev_get_by_name(net, ifname); | 1652 | if (ifname[0]) |
1458 | else | 1653 | dev = __dev_get_by_name(net, ifname); |
1459 | dev = NULL; | 1654 | else |
1655 | dev = NULL; | ||
1656 | } | ||
1460 | 1657 | ||
1461 | err = validate_linkmsg(dev, tb); | 1658 | err = validate_linkmsg(dev, tb); |
1462 | if (err < 0) | 1659 | if (err < 0) |
@@ -1520,8 +1717,13 @@ replay: | |||
1520 | return do_setlink(dev, ifm, tb, ifname, modified); | 1717 | return do_setlink(dev, ifm, tb, ifname, modified); |
1521 | } | 1718 | } |
1522 | 1719 | ||
1523 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) | 1720 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { |
1721 | if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) | ||
1722 | return rtnl_group_changelink(net, | ||
1723 | nla_get_u32(tb[IFLA_GROUP]), | ||
1724 | ifm, tb); | ||
1524 | return -ENODEV; | 1725 | return -ENODEV; |
1726 | } | ||
1525 | 1727 | ||
1526 | if (ifm->ifi_index) | 1728 | if (ifm->ifi_index) |
1527 | return -EOPNOTSUPP; | 1729 | return -EOPNOTSUPP; |
@@ -1546,6 +1748,9 @@ replay: | |||
1546 | snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); | 1748 | snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); |
1547 | 1749 | ||
1548 | dest_net = rtnl_link_get_net(net, tb); | 1750 | dest_net = rtnl_link_get_net(net, tb); |
1751 | if (IS_ERR(dest_net)) | ||
1752 | return PTR_ERR(dest_net); | ||
1753 | |||
1549 | dev = rtnl_create_link(net, dest_net, ifname, ops, tb); | 1754 | dev = rtnl_create_link(net, dest_net, ifname, ops, tb); |
1550 | 1755 | ||
1551 | if (IS_ERR(dev)) | 1756 | if (IS_ERR(dev)) |
@@ -1759,6 +1964,8 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi | |||
1759 | case NETDEV_GOING_DOWN: | 1964 | case NETDEV_GOING_DOWN: |
1760 | case NETDEV_UNREGISTER: | 1965 | case NETDEV_UNREGISTER: |
1761 | case NETDEV_UNREGISTER_BATCH: | 1966 | case NETDEV_UNREGISTER_BATCH: |
1967 | case NETDEV_RELEASE: | ||
1968 | case NETDEV_JOIN: | ||
1762 | break; | 1969 | break; |
1763 | default: | 1970 | default: |
1764 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); | 1971 | rtmsg_ifinfo(RTM_NEWLINK, dev, 0); |
diff --git a/net/core/scm.c b/net/core/scm.c index 413cab89017d..4c1ef026d695 100644 --- a/net/core/scm.c +++ b/net/core/scm.c | |||
@@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) | |||
79 | return -ENOMEM; | 79 | return -ENOMEM; |
80 | *fplp = fpl; | 80 | *fplp = fpl; |
81 | fpl->count = 0; | 81 | fpl->count = 0; |
82 | fpl->max = SCM_MAX_FD; | ||
82 | } | 83 | } |
83 | fpp = &fpl->fp[fpl->count]; | 84 | fpp = &fpl->fp[fpl->count]; |
84 | 85 | ||
85 | if (fpl->count + num > SCM_MAX_FD) | 86 | if (fpl->count + num > fpl->max) |
86 | return -EINVAL; | 87 | return -EINVAL; |
87 | 88 | ||
88 | /* | 89 | /* |
@@ -94,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) | |||
94 | int fd = fdp[i]; | 95 | int fd = fdp[i]; |
95 | struct file *file; | 96 | struct file *file; |
96 | 97 | ||
97 | if (fd < 0 || !(file = fget(fd))) | 98 | if (fd < 0 || !(file = fget_raw(fd))) |
98 | return -EBADF; | 99 | return -EBADF; |
99 | *fpp++ = file; | 100 | *fpp++ = file; |
100 | fpl->count++; | 101 | fpl->count++; |
@@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) | |||
331 | if (!fpl) | 332 | if (!fpl) |
332 | return NULL; | 333 | return NULL; |
333 | 334 | ||
334 | new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); | 335 | new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), |
336 | GFP_KERNEL); | ||
335 | if (new_fpl) { | 337 | if (new_fpl) { |
336 | for (i=fpl->count-1; i>=0; i--) | 338 | for (i = 0; i < fpl->count; i++) |
337 | get_file(fpl->fp[i]); | 339 | get_file(fpl->fp[i]); |
338 | memcpy(new_fpl, fpl, sizeof(*fpl)); | 340 | new_fpl->max = new_fpl->count; |
339 | } | 341 | } |
340 | return new_fpl; | 342 | return new_fpl; |
341 | } | 343 | } |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c83b421341c0..46cbd28f40f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/init.h> | 57 | #include <linux/init.h> |
58 | #include <linux/scatterlist.h> | 58 | #include <linux/scatterlist.h> |
59 | #include <linux/errqueue.h> | 59 | #include <linux/errqueue.h> |
60 | #include <linux/prefetch.h> | ||
60 | 61 | ||
61 | #include <net/protocol.h> | 62 | #include <net/protocol.h> |
62 | #include <net/dst.h> | 63 | #include <net/dst.h> |
@@ -202,8 +203,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
202 | skb->data = data; | 203 | skb->data = data; |
203 | skb_reset_tail_pointer(skb); | 204 | skb_reset_tail_pointer(skb); |
204 | skb->end = skb->tail + size; | 205 | skb->end = skb->tail + size; |
205 | kmemcheck_annotate_bitfield(skb, flags1); | ||
206 | kmemcheck_annotate_bitfield(skb, flags2); | ||
207 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | 206 | #ifdef NET_SKBUFF_DATA_USES_OFFSET |
208 | skb->mac_header = ~0U; | 207 | skb->mac_header = ~0U; |
209 | #endif | 208 | #endif |
@@ -212,6 +211,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
212 | shinfo = skb_shinfo(skb); | 211 | shinfo = skb_shinfo(skb); |
213 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | 212 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); |
214 | atomic_set(&shinfo->dataref, 1); | 213 | atomic_set(&shinfo->dataref, 1); |
214 | kmemcheck_annotate_variable(shinfo->destructor_arg); | ||
215 | 215 | ||
216 | if (fclone) { | 216 | if (fclone) { |
217 | struct sk_buff *child = skb + 1; | 217 | struct sk_buff *child = skb + 1; |
@@ -249,10 +249,9 @@ EXPORT_SYMBOL(__alloc_skb); | |||
249 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | 249 | struct sk_buff *__netdev_alloc_skb(struct net_device *dev, |
250 | unsigned int length, gfp_t gfp_mask) | 250 | unsigned int length, gfp_t gfp_mask) |
251 | { | 251 | { |
252 | int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; | ||
253 | struct sk_buff *skb; | 252 | struct sk_buff *skb; |
254 | 253 | ||
255 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); | 254 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); |
256 | if (likely(skb)) { | 255 | if (likely(skb)) { |
257 | skb_reserve(skb, NET_SKB_PAD); | 256 | skb_reserve(skb, NET_SKB_PAD); |
258 | skb->dev = dev; | 257 | skb->dev = dev; |
@@ -261,16 +260,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
261 | } | 260 | } |
262 | EXPORT_SYMBOL(__netdev_alloc_skb); | 261 | EXPORT_SYMBOL(__netdev_alloc_skb); |
263 | 262 | ||
264 | struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) | ||
265 | { | ||
266 | int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; | ||
267 | struct page *page; | ||
268 | |||
269 | page = alloc_pages_node(node, gfp_mask, 0); | ||
270 | return page; | ||
271 | } | ||
272 | EXPORT_SYMBOL(__netdev_alloc_page); | ||
273 | |||
274 | void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, | 263 | void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, |
275 | int size) | 264 | int size) |
276 | { | 265 | { |
@@ -340,7 +329,7 @@ static void skb_release_data(struct sk_buff *skb) | |||
340 | put_page(skb_shinfo(skb)->frags[i].page); | 329 | put_page(skb_shinfo(skb)->frags[i].page); |
341 | } | 330 | } |
342 | 331 | ||
343 | if (skb_has_frags(skb)) | 332 | if (skb_has_frag_list(skb)) |
344 | skb_drop_fraglist(skb); | 333 | skb_drop_fraglist(skb); |
345 | 334 | ||
346 | kfree(skb->head); | 335 | kfree(skb->head); |
@@ -393,6 +382,8 @@ static void skb_release_head_state(struct sk_buff *skb) | |||
393 | } | 382 | } |
394 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 383 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
395 | nf_conntrack_put(skb->nfct); | 384 | nf_conntrack_put(skb->nfct); |
385 | #endif | ||
386 | #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED | ||
396 | nf_conntrack_put_reasm(skb->nfct_reasm); | 387 | nf_conntrack_put_reasm(skb->nfct_reasm); |
397 | #endif | 388 | #endif |
398 | #ifdef CONFIG_BRIDGE_NETFILTER | 389 | #ifdef CONFIG_BRIDGE_NETFILTER |
@@ -466,6 +457,7 @@ void consume_skb(struct sk_buff *skb) | |||
466 | smp_rmb(); | 457 | smp_rmb(); |
467 | else if (likely(!atomic_dec_and_test(&skb->users))) | 458 | else if (likely(!atomic_dec_and_test(&skb->users))) |
468 | return; | 459 | return; |
460 | trace_consume_skb(skb); | ||
469 | __kfree_skb(skb); | 461 | __kfree_skb(skb); |
470 | } | 462 | } |
471 | EXPORT_SYMBOL(consume_skb); | 463 | EXPORT_SYMBOL(consume_skb); |
@@ -532,7 +524,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
532 | new->ip_summed = old->ip_summed; | 524 | new->ip_summed = old->ip_summed; |
533 | skb_copy_queue_mapping(new, old); | 525 | skb_copy_queue_mapping(new, old); |
534 | new->priority = old->priority; | 526 | new->priority = old->priority; |
535 | new->deliver_no_wcard = old->deliver_no_wcard; | ||
536 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 527 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
537 | new->ipvs_property = old->ipvs_property; | 528 | new->ipvs_property = old->ipvs_property; |
538 | #endif | 529 | #endif |
@@ -685,16 +676,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
685 | 676 | ||
686 | struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | 677 | struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) |
687 | { | 678 | { |
688 | int headerlen = skb->data - skb->head; | 679 | int headerlen = skb_headroom(skb); |
689 | /* | 680 | unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len; |
690 | * Allocate the copy buffer | 681 | struct sk_buff *n = alloc_skb(size, gfp_mask); |
691 | */ | 682 | |
692 | struct sk_buff *n; | ||
693 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | ||
694 | n = alloc_skb(skb->end + skb->data_len, gfp_mask); | ||
695 | #else | ||
696 | n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); | ||
697 | #endif | ||
698 | if (!n) | 683 | if (!n) |
699 | return NULL; | 684 | return NULL; |
700 | 685 | ||
@@ -726,20 +711,14 @@ EXPORT_SYMBOL(skb_copy); | |||
726 | 711 | ||
727 | struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | 712 | struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) |
728 | { | 713 | { |
729 | /* | 714 | unsigned int size = skb_end_pointer(skb) - skb->head; |
730 | * Allocate the copy buffer | 715 | struct sk_buff *n = alloc_skb(size, gfp_mask); |
731 | */ | 716 | |
732 | struct sk_buff *n; | ||
733 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | ||
734 | n = alloc_skb(skb->end, gfp_mask); | ||
735 | #else | ||
736 | n = alloc_skb(skb->end - skb->head, gfp_mask); | ||
737 | #endif | ||
738 | if (!n) | 717 | if (!n) |
739 | goto out; | 718 | goto out; |
740 | 719 | ||
741 | /* Set the data pointer */ | 720 | /* Set the data pointer */ |
742 | skb_reserve(n, skb->data - skb->head); | 721 | skb_reserve(n, skb_headroom(skb)); |
743 | /* Set the tail pointer and length */ | 722 | /* Set the tail pointer and length */ |
744 | skb_put(n, skb_headlen(skb)); | 723 | skb_put(n, skb_headlen(skb)); |
745 | /* Copy the bytes */ | 724 | /* Copy the bytes */ |
@@ -759,7 +738,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | |||
759 | skb_shinfo(n)->nr_frags = i; | 738 | skb_shinfo(n)->nr_frags = i; |
760 | } | 739 | } |
761 | 740 | ||
762 | if (skb_has_frags(skb)) { | 741 | if (skb_has_frag_list(skb)) { |
763 | skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; | 742 | skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; |
764 | skb_clone_fraglist(n); | 743 | skb_clone_fraglist(n); |
765 | } | 744 | } |
@@ -791,12 +770,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
791 | { | 770 | { |
792 | int i; | 771 | int i; |
793 | u8 *data; | 772 | u8 *data; |
794 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | 773 | int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail; |
795 | int size = nhead + skb->end + ntail; | ||
796 | #else | ||
797 | int size = nhead + (skb->end - skb->head) + ntail; | ||
798 | #endif | ||
799 | long off; | 774 | long off; |
775 | bool fastpath; | ||
800 | 776 | ||
801 | BUG_ON(nhead < 0); | 777 | BUG_ON(nhead < 0); |
802 | 778 | ||
@@ -805,31 +781,56 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
805 | 781 | ||
806 | size = SKB_DATA_ALIGN(size); | 782 | size = SKB_DATA_ALIGN(size); |
807 | 783 | ||
784 | /* Check if we can avoid taking references on fragments if we own | ||
785 | * the last reference on skb->head. (see skb_release_data()) | ||
786 | */ | ||
787 | if (!skb->cloned) | ||
788 | fastpath = true; | ||
789 | else { | ||
790 | int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; | ||
791 | |||
792 | fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; | ||
793 | } | ||
794 | |||
795 | if (fastpath && | ||
796 | size + sizeof(struct skb_shared_info) <= ksize(skb->head)) { | ||
797 | memmove(skb->head + size, skb_shinfo(skb), | ||
798 | offsetof(struct skb_shared_info, | ||
799 | frags[skb_shinfo(skb)->nr_frags])); | ||
800 | memmove(skb->head + nhead, skb->head, | ||
801 | skb_tail_pointer(skb) - skb->head); | ||
802 | off = nhead; | ||
803 | goto adjust_others; | ||
804 | } | ||
805 | |||
808 | data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); | 806 | data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); |
809 | if (!data) | 807 | if (!data) |
810 | goto nodata; | 808 | goto nodata; |
811 | 809 | ||
812 | /* Copy only real data... and, alas, header. This should be | 810 | /* Copy only real data... and, alas, header. This should be |
813 | * optimized for the cases when header is void. */ | 811 | * optimized for the cases when header is void. |
814 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | 812 | */ |
815 | memcpy(data + nhead, skb->head, skb->tail); | 813 | memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); |
816 | #else | ||
817 | memcpy(data + nhead, skb->head, skb->tail - skb->head); | ||
818 | #endif | ||
819 | memcpy(data + size, skb_end_pointer(skb), | ||
820 | offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); | ||
821 | 814 | ||
822 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | 815 | memcpy((struct skb_shared_info *)(data + size), |
823 | get_page(skb_shinfo(skb)->frags[i].page); | 816 | skb_shinfo(skb), |
817 | offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); | ||
824 | 818 | ||
825 | if (skb_has_frags(skb)) | 819 | if (fastpath) { |
826 | skb_clone_fraglist(skb); | 820 | kfree(skb->head); |
821 | } else { | ||
822 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
823 | get_page(skb_shinfo(skb)->frags[i].page); | ||
827 | 824 | ||
828 | skb_release_data(skb); | 825 | if (skb_has_frag_list(skb)) |
826 | skb_clone_fraglist(skb); | ||
829 | 827 | ||
828 | skb_release_data(skb); | ||
829 | } | ||
830 | off = (data + nhead) - skb->head; | 830 | off = (data + nhead) - skb->head; |
831 | 831 | ||
832 | skb->head = data; | 832 | skb->head = data; |
833 | adjust_others: | ||
833 | skb->data += off; | 834 | skb->data += off; |
834 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | 835 | #ifdef NET_SKBUFF_DATA_USES_OFFSET |
835 | skb->end = size; | 836 | skb->end = size; |
@@ -1099,7 +1100,7 @@ drop_pages: | |||
1099 | for (; i < nfrags; i++) | 1100 | for (; i < nfrags; i++) |
1100 | put_page(skb_shinfo(skb)->frags[i].page); | 1101 | put_page(skb_shinfo(skb)->frags[i].page); |
1101 | 1102 | ||
1102 | if (skb_has_frags(skb)) | 1103 | if (skb_has_frag_list(skb)) |
1103 | skb_drop_fraglist(skb); | 1104 | skb_drop_fraglist(skb); |
1104 | goto done; | 1105 | goto done; |
1105 | } | 1106 | } |
@@ -1194,7 +1195,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) | |||
1194 | /* Optimization: no fragments, no reasons to preestimate | 1195 | /* Optimization: no fragments, no reasons to preestimate |
1195 | * size of pulled pages. Superb. | 1196 | * size of pulled pages. Superb. |
1196 | */ | 1197 | */ |
1197 | if (!skb_has_frags(skb)) | 1198 | if (!skb_has_frag_list(skb)) |
1198 | goto pull_pages; | 1199 | goto pull_pages; |
1199 | 1200 | ||
1200 | /* Estimate size of pulled pages. */ | 1201 | /* Estimate size of pulled pages. */ |
@@ -1826,7 +1827,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | |||
1826 | long csstart; | 1827 | long csstart; |
1827 | 1828 | ||
1828 | if (skb->ip_summed == CHECKSUM_PARTIAL) | 1829 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
1829 | csstart = skb->csum_start - skb_headroom(skb); | 1830 | csstart = skb_checksum_start_offset(skb); |
1830 | else | 1831 | else |
1831 | csstart = skb_headlen(skb); | 1832 | csstart = skb_headlen(skb); |
1832 | 1833 | ||
@@ -2267,7 +2268,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read); | |||
2267 | * of bytes already consumed and the next call to | 2268 | * of bytes already consumed and the next call to |
2268 | * skb_seq_read() will return the remaining part of the block. | 2269 | * skb_seq_read() will return the remaining part of the block. |
2269 | * | 2270 | * |
2270 | * Note 1: The size of each block of data returned can be arbitary, | 2271 | * Note 1: The size of each block of data returned can be arbitrary, |
2271 | * this limitation is the cost for zerocopy seqeuental | 2272 | * this limitation is the cost for zerocopy seqeuental |
2272 | * reads of potentially non linear data. | 2273 | * reads of potentially non linear data. |
2273 | * | 2274 | * |
@@ -2323,7 +2324,7 @@ next_skb: | |||
2323 | st->frag_data = NULL; | 2324 | st->frag_data = NULL; |
2324 | } | 2325 | } |
2325 | 2326 | ||
2326 | if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { | 2327 | if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { |
2327 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; | 2328 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; |
2328 | st->frag_idx = 0; | 2329 | st->frag_idx = 0; |
2329 | goto next_skb; | 2330 | goto next_skb; |
@@ -2433,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, | |||
2433 | return -ENOMEM; | 2434 | return -ENOMEM; |
2434 | 2435 | ||
2435 | /* initialize the next frag */ | 2436 | /* initialize the next frag */ |
2436 | sk->sk_sndmsg_page = page; | ||
2437 | sk->sk_sndmsg_off = 0; | ||
2438 | skb_fill_page_desc(skb, frg_cnt, page, 0, 0); | 2437 | skb_fill_page_desc(skb, frg_cnt, page, 0, 0); |
2439 | skb->truesize += PAGE_SIZE; | 2438 | skb->truesize += PAGE_SIZE; |
2440 | atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); | 2439 | atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); |
@@ -2454,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, | |||
2454 | return -EFAULT; | 2453 | return -EFAULT; |
2455 | 2454 | ||
2456 | /* copy was successful so update the size parameters */ | 2455 | /* copy was successful so update the size parameters */ |
2457 | sk->sk_sndmsg_off += copy; | ||
2458 | frag->size += copy; | 2456 | frag->size += copy; |
2459 | skb->len += copy; | 2457 | skb->len += copy; |
2460 | skb->data_len += copy; | 2458 | skb->data_len += copy; |
@@ -2497,7 +2495,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); | |||
2497 | * a pointer to the first in a list of new skbs for the segments. | 2495 | * a pointer to the first in a list of new skbs for the segments. |
2498 | * In case of error it returns ERR_PTR(err). | 2496 | * In case of error it returns ERR_PTR(err). |
2499 | */ | 2497 | */ |
2500 | struct sk_buff *skb_segment(struct sk_buff *skb, int features) | 2498 | struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) |
2501 | { | 2499 | { |
2502 | struct sk_buff *segs = NULL; | 2500 | struct sk_buff *segs = NULL; |
2503 | struct sk_buff *tail = NULL; | 2501 | struct sk_buff *tail = NULL; |
@@ -2507,7 +2505,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) | |||
2507 | unsigned int offset = doffset; | 2505 | unsigned int offset = doffset; |
2508 | unsigned int headroom; | 2506 | unsigned int headroom; |
2509 | unsigned int len; | 2507 | unsigned int len; |
2510 | int sg = features & NETIF_F_SG; | 2508 | int sg = !!(features & NETIF_F_SG); |
2511 | int nfrags = skb_shinfo(skb)->nr_frags; | 2509 | int nfrags = skb_shinfo(skb)->nr_frags; |
2512 | int err = -ENOMEM; | 2510 | int err = -ENOMEM; |
2513 | int i = 0; | 2511 | int i = 0; |
@@ -2744,8 +2742,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2744 | 2742 | ||
2745 | merge: | 2743 | merge: |
2746 | if (offset > headlen) { | 2744 | if (offset > headlen) { |
2747 | skbinfo->frags[0].page_offset += offset - headlen; | 2745 | unsigned int eat = offset - headlen; |
2748 | skbinfo->frags[0].size -= offset - headlen; | 2746 | |
2747 | skbinfo->frags[0].page_offset += eat; | ||
2748 | skbinfo->frags[0].size -= eat; | ||
2749 | skb->data_len -= eat; | ||
2750 | skb->len -= eat; | ||
2749 | offset = headlen; | 2751 | offset = headlen; |
2750 | } | 2752 | } |
2751 | 2753 | ||
@@ -2893,7 +2895,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |||
2893 | return -ENOMEM; | 2895 | return -ENOMEM; |
2894 | 2896 | ||
2895 | /* Easy case. Most of packets will go this way. */ | 2897 | /* Easy case. Most of packets will go this way. */ |
2896 | if (!skb_has_frags(skb)) { | 2898 | if (!skb_has_frag_list(skb)) { |
2897 | /* A little of trouble, not enough of space for trailer. | 2899 | /* A little of trouble, not enough of space for trailer. |
2898 | * This should not happen, when stack is tuned to generate | 2900 | * This should not happen, when stack is tuned to generate |
2899 | * good frames. OK, on miss we reallocate and reserve even more | 2901 | * good frames. OK, on miss we reallocate and reserve even more |
@@ -2928,7 +2930,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |||
2928 | 2930 | ||
2929 | if (skb1->next == NULL && tailbits) { | 2931 | if (skb1->next == NULL && tailbits) { |
2930 | if (skb_shinfo(skb1)->nr_frags || | 2932 | if (skb_shinfo(skb1)->nr_frags || |
2931 | skb_has_frags(skb1) || | 2933 | skb_has_frag_list(skb1) || |
2932 | skb_tailroom(skb1) < tailbits) | 2934 | skb_tailroom(skb1) < tailbits) |
2933 | ntail = tailbits + 128; | 2935 | ntail = tailbits + 128; |
2934 | } | 2936 | } |
@@ -2937,7 +2939,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |||
2937 | skb_cloned(skb1) || | 2939 | skb_cloned(skb1) || |
2938 | ntail || | 2940 | ntail || |
2939 | skb_shinfo(skb1)->nr_frags || | 2941 | skb_shinfo(skb1)->nr_frags || |
2940 | skb_has_frags(skb1)) { | 2942 | skb_has_frag_list(skb1)) { |
2941 | struct sk_buff *skb2; | 2943 | struct sk_buff *skb2; |
2942 | 2944 | ||
2943 | /* Fuck, we are miserable poor guys... */ | 2945 | /* Fuck, we are miserable poor guys... */ |
@@ -2992,6 +2994,9 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) | |||
2992 | skb->destructor = sock_rmem_free; | 2994 | skb->destructor = sock_rmem_free; |
2993 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); | 2995 | atomic_add(skb->truesize, &sk->sk_rmem_alloc); |
2994 | 2996 | ||
2997 | /* before exiting rcu section, make sure dst is refcounted */ | ||
2998 | skb_dst_force(skb); | ||
2999 | |||
2995 | skb_queue_tail(&sk->sk_error_queue, skb); | 3000 | skb_queue_tail(&sk->sk_error_queue, skb); |
2996 | if (!sock_flag(sk, SOCK_DEAD)) | 3001 | if (!sock_flag(sk, SOCK_DEAD)) |
2997 | sk->sk_data_ready(sk, skb->len); | 3002 | sk->sk_data_ready(sk, skb->len); |
@@ -3020,7 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, | |||
3020 | } else { | 3025 | } else { |
3021 | /* | 3026 | /* |
3022 | * no hardware time stamps available, | 3027 | * no hardware time stamps available, |
3023 | * so keep the skb_shared_tx and only | 3028 | * so keep the shared tx_flags and only |
3024 | * store software time stamp | 3029 | * store software time stamp |
3025 | */ | 3030 | */ |
3026 | skb->tstamp = ktime_get_real(); | 3031 | skb->tstamp = ktime_get_real(); |
diff --git a/net/core/sock.c b/net/core/sock.c index ef30e9d286e7..6e819780c232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { | |||
157 | "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , | 157 | "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , |
158 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , | 158 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , |
159 | "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , | 159 | "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , |
160 | "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , | 160 | "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , |
161 | "sk_lock-AF_MAX" | 161 | "sk_lock-AF_MAX" |
162 | }; | 162 | }; |
163 | static const char *const af_family_slock_key_strings[AF_MAX+1] = { | 163 | static const char *const af_family_slock_key_strings[AF_MAX+1] = { |
@@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { | |||
173 | "slock-27" , "slock-28" , "slock-AF_CAN" , | 173 | "slock-27" , "slock-28" , "slock-AF_CAN" , |
174 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , | 174 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , |
175 | "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , | 175 | "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , |
176 | "slock-AF_IEEE802154", "slock-AF_CAIF" , | 176 | "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , |
177 | "slock-AF_MAX" | 177 | "slock-AF_MAX" |
178 | }; | 178 | }; |
179 | static const char *const af_family_clock_key_strings[AF_MAX+1] = { | 179 | static const char *const af_family_clock_key_strings[AF_MAX+1] = { |
@@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { | |||
189 | "clock-27" , "clock-28" , "clock-AF_CAN" , | 189 | "clock-27" , "clock-28" , "clock-AF_CAN" , |
190 | "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , | 190 | "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , |
191 | "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , | 191 | "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , |
192 | "clock-AF_IEEE802154", "clock-AF_CAIF" , | 192 | "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , |
193 | "clock-AF_MAX" | 193 | "clock-AF_MAX" |
194 | }; | 194 | }; |
195 | 195 | ||
@@ -215,7 +215,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; | |||
215 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; | 215 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; |
216 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | 216 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; |
217 | 217 | ||
218 | /* Maximal space eaten by iovec or ancilliary data plus some space */ | 218 | /* Maximal space eaten by iovec or ancillary data plus some space */ |
219 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 219 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
220 | EXPORT_SYMBOL(sysctl_optmem_max); | 220 | EXPORT_SYMBOL(sysctl_optmem_max); |
221 | 221 | ||
@@ -992,23 +992,54 @@ static inline void sock_lock_init(struct sock *sk) | |||
992 | /* | 992 | /* |
993 | * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, | 993 | * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, |
994 | * even temporarly, because of RCU lookups. sk_node should also be left as is. | 994 | * even temporarly, because of RCU lookups. sk_node should also be left as is. |
995 | * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end | ||
995 | */ | 996 | */ |
996 | static void sock_copy(struct sock *nsk, const struct sock *osk) | 997 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
997 | { | 998 | { |
998 | #ifdef CONFIG_SECURITY_NETWORK | 999 | #ifdef CONFIG_SECURITY_NETWORK |
999 | void *sptr = nsk->sk_security; | 1000 | void *sptr = nsk->sk_security; |
1000 | #endif | 1001 | #endif |
1001 | BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != | 1002 | memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); |
1002 | sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + | 1003 | |
1003 | sizeof(osk->sk_tx_queue_mapping)); | 1004 | memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, |
1004 | memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, | 1005 | osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
1005 | osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); | 1006 | |
1006 | #ifdef CONFIG_SECURITY_NETWORK | 1007 | #ifdef CONFIG_SECURITY_NETWORK |
1007 | nsk->sk_security = sptr; | 1008 | nsk->sk_security = sptr; |
1008 | security_sk_clone(osk, nsk); | 1009 | security_sk_clone(osk, nsk); |
1009 | #endif | 1010 | #endif |
1010 | } | 1011 | } |
1011 | 1012 | ||
1013 | /* | ||
1014 | * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes | ||
1015 | * un-modified. Special care is taken when initializing object to zero. | ||
1016 | */ | ||
1017 | static inline void sk_prot_clear_nulls(struct sock *sk, int size) | ||
1018 | { | ||
1019 | if (offsetof(struct sock, sk_node.next) != 0) | ||
1020 | memset(sk, 0, offsetof(struct sock, sk_node.next)); | ||
1021 | memset(&sk->sk_node.pprev, 0, | ||
1022 | size - offsetof(struct sock, sk_node.pprev)); | ||
1023 | } | ||
1024 | |||
1025 | void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) | ||
1026 | { | ||
1027 | unsigned long nulls1, nulls2; | ||
1028 | |||
1029 | nulls1 = offsetof(struct sock, __sk_common.skc_node.next); | ||
1030 | nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); | ||
1031 | if (nulls1 > nulls2) | ||
1032 | swap(nulls1, nulls2); | ||
1033 | |||
1034 | if (nulls1 != 0) | ||
1035 | memset((char *)sk, 0, nulls1); | ||
1036 | memset((char *)sk + nulls1 + sizeof(void *), 0, | ||
1037 | nulls2 - nulls1 - sizeof(void *)); | ||
1038 | memset((char *)sk + nulls2 + sizeof(void *), 0, | ||
1039 | size - nulls2 - sizeof(void *)); | ||
1040 | } | ||
1041 | EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); | ||
1042 | |||
1012 | static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, | 1043 | static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, |
1013 | int family) | 1044 | int family) |
1014 | { | 1045 | { |
@@ -1021,19 +1052,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, | |||
1021 | if (!sk) | 1052 | if (!sk) |
1022 | return sk; | 1053 | return sk; |
1023 | if (priority & __GFP_ZERO) { | 1054 | if (priority & __GFP_ZERO) { |
1024 | /* | 1055 | if (prot->clear_sk) |
1025 | * caches using SLAB_DESTROY_BY_RCU should let | 1056 | prot->clear_sk(sk, prot->obj_size); |
1026 | * sk_node.next un-modified. Special care is taken | 1057 | else |
1027 | * when initializing object to zero. | 1058 | sk_prot_clear_nulls(sk, prot->obj_size); |
1028 | */ | ||
1029 | if (offsetof(struct sock, sk_node.next) != 0) | ||
1030 | memset(sk, 0, offsetof(struct sock, sk_node.next)); | ||
1031 | memset(&sk->sk_node.pprev, 0, | ||
1032 | prot->obj_size - offsetof(struct sock, | ||
1033 | sk_node.pprev)); | ||
1034 | } | 1059 | } |
1035 | } | 1060 | } else |
1036 | else | ||
1037 | sk = kmalloc(prot->obj_size, priority); | 1061 | sk = kmalloc(prot->obj_size, priority); |
1038 | 1062 | ||
1039 | if (sk != NULL) { | 1063 | if (sk != NULL) { |
@@ -1078,8 +1102,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
1078 | #ifdef CONFIG_CGROUPS | 1102 | #ifdef CONFIG_CGROUPS |
1079 | void sock_update_classid(struct sock *sk) | 1103 | void sock_update_classid(struct sock *sk) |
1080 | { | 1104 | { |
1081 | u32 classid = task_cls_classid(current); | 1105 | u32 classid; |
1082 | 1106 | ||
1107 | rcu_read_lock(); /* doing current task, which cannot vanish. */ | ||
1108 | classid = task_cls_classid(current); | ||
1109 | rcu_read_unlock(); | ||
1083 | if (classid && classid != sk->sk_classid) | 1110 | if (classid && classid != sk->sk_classid) |
1084 | sk->sk_classid = classid; | 1111 | sk->sk_classid = classid; |
1085 | } | 1112 | } |
@@ -1148,7 +1175,7 @@ static void __sk_free(struct sock *sk) | |||
1148 | void sk_free(struct sock *sk) | 1175 | void sk_free(struct sock *sk) |
1149 | { | 1176 | { |
1150 | /* | 1177 | /* |
1151 | * We substract one from sk_wmem_alloc and can know if | 1178 | * We subtract one from sk_wmem_alloc and can know if |
1152 | * some packets are still in some tx queue. | 1179 | * some packets are still in some tx queue. |
1153 | * If not null, sock_wfree() will call __sk_free(sk) later | 1180 | * If not null, sock_wfree() will call __sk_free(sk) later |
1154 | */ | 1181 | */ |
@@ -1158,10 +1185,10 @@ void sk_free(struct sock *sk) | |||
1158 | EXPORT_SYMBOL(sk_free); | 1185 | EXPORT_SYMBOL(sk_free); |
1159 | 1186 | ||
1160 | /* | 1187 | /* |
1161 | * Last sock_put should drop referrence to sk->sk_net. It has already | 1188 | * Last sock_put should drop reference to sk->sk_net. It has already |
1162 | * been dropped in sk_change_net. Taking referrence to stopping namespace | 1189 | * been dropped in sk_change_net. Taking reference to stopping namespace |
1163 | * is not an option. | 1190 | * is not an option. |
1164 | * Take referrence to a socket to remove it from hash _alive_ and after that | 1191 | * Take reference to a socket to remove it from hash _alive_ and after that |
1165 | * destroy it in the context of init_net. | 1192 | * destroy it in the context of init_net. |
1166 | */ | 1193 | */ |
1167 | void sk_release_kernel(struct sock *sk) | 1194 | void sk_release_kernel(struct sock *sk) |
@@ -1222,7 +1249,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1222 | sock_reset_flag(newsk, SOCK_DONE); | 1249 | sock_reset_flag(newsk, SOCK_DONE); |
1223 | skb_queue_head_init(&newsk->sk_error_queue); | 1250 | skb_queue_head_init(&newsk->sk_error_queue); |
1224 | 1251 | ||
1225 | filter = newsk->sk_filter; | 1252 | filter = rcu_dereference_protected(newsk->sk_filter, 1); |
1226 | if (filter != NULL) | 1253 | if (filter != NULL) |
1227 | sk_filter_charge(newsk, filter); | 1254 | sk_filter_charge(newsk, filter); |
1228 | 1255 | ||
@@ -1557,6 +1584,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, | |||
1557 | EXPORT_SYMBOL(sock_alloc_send_skb); | 1584 | EXPORT_SYMBOL(sock_alloc_send_skb); |
1558 | 1585 | ||
1559 | static void __lock_sock(struct sock *sk) | 1586 | static void __lock_sock(struct sock *sk) |
1587 | __releases(&sk->sk_lock.slock) | ||
1588 | __acquires(&sk->sk_lock.slock) | ||
1560 | { | 1589 | { |
1561 | DEFINE_WAIT(wait); | 1590 | DEFINE_WAIT(wait); |
1562 | 1591 | ||
@@ -1573,6 +1602,8 @@ static void __lock_sock(struct sock *sk) | |||
1573 | } | 1602 | } |
1574 | 1603 | ||
1575 | static void __release_sock(struct sock *sk) | 1604 | static void __release_sock(struct sock *sk) |
1605 | __releases(&sk->sk_lock.slock) | ||
1606 | __acquires(&sk->sk_lock.slock) | ||
1576 | { | 1607 | { |
1577 | struct sk_buff *skb = sk->sk_backlog.head; | 1608 | struct sk_buff *skb = sk->sk_backlog.head; |
1578 | 1609 | ||
@@ -1646,10 +1677,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1646 | { | 1677 | { |
1647 | struct proto *prot = sk->sk_prot; | 1678 | struct proto *prot = sk->sk_prot; |
1648 | int amt = sk_mem_pages(size); | 1679 | int amt = sk_mem_pages(size); |
1649 | int allocated; | 1680 | long allocated; |
1650 | 1681 | ||
1651 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 1682 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
1652 | allocated = atomic_add_return(amt, prot->memory_allocated); | 1683 | allocated = atomic_long_add_return(amt, prot->memory_allocated); |
1653 | 1684 | ||
1654 | /* Under limit. */ | 1685 | /* Under limit. */ |
1655 | if (allocated <= prot->sysctl_mem[0]) { | 1686 | if (allocated <= prot->sysctl_mem[0]) { |
@@ -1707,7 +1738,7 @@ suppress_allocation: | |||
1707 | 1738 | ||
1708 | /* Alas. Undo changes. */ | 1739 | /* Alas. Undo changes. */ |
1709 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; | 1740 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; |
1710 | atomic_sub(amt, prot->memory_allocated); | 1741 | atomic_long_sub(amt, prot->memory_allocated); |
1711 | return 0; | 1742 | return 0; |
1712 | } | 1743 | } |
1713 | EXPORT_SYMBOL(__sk_mem_schedule); | 1744 | EXPORT_SYMBOL(__sk_mem_schedule); |
@@ -1720,12 +1751,12 @@ void __sk_mem_reclaim(struct sock *sk) | |||
1720 | { | 1751 | { |
1721 | struct proto *prot = sk->sk_prot; | 1752 | struct proto *prot = sk->sk_prot; |
1722 | 1753 | ||
1723 | atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, | 1754 | atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, |
1724 | prot->memory_allocated); | 1755 | prot->memory_allocated); |
1725 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; | 1756 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; |
1726 | 1757 | ||
1727 | if (prot->memory_pressure && *prot->memory_pressure && | 1758 | if (prot->memory_pressure && *prot->memory_pressure && |
1728 | (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) | 1759 | (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) |
1729 | *prot->memory_pressure = 0; | 1760 | *prot->memory_pressure = 0; |
1730 | } | 1761 | } |
1731 | EXPORT_SYMBOL(__sk_mem_reclaim); | 1762 | EXPORT_SYMBOL(__sk_mem_reclaim); |
@@ -1877,7 +1908,7 @@ static void sock_def_readable(struct sock *sk, int len) | |||
1877 | rcu_read_lock(); | 1908 | rcu_read_lock(); |
1878 | wq = rcu_dereference(sk->sk_wq); | 1909 | wq = rcu_dereference(sk->sk_wq); |
1879 | if (wq_has_sleeper(wq)) | 1910 | if (wq_has_sleeper(wq)) |
1880 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | 1911 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | |
1881 | POLLRDNORM | POLLRDBAND); | 1912 | POLLRDNORM | POLLRDBAND); |
1882 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 1913 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
1883 | rcu_read_unlock(); | 1914 | rcu_read_unlock(); |
@@ -2445,12 +2476,12 @@ static char proto_method_implemented(const void *method) | |||
2445 | 2476 | ||
2446 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) | 2477 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) |
2447 | { | 2478 | { |
2448 | seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " | 2479 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " |
2449 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", | 2480 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", |
2450 | proto->name, | 2481 | proto->name, |
2451 | proto->obj_size, | 2482 | proto->obj_size, |
2452 | sock_prot_inuse_get(seq_file_net(seq), proto), | 2483 | sock_prot_inuse_get(seq_file_net(seq), proto), |
2453 | proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, | 2484 | proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, |
2454 | proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", | 2485 | proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", |
2455 | proto->max_header, | 2486 | proto->max_header, |
2456 | proto->slab == NULL ? "no" : "yes", | 2487 | proto->slab == NULL ? "no" : "yes", |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 01eee5d984be..77a65f031488 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <net/ip.h> | 18 | #include <net/ip.h> |
19 | #include <net/sock.h> | 19 | #include <net/sock.h> |
20 | #include <net/net_ratelimit.h> | ||
20 | 21 | ||
21 | #ifdef CONFIG_RPS | 22 | #ifdef CONFIG_RPS |
22 | static int rps_sock_flow_sysctl(ctl_table *table, int write, | 23 | static int rps_sock_flow_sysctl(ctl_table *table, int write, |
@@ -34,7 +35,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, | |||
34 | 35 | ||
35 | mutex_lock(&sock_flow_mutex); | 36 | mutex_lock(&sock_flow_mutex); |
36 | 37 | ||
37 | orig_sock_table = rps_sock_flow_table; | 38 | orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, |
39 | lockdep_is_held(&sock_flow_mutex)); | ||
38 | size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; | 40 | size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; |
39 | 41 | ||
40 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); | 42 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); |
@@ -121,6 +123,15 @@ static struct ctl_table net_core_table[] = { | |||
121 | .mode = 0644, | 123 | .mode = 0644, |
122 | .proc_handler = proc_dointvec | 124 | .proc_handler = proc_dointvec |
123 | }, | 125 | }, |
126 | #ifdef CONFIG_BPF_JIT | ||
127 | { | ||
128 | .procname = "bpf_jit_enable", | ||
129 | .data = &bpf_jit_enable, | ||
130 | .maxlen = sizeof(int), | ||
131 | .mode = 0644, | ||
132 | .proc_handler = proc_dointvec | ||
133 | }, | ||
134 | #endif | ||
124 | { | 135 | { |
125 | .procname = "netdev_tstamp_prequeue", | 136 | .procname = "netdev_tstamp_prequeue", |
126 | .data = &netdev_tstamp_prequeue, | 137 | .data = &netdev_tstamp_prequeue, |
diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 0ae6c22da85b..7e7ca375d431 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c | |||
@@ -26,12 +26,12 @@ static struct sock_filter ptp_filter[] = { | |||
26 | PTP_FILTER | 26 | PTP_FILTER |
27 | }; | 27 | }; |
28 | 28 | ||
29 | static unsigned int classify(struct sk_buff *skb) | 29 | static unsigned int classify(const struct sk_buff *skb) |
30 | { | 30 | { |
31 | if (likely(skb->dev && | 31 | if (likely(skb->dev && |
32 | skb->dev->phydev && | 32 | skb->dev->phydev && |
33 | skb->dev->phydev->drv)) | 33 | skb->dev->phydev->drv)) |
34 | return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); | 34 | return sk_run_filter(skb, ptp_filter); |
35 | else | 35 | else |
36 | return PTP_CLASS_NONE; | 36 | return PTP_CLASS_NONE; |
37 | } | 37 | } |
@@ -96,11 +96,13 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) | |||
96 | struct phy_device *phydev; | 96 | struct phy_device *phydev; |
97 | unsigned int type; | 97 | unsigned int type; |
98 | 98 | ||
99 | skb_push(skb, ETH_HLEN); | 99 | if (skb_headroom(skb) < ETH_HLEN) |
100 | return false; | ||
101 | __skb_push(skb, ETH_HLEN); | ||
100 | 102 | ||
101 | type = classify(skb); | 103 | type = classify(skb); |
102 | 104 | ||
103 | skb_pull(skb, ETH_HLEN); | 105 | __skb_pull(skb, ETH_HLEN); |
104 | 106 | ||
105 | switch (type) { | 107 | switch (type) { |
106 | case PTP_CLASS_V1_IPV4: | 108 | case PTP_CLASS_V1_IPV4: |
diff --git a/net/core/utils.c b/net/core/utils.c index f41854470539..386e263f6066 100644 --- a/net/core/utils.c +++ b/net/core/utils.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/ratelimit.h> | 27 | #include <linux/ratelimit.h> |
28 | 28 | ||
29 | #include <net/sock.h> | 29 | #include <net/sock.h> |
30 | #include <net/net_ratelimit.h> | ||
30 | 31 | ||
31 | #include <asm/byteorder.h> | 32 | #include <asm/byteorder.h> |
32 | #include <asm/system.h> | 33 | #include <asm/system.h> |
@@ -75,7 +76,7 @@ __be32 in_aton(const char *str) | |||
75 | str++; | 76 | str++; |
76 | } | 77 | } |
77 | } | 78 | } |
78 | return(htonl(l)); | 79 | return htonl(l); |
79 | } | 80 | } |
80 | EXPORT_SYMBOL(in_aton); | 81 | EXPORT_SYMBOL(in_aton); |
81 | 82 | ||
@@ -92,18 +93,19 @@ EXPORT_SYMBOL(in_aton); | |||
92 | 93 | ||
93 | static inline int xdigit2bin(char c, int delim) | 94 | static inline int xdigit2bin(char c, int delim) |
94 | { | 95 | { |
96 | int val; | ||
97 | |||
95 | if (c == delim || c == '\0') | 98 | if (c == delim || c == '\0') |
96 | return IN6PTON_DELIM; | 99 | return IN6PTON_DELIM; |
97 | if (c == ':') | 100 | if (c == ':') |
98 | return IN6PTON_COLON_MASK; | 101 | return IN6PTON_COLON_MASK; |
99 | if (c == '.') | 102 | if (c == '.') |
100 | return IN6PTON_DOT; | 103 | return IN6PTON_DOT; |
101 | if (c >= '0' && c <= '9') | 104 | |
102 | return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); | 105 | val = hex_to_bin(c); |
103 | if (c >= 'a' && c <= 'f') | 106 | if (val >= 0) |
104 | return (IN6PTON_XDIGIT | (c - 'a' + 10)); | 107 | return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0); |
105 | if (c >= 'A' && c <= 'F') | 108 | |
106 | return (IN6PTON_XDIGIT | (c - 'A' + 10)); | ||
107 | if (delim == -1) | 109 | if (delim == -1) |
108 | return IN6PTON_DELIM; | 110 | return IN6PTON_DELIM; |
109 | return IN6PTON_UNKNOWN; | 111 | return IN6PTON_UNKNOWN; |
@@ -295,3 +297,27 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, | |||
295 | csum_unfold(*sum))); | 297 | csum_unfold(*sum))); |
296 | } | 298 | } |
297 | EXPORT_SYMBOL(inet_proto_csum_replace4); | 299 | EXPORT_SYMBOL(inet_proto_csum_replace4); |
300 | |||
301 | int mac_pton(const char *s, u8 *mac) | ||
302 | { | ||
303 | int i; | ||
304 | |||
305 | /* XX:XX:XX:XX:XX:XX */ | ||
306 | if (strlen(s) < 3 * ETH_ALEN - 1) | ||
307 | return 0; | ||
308 | |||
309 | /* Don't dirty result unless string is valid MAC. */ | ||
310 | for (i = 0; i < ETH_ALEN; i++) { | ||
311 | if (!strchr("0123456789abcdefABCDEF", s[i * 3])) | ||
312 | return 0; | ||
313 | if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1])) | ||
314 | return 0; | ||
315 | if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') | ||
316 | return 0; | ||
317 | } | ||
318 | for (i = 0; i < ETH_ALEN; i++) { | ||
319 | mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]); | ||
320 | } | ||
321 | return 1; | ||
322 | } | ||
323 | EXPORT_SYMBOL(mac_pton); | ||