diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 20:02:01 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 20:02:01 -0400 |
commit | 10ce3cc919f50c2043b41ca968b43c26a3672600 (patch) | |
tree | ea409366a5208aced495bc0516a08b81fd43222e /net/core | |
parent | 24e3e5ae1e4c2a3a32f5b1f96b4e3fd721806acd (diff) | |
parent | 5c6a7a62c130afef3d61c1dee153012231ff5cd9 (diff) |
Merge branch 'next' into for-linus
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 6 | ||||
-rw-r--r-- | net/core/dev.c | 352 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 19 | ||||
-rw-r--r-- | net/core/dst.c | 2 | ||||
-rw-r--r-- | net/core/ethtool.c | 715 | ||||
-rw-r--r-- | net/core/flow.c | 12 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 144 | ||||
-rw-r--r-- | net/core/neighbour.c | 229 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 330 | ||||
-rw-r--r-- | net/core/net_namespace.c | 31 | ||||
-rw-r--r-- | net/core/netpoll.c | 14 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 339 | ||||
-rw-r--r-- | net/core/pktgen.c | 25 | ||||
-rw-r--r-- | net/core/request_sock.c | 7 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 104 | ||||
-rw-r--r-- | net/core/secure_seq.c | 10 | ||||
-rw-r--r-- | net/core/skbuff.c | 91 | ||||
-rw-r--r-- | net/core/sock.c | 209 | ||||
-rw-r--r-- | net/core/sock_diag.c | 192 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 9 |
20 files changed, 1784 insertions, 1056 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 0d357b1c4e57..674641b13aea 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -3,12 +3,13 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ | 5 | obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ |
6 | gen_stats.o gen_estimator.o net_namespace.o secure_seq.o | 6 | gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o |
7 | 7 | ||
8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o |
9 | 9 | ||
10 | obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ | 10 | obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ |
11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o | 11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ |
12 | sock_diag.o | ||
12 | 13 | ||
13 | obj-$(CONFIG_XFRM) += flow.o | 14 | obj-$(CONFIG_XFRM) += flow.o |
14 | obj-y += net-sysfs.o | 15 | obj-y += net-sysfs.o |
@@ -19,3 +20,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o | |||
19 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o | 20 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o |
20 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o | 21 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o |
21 | obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o | 22 | obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o |
23 | obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o | ||
diff --git a/net/core/dev.c b/net/core/dev.c index 6ba50a1e404c..6ca32f6b3105 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -133,10 +133,9 @@ | |||
133 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | 134 | #include <linux/inetdevice.h> |
135 | #include <linux/cpu_rmap.h> | 135 | #include <linux/cpu_rmap.h> |
136 | #include <linux/if_tunnel.h> | ||
137 | #include <linux/if_pppox.h> | ||
138 | #include <linux/ppp_defs.h> | ||
139 | #include <linux/net_tstamp.h> | 136 | #include <linux/net_tstamp.h> |
137 | #include <linux/jump_label.h> | ||
138 | #include <net/flow_keys.h> | ||
140 | 139 | ||
141 | #include "net-sysfs.h" | 140 | #include "net-sysfs.h" |
142 | 141 | ||
@@ -1320,8 +1319,6 @@ EXPORT_SYMBOL(dev_close); | |||
1320 | */ | 1319 | */ |
1321 | void dev_disable_lro(struct net_device *dev) | 1320 | void dev_disable_lro(struct net_device *dev) |
1322 | { | 1321 | { |
1323 | u32 flags; | ||
1324 | |||
1325 | /* | 1322 | /* |
1326 | * If we're trying to disable lro on a vlan device | 1323 | * If we're trying to disable lro on a vlan device |
1327 | * use the underlying physical device instead | 1324 | * use the underlying physical device instead |
@@ -1329,15 +1326,9 @@ void dev_disable_lro(struct net_device *dev) | |||
1329 | if (is_vlan_dev(dev)) | 1326 | if (is_vlan_dev(dev)) |
1330 | dev = vlan_dev_real_dev(dev); | 1327 | dev = vlan_dev_real_dev(dev); |
1331 | 1328 | ||
1332 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags) | 1329 | dev->wanted_features &= ~NETIF_F_LRO; |
1333 | flags = dev->ethtool_ops->get_flags(dev); | 1330 | netdev_update_features(dev); |
1334 | else | ||
1335 | flags = ethtool_op_get_flags(dev); | ||
1336 | |||
1337 | if (!(flags & ETH_FLAG_LRO)) | ||
1338 | return; | ||
1339 | 1331 | ||
1340 | __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); | ||
1341 | if (unlikely(dev->features & NETIF_F_LRO)) | 1332 | if (unlikely(dev->features & NETIF_F_LRO)) |
1342 | netdev_WARN(dev, "failed to disable LRO!\n"); | 1333 | netdev_WARN(dev, "failed to disable LRO!\n"); |
1343 | } | 1334 | } |
@@ -1396,7 +1387,7 @@ rollback: | |||
1396 | for_each_net(net) { | 1387 | for_each_net(net) { |
1397 | for_each_netdev(net, dev) { | 1388 | for_each_netdev(net, dev) { |
1398 | if (dev == last) | 1389 | if (dev == last) |
1399 | break; | 1390 | goto outroll; |
1400 | 1391 | ||
1401 | if (dev->flags & IFF_UP) { | 1392 | if (dev->flags & IFF_UP) { |
1402 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); | 1393 | nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); |
@@ -1407,6 +1398,7 @@ rollback: | |||
1407 | } | 1398 | } |
1408 | } | 1399 | } |
1409 | 1400 | ||
1401 | outroll: | ||
1410 | raw_notifier_chain_unregister(&netdev_chain, nb); | 1402 | raw_notifier_chain_unregister(&netdev_chain, nb); |
1411 | goto unlock; | 1403 | goto unlock; |
1412 | } | 1404 | } |
@@ -1449,34 +1441,55 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
1449 | } | 1441 | } |
1450 | EXPORT_SYMBOL(call_netdevice_notifiers); | 1442 | EXPORT_SYMBOL(call_netdevice_notifiers); |
1451 | 1443 | ||
1452 | /* When > 0 there are consumers of rx skb time stamps */ | 1444 | static struct jump_label_key netstamp_needed __read_mostly; |
1453 | static atomic_t netstamp_needed = ATOMIC_INIT(0); | 1445 | #ifdef HAVE_JUMP_LABEL |
1446 | /* We are not allowed to call jump_label_dec() from irq context | ||
1447 | * If net_disable_timestamp() is called from irq context, defer the | ||
1448 | * jump_label_dec() calls. | ||
1449 | */ | ||
1450 | static atomic_t netstamp_needed_deferred; | ||
1451 | #endif | ||
1454 | 1452 | ||
1455 | void net_enable_timestamp(void) | 1453 | void net_enable_timestamp(void) |
1456 | { | 1454 | { |
1457 | atomic_inc(&netstamp_needed); | 1455 | #ifdef HAVE_JUMP_LABEL |
1456 | int deferred = atomic_xchg(&netstamp_needed_deferred, 0); | ||
1457 | |||
1458 | if (deferred) { | ||
1459 | while (--deferred) | ||
1460 | jump_label_dec(&netstamp_needed); | ||
1461 | return; | ||
1462 | } | ||
1463 | #endif | ||
1464 | WARN_ON(in_interrupt()); | ||
1465 | jump_label_inc(&netstamp_needed); | ||
1458 | } | 1466 | } |
1459 | EXPORT_SYMBOL(net_enable_timestamp); | 1467 | EXPORT_SYMBOL(net_enable_timestamp); |
1460 | 1468 | ||
1461 | void net_disable_timestamp(void) | 1469 | void net_disable_timestamp(void) |
1462 | { | 1470 | { |
1463 | atomic_dec(&netstamp_needed); | 1471 | #ifdef HAVE_JUMP_LABEL |
1472 | if (in_interrupt()) { | ||
1473 | atomic_inc(&netstamp_needed_deferred); | ||
1474 | return; | ||
1475 | } | ||
1476 | #endif | ||
1477 | jump_label_dec(&netstamp_needed); | ||
1464 | } | 1478 | } |
1465 | EXPORT_SYMBOL(net_disable_timestamp); | 1479 | EXPORT_SYMBOL(net_disable_timestamp); |
1466 | 1480 | ||
1467 | static inline void net_timestamp_set(struct sk_buff *skb) | 1481 | static inline void net_timestamp_set(struct sk_buff *skb) |
1468 | { | 1482 | { |
1469 | if (atomic_read(&netstamp_needed)) | 1483 | skb->tstamp.tv64 = 0; |
1484 | if (static_branch(&netstamp_needed)) | ||
1470 | __net_timestamp(skb); | 1485 | __net_timestamp(skb); |
1471 | else | ||
1472 | skb->tstamp.tv64 = 0; | ||
1473 | } | 1486 | } |
1474 | 1487 | ||
1475 | static inline void net_timestamp_check(struct sk_buff *skb) | 1488 | #define net_timestamp_check(COND, SKB) \ |
1476 | { | 1489 | if (static_branch(&netstamp_needed)) { \ |
1477 | if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) | 1490 | if ((COND) && !(SKB)->tstamp.tv64) \ |
1478 | __net_timestamp(skb); | 1491 | __net_timestamp(SKB); \ |
1479 | } | 1492 | } \ |
1480 | 1493 | ||
1481 | static int net_hwtstamp_validate(struct ifreq *ifr) | 1494 | static int net_hwtstamp_validate(struct ifreq *ifr) |
1482 | { | 1495 | { |
@@ -1874,6 +1887,23 @@ void skb_set_dev(struct sk_buff *skb, struct net_device *dev) | |||
1874 | EXPORT_SYMBOL(skb_set_dev); | 1887 | EXPORT_SYMBOL(skb_set_dev); |
1875 | #endif /* CONFIG_NET_NS */ | 1888 | #endif /* CONFIG_NET_NS */ |
1876 | 1889 | ||
1890 | static void skb_warn_bad_offload(const struct sk_buff *skb) | ||
1891 | { | ||
1892 | static const netdev_features_t null_features = 0; | ||
1893 | struct net_device *dev = skb->dev; | ||
1894 | const char *driver = ""; | ||
1895 | |||
1896 | if (dev && dev->dev.parent) | ||
1897 | driver = dev_driver_string(dev->dev.parent); | ||
1898 | |||
1899 | WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d " | ||
1900 | "gso_type=%d ip_summed=%d\n", | ||
1901 | driver, dev ? &dev->features : &null_features, | ||
1902 | skb->sk ? &skb->sk->sk_route_caps : &null_features, | ||
1903 | skb->len, skb->data_len, skb_shinfo(skb)->gso_size, | ||
1904 | skb_shinfo(skb)->gso_type, skb->ip_summed); | ||
1905 | } | ||
1906 | |||
1877 | /* | 1907 | /* |
1878 | * Invalidate hardware checksum when packet is to be mangled, and | 1908 | * Invalidate hardware checksum when packet is to be mangled, and |
1879 | * complete checksum manually on outgoing path. | 1909 | * complete checksum manually on outgoing path. |
@@ -1887,8 +1917,8 @@ int skb_checksum_help(struct sk_buff *skb) | |||
1887 | goto out_set_summed; | 1917 | goto out_set_summed; |
1888 | 1918 | ||
1889 | if (unlikely(skb_shinfo(skb)->gso_size)) { | 1919 | if (unlikely(skb_shinfo(skb)->gso_size)) { |
1890 | /* Let GSO fix up the checksum. */ | 1920 | skb_warn_bad_offload(skb); |
1891 | goto out_set_summed; | 1921 | return -EINVAL; |
1892 | } | 1922 | } |
1893 | 1923 | ||
1894 | offset = skb_checksum_start_offset(skb); | 1924 | offset = skb_checksum_start_offset(skb); |
@@ -1923,7 +1953,8 @@ EXPORT_SYMBOL(skb_checksum_help); | |||
1923 | * It may return NULL if the skb requires no segmentation. This is | 1953 | * It may return NULL if the skb requires no segmentation. This is |
1924 | * only possible when GSO is used for verifying header integrity. | 1954 | * only possible when GSO is used for verifying header integrity. |
1925 | */ | 1955 | */ |
1926 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) | 1956 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, |
1957 | netdev_features_t features) | ||
1927 | { | 1958 | { |
1928 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 1959 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1929 | struct packet_type *ptype; | 1960 | struct packet_type *ptype; |
@@ -1947,16 +1978,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) | |||
1947 | __skb_pull(skb, skb->mac_len); | 1978 | __skb_pull(skb, skb->mac_len); |
1948 | 1979 | ||
1949 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | 1980 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1950 | struct net_device *dev = skb->dev; | 1981 | skb_warn_bad_offload(skb); |
1951 | struct ethtool_drvinfo info = {}; | ||
1952 | |||
1953 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) | ||
1954 | dev->ethtool_ops->get_drvinfo(dev, &info); | ||
1955 | |||
1956 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", | ||
1957 | info.driver, dev ? dev->features : 0L, | ||
1958 | skb->sk ? skb->sk->sk_route_caps : 0L, | ||
1959 | skb->len, skb->data_len, skb->ip_summed); | ||
1960 | 1982 | ||
1961 | if (skb_header_cloned(skb) && | 1983 | if (skb_header_cloned(skb) && |
1962 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | 1984 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
@@ -2064,7 +2086,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) | |||
2064 | * This function segments the given skb and stores the list of segments | 2086 | * This function segments the given skb and stores the list of segments |
2065 | * in skb->next. | 2087 | * in skb->next. |
2066 | */ | 2088 | */ |
2067 | static int dev_gso_segment(struct sk_buff *skb, int features) | 2089 | static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) |
2068 | { | 2090 | { |
2069 | struct sk_buff *segs; | 2091 | struct sk_buff *segs; |
2070 | 2092 | ||
@@ -2103,7 +2125,7 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
2103 | } | 2125 | } |
2104 | } | 2126 | } |
2105 | 2127 | ||
2106 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | 2128 | static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) |
2107 | { | 2129 | { |
2108 | return ((features & NETIF_F_GEN_CSUM) || | 2130 | return ((features & NETIF_F_GEN_CSUM) || |
2109 | ((features & NETIF_F_V4_CSUM) && | 2131 | ((features & NETIF_F_V4_CSUM) && |
@@ -2114,7 +2136,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) | |||
2114 | protocol == htons(ETH_P_FCOE))); | 2136 | protocol == htons(ETH_P_FCOE))); |
2115 | } | 2137 | } |
2116 | 2138 | ||
2117 | static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) | 2139 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
2140 | __be16 protocol, netdev_features_t features) | ||
2118 | { | 2141 | { |
2119 | if (!can_checksum_protocol(features, protocol)) { | 2142 | if (!can_checksum_protocol(features, protocol)) { |
2120 | features &= ~NETIF_F_ALL_CSUM; | 2143 | features &= ~NETIF_F_ALL_CSUM; |
@@ -2126,10 +2149,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features | |||
2126 | return features; | 2149 | return features; |
2127 | } | 2150 | } |
2128 | 2151 | ||
2129 | u32 netif_skb_features(struct sk_buff *skb) | 2152 | netdev_features_t netif_skb_features(struct sk_buff *skb) |
2130 | { | 2153 | { |
2131 | __be16 protocol = skb->protocol; | 2154 | __be16 protocol = skb->protocol; |
2132 | u32 features = skb->dev->features; | 2155 | netdev_features_t features = skb->dev->features; |
2133 | 2156 | ||
2134 | if (protocol == htons(ETH_P_8021Q)) { | 2157 | if (protocol == htons(ETH_P_8021Q)) { |
2135 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2158 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
@@ -2175,7 +2198,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
2175 | unsigned int skb_len; | 2198 | unsigned int skb_len; |
2176 | 2199 | ||
2177 | if (likely(!skb->next)) { | 2200 | if (likely(!skb->next)) { |
2178 | u32 features; | 2201 | netdev_features_t features; |
2179 | 2202 | ||
2180 | /* | 2203 | /* |
2181 | * If device doesn't need skb->dst, release it right now while | 2204 | * If device doesn't need skb->dst, release it right now while |
@@ -2256,7 +2279,7 @@ gso: | |||
2256 | return rc; | 2279 | return rc; |
2257 | } | 2280 | } |
2258 | txq_trans_update(txq); | 2281 | txq_trans_update(txq); |
2259 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) | 2282 | if (unlikely(netif_xmit_stopped(txq) && skb->next)) |
2260 | return NETDEV_TX_BUSY; | 2283 | return NETDEV_TX_BUSY; |
2261 | } while (skb->next); | 2284 | } while (skb->next); |
2262 | 2285 | ||
@@ -2456,6 +2479,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2456 | return rc; | 2479 | return rc; |
2457 | } | 2480 | } |
2458 | 2481 | ||
2482 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) | ||
2483 | static void skb_update_prio(struct sk_buff *skb) | ||
2484 | { | ||
2485 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); | ||
2486 | |||
2487 | if ((!skb->priority) && (skb->sk) && map) | ||
2488 | skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; | ||
2489 | } | ||
2490 | #else | ||
2491 | #define skb_update_prio(skb) | ||
2492 | #endif | ||
2493 | |||
2459 | static DEFINE_PER_CPU(int, xmit_recursion); | 2494 | static DEFINE_PER_CPU(int, xmit_recursion); |
2460 | #define RECURSION_LIMIT 10 | 2495 | #define RECURSION_LIMIT 10 |
2461 | 2496 | ||
@@ -2496,6 +2531,8 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2496 | */ | 2531 | */ |
2497 | rcu_read_lock_bh(); | 2532 | rcu_read_lock_bh(); |
2498 | 2533 | ||
2534 | skb_update_prio(skb); | ||
2535 | |||
2499 | txq = dev_pick_tx(dev, skb); | 2536 | txq = dev_pick_tx(dev, skb); |
2500 | q = rcu_dereference_bh(txq->qdisc); | 2537 | q = rcu_dereference_bh(txq->qdisc); |
2501 | 2538 | ||
@@ -2530,7 +2567,7 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2530 | 2567 | ||
2531 | HARD_TX_LOCK(dev, txq, cpu); | 2568 | HARD_TX_LOCK(dev, txq, cpu); |
2532 | 2569 | ||
2533 | if (!netif_tx_queue_stopped(txq)) { | 2570 | if (!netif_xmit_stopped(txq)) { |
2534 | __this_cpu_inc(xmit_recursion); | 2571 | __this_cpu_inc(xmit_recursion); |
2535 | rc = dev_hard_start_xmit(skb, dev, txq); | 2572 | rc = dev_hard_start_xmit(skb, dev, txq); |
2536 | __this_cpu_dec(xmit_recursion); | 2573 | __this_cpu_dec(xmit_recursion); |
@@ -2591,123 +2628,28 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2591 | */ | 2628 | */ |
2592 | void __skb_get_rxhash(struct sk_buff *skb) | 2629 | void __skb_get_rxhash(struct sk_buff *skb) |
2593 | { | 2630 | { |
2594 | int nhoff, hash = 0, poff; | 2631 | struct flow_keys keys; |
2595 | const struct ipv6hdr *ip6; | 2632 | u32 hash; |
2596 | const struct iphdr *ip; | ||
2597 | const struct vlan_hdr *vlan; | ||
2598 | u8 ip_proto; | ||
2599 | u32 addr1, addr2; | ||
2600 | u16 proto; | ||
2601 | union { | ||
2602 | u32 v32; | ||
2603 | u16 v16[2]; | ||
2604 | } ports; | ||
2605 | |||
2606 | nhoff = skb_network_offset(skb); | ||
2607 | proto = skb->protocol; | ||
2608 | |||
2609 | again: | ||
2610 | switch (proto) { | ||
2611 | case __constant_htons(ETH_P_IP): | ||
2612 | ip: | ||
2613 | if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) | ||
2614 | goto done; | ||
2615 | |||
2616 | ip = (const struct iphdr *) (skb->data + nhoff); | ||
2617 | if (ip_is_fragment(ip)) | ||
2618 | ip_proto = 0; | ||
2619 | else | ||
2620 | ip_proto = ip->protocol; | ||
2621 | addr1 = (__force u32) ip->saddr; | ||
2622 | addr2 = (__force u32) ip->daddr; | ||
2623 | nhoff += ip->ihl * 4; | ||
2624 | break; | ||
2625 | case __constant_htons(ETH_P_IPV6): | ||
2626 | ipv6: | ||
2627 | if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) | ||
2628 | goto done; | ||
2629 | |||
2630 | ip6 = (const struct ipv6hdr *) (skb->data + nhoff); | ||
2631 | ip_proto = ip6->nexthdr; | ||
2632 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | ||
2633 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | ||
2634 | nhoff += 40; | ||
2635 | break; | ||
2636 | case __constant_htons(ETH_P_8021Q): | ||
2637 | if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff)) | ||
2638 | goto done; | ||
2639 | vlan = (const struct vlan_hdr *) (skb->data + nhoff); | ||
2640 | proto = vlan->h_vlan_encapsulated_proto; | ||
2641 | nhoff += sizeof(*vlan); | ||
2642 | goto again; | ||
2643 | case __constant_htons(ETH_P_PPP_SES): | ||
2644 | if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff)) | ||
2645 | goto done; | ||
2646 | proto = *((__be16 *) (skb->data + nhoff + | ||
2647 | sizeof(struct pppoe_hdr))); | ||
2648 | nhoff += PPPOE_SES_HLEN; | ||
2649 | switch (proto) { | ||
2650 | case __constant_htons(PPP_IP): | ||
2651 | goto ip; | ||
2652 | case __constant_htons(PPP_IPV6): | ||
2653 | goto ipv6; | ||
2654 | default: | ||
2655 | goto done; | ||
2656 | } | ||
2657 | default: | ||
2658 | goto done; | ||
2659 | } | ||
2660 | |||
2661 | switch (ip_proto) { | ||
2662 | case IPPROTO_GRE: | ||
2663 | if (pskb_may_pull(skb, nhoff + 16)) { | ||
2664 | u8 *h = skb->data + nhoff; | ||
2665 | __be16 flags = *(__be16 *)h; | ||
2666 | 2633 | ||
2667 | /* | 2634 | if (!skb_flow_dissect(skb, &keys)) |
2668 | * Only look inside GRE if version zero and no | 2635 | return; |
2669 | * routing | ||
2670 | */ | ||
2671 | if (!(flags & (GRE_VERSION|GRE_ROUTING))) { | ||
2672 | proto = *(__be16 *)(h + 2); | ||
2673 | nhoff += 4; | ||
2674 | if (flags & GRE_CSUM) | ||
2675 | nhoff += 4; | ||
2676 | if (flags & GRE_KEY) | ||
2677 | nhoff += 4; | ||
2678 | if (flags & GRE_SEQ) | ||
2679 | nhoff += 4; | ||
2680 | goto again; | ||
2681 | } | ||
2682 | } | ||
2683 | break; | ||
2684 | case IPPROTO_IPIP: | ||
2685 | goto again; | ||
2686 | default: | ||
2687 | break; | ||
2688 | } | ||
2689 | 2636 | ||
2690 | ports.v32 = 0; | 2637 | if (keys.ports) { |
2691 | poff = proto_ports_offset(ip_proto); | 2638 | if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]) |
2692 | if (poff >= 0) { | 2639 | swap(keys.port16[0], keys.port16[1]); |
2693 | nhoff += poff; | 2640 | skb->l4_rxhash = 1; |
2694 | if (pskb_may_pull(skb, nhoff + 4)) { | ||
2695 | ports.v32 = * (__force u32 *) (skb->data + nhoff); | ||
2696 | if (ports.v16[1] < ports.v16[0]) | ||
2697 | swap(ports.v16[0], ports.v16[1]); | ||
2698 | skb->l4_rxhash = 1; | ||
2699 | } | ||
2700 | } | 2641 | } |
2701 | 2642 | ||
2702 | /* get a consistent hash (same value on both flow directions) */ | 2643 | /* get a consistent hash (same value on both flow directions) */ |
2703 | if (addr2 < addr1) | 2644 | if ((__force u32)keys.dst < (__force u32)keys.src) |
2704 | swap(addr1, addr2); | 2645 | swap(keys.dst, keys.src); |
2705 | 2646 | ||
2706 | hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | 2647 | hash = jhash_3words((__force u32)keys.dst, |
2648 | (__force u32)keys.src, | ||
2649 | (__force u32)keys.ports, hashrnd); | ||
2707 | if (!hash) | 2650 | if (!hash) |
2708 | hash = 1; | 2651 | hash = 1; |
2709 | 2652 | ||
2710 | done: | ||
2711 | skb->rxhash = hash; | 2653 | skb->rxhash = hash; |
2712 | } | 2654 | } |
2713 | EXPORT_SYMBOL(__skb_get_rxhash); | 2655 | EXPORT_SYMBOL(__skb_get_rxhash); |
@@ -2718,6 +2660,8 @@ EXPORT_SYMBOL(__skb_get_rxhash); | |||
2718 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2660 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
2719 | EXPORT_SYMBOL(rps_sock_flow_table); | 2661 | EXPORT_SYMBOL(rps_sock_flow_table); |
2720 | 2662 | ||
2663 | struct jump_label_key rps_needed __read_mostly; | ||
2664 | |||
2721 | static struct rps_dev_flow * | 2665 | static struct rps_dev_flow * |
2722 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2666 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
2723 | struct rps_dev_flow *rflow, u16 next_cpu) | 2667 | struct rps_dev_flow *rflow, u16 next_cpu) |
@@ -2997,12 +2941,11 @@ int netif_rx(struct sk_buff *skb) | |||
2997 | if (netpoll_rx(skb)) | 2941 | if (netpoll_rx(skb)) |
2998 | return NET_RX_DROP; | 2942 | return NET_RX_DROP; |
2999 | 2943 | ||
3000 | if (netdev_tstamp_prequeue) | 2944 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3001 | net_timestamp_check(skb); | ||
3002 | 2945 | ||
3003 | trace_netif_rx(skb); | 2946 | trace_netif_rx(skb); |
3004 | #ifdef CONFIG_RPS | 2947 | #ifdef CONFIG_RPS |
3005 | { | 2948 | if (static_branch(&rps_needed)) { |
3006 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2949 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3007 | int cpu; | 2950 | int cpu; |
3008 | 2951 | ||
@@ -3017,14 +2960,13 @@ int netif_rx(struct sk_buff *skb) | |||
3017 | 2960 | ||
3018 | rcu_read_unlock(); | 2961 | rcu_read_unlock(); |
3019 | preempt_enable(); | 2962 | preempt_enable(); |
3020 | } | 2963 | } else |
3021 | #else | 2964 | #endif |
3022 | { | 2965 | { |
3023 | unsigned int qtail; | 2966 | unsigned int qtail; |
3024 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | 2967 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
3025 | put_cpu(); | 2968 | put_cpu(); |
3026 | } | 2969 | } |
3027 | #endif | ||
3028 | return ret; | 2970 | return ret; |
3029 | } | 2971 | } |
3030 | EXPORT_SYMBOL(netif_rx); | 2972 | EXPORT_SYMBOL(netif_rx); |
@@ -3230,8 +3172,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3230 | int ret = NET_RX_DROP; | 3172 | int ret = NET_RX_DROP; |
3231 | __be16 type; | 3173 | __be16 type; |
3232 | 3174 | ||
3233 | if (!netdev_tstamp_prequeue) | 3175 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
3234 | net_timestamp_check(skb); | ||
3235 | 3176 | ||
3236 | trace_netif_receive_skb(skb); | 3177 | trace_netif_receive_skb(skb); |
3237 | 3178 | ||
@@ -3362,14 +3303,13 @@ out: | |||
3362 | */ | 3303 | */ |
3363 | int netif_receive_skb(struct sk_buff *skb) | 3304 | int netif_receive_skb(struct sk_buff *skb) |
3364 | { | 3305 | { |
3365 | if (netdev_tstamp_prequeue) | 3306 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3366 | net_timestamp_check(skb); | ||
3367 | 3307 | ||
3368 | if (skb_defer_rx_timestamp(skb)) | 3308 | if (skb_defer_rx_timestamp(skb)) |
3369 | return NET_RX_SUCCESS; | 3309 | return NET_RX_SUCCESS; |
3370 | 3310 | ||
3371 | #ifdef CONFIG_RPS | 3311 | #ifdef CONFIG_RPS |
3372 | { | 3312 | if (static_branch(&rps_needed)) { |
3373 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 3313 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3374 | int cpu, ret; | 3314 | int cpu, ret; |
3375 | 3315 | ||
@@ -3380,16 +3320,12 @@ int netif_receive_skb(struct sk_buff *skb) | |||
3380 | if (cpu >= 0) { | 3320 | if (cpu >= 0) { |
3381 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | 3321 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
3382 | rcu_read_unlock(); | 3322 | rcu_read_unlock(); |
3383 | } else { | 3323 | return ret; |
3384 | rcu_read_unlock(); | ||
3385 | ret = __netif_receive_skb(skb); | ||
3386 | } | 3324 | } |
3387 | 3325 | rcu_read_unlock(); | |
3388 | return ret; | ||
3389 | } | 3326 | } |
3390 | #else | ||
3391 | return __netif_receive_skb(skb); | ||
3392 | #endif | 3327 | #endif |
3328 | return __netif_receive_skb(skb); | ||
3393 | } | 3329 | } |
3394 | EXPORT_SYMBOL(netif_receive_skb); | 3330 | EXPORT_SYMBOL(netif_receive_skb); |
3395 | 3331 | ||
@@ -3564,14 +3500,20 @@ static inline gro_result_t | |||
3564 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | 3500 | __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) |
3565 | { | 3501 | { |
3566 | struct sk_buff *p; | 3502 | struct sk_buff *p; |
3503 | unsigned int maclen = skb->dev->hard_header_len; | ||
3567 | 3504 | ||
3568 | for (p = napi->gro_list; p; p = p->next) { | 3505 | for (p = napi->gro_list; p; p = p->next) { |
3569 | unsigned long diffs; | 3506 | unsigned long diffs; |
3570 | 3507 | ||
3571 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; | 3508 | diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; |
3572 | diffs |= p->vlan_tci ^ skb->vlan_tci; | 3509 | diffs |= p->vlan_tci ^ skb->vlan_tci; |
3573 | diffs |= compare_ether_header(skb_mac_header(p), | 3510 | if (maclen == ETH_HLEN) |
3574 | skb_gro_mac_header(skb)); | 3511 | diffs |= compare_ether_header(skb_mac_header(p), |
3512 | skb_gro_mac_header(skb)); | ||
3513 | else if (!diffs) | ||
3514 | diffs = memcmp(skb_mac_header(p), | ||
3515 | skb_gro_mac_header(skb), | ||
3516 | maclen); | ||
3575 | NAPI_GRO_CB(p)->same_flow = !diffs; | 3517 | NAPI_GRO_CB(p)->same_flow = !diffs; |
3576 | NAPI_GRO_CB(p)->flush = 0; | 3518 | NAPI_GRO_CB(p)->flush = 0; |
3577 | } | 3519 | } |
@@ -4282,6 +4224,12 @@ static int dev_seq_open(struct inode *inode, struct file *file) | |||
4282 | sizeof(struct dev_iter_state)); | 4224 | sizeof(struct dev_iter_state)); |
4283 | } | 4225 | } |
4284 | 4226 | ||
4227 | int dev_seq_open_ops(struct inode *inode, struct file *file, | ||
4228 | const struct seq_operations *ops) | ||
4229 | { | ||
4230 | return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state)); | ||
4231 | } | ||
4232 | |||
4285 | static const struct file_operations dev_seq_fops = { | 4233 | static const struct file_operations dev_seq_fops = { |
4286 | .owner = THIS_MODULE, | 4234 | .owner = THIS_MODULE, |
4287 | .open = dev_seq_open, | 4235 | .open = dev_seq_open, |
@@ -4532,7 +4480,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) | |||
4532 | 4480 | ||
4533 | static int __dev_set_promiscuity(struct net_device *dev, int inc) | 4481 | static int __dev_set_promiscuity(struct net_device *dev, int inc) |
4534 | { | 4482 | { |
4535 | unsigned short old_flags = dev->flags; | 4483 | unsigned int old_flags = dev->flags; |
4536 | uid_t uid; | 4484 | uid_t uid; |
4537 | gid_t gid; | 4485 | gid_t gid; |
4538 | 4486 | ||
@@ -4589,7 +4537,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) | |||
4589 | */ | 4537 | */ |
4590 | int dev_set_promiscuity(struct net_device *dev, int inc) | 4538 | int dev_set_promiscuity(struct net_device *dev, int inc) |
4591 | { | 4539 | { |
4592 | unsigned short old_flags = dev->flags; | 4540 | unsigned int old_flags = dev->flags; |
4593 | int err; | 4541 | int err; |
4594 | 4542 | ||
4595 | err = __dev_set_promiscuity(dev, inc); | 4543 | err = __dev_set_promiscuity(dev, inc); |
@@ -4616,7 +4564,7 @@ EXPORT_SYMBOL(dev_set_promiscuity); | |||
4616 | 4564 | ||
4617 | int dev_set_allmulti(struct net_device *dev, int inc) | 4565 | int dev_set_allmulti(struct net_device *dev, int inc) |
4618 | { | 4566 | { |
4619 | unsigned short old_flags = dev->flags; | 4567 | unsigned int old_flags = dev->flags; |
4620 | 4568 | ||
4621 | ASSERT_RTNL(); | 4569 | ASSERT_RTNL(); |
4622 | 4570 | ||
@@ -4719,7 +4667,7 @@ EXPORT_SYMBOL(dev_get_flags); | |||
4719 | 4667 | ||
4720 | int __dev_change_flags(struct net_device *dev, unsigned int flags) | 4668 | int __dev_change_flags(struct net_device *dev, unsigned int flags) |
4721 | { | 4669 | { |
4722 | int old_flags = dev->flags; | 4670 | unsigned int old_flags = dev->flags; |
4723 | int ret; | 4671 | int ret; |
4724 | 4672 | ||
4725 | ASSERT_RTNL(); | 4673 | ASSERT_RTNL(); |
@@ -4802,10 +4750,10 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) | |||
4802 | * Change settings on device based state flags. The flags are | 4750 | * Change settings on device based state flags. The flags are |
4803 | * in the userspace exported format. | 4751 | * in the userspace exported format. |
4804 | */ | 4752 | */ |
4805 | int dev_change_flags(struct net_device *dev, unsigned flags) | 4753 | int dev_change_flags(struct net_device *dev, unsigned int flags) |
4806 | { | 4754 | { |
4807 | int ret, changes; | 4755 | int ret; |
4808 | int old_flags = dev->flags; | 4756 | unsigned int changes, old_flags = dev->flags; |
4809 | 4757 | ||
4810 | ret = __dev_change_flags(dev, flags); | 4758 | ret = __dev_change_flags(dev, flags); |
4811 | if (ret < 0) | 4759 | if (ret < 0) |
@@ -5362,7 +5310,8 @@ static void rollback_registered(struct net_device *dev) | |||
5362 | list_del(&single); | 5310 | list_del(&single); |
5363 | } | 5311 | } |
5364 | 5312 | ||
5365 | static u32 netdev_fix_features(struct net_device *dev, u32 features) | 5313 | static netdev_features_t netdev_fix_features(struct net_device *dev, |
5314 | netdev_features_t features) | ||
5366 | { | 5315 | { |
5367 | /* Fix illegal checksum combinations */ | 5316 | /* Fix illegal checksum combinations */ |
5368 | if ((features & NETIF_F_HW_CSUM) && | 5317 | if ((features & NETIF_F_HW_CSUM) && |
@@ -5371,12 +5320,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features) | |||
5371 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | 5320 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); |
5372 | } | 5321 | } |
5373 | 5322 | ||
5374 | if ((features & NETIF_F_NO_CSUM) && | ||
5375 | (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | ||
5376 | netdev_warn(dev, "mixed no checksumming and other settings.\n"); | ||
5377 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); | ||
5378 | } | ||
5379 | |||
5380 | /* Fix illegal SG+CSUM combinations. */ | 5323 | /* Fix illegal SG+CSUM combinations. */ |
5381 | if ((features & NETIF_F_SG) && | 5324 | if ((features & NETIF_F_SG) && |
5382 | !(features & NETIF_F_ALL_CSUM)) { | 5325 | !(features & NETIF_F_ALL_CSUM)) { |
@@ -5424,7 +5367,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features) | |||
5424 | 5367 | ||
5425 | int __netdev_update_features(struct net_device *dev) | 5368 | int __netdev_update_features(struct net_device *dev) |
5426 | { | 5369 | { |
5427 | u32 features; | 5370 | netdev_features_t features; |
5428 | int err = 0; | 5371 | int err = 0; |
5429 | 5372 | ||
5430 | ASSERT_RTNL(); | 5373 | ASSERT_RTNL(); |
@@ -5440,16 +5383,16 @@ int __netdev_update_features(struct net_device *dev) | |||
5440 | if (dev->features == features) | 5383 | if (dev->features == features) |
5441 | return 0; | 5384 | return 0; |
5442 | 5385 | ||
5443 | netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", | 5386 | netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", |
5444 | dev->features, features); | 5387 | &dev->features, &features); |
5445 | 5388 | ||
5446 | if (dev->netdev_ops->ndo_set_features) | 5389 | if (dev->netdev_ops->ndo_set_features) |
5447 | err = dev->netdev_ops->ndo_set_features(dev, features); | 5390 | err = dev->netdev_ops->ndo_set_features(dev, features); |
5448 | 5391 | ||
5449 | if (unlikely(err < 0)) { | 5392 | if (unlikely(err < 0)) { |
5450 | netdev_err(dev, | 5393 | netdev_err(dev, |
5451 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", | 5394 | "set_features() failed (%d); wanted %pNF, left %pNF\n", |
5452 | err, features, dev->features); | 5395 | err, &features, &dev->features); |
5453 | return -1; | 5396 | return -1; |
5454 | } | 5397 | } |
5455 | 5398 | ||
@@ -5548,6 +5491,9 @@ static void netdev_init_one_queue(struct net_device *dev, | |||
5548 | queue->xmit_lock_owner = -1; | 5491 | queue->xmit_lock_owner = -1; |
5549 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); | 5492 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); |
5550 | queue->dev = dev; | 5493 | queue->dev = dev; |
5494 | #ifdef CONFIG_BQL | ||
5495 | dql_init(&queue->dql, HZ); | ||
5496 | #endif | ||
5551 | } | 5497 | } |
5552 | 5498 | ||
5553 | static int netif_alloc_netdev_queues(struct net_device *dev) | 5499 | static int netif_alloc_netdev_queues(struct net_device *dev) |
@@ -5633,11 +5579,12 @@ int register_netdevice(struct net_device *dev) | |||
5633 | dev->wanted_features = dev->features & dev->hw_features; | 5579 | dev->wanted_features = dev->features & dev->hw_features; |
5634 | 5580 | ||
5635 | /* Turn on no cache copy if HW is doing checksum */ | 5581 | /* Turn on no cache copy if HW is doing checksum */ |
5636 | dev->hw_features |= NETIF_F_NOCACHE_COPY; | 5582 | if (!(dev->flags & IFF_LOOPBACK)) { |
5637 | if ((dev->features & NETIF_F_ALL_CSUM) && | 5583 | dev->hw_features |= NETIF_F_NOCACHE_COPY; |
5638 | !(dev->features & NETIF_F_NO_CSUM)) { | 5584 | if (dev->features & NETIF_F_ALL_CSUM) { |
5639 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; | 5585 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; |
5640 | dev->features |= NETIF_F_NOCACHE_COPY; | 5586 | dev->features |= NETIF_F_NOCACHE_COPY; |
5587 | } | ||
5641 | } | 5588 | } |
5642 | 5589 | ||
5643 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. | 5590 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. |
@@ -6373,7 +6320,8 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
6373 | * @one to the master device with current feature set @all. Will not | 6320 | * @one to the master device with current feature set @all. Will not |
6374 | * enable anything that is off in @mask. Returns the new feature set. | 6321 | * enable anything that is off in @mask. Returns the new feature set. |
6375 | */ | 6322 | */ |
6376 | u32 netdev_increment_features(u32 all, u32 one, u32 mask) | 6323 | netdev_features_t netdev_increment_features(netdev_features_t all, |
6324 | netdev_features_t one, netdev_features_t mask) | ||
6377 | { | 6325 | { |
6378 | if (mask & NETIF_F_GEN_CSUM) | 6326 | if (mask & NETIF_F_GEN_CSUM) |
6379 | mask |= NETIF_F_ALL_CSUM; | 6327 | mask |= NETIF_F_ALL_CSUM; |
@@ -6382,10 +6330,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) | |||
6382 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; | 6330 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; |
6383 | all &= one | ~NETIF_F_ALL_FOR_ALL; | 6331 | all &= one | ~NETIF_F_ALL_FOR_ALL; |
6384 | 6332 | ||
6385 | /* If device needs checksumming, downgrade to it. */ | ||
6386 | if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) | ||
6387 | all &= ~NETIF_F_NO_CSUM; | ||
6388 | |||
6389 | /* If one device supports hw checksumming, set for all. */ | 6333 | /* If one device supports hw checksumming, set for all. */ |
6390 | if (all & NETIF_F_GEN_CSUM) | 6334 | if (all & NETIF_F_GEN_CSUM) |
6391 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); | 6335 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); |
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 277faef9148d..29c07fef9228 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c | |||
@@ -427,7 +427,7 @@ EXPORT_SYMBOL(dev_uc_del); | |||
427 | * | 427 | * |
428 | * Add newly added addresses to the destination device and release | 428 | * Add newly added addresses to the destination device and release |
429 | * addresses that have no users left. The source device must be | 429 | * addresses that have no users left. The source device must be |
430 | * locked by netif_tx_lock_bh. | 430 | * locked by netif_addr_lock_bh. |
431 | * | 431 | * |
432 | * This function is intended to be called from the dev->set_rx_mode | 432 | * This function is intended to be called from the dev->set_rx_mode |
433 | * function of layered software devices. | 433 | * function of layered software devices. |
@@ -439,11 +439,11 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) | |||
439 | if (to->addr_len != from->addr_len) | 439 | if (to->addr_len != from->addr_len) |
440 | return -EINVAL; | 440 | return -EINVAL; |
441 | 441 | ||
442 | netif_addr_lock_bh(to); | 442 | netif_addr_lock_nested(to); |
443 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); | 443 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); |
444 | if (!err) | 444 | if (!err) |
445 | __dev_set_rx_mode(to); | 445 | __dev_set_rx_mode(to); |
446 | netif_addr_unlock_bh(to); | 446 | netif_addr_unlock(to); |
447 | return err; | 447 | return err; |
448 | } | 448 | } |
449 | EXPORT_SYMBOL(dev_uc_sync); | 449 | EXPORT_SYMBOL(dev_uc_sync); |
@@ -463,7 +463,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) | |||
463 | return; | 463 | return; |
464 | 464 | ||
465 | netif_addr_lock_bh(from); | 465 | netif_addr_lock_bh(from); |
466 | netif_addr_lock(to); | 466 | netif_addr_lock_nested(to); |
467 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); | 467 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); |
468 | __dev_set_rx_mode(to); | 468 | __dev_set_rx_mode(to); |
469 | netif_addr_unlock(to); | 469 | netif_addr_unlock(to); |
@@ -590,7 +590,7 @@ EXPORT_SYMBOL(dev_mc_del_global); | |||
590 | * | 590 | * |
591 | * Add newly added addresses to the destination device and release | 591 | * Add newly added addresses to the destination device and release |
592 | * addresses that have no users left. The source device must be | 592 | * addresses that have no users left. The source device must be |
593 | * locked by netif_tx_lock_bh. | 593 | * locked by netif_addr_lock_bh. |
594 | * | 594 | * |
595 | * This function is intended to be called from the ndo_set_rx_mode | 595 | * This function is intended to be called from the ndo_set_rx_mode |
596 | * function of layered software devices. | 596 | * function of layered software devices. |
@@ -602,11 +602,11 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) | |||
602 | if (to->addr_len != from->addr_len) | 602 | if (to->addr_len != from->addr_len) |
603 | return -EINVAL; | 603 | return -EINVAL; |
604 | 604 | ||
605 | netif_addr_lock_bh(to); | 605 | netif_addr_lock_nested(to); |
606 | err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); | 606 | err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); |
607 | if (!err) | 607 | if (!err) |
608 | __dev_set_rx_mode(to); | 608 | __dev_set_rx_mode(to); |
609 | netif_addr_unlock_bh(to); | 609 | netif_addr_unlock(to); |
610 | return err; | 610 | return err; |
611 | } | 611 | } |
612 | EXPORT_SYMBOL(dev_mc_sync); | 612 | EXPORT_SYMBOL(dev_mc_sync); |
@@ -626,7 +626,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) | |||
626 | return; | 626 | return; |
627 | 627 | ||
628 | netif_addr_lock_bh(from); | 628 | netif_addr_lock_bh(from); |
629 | netif_addr_lock(to); | 629 | netif_addr_lock_nested(to); |
630 | __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); | 630 | __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); |
631 | __dev_set_rx_mode(to); | 631 | __dev_set_rx_mode(to); |
632 | netif_addr_unlock(to); | 632 | netif_addr_unlock(to); |
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = { | |||
696 | 696 | ||
697 | static int dev_mc_seq_open(struct inode *inode, struct file *file) | 697 | static int dev_mc_seq_open(struct inode *inode, struct file *file) |
698 | { | 698 | { |
699 | return seq_open_net(inode, file, &dev_mc_seq_ops, | 699 | return dev_seq_open_ops(inode, file, &dev_mc_seq_ops); |
700 | sizeof(struct seq_net_private)); | ||
701 | } | 700 | } |
702 | 701 | ||
703 | static const struct file_operations dev_mc_seq_fops = { | 702 | static const struct file_operations dev_mc_seq_fops = { |
diff --git a/net/core/dst.c b/net/core/dst.c index d5e2c4c09107..43d94cedbf7c 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -366,7 +366,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
366 | dev_hold(dst->dev); | 366 | dev_hold(dst->dev); |
367 | dev_put(dev); | 367 | dev_put(dev); |
368 | rcu_read_lock(); | 368 | rcu_read_lock(); |
369 | neigh = dst_get_neighbour(dst); | 369 | neigh = dst_get_neighbour_noref(dst); |
370 | if (neigh && neigh->dev == dev) { | 370 | if (neigh && neigh->dev == dev) { |
371 | neigh->dev = dst->dev; | 371 | neigh->dev = dst->dev; |
372 | dev_hold(dst->dev); | 372 | dev_hold(dst->dev); |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f44481707124..3f79db1b612a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -36,235 +36,44 @@ u32 ethtool_op_get_link(struct net_device *dev) | |||
36 | } | 36 | } |
37 | EXPORT_SYMBOL(ethtool_op_get_link); | 37 | EXPORT_SYMBOL(ethtool_op_get_link); |
38 | 38 | ||
39 | u32 ethtool_op_get_tx_csum(struct net_device *dev) | ||
40 | { | ||
41 | return (dev->features & NETIF_F_ALL_CSUM) != 0; | ||
42 | } | ||
43 | EXPORT_SYMBOL(ethtool_op_get_tx_csum); | ||
44 | |||
45 | int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) | ||
46 | { | ||
47 | if (data) | ||
48 | dev->features |= NETIF_F_IP_CSUM; | ||
49 | else | ||
50 | dev->features &= ~NETIF_F_IP_CSUM; | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
55 | |||
56 | int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) | ||
57 | { | ||
58 | if (data) | ||
59 | dev->features |= NETIF_F_HW_CSUM; | ||
60 | else | ||
61 | dev->features &= ~NETIF_F_HW_CSUM; | ||
62 | |||
63 | return 0; | ||
64 | } | ||
65 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
66 | |||
67 | int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) | ||
68 | { | ||
69 | if (data) | ||
70 | dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; | ||
71 | else | ||
72 | dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); | ||
77 | |||
78 | u32 ethtool_op_get_sg(struct net_device *dev) | ||
79 | { | ||
80 | return (dev->features & NETIF_F_SG) != 0; | ||
81 | } | ||
82 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
83 | |||
84 | int ethtool_op_set_sg(struct net_device *dev, u32 data) | ||
85 | { | ||
86 | if (data) | ||
87 | dev->features |= NETIF_F_SG; | ||
88 | else | ||
89 | dev->features &= ~NETIF_F_SG; | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
94 | |||
95 | u32 ethtool_op_get_tso(struct net_device *dev) | ||
96 | { | ||
97 | return (dev->features & NETIF_F_TSO) != 0; | ||
98 | } | ||
99 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
100 | |||
101 | int ethtool_op_set_tso(struct net_device *dev, u32 data) | ||
102 | { | ||
103 | if (data) | ||
104 | dev->features |= NETIF_F_TSO; | ||
105 | else | ||
106 | dev->features &= ~NETIF_F_TSO; | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
111 | |||
112 | u32 ethtool_op_get_ufo(struct net_device *dev) | ||
113 | { | ||
114 | return (dev->features & NETIF_F_UFO) != 0; | ||
115 | } | ||
116 | EXPORT_SYMBOL(ethtool_op_get_ufo); | ||
117 | |||
118 | int ethtool_op_set_ufo(struct net_device *dev, u32 data) | ||
119 | { | ||
120 | if (data) | ||
121 | dev->features |= NETIF_F_UFO; | ||
122 | else | ||
123 | dev->features &= ~NETIF_F_UFO; | ||
124 | return 0; | ||
125 | } | ||
126 | EXPORT_SYMBOL(ethtool_op_set_ufo); | ||
127 | |||
128 | /* the following list of flags are the same as their associated | ||
129 | * NETIF_F_xxx values in include/linux/netdevice.h | ||
130 | */ | ||
131 | static const u32 flags_dup_features = | ||
132 | (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE | | ||
133 | ETH_FLAG_RXHASH); | ||
134 | |||
135 | u32 ethtool_op_get_flags(struct net_device *dev) | ||
136 | { | ||
137 | /* in the future, this function will probably contain additional | ||
138 | * handling for flags which are not so easily handled | ||
139 | * by a simple masking operation | ||
140 | */ | ||
141 | |||
142 | return dev->features & flags_dup_features; | ||
143 | } | ||
144 | EXPORT_SYMBOL(ethtool_op_get_flags); | ||
145 | |||
146 | /* Check if device can enable (or disable) particular feature coded in "data" | ||
147 | * argument. Flags "supported" describe features that can be toggled by device. | ||
148 | * If feature can not be toggled, it state (enabled or disabled) must match | ||
149 | * hardcoded device features state, otherwise flags are marked as invalid. | ||
150 | */ | ||
151 | bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported) | ||
152 | { | ||
153 | u32 features = dev->features & flags_dup_features; | ||
154 | /* "data" can contain only flags_dup_features bits, | ||
155 | * see __ethtool_set_flags */ | ||
156 | |||
157 | return (features & ~supported) != (data & ~supported); | ||
158 | } | ||
159 | EXPORT_SYMBOL(ethtool_invalid_flags); | ||
160 | |||
161 | int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) | ||
162 | { | ||
163 | if (ethtool_invalid_flags(dev, data, supported)) | ||
164 | return -EINVAL; | ||
165 | |||
166 | dev->features = ((dev->features & ~flags_dup_features) | | ||
167 | (data & flags_dup_features)); | ||
168 | return 0; | ||
169 | } | ||
170 | EXPORT_SYMBOL(ethtool_op_set_flags); | ||
171 | |||
172 | /* Handlers for each ethtool command */ | 39 | /* Handlers for each ethtool command */ |
173 | 40 | ||
174 | #define ETHTOOL_DEV_FEATURE_WORDS 1 | 41 | #define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) |
175 | 42 | ||
176 | static void ethtool_get_features_compat(struct net_device *dev, | 43 | static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { |
177 | struct ethtool_get_features_block *features) | 44 | [NETIF_F_SG_BIT] = "tx-scatter-gather", |
178 | { | 45 | [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", |
179 | if (!dev->ethtool_ops) | 46 | [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", |
180 | return; | 47 | [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", |
181 | 48 | [NETIF_F_HIGHDMA_BIT] = "highdma", | |
182 | /* getting RX checksum */ | 49 | [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", |
183 | if (dev->ethtool_ops->get_rx_csum) | 50 | [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", |
184 | if (dev->ethtool_ops->get_rx_csum(dev)) | 51 | |
185 | features[0].active |= NETIF_F_RXCSUM; | 52 | [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", |
186 | 53 | [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", | |
187 | /* mark legacy-changeable features */ | 54 | [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", |
188 | if (dev->ethtool_ops->set_sg) | 55 | [NETIF_F_GSO_BIT] = "tx-generic-segmentation", |
189 | features[0].available |= NETIF_F_SG; | 56 | [NETIF_F_LLTX_BIT] = "tx-lockless", |
190 | if (dev->ethtool_ops->set_tx_csum) | 57 | [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", |
191 | features[0].available |= NETIF_F_ALL_CSUM; | 58 | [NETIF_F_GRO_BIT] = "rx-gro", |
192 | if (dev->ethtool_ops->set_tso) | 59 | [NETIF_F_LRO_BIT] = "rx-lro", |
193 | features[0].available |= NETIF_F_ALL_TSO; | 60 | |
194 | if (dev->ethtool_ops->set_rx_csum) | 61 | [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", |
195 | features[0].available |= NETIF_F_RXCSUM; | 62 | [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", |
196 | if (dev->ethtool_ops->set_flags) | 63 | [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", |
197 | features[0].available |= flags_dup_features; | 64 | [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", |
198 | } | 65 | [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", |
199 | 66 | [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", | |
200 | static int ethtool_set_feature_compat(struct net_device *dev, | 67 | |
201 | int (*legacy_set)(struct net_device *, u32), | 68 | [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", |
202 | struct ethtool_set_features_block *features, u32 mask) | 69 | [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", |
203 | { | 70 | [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", |
204 | u32 do_set; | 71 | [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", |
205 | 72 | [NETIF_F_RXHASH_BIT] = "rx-hashing", | |
206 | if (!legacy_set) | 73 | [NETIF_F_RXCSUM_BIT] = "rx-checksum", |
207 | return 0; | 74 | [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", |
208 | 75 | [NETIF_F_LOOPBACK_BIT] = "loopback", | |
209 | if (!(features[0].valid & mask)) | 76 | }; |
210 | return 0; | ||
211 | |||
212 | features[0].valid &= ~mask; | ||
213 | |||
214 | do_set = !!(features[0].requested & mask); | ||
215 | |||
216 | if (legacy_set(dev, do_set) < 0) | ||
217 | netdev_info(dev, | ||
218 | "Legacy feature change (%s) failed for 0x%08x\n", | ||
219 | do_set ? "set" : "clear", mask); | ||
220 | |||
221 | return 1; | ||
222 | } | ||
223 | |||
224 | static int ethtool_set_flags_compat(struct net_device *dev, | ||
225 | int (*legacy_set)(struct net_device *, u32), | ||
226 | struct ethtool_set_features_block *features, u32 mask) | ||
227 | { | ||
228 | u32 value; | ||
229 | |||
230 | if (!legacy_set) | ||
231 | return 0; | ||
232 | |||
233 | if (!(features[0].valid & mask)) | ||
234 | return 0; | ||
235 | |||
236 | value = dev->features & ~features[0].valid; | ||
237 | value |= features[0].requested; | ||
238 | |||
239 | features[0].valid &= ~mask; | ||
240 | |||
241 | if (legacy_set(dev, value & mask) < 0) | ||
242 | netdev_info(dev, "Legacy flags change failed\n"); | ||
243 | |||
244 | return 1; | ||
245 | } | ||
246 | |||
247 | static int ethtool_set_features_compat(struct net_device *dev, | ||
248 | struct ethtool_set_features_block *features) | ||
249 | { | ||
250 | int compat; | ||
251 | |||
252 | if (!dev->ethtool_ops) | ||
253 | return 0; | ||
254 | |||
255 | compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, | ||
256 | features, NETIF_F_SG); | ||
257 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, | ||
258 | features, NETIF_F_ALL_CSUM); | ||
259 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, | ||
260 | features, NETIF_F_ALL_TSO); | ||
261 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, | ||
262 | features, NETIF_F_RXCSUM); | ||
263 | compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags, | ||
264 | features, flags_dup_features); | ||
265 | |||
266 | return compat; | ||
267 | } | ||
268 | 77 | ||
269 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) | 78 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) |
270 | { | 79 | { |
@@ -272,18 +81,21 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) | |||
272 | .cmd = ETHTOOL_GFEATURES, | 81 | .cmd = ETHTOOL_GFEATURES, |
273 | .size = ETHTOOL_DEV_FEATURE_WORDS, | 82 | .size = ETHTOOL_DEV_FEATURE_WORDS, |
274 | }; | 83 | }; |
275 | struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { | 84 | struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; |
276 | { | ||
277 | .available = dev->hw_features, | ||
278 | .requested = dev->wanted_features, | ||
279 | .active = dev->features, | ||
280 | .never_changed = NETIF_F_NEVER_CHANGE, | ||
281 | }, | ||
282 | }; | ||
283 | u32 __user *sizeaddr; | 85 | u32 __user *sizeaddr; |
284 | u32 copy_size; | 86 | u32 copy_size; |
87 | int i; | ||
285 | 88 | ||
286 | ethtool_get_features_compat(dev, features); | 89 | /* in case feature bits run out again */ |
90 | BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t)); | ||
91 | |||
92 | for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { | ||
93 | features[i].available = (u32)(dev->hw_features >> (32 * i)); | ||
94 | features[i].requested = (u32)(dev->wanted_features >> (32 * i)); | ||
95 | features[i].active = (u32)(dev->features >> (32 * i)); | ||
96 | features[i].never_changed = | ||
97 | (u32)(NETIF_F_NEVER_CHANGE >> (32 * i)); | ||
98 | } | ||
287 | 99 | ||
288 | sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); | 100 | sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); |
289 | if (get_user(copy_size, sizeaddr)) | 101 | if (get_user(copy_size, sizeaddr)) |
@@ -305,7 +117,8 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) | |||
305 | { | 117 | { |
306 | struct ethtool_sfeatures cmd; | 118 | struct ethtool_sfeatures cmd; |
307 | struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; | 119 | struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; |
308 | int ret = 0; | 120 | netdev_features_t wanted = 0, valid = 0; |
121 | int i, ret = 0; | ||
309 | 122 | ||
310 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | 123 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) |
311 | return -EFAULT; | 124 | return -EFAULT; |
@@ -317,65 +130,29 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) | |||
317 | if (copy_from_user(features, useraddr, sizeof(features))) | 130 | if (copy_from_user(features, useraddr, sizeof(features))) |
318 | return -EFAULT; | 131 | return -EFAULT; |
319 | 132 | ||
320 | if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) | 133 | for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { |
321 | return -EINVAL; | 134 | valid |= (netdev_features_t)features[i].valid << (32 * i); |
135 | wanted |= (netdev_features_t)features[i].requested << (32 * i); | ||
136 | } | ||
322 | 137 | ||
323 | if (ethtool_set_features_compat(dev, features)) | 138 | if (valid & ~NETIF_F_ETHTOOL_BITS) |
324 | ret |= ETHTOOL_F_COMPAT; | 139 | return -EINVAL; |
325 | 140 | ||
326 | if (features[0].valid & ~dev->hw_features) { | 141 | if (valid & ~dev->hw_features) { |
327 | features[0].valid &= dev->hw_features; | 142 | valid &= dev->hw_features; |
328 | ret |= ETHTOOL_F_UNSUPPORTED; | 143 | ret |= ETHTOOL_F_UNSUPPORTED; |
329 | } | 144 | } |
330 | 145 | ||
331 | dev->wanted_features &= ~features[0].valid; | 146 | dev->wanted_features &= ~valid; |
332 | dev->wanted_features |= features[0].valid & features[0].requested; | 147 | dev->wanted_features |= wanted & valid; |
333 | __netdev_update_features(dev); | 148 | __netdev_update_features(dev); |
334 | 149 | ||
335 | if ((dev->wanted_features ^ dev->features) & features[0].valid) | 150 | if ((dev->wanted_features ^ dev->features) & valid) |
336 | ret |= ETHTOOL_F_WISH; | 151 | ret |= ETHTOOL_F_WISH; |
337 | 152 | ||
338 | return ret; | 153 | return ret; |
339 | } | 154 | } |
340 | 155 | ||
341 | static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { | ||
342 | /* NETIF_F_SG */ "tx-scatter-gather", | ||
343 | /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", | ||
344 | /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", | ||
345 | /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", | ||
346 | /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6", | ||
347 | /* NETIF_F_HIGHDMA */ "highdma", | ||
348 | /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", | ||
349 | /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", | ||
350 | |||
351 | /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", | ||
352 | /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", | ||
353 | /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", | ||
354 | /* NETIF_F_GSO */ "tx-generic-segmentation", | ||
355 | /* NETIF_F_LLTX */ "tx-lockless", | ||
356 | /* NETIF_F_NETNS_LOCAL */ "netns-local", | ||
357 | /* NETIF_F_GRO */ "rx-gro", | ||
358 | /* NETIF_F_LRO */ "rx-lro", | ||
359 | |||
360 | /* NETIF_F_TSO */ "tx-tcp-segmentation", | ||
361 | /* NETIF_F_UFO */ "tx-udp-fragmentation", | ||
362 | /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", | ||
363 | /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", | ||
364 | /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", | ||
365 | /* NETIF_F_FSO */ "tx-fcoe-segmentation", | ||
366 | "", | ||
367 | "", | ||
368 | |||
369 | /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", | ||
370 | /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", | ||
371 | /* NETIF_F_FCOE_MTU */ "fcoe-mtu", | ||
372 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", | ||
373 | /* NETIF_F_RXHASH */ "rx-hashing", | ||
374 | /* NETIF_F_RXCSUM */ "rx-checksum", | ||
375 | /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy", | ||
376 | /* NETIF_F_LOOPBACK */ "loopback", | ||
377 | }; | ||
378 | |||
379 | static int __ethtool_get_sset_count(struct net_device *dev, int sset) | 156 | static int __ethtool_get_sset_count(struct net_device *dev, int sset) |
380 | { | 157 | { |
381 | const struct ethtool_ops *ops = dev->ethtool_ops; | 158 | const struct ethtool_ops *ops = dev->ethtool_ops; |
@@ -402,7 +179,7 @@ static void __ethtool_get_strings(struct net_device *dev, | |||
402 | ops->get_strings(dev, stringset, data); | 179 | ops->get_strings(dev, stringset, data); |
403 | } | 180 | } |
404 | 181 | ||
405 | static u32 ethtool_get_feature_mask(u32 eth_cmd) | 182 | static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) |
406 | { | 183 | { |
407 | /* feature masks of legacy discrete ethtool ops */ | 184 | /* feature masks of legacy discrete ethtool ops */ |
408 | 185 | ||
@@ -433,136 +210,82 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd) | |||
433 | } | 210 | } |
434 | } | 211 | } |
435 | 212 | ||
436 | static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) | ||
437 | { | ||
438 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
439 | |||
440 | if (!ops) | ||
441 | return NULL; | ||
442 | |||
443 | switch (ethcmd) { | ||
444 | case ETHTOOL_GTXCSUM: | ||
445 | return ops->get_tx_csum; | ||
446 | case ETHTOOL_GRXCSUM: | ||
447 | return ops->get_rx_csum; | ||
448 | case ETHTOOL_SSG: | ||
449 | return ops->get_sg; | ||
450 | case ETHTOOL_STSO: | ||
451 | return ops->get_tso; | ||
452 | case ETHTOOL_SUFO: | ||
453 | return ops->get_ufo; | ||
454 | default: | ||
455 | return NULL; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) | ||
460 | { | ||
461 | return !!(dev->features & NETIF_F_ALL_CSUM); | ||
462 | } | ||
463 | |||
464 | static int ethtool_get_one_feature(struct net_device *dev, | 213 | static int ethtool_get_one_feature(struct net_device *dev, |
465 | char __user *useraddr, u32 ethcmd) | 214 | char __user *useraddr, u32 ethcmd) |
466 | { | 215 | { |
467 | u32 mask = ethtool_get_feature_mask(ethcmd); | 216 | netdev_features_t mask = ethtool_get_feature_mask(ethcmd); |
468 | struct ethtool_value edata = { | 217 | struct ethtool_value edata = { |
469 | .cmd = ethcmd, | 218 | .cmd = ethcmd, |
470 | .data = !!(dev->features & mask), | 219 | .data = !!(dev->features & mask), |
471 | }; | 220 | }; |
472 | 221 | ||
473 | /* compatibility with discrete get_ ops */ | ||
474 | if (!(dev->hw_features & mask)) { | ||
475 | u32 (*actor)(struct net_device *); | ||
476 | |||
477 | actor = __ethtool_get_one_feature_actor(dev, ethcmd); | ||
478 | |||
479 | /* bug compatibility with old get_rx_csum */ | ||
480 | if (ethcmd == ETHTOOL_GRXCSUM && !actor) | ||
481 | actor = __ethtool_get_rx_csum_oldbug; | ||
482 | |||
483 | if (actor) | ||
484 | edata.data = actor(dev); | ||
485 | } | ||
486 | |||
487 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | 222 | if (copy_to_user(useraddr, &edata, sizeof(edata))) |
488 | return -EFAULT; | 223 | return -EFAULT; |
489 | return 0; | 224 | return 0; |
490 | } | 225 | } |
491 | 226 | ||
492 | static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); | ||
493 | static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); | ||
494 | static int __ethtool_set_sg(struct net_device *dev, u32 data); | ||
495 | static int __ethtool_set_tso(struct net_device *dev, u32 data); | ||
496 | static int __ethtool_set_ufo(struct net_device *dev, u32 data); | ||
497 | |||
498 | static int ethtool_set_one_feature(struct net_device *dev, | 227 | static int ethtool_set_one_feature(struct net_device *dev, |
499 | void __user *useraddr, u32 ethcmd) | 228 | void __user *useraddr, u32 ethcmd) |
500 | { | 229 | { |
501 | struct ethtool_value edata; | 230 | struct ethtool_value edata; |
502 | u32 mask; | 231 | netdev_features_t mask; |
503 | 232 | ||
504 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | 233 | if (copy_from_user(&edata, useraddr, sizeof(edata))) |
505 | return -EFAULT; | 234 | return -EFAULT; |
506 | 235 | ||
507 | mask = ethtool_get_feature_mask(ethcmd); | 236 | mask = ethtool_get_feature_mask(ethcmd); |
508 | mask &= dev->hw_features; | 237 | mask &= dev->hw_features; |
509 | if (mask) { | 238 | if (!mask) |
510 | if (edata.data) | 239 | return -EOPNOTSUPP; |
511 | dev->wanted_features |= mask; | ||
512 | else | ||
513 | dev->wanted_features &= ~mask; | ||
514 | 240 | ||
515 | __netdev_update_features(dev); | 241 | if (edata.data) |
516 | return 0; | 242 | dev->wanted_features |= mask; |
517 | } | 243 | else |
244 | dev->wanted_features &= ~mask; | ||
518 | 245 | ||
519 | /* Driver is not converted to ndo_fix_features or does not | 246 | __netdev_update_features(dev); |
520 | * support changing this offload. In the latter case it won't | ||
521 | * have corresponding ethtool_ops field set. | ||
522 | * | ||
523 | * Following part is to be removed after all drivers advertise | ||
524 | * their changeable features in netdev->hw_features and stop | ||
525 | * using discrete offload setting ops. | ||
526 | */ | ||
527 | 247 | ||
528 | switch (ethcmd) { | 248 | return 0; |
529 | case ETHTOOL_STXCSUM: | 249 | } |
530 | return __ethtool_set_tx_csum(dev, edata.data); | 250 | |
531 | case ETHTOOL_SRXCSUM: | 251 | #define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ |
532 | return __ethtool_set_rx_csum(dev, edata.data); | 252 | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) |
533 | case ETHTOOL_SSG: | 253 | #define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ |
534 | return __ethtool_set_sg(dev, edata.data); | 254 | NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) |
535 | case ETHTOOL_STSO: | 255 | |
536 | return __ethtool_set_tso(dev, edata.data); | 256 | static u32 __ethtool_get_flags(struct net_device *dev) |
537 | case ETHTOOL_SUFO: | 257 | { |
538 | return __ethtool_set_ufo(dev, edata.data); | 258 | u32 flags = 0; |
539 | default: | 259 | |
540 | return -EOPNOTSUPP; | 260 | if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; |
541 | } | 261 | if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; |
262 | if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; | ||
263 | if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; | ||
264 | if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; | ||
265 | |||
266 | return flags; | ||
542 | } | 267 | } |
543 | 268 | ||
544 | int __ethtool_set_flags(struct net_device *dev, u32 data) | 269 | static int __ethtool_set_flags(struct net_device *dev, u32 data) |
545 | { | 270 | { |
546 | u32 changed; | 271 | netdev_features_t features = 0, changed; |
547 | 272 | ||
548 | if (data & ~flags_dup_features) | 273 | if (data & ~ETH_ALL_FLAGS) |
549 | return -EINVAL; | 274 | return -EINVAL; |
550 | 275 | ||
551 | /* legacy set_flags() op */ | 276 | if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; |
552 | if (dev->ethtool_ops->set_flags) { | 277 | if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; |
553 | if (unlikely(dev->hw_features & flags_dup_features)) | 278 | if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; |
554 | netdev_warn(dev, | 279 | if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; |
555 | "driver BUG: mixed hw_features and set_flags()\n"); | 280 | if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; |
556 | return dev->ethtool_ops->set_flags(dev, data); | ||
557 | } | ||
558 | 281 | ||
559 | /* allow changing only bits set in hw_features */ | 282 | /* allow changing only bits set in hw_features */ |
560 | changed = (data ^ dev->features) & flags_dup_features; | 283 | changed = (features ^ dev->features) & ETH_ALL_FEATURES; |
561 | if (changed & ~dev->hw_features) | 284 | if (changed & ~dev->hw_features) |
562 | return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; | 285 | return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; |
563 | 286 | ||
564 | dev->wanted_features = | 287 | dev->wanted_features = |
565 | (dev->wanted_features & ~changed) | (data & dev->hw_features); | 288 | (dev->wanted_features & ~changed) | (features & changed); |
566 | 289 | ||
567 | __netdev_update_features(dev); | 290 | __netdev_update_features(dev); |
568 | 291 | ||
@@ -716,6 +439,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, | |||
716 | { | 439 | { |
717 | struct ethtool_rxnfc info; | 440 | struct ethtool_rxnfc info; |
718 | size_t info_size = sizeof(info); | 441 | size_t info_size = sizeof(info); |
442 | int rc; | ||
719 | 443 | ||
720 | if (!dev->ethtool_ops->set_rxnfc) | 444 | if (!dev->ethtool_ops->set_rxnfc) |
721 | return -EOPNOTSUPP; | 445 | return -EOPNOTSUPP; |
@@ -731,7 +455,15 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, | |||
731 | if (copy_from_user(&info, useraddr, info_size)) | 455 | if (copy_from_user(&info, useraddr, info_size)) |
732 | return -EFAULT; | 456 | return -EFAULT; |
733 | 457 | ||
734 | return dev->ethtool_ops->set_rxnfc(dev, &info); | 458 | rc = dev->ethtool_ops->set_rxnfc(dev, &info); |
459 | if (rc) | ||
460 | return rc; | ||
461 | |||
462 | if (cmd == ETHTOOL_SRXCLSRLINS && | ||
463 | copy_to_user(useraddr, &info, info_size)) | ||
464 | return -EFAULT; | ||
465 | |||
466 | return 0; | ||
735 | } | 467 | } |
736 | 468 | ||
737 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, | 469 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, |
@@ -792,34 +524,44 @@ err_out: | |||
792 | static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, | 524 | static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, |
793 | void __user *useraddr) | 525 | void __user *useraddr) |
794 | { | 526 | { |
795 | struct ethtool_rxfh_indir *indir; | 527 | u32 user_size, dev_size; |
796 | u32 table_size; | 528 | u32 *indir; |
797 | size_t full_size; | ||
798 | int ret; | 529 | int ret; |
799 | 530 | ||
800 | if (!dev->ethtool_ops->get_rxfh_indir) | 531 | if (!dev->ethtool_ops->get_rxfh_indir_size || |
532 | !dev->ethtool_ops->get_rxfh_indir) | ||
533 | return -EOPNOTSUPP; | ||
534 | dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); | ||
535 | if (dev_size == 0) | ||
801 | return -EOPNOTSUPP; | 536 | return -EOPNOTSUPP; |
802 | 537 | ||
803 | if (copy_from_user(&table_size, | 538 | if (copy_from_user(&user_size, |
804 | useraddr + offsetof(struct ethtool_rxfh_indir, size), | 539 | useraddr + offsetof(struct ethtool_rxfh_indir, size), |
805 | sizeof(table_size))) | 540 | sizeof(user_size))) |
806 | return -EFAULT; | 541 | return -EFAULT; |
807 | 542 | ||
808 | if (table_size > | 543 | if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size), |
809 | (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) | 544 | &dev_size, sizeof(dev_size))) |
810 | return -ENOMEM; | 545 | return -EFAULT; |
811 | full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; | 546 | |
812 | indir = kzalloc(full_size, GFP_USER); | 547 | /* If the user buffer size is 0, this is just a query for the |
548 | * device table size. Otherwise, if it's smaller than the | ||
549 | * device table size it's an error. | ||
550 | */ | ||
551 | if (user_size < dev_size) | ||
552 | return user_size == 0 ? 0 : -EINVAL; | ||
553 | |||
554 | indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); | ||
813 | if (!indir) | 555 | if (!indir) |
814 | return -ENOMEM; | 556 | return -ENOMEM; |
815 | 557 | ||
816 | indir->cmd = ETHTOOL_GRXFHINDIR; | ||
817 | indir->size = table_size; | ||
818 | ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); | 558 | ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); |
819 | if (ret) | 559 | if (ret) |
820 | goto out; | 560 | goto out; |
821 | 561 | ||
822 | if (copy_to_user(useraddr, indir, full_size)) | 562 | if (copy_to_user(useraddr + |
563 | offsetof(struct ethtool_rxfh_indir, ring_index[0]), | ||
564 | indir, dev_size * sizeof(indir[0]))) | ||
823 | ret = -EFAULT; | 565 | ret = -EFAULT; |
824 | 566 | ||
825 | out: | 567 | out: |
@@ -830,30 +572,56 @@ out: | |||
830 | static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, | 572 | static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, |
831 | void __user *useraddr) | 573 | void __user *useraddr) |
832 | { | 574 | { |
833 | struct ethtool_rxfh_indir *indir; | 575 | struct ethtool_rxnfc rx_rings; |
834 | u32 table_size; | 576 | u32 user_size, dev_size, i; |
835 | size_t full_size; | 577 | u32 *indir; |
836 | int ret; | 578 | int ret; |
837 | 579 | ||
838 | if (!dev->ethtool_ops->set_rxfh_indir) | 580 | if (!dev->ethtool_ops->get_rxfh_indir_size || |
581 | !dev->ethtool_ops->set_rxfh_indir || | ||
582 | !dev->ethtool_ops->get_rxnfc) | ||
583 | return -EOPNOTSUPP; | ||
584 | dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); | ||
585 | if (dev_size == 0) | ||
839 | return -EOPNOTSUPP; | 586 | return -EOPNOTSUPP; |
840 | 587 | ||
841 | if (copy_from_user(&table_size, | 588 | if (copy_from_user(&user_size, |
842 | useraddr + offsetof(struct ethtool_rxfh_indir, size), | 589 | useraddr + offsetof(struct ethtool_rxfh_indir, size), |
843 | sizeof(table_size))) | 590 | sizeof(user_size))) |
844 | return -EFAULT; | 591 | return -EFAULT; |
845 | 592 | ||
846 | if (table_size > | 593 | if (user_size != 0 && user_size != dev_size) |
847 | (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) | 594 | return -EINVAL; |
848 | return -ENOMEM; | 595 | |
849 | full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; | 596 | indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); |
850 | indir = kmalloc(full_size, GFP_USER); | ||
851 | if (!indir) | 597 | if (!indir) |
852 | return -ENOMEM; | 598 | return -ENOMEM; |
853 | 599 | ||
854 | if (copy_from_user(indir, useraddr, full_size)) { | 600 | rx_rings.cmd = ETHTOOL_GRXRINGS; |
855 | ret = -EFAULT; | 601 | ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); |
602 | if (ret) | ||
856 | goto out; | 603 | goto out; |
604 | |||
605 | if (user_size == 0) { | ||
606 | for (i = 0; i < dev_size; i++) | ||
607 | indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); | ||
608 | } else { | ||
609 | if (copy_from_user(indir, | ||
610 | useraddr + | ||
611 | offsetof(struct ethtool_rxfh_indir, | ||
612 | ring_index[0]), | ||
613 | dev_size * sizeof(indir[0]))) { | ||
614 | ret = -EFAULT; | ||
615 | goto out; | ||
616 | } | ||
617 | |||
618 | /* Validate ring indices */ | ||
619 | for (i = 0; i < dev_size; i++) { | ||
620 | if (indir[i] >= rx_rings.data) { | ||
621 | ret = -EINVAL; | ||
622 | goto out; | ||
623 | } | ||
624 | } | ||
857 | } | 625 | } |
858 | 626 | ||
859 | ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); | 627 | ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); |
@@ -863,58 +631,6 @@ out: | |||
863 | return ret; | 631 | return ret; |
864 | } | 632 | } |
865 | 633 | ||
866 | /* | ||
867 | * ethtool does not (or did not) set masks for flow parameters that are | ||
868 | * not specified, so if both value and mask are 0 then this must be | ||
869 | * treated as equivalent to a mask with all bits set. Implement that | ||
870 | * here rather than in drivers. | ||
871 | */ | ||
872 | static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs) | ||
873 | { | ||
874 | struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec; | ||
875 | struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec; | ||
876 | |||
877 | if (fs->flow_type != TCP_V4_FLOW && | ||
878 | fs->flow_type != UDP_V4_FLOW && | ||
879 | fs->flow_type != SCTP_V4_FLOW) | ||
880 | return; | ||
881 | |||
882 | if (!(entry->ip4src | mask->ip4src)) | ||
883 | mask->ip4src = htonl(0xffffffff); | ||
884 | if (!(entry->ip4dst | mask->ip4dst)) | ||
885 | mask->ip4dst = htonl(0xffffffff); | ||
886 | if (!(entry->psrc | mask->psrc)) | ||
887 | mask->psrc = htons(0xffff); | ||
888 | if (!(entry->pdst | mask->pdst)) | ||
889 | mask->pdst = htons(0xffff); | ||
890 | if (!(entry->tos | mask->tos)) | ||
891 | mask->tos = 0xff; | ||
892 | if (!(fs->vlan_tag | fs->vlan_tag_mask)) | ||
893 | fs->vlan_tag_mask = 0xffff; | ||
894 | if (!(fs->data | fs->data_mask)) | ||
895 | fs->data_mask = 0xffffffffffffffffULL; | ||
896 | } | ||
897 | |||
898 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, | ||
899 | void __user *useraddr) | ||
900 | { | ||
901 | struct ethtool_rx_ntuple cmd; | ||
902 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
903 | |||
904 | if (!ops->set_rx_ntuple) | ||
905 | return -EOPNOTSUPP; | ||
906 | |||
907 | if (!(dev->features & NETIF_F_NTUPLE)) | ||
908 | return -EINVAL; | ||
909 | |||
910 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | ||
911 | return -EFAULT; | ||
912 | |||
913 | rx_ntuple_fix_masks(&cmd.fs); | ||
914 | |||
915 | return ops->set_rx_ntuple(dev, &cmd); | ||
916 | } | ||
917 | |||
918 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | 634 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) |
919 | { | 635 | { |
920 | struct ethtool_regs regs; | 636 | struct ethtool_regs regs; |
@@ -1231,81 +947,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) | |||
1231 | return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); | 947 | return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); |
1232 | } | 948 | } |
1233 | 949 | ||
1234 | static int __ethtool_set_sg(struct net_device *dev, u32 data) | ||
1235 | { | ||
1236 | int err; | ||
1237 | |||
1238 | if (!dev->ethtool_ops->set_sg) | ||
1239 | return -EOPNOTSUPP; | ||
1240 | |||
1241 | if (data && !(dev->features & NETIF_F_ALL_CSUM)) | ||
1242 | return -EINVAL; | ||
1243 | |||
1244 | if (!data && dev->ethtool_ops->set_tso) { | ||
1245 | err = dev->ethtool_ops->set_tso(dev, 0); | ||
1246 | if (err) | ||
1247 | return err; | ||
1248 | } | ||
1249 | |||
1250 | if (!data && dev->ethtool_ops->set_ufo) { | ||
1251 | err = dev->ethtool_ops->set_ufo(dev, 0); | ||
1252 | if (err) | ||
1253 | return err; | ||
1254 | } | ||
1255 | return dev->ethtool_ops->set_sg(dev, data); | ||
1256 | } | ||
1257 | |||
1258 | static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) | ||
1259 | { | ||
1260 | int err; | ||
1261 | |||
1262 | if (!dev->ethtool_ops->set_tx_csum) | ||
1263 | return -EOPNOTSUPP; | ||
1264 | |||
1265 | if (!data && dev->ethtool_ops->set_sg) { | ||
1266 | err = __ethtool_set_sg(dev, 0); | ||
1267 | if (err) | ||
1268 | return err; | ||
1269 | } | ||
1270 | |||
1271 | return dev->ethtool_ops->set_tx_csum(dev, data); | ||
1272 | } | ||
1273 | |||
1274 | static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) | ||
1275 | { | ||
1276 | if (!dev->ethtool_ops->set_rx_csum) | ||
1277 | return -EOPNOTSUPP; | ||
1278 | |||
1279 | if (!data) | ||
1280 | dev->features &= ~NETIF_F_GRO; | ||
1281 | |||
1282 | return dev->ethtool_ops->set_rx_csum(dev, data); | ||
1283 | } | ||
1284 | |||
1285 | static int __ethtool_set_tso(struct net_device *dev, u32 data) | ||
1286 | { | ||
1287 | if (!dev->ethtool_ops->set_tso) | ||
1288 | return -EOPNOTSUPP; | ||
1289 | |||
1290 | if (data && !(dev->features & NETIF_F_SG)) | ||
1291 | return -EINVAL; | ||
1292 | |||
1293 | return dev->ethtool_ops->set_tso(dev, data); | ||
1294 | } | ||
1295 | |||
1296 | static int __ethtool_set_ufo(struct net_device *dev, u32 data) | ||
1297 | { | ||
1298 | if (!dev->ethtool_ops->set_ufo) | ||
1299 | return -EOPNOTSUPP; | ||
1300 | if (data && !(dev->features & NETIF_F_SG)) | ||
1301 | return -EINVAL; | ||
1302 | if (data && !((dev->features & NETIF_F_GEN_CSUM) || | ||
1303 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) | ||
1304 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) | ||
1305 | return -EINVAL; | ||
1306 | return dev->ethtool_ops->set_ufo(dev, data); | ||
1307 | } | ||
1308 | |||
1309 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) | 950 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) |
1310 | { | 951 | { |
1311 | struct ethtool_test test; | 952 | struct ethtool_test test; |
@@ -1549,6 +1190,8 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev, | |||
1549 | if (!dev->ethtool_ops->flash_device) | 1190 | if (!dev->ethtool_ops->flash_device) |
1550 | return -EOPNOTSUPP; | 1191 | return -EOPNOTSUPP; |
1551 | 1192 | ||
1193 | efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; | ||
1194 | |||
1552 | return dev->ethtool_ops->flash_device(dev, &efl); | 1195 | return dev->ethtool_ops->flash_device(dev, &efl); |
1553 | } | 1196 | } |
1554 | 1197 | ||
@@ -1670,6 +1313,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1670 | case ETHTOOL_GRXCSUM: | 1313 | case ETHTOOL_GRXCSUM: |
1671 | case ETHTOOL_GTXCSUM: | 1314 | case ETHTOOL_GTXCSUM: |
1672 | case ETHTOOL_GSG: | 1315 | case ETHTOOL_GSG: |
1316 | case ETHTOOL_GSSET_INFO: | ||
1673 | case ETHTOOL_GSTRINGS: | 1317 | case ETHTOOL_GSTRINGS: |
1674 | case ETHTOOL_GTSO: | 1318 | case ETHTOOL_GTSO: |
1675 | case ETHTOOL_GPERMADDR: | 1319 | case ETHTOOL_GPERMADDR: |
@@ -1771,9 +1415,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1771 | break; | 1415 | break; |
1772 | case ETHTOOL_GFLAGS: | 1416 | case ETHTOOL_GFLAGS: |
1773 | rc = ethtool_get_value(dev, useraddr, ethcmd, | 1417 | rc = ethtool_get_value(dev, useraddr, ethcmd, |
1774 | (dev->ethtool_ops->get_flags ? | 1418 | __ethtool_get_flags); |
1775 | dev->ethtool_ops->get_flags : | ||
1776 | ethtool_op_get_flags)); | ||
1777 | break; | 1419 | break; |
1778 | case ETHTOOL_SFLAGS: | 1420 | case ETHTOOL_SFLAGS: |
1779 | rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); | 1421 | rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); |
@@ -1804,9 +1446,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1804 | case ETHTOOL_RESET: | 1446 | case ETHTOOL_RESET: |
1805 | rc = ethtool_reset(dev, useraddr); | 1447 | rc = ethtool_reset(dev, useraddr); |
1806 | break; | 1448 | break; |
1807 | case ETHTOOL_SRXNTUPLE: | ||
1808 | rc = ethtool_set_rx_ntuple(dev, useraddr); | ||
1809 | break; | ||
1810 | case ETHTOOL_GSSET_INFO: | 1449 | case ETHTOOL_GSSET_INFO: |
1811 | rc = ethtool_get_sset_info(dev, useraddr); | 1450 | rc = ethtool_get_sset_info(dev, useraddr); |
1812 | break; | 1451 | break; |
diff --git a/net/core/flow.c b/net/core/flow.c index 8ae42de9c79e..e318c7e98042 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -358,6 +358,18 @@ void flow_cache_flush(void) | |||
358 | put_online_cpus(); | 358 | put_online_cpus(); |
359 | } | 359 | } |
360 | 360 | ||
361 | static void flow_cache_flush_task(struct work_struct *work) | ||
362 | { | ||
363 | flow_cache_flush(); | ||
364 | } | ||
365 | |||
366 | static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); | ||
367 | |||
368 | void flow_cache_flush_deferred(void) | ||
369 | { | ||
370 | schedule_work(&flow_cache_flush_work); | ||
371 | } | ||
372 | |||
361 | static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) | 373 | static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) |
362 | { | 374 | { |
363 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); | 375 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c new file mode 100644 index 000000000000..a225089df5b6 --- /dev/null +++ b/net/core/flow_dissector.c | |||
@@ -0,0 +1,144 @@ | |||
1 | #include <linux/skbuff.h> | ||
2 | #include <linux/export.h> | ||
3 | #include <linux/ip.h> | ||
4 | #include <linux/ipv6.h> | ||
5 | #include <linux/if_vlan.h> | ||
6 | #include <net/ip.h> | ||
7 | #include <linux/if_tunnel.h> | ||
8 | #include <linux/if_pppox.h> | ||
9 | #include <linux/ppp_defs.h> | ||
10 | #include <net/flow_keys.h> | ||
11 | |||
12 | /* copy saddr & daddr, possibly using 64bit load/store | ||
13 | * Equivalent to : flow->src = iph->saddr; | ||
14 | * flow->dst = iph->daddr; | ||
15 | */ | ||
16 | static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) | ||
17 | { | ||
18 | BUILD_BUG_ON(offsetof(typeof(*flow), dst) != | ||
19 | offsetof(typeof(*flow), src) + sizeof(flow->src)); | ||
20 | memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); | ||
21 | } | ||
22 | |||
23 | bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) | ||
24 | { | ||
25 | int poff, nhoff = skb_network_offset(skb); | ||
26 | u8 ip_proto; | ||
27 | __be16 proto = skb->protocol; | ||
28 | |||
29 | memset(flow, 0, sizeof(*flow)); | ||
30 | |||
31 | again: | ||
32 | switch (proto) { | ||
33 | case __constant_htons(ETH_P_IP): { | ||
34 | const struct iphdr *iph; | ||
35 | struct iphdr _iph; | ||
36 | ip: | ||
37 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | ||
38 | if (!iph) | ||
39 | return false; | ||
40 | |||
41 | if (ip_is_fragment(iph)) | ||
42 | ip_proto = 0; | ||
43 | else | ||
44 | ip_proto = iph->protocol; | ||
45 | iph_to_flow_copy_addrs(flow, iph); | ||
46 | nhoff += iph->ihl * 4; | ||
47 | break; | ||
48 | } | ||
49 | case __constant_htons(ETH_P_IPV6): { | ||
50 | const struct ipv6hdr *iph; | ||
51 | struct ipv6hdr _iph; | ||
52 | ipv6: | ||
53 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | ||
54 | if (!iph) | ||
55 | return false; | ||
56 | |||
57 | ip_proto = iph->nexthdr; | ||
58 | flow->src = iph->saddr.s6_addr32[3]; | ||
59 | flow->dst = iph->daddr.s6_addr32[3]; | ||
60 | nhoff += sizeof(struct ipv6hdr); | ||
61 | break; | ||
62 | } | ||
63 | case __constant_htons(ETH_P_8021Q): { | ||
64 | const struct vlan_hdr *vlan; | ||
65 | struct vlan_hdr _vlan; | ||
66 | |||
67 | vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); | ||
68 | if (!vlan) | ||
69 | return false; | ||
70 | |||
71 | proto = vlan->h_vlan_encapsulated_proto; | ||
72 | nhoff += sizeof(*vlan); | ||
73 | goto again; | ||
74 | } | ||
75 | case __constant_htons(ETH_P_PPP_SES): { | ||
76 | struct { | ||
77 | struct pppoe_hdr hdr; | ||
78 | __be16 proto; | ||
79 | } *hdr, _hdr; | ||
80 | hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); | ||
81 | if (!hdr) | ||
82 | return false; | ||
83 | proto = hdr->proto; | ||
84 | nhoff += PPPOE_SES_HLEN; | ||
85 | switch (proto) { | ||
86 | case __constant_htons(PPP_IP): | ||
87 | goto ip; | ||
88 | case __constant_htons(PPP_IPV6): | ||
89 | goto ipv6; | ||
90 | default: | ||
91 | return false; | ||
92 | } | ||
93 | } | ||
94 | default: | ||
95 | return false; | ||
96 | } | ||
97 | |||
98 | switch (ip_proto) { | ||
99 | case IPPROTO_GRE: { | ||
100 | struct gre_hdr { | ||
101 | __be16 flags; | ||
102 | __be16 proto; | ||
103 | } *hdr, _hdr; | ||
104 | |||
105 | hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); | ||
106 | if (!hdr) | ||
107 | return false; | ||
108 | /* | ||
109 | * Only look inside GRE if version zero and no | ||
110 | * routing | ||
111 | */ | ||
112 | if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { | ||
113 | proto = hdr->proto; | ||
114 | nhoff += 4; | ||
115 | if (hdr->flags & GRE_CSUM) | ||
116 | nhoff += 4; | ||
117 | if (hdr->flags & GRE_KEY) | ||
118 | nhoff += 4; | ||
119 | if (hdr->flags & GRE_SEQ) | ||
120 | nhoff += 4; | ||
121 | goto again; | ||
122 | } | ||
123 | break; | ||
124 | } | ||
125 | case IPPROTO_IPIP: | ||
126 | goto again; | ||
127 | default: | ||
128 | break; | ||
129 | } | ||
130 | |||
131 | flow->ip_proto = ip_proto; | ||
132 | poff = proto_ports_offset(ip_proto); | ||
133 | if (poff >= 0) { | ||
134 | __be32 *ports, _ports; | ||
135 | |||
136 | nhoff += poff; | ||
137 | ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); | ||
138 | if (ports) | ||
139 | flow->ports = *ports; | ||
140 | } | ||
141 | |||
142 | return true; | ||
143 | } | ||
144 | EXPORT_SYMBOL(skb_flow_dissect); | ||
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 039d51e6c284..2a83914b0277 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) | |||
238 | it to safe state. | 238 | it to safe state. |
239 | */ | 239 | */ |
240 | skb_queue_purge(&n->arp_queue); | 240 | skb_queue_purge(&n->arp_queue); |
241 | n->arp_queue_len_bytes = 0; | ||
241 | n->output = neigh_blackhole; | 242 | n->output = neigh_blackhole; |
242 | if (n->nud_state & NUD_VALID) | 243 | if (n->nud_state & NUD_VALID) |
243 | n->nud_state = NUD_NOARP; | 244 | n->nud_state = NUD_NOARP; |
@@ -272,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) | |||
272 | } | 273 | } |
273 | EXPORT_SYMBOL(neigh_ifdown); | 274 | EXPORT_SYMBOL(neigh_ifdown); |
274 | 275 | ||
275 | static struct neighbour *neigh_alloc(struct neigh_table *tbl) | 276 | static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev) |
276 | { | 277 | { |
277 | struct neighbour *n = NULL; | 278 | struct neighbour *n = NULL; |
278 | unsigned long now = jiffies; | 279 | unsigned long now = jiffies; |
@@ -287,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) | |||
287 | goto out_entries; | 288 | goto out_entries; |
288 | } | 289 | } |
289 | 290 | ||
290 | n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); | 291 | if (tbl->entry_size) |
292 | n = kzalloc(tbl->entry_size, GFP_ATOMIC); | ||
293 | else { | ||
294 | int sz = sizeof(*n) + tbl->key_len; | ||
295 | |||
296 | sz = ALIGN(sz, NEIGH_PRIV_ALIGN); | ||
297 | sz += dev->neigh_priv_len; | ||
298 | n = kzalloc(sz, GFP_ATOMIC); | ||
299 | } | ||
291 | if (!n) | 300 | if (!n) |
292 | goto out_entries; | 301 | goto out_entries; |
293 | 302 | ||
@@ -313,11 +322,18 @@ out_entries: | |||
313 | goto out; | 322 | goto out; |
314 | } | 323 | } |
315 | 324 | ||
325 | static void neigh_get_hash_rnd(u32 *x) | ||
326 | { | ||
327 | get_random_bytes(x, sizeof(*x)); | ||
328 | *x |= 1; | ||
329 | } | ||
330 | |||
316 | static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) | 331 | static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) |
317 | { | 332 | { |
318 | size_t size = (1 << shift) * sizeof(struct neighbour *); | 333 | size_t size = (1 << shift) * sizeof(struct neighbour *); |
319 | struct neigh_hash_table *ret; | 334 | struct neigh_hash_table *ret; |
320 | struct neighbour __rcu **buckets; | 335 | struct neighbour __rcu **buckets; |
336 | int i; | ||
321 | 337 | ||
322 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); | 338 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); |
323 | if (!ret) | 339 | if (!ret) |
@@ -334,8 +350,8 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) | |||
334 | } | 350 | } |
335 | ret->hash_buckets = buckets; | 351 | ret->hash_buckets = buckets; |
336 | ret->hash_shift = shift; | 352 | ret->hash_shift = shift; |
337 | get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); | 353 | for (i = 0; i < NEIGH_NUM_HASH_RND; i++) |
338 | ret->hash_rnd |= 1; | 354 | neigh_get_hash_rnd(&ret->hash_rnd[i]); |
339 | return ret; | 355 | return ret; |
340 | } | 356 | } |
341 | 357 | ||
@@ -462,7 +478,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
462 | u32 hash_val; | 478 | u32 hash_val; |
463 | int key_len = tbl->key_len; | 479 | int key_len = tbl->key_len; |
464 | int error; | 480 | int error; |
465 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl); | 481 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); |
466 | struct neigh_hash_table *nht; | 482 | struct neigh_hash_table *nht; |
467 | 483 | ||
468 | if (!n) { | 484 | if (!n) { |
@@ -480,6 +496,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
480 | goto out_neigh_release; | 496 | goto out_neigh_release; |
481 | } | 497 | } |
482 | 498 | ||
499 | if (dev->netdev_ops->ndo_neigh_construct) { | ||
500 | error = dev->netdev_ops->ndo_neigh_construct(n); | ||
501 | if (error < 0) { | ||
502 | rc = ERR_PTR(error); | ||
503 | goto out_neigh_release; | ||
504 | } | ||
505 | } | ||
506 | |||
483 | /* Device specific setup. */ | 507 | /* Device specific setup. */ |
484 | if (n->parms->neigh_setup && | 508 | if (n->parms->neigh_setup && |
485 | (error = n->parms->neigh_setup(n)) < 0) { | 509 | (error = n->parms->neigh_setup(n)) < 0) { |
@@ -677,18 +701,14 @@ static inline void neigh_parms_put(struct neigh_parms *parms) | |||
677 | neigh_parms_destroy(parms); | 701 | neigh_parms_destroy(parms); |
678 | } | 702 | } |
679 | 703 | ||
680 | static void neigh_destroy_rcu(struct rcu_head *head) | ||
681 | { | ||
682 | struct neighbour *neigh = container_of(head, struct neighbour, rcu); | ||
683 | |||
684 | kmem_cache_free(neigh->tbl->kmem_cachep, neigh); | ||
685 | } | ||
686 | /* | 704 | /* |
687 | * neighbour must already be out of the table; | 705 | * neighbour must already be out of the table; |
688 | * | 706 | * |
689 | */ | 707 | */ |
690 | void neigh_destroy(struct neighbour *neigh) | 708 | void neigh_destroy(struct neighbour *neigh) |
691 | { | 709 | { |
710 | struct net_device *dev = neigh->dev; | ||
711 | |||
692 | NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); | 712 | NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); |
693 | 713 | ||
694 | if (!neigh->dead) { | 714 | if (!neigh->dead) { |
@@ -702,14 +722,18 @@ void neigh_destroy(struct neighbour *neigh) | |||
702 | printk(KERN_WARNING "Impossible event.\n"); | 722 | printk(KERN_WARNING "Impossible event.\n"); |
703 | 723 | ||
704 | skb_queue_purge(&neigh->arp_queue); | 724 | skb_queue_purge(&neigh->arp_queue); |
725 | neigh->arp_queue_len_bytes = 0; | ||
726 | |||
727 | if (dev->netdev_ops->ndo_neigh_destroy) | ||
728 | dev->netdev_ops->ndo_neigh_destroy(neigh); | ||
705 | 729 | ||
706 | dev_put(neigh->dev); | 730 | dev_put(dev); |
707 | neigh_parms_put(neigh->parms); | 731 | neigh_parms_put(neigh->parms); |
708 | 732 | ||
709 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); | 733 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); |
710 | 734 | ||
711 | atomic_dec(&neigh->tbl->entries); | 735 | atomic_dec(&neigh->tbl->entries); |
712 | call_rcu(&neigh->rcu, neigh_destroy_rcu); | 736 | kfree_rcu(neigh, rcu); |
713 | } | 737 | } |
714 | EXPORT_SYMBOL(neigh_destroy); | 738 | EXPORT_SYMBOL(neigh_destroy); |
715 | 739 | ||
@@ -802,6 +826,8 @@ next_elt: | |||
802 | write_unlock_bh(&tbl->lock); | 826 | write_unlock_bh(&tbl->lock); |
803 | cond_resched(); | 827 | cond_resched(); |
804 | write_lock_bh(&tbl->lock); | 828 | write_lock_bh(&tbl->lock); |
829 | nht = rcu_dereference_protected(tbl->nht, | ||
830 | lockdep_is_held(&tbl->lock)); | ||
805 | } | 831 | } |
806 | /* Cycle through all hash buckets every base_reachable_time/2 ticks. | 832 | /* Cycle through all hash buckets every base_reachable_time/2 ticks. |
807 | * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 | 833 | * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 |
@@ -842,6 +868,7 @@ static void neigh_invalidate(struct neighbour *neigh) | |||
842 | write_lock(&neigh->lock); | 868 | write_lock(&neigh->lock); |
843 | } | 869 | } |
844 | skb_queue_purge(&neigh->arp_queue); | 870 | skb_queue_purge(&neigh->arp_queue); |
871 | neigh->arp_queue_len_bytes = 0; | ||
845 | } | 872 | } |
846 | 873 | ||
847 | static void neigh_probe(struct neighbour *neigh) | 874 | static void neigh_probe(struct neighbour *neigh) |
@@ -980,15 +1007,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | |||
980 | 1007 | ||
981 | if (neigh->nud_state == NUD_INCOMPLETE) { | 1008 | if (neigh->nud_state == NUD_INCOMPLETE) { |
982 | if (skb) { | 1009 | if (skb) { |
983 | if (skb_queue_len(&neigh->arp_queue) >= | 1010 | while (neigh->arp_queue_len_bytes + skb->truesize > |
984 | neigh->parms->queue_len) { | 1011 | neigh->parms->queue_len_bytes) { |
985 | struct sk_buff *buff; | 1012 | struct sk_buff *buff; |
1013 | |||
986 | buff = __skb_dequeue(&neigh->arp_queue); | 1014 | buff = __skb_dequeue(&neigh->arp_queue); |
1015 | if (!buff) | ||
1016 | break; | ||
1017 | neigh->arp_queue_len_bytes -= buff->truesize; | ||
987 | kfree_skb(buff); | 1018 | kfree_skb(buff); |
988 | NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); | 1019 | NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); |
989 | } | 1020 | } |
990 | skb_dst_force(skb); | 1021 | skb_dst_force(skb); |
991 | __skb_queue_tail(&neigh->arp_queue, skb); | 1022 | __skb_queue_tail(&neigh->arp_queue, skb); |
1023 | neigh->arp_queue_len_bytes += skb->truesize; | ||
992 | } | 1024 | } |
993 | rc = 1; | 1025 | rc = 1; |
994 | } | 1026 | } |
@@ -1167,7 +1199,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1167 | 1199 | ||
1168 | rcu_read_lock(); | 1200 | rcu_read_lock(); |
1169 | /* On shaper/eql skb->dst->neighbour != neigh :( */ | 1201 | /* On shaper/eql skb->dst->neighbour != neigh :( */ |
1170 | if (dst && (n2 = dst_get_neighbour(dst)) != NULL) | 1202 | if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL) |
1171 | n1 = n2; | 1203 | n1 = n2; |
1172 | n1->output(n1, skb); | 1204 | n1->output(n1, skb); |
1173 | rcu_read_unlock(); | 1205 | rcu_read_unlock(); |
@@ -1175,6 +1207,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1175 | write_lock_bh(&neigh->lock); | 1207 | write_lock_bh(&neigh->lock); |
1176 | } | 1208 | } |
1177 | skb_queue_purge(&neigh->arp_queue); | 1209 | skb_queue_purge(&neigh->arp_queue); |
1210 | neigh->arp_queue_len_bytes = 0; | ||
1178 | } | 1211 | } |
1179 | out: | 1212 | out: |
1180 | if (update_isrouter) { | 1213 | if (update_isrouter) { |
@@ -1477,11 +1510,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1477 | tbl->parms.reachable_time = | 1510 | tbl->parms.reachable_time = |
1478 | neigh_rand_reach_time(tbl->parms.base_reachable_time); | 1511 | neigh_rand_reach_time(tbl->parms.base_reachable_time); |
1479 | 1512 | ||
1480 | if (!tbl->kmem_cachep) | ||
1481 | tbl->kmem_cachep = | ||
1482 | kmem_cache_create(tbl->id, tbl->entry_size, 0, | ||
1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, | ||
1484 | NULL); | ||
1485 | tbl->stats = alloc_percpu(struct neigh_statistics); | 1513 | tbl->stats = alloc_percpu(struct neigh_statistics); |
1486 | if (!tbl->stats) | 1514 | if (!tbl->stats) |
1487 | panic("cannot create neighbour cache statistics"); | 1515 | panic("cannot create neighbour cache statistics"); |
@@ -1566,9 +1594,6 @@ int neigh_table_clear(struct neigh_table *tbl) | |||
1566 | free_percpu(tbl->stats); | 1594 | free_percpu(tbl->stats); |
1567 | tbl->stats = NULL; | 1595 | tbl->stats = NULL; |
1568 | 1596 | ||
1569 | kmem_cache_destroy(tbl->kmem_cachep); | ||
1570 | tbl->kmem_cachep = NULL; | ||
1571 | |||
1572 | return 0; | 1597 | return 0; |
1573 | } | 1598 | } |
1574 | EXPORT_SYMBOL(neigh_table_clear); | 1599 | EXPORT_SYMBOL(neigh_table_clear); |
@@ -1747,7 +1772,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) | |||
1747 | NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); | 1772 | NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); |
1748 | 1773 | ||
1749 | NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); | 1774 | NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); |
1750 | NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); | 1775 | NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes); |
1776 | /* approximative value for deprecated QUEUE_LEN (in packets) */ | ||
1777 | NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, | ||
1778 | DIV_ROUND_UP(parms->queue_len_bytes, | ||
1779 | SKB_TRUESIZE(ETH_FRAME_LEN))); | ||
1751 | NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); | 1780 | NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); |
1752 | NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); | 1781 | NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); |
1753 | NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); | 1782 | NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); |
@@ -1808,7 +1837,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, | |||
1808 | 1837 | ||
1809 | rcu_read_lock_bh(); | 1838 | rcu_read_lock_bh(); |
1810 | nht = rcu_dereference_bh(tbl->nht); | 1839 | nht = rcu_dereference_bh(tbl->nht); |
1811 | ndc.ndtc_hash_rnd = nht->hash_rnd; | 1840 | ndc.ndtc_hash_rnd = nht->hash_rnd[0]; |
1812 | ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); | 1841 | ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); |
1813 | rcu_read_unlock_bh(); | 1842 | rcu_read_unlock_bh(); |
1814 | 1843 | ||
@@ -1974,7 +2003,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1974 | 2003 | ||
1975 | switch (i) { | 2004 | switch (i) { |
1976 | case NDTPA_QUEUE_LEN: | 2005 | case NDTPA_QUEUE_LEN: |
1977 | p->queue_len = nla_get_u32(tbp[i]); | 2006 | p->queue_len_bytes = nla_get_u32(tbp[i]) * |
2007 | SKB_TRUESIZE(ETH_FRAME_LEN); | ||
2008 | break; | ||
2009 | case NDTPA_QUEUE_LENBYTES: | ||
2010 | p->queue_len_bytes = nla_get_u32(tbp[i]); | ||
1978 | break; | 2011 | break; |
1979 | case NDTPA_PROXY_QLEN: | 2012 | case NDTPA_PROXY_QLEN: |
1980 | p->proxy_qlen = nla_get_u32(tbp[i]); | 2013 | p->proxy_qlen = nla_get_u32(tbp[i]); |
@@ -2397,7 +2430,10 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, | |||
2397 | struct net *net = seq_file_net(seq); | 2430 | struct net *net = seq_file_net(seq); |
2398 | struct neigh_table *tbl = state->tbl; | 2431 | struct neigh_table *tbl = state->tbl; |
2399 | 2432 | ||
2400 | pn = pn->next; | 2433 | do { |
2434 | pn = pn->next; | ||
2435 | } while (pn && !net_eq(pneigh_net(pn), net)); | ||
2436 | |||
2401 | while (!pn) { | 2437 | while (!pn) { |
2402 | if (++state->bucket > PNEIGH_HASHMASK) | 2438 | if (++state->bucket > PNEIGH_HASHMASK) |
2403 | break; | 2439 | break; |
@@ -2635,117 +2671,158 @@ EXPORT_SYMBOL(neigh_app_ns); | |||
2635 | 2671 | ||
2636 | #ifdef CONFIG_SYSCTL | 2672 | #ifdef CONFIG_SYSCTL |
2637 | 2673 | ||
2638 | #define NEIGH_VARS_MAX 19 | 2674 | static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, |
2675 | size_t *lenp, loff_t *ppos) | ||
2676 | { | ||
2677 | int size, ret; | ||
2678 | ctl_table tmp = *ctl; | ||
2679 | |||
2680 | tmp.data = &size; | ||
2681 | size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); | ||
2682 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); | ||
2683 | if (write && !ret) | ||
2684 | *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); | ||
2685 | return ret; | ||
2686 | } | ||
2687 | |||
2688 | enum { | ||
2689 | NEIGH_VAR_MCAST_PROBE, | ||
2690 | NEIGH_VAR_UCAST_PROBE, | ||
2691 | NEIGH_VAR_APP_PROBE, | ||
2692 | NEIGH_VAR_RETRANS_TIME, | ||
2693 | NEIGH_VAR_BASE_REACHABLE_TIME, | ||
2694 | NEIGH_VAR_DELAY_PROBE_TIME, | ||
2695 | NEIGH_VAR_GC_STALETIME, | ||
2696 | NEIGH_VAR_QUEUE_LEN, | ||
2697 | NEIGH_VAR_QUEUE_LEN_BYTES, | ||
2698 | NEIGH_VAR_PROXY_QLEN, | ||
2699 | NEIGH_VAR_ANYCAST_DELAY, | ||
2700 | NEIGH_VAR_PROXY_DELAY, | ||
2701 | NEIGH_VAR_LOCKTIME, | ||
2702 | NEIGH_VAR_RETRANS_TIME_MS, | ||
2703 | NEIGH_VAR_BASE_REACHABLE_TIME_MS, | ||
2704 | NEIGH_VAR_GC_INTERVAL, | ||
2705 | NEIGH_VAR_GC_THRESH1, | ||
2706 | NEIGH_VAR_GC_THRESH2, | ||
2707 | NEIGH_VAR_GC_THRESH3, | ||
2708 | NEIGH_VAR_MAX | ||
2709 | }; | ||
2639 | 2710 | ||
2640 | static struct neigh_sysctl_table { | 2711 | static struct neigh_sysctl_table { |
2641 | struct ctl_table_header *sysctl_header; | 2712 | struct ctl_table_header *sysctl_header; |
2642 | struct ctl_table neigh_vars[NEIGH_VARS_MAX]; | 2713 | struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; |
2643 | char *dev_name; | 2714 | char *dev_name; |
2644 | } neigh_sysctl_template __read_mostly = { | 2715 | } neigh_sysctl_template __read_mostly = { |
2645 | .neigh_vars = { | 2716 | .neigh_vars = { |
2646 | { | 2717 | [NEIGH_VAR_MCAST_PROBE] = { |
2647 | .procname = "mcast_solicit", | 2718 | .procname = "mcast_solicit", |
2648 | .maxlen = sizeof(int), | 2719 | .maxlen = sizeof(int), |
2649 | .mode = 0644, | 2720 | .mode = 0644, |
2650 | .proc_handler = proc_dointvec, | 2721 | .proc_handler = proc_dointvec, |
2651 | }, | 2722 | }, |
2652 | { | 2723 | [NEIGH_VAR_UCAST_PROBE] = { |
2653 | .procname = "ucast_solicit", | 2724 | .procname = "ucast_solicit", |
2654 | .maxlen = sizeof(int), | 2725 | .maxlen = sizeof(int), |
2655 | .mode = 0644, | 2726 | .mode = 0644, |
2656 | .proc_handler = proc_dointvec, | 2727 | .proc_handler = proc_dointvec, |
2657 | }, | 2728 | }, |
2658 | { | 2729 | [NEIGH_VAR_APP_PROBE] = { |
2659 | .procname = "app_solicit", | 2730 | .procname = "app_solicit", |
2660 | .maxlen = sizeof(int), | 2731 | .maxlen = sizeof(int), |
2661 | .mode = 0644, | 2732 | .mode = 0644, |
2662 | .proc_handler = proc_dointvec, | 2733 | .proc_handler = proc_dointvec, |
2663 | }, | 2734 | }, |
2664 | { | 2735 | [NEIGH_VAR_RETRANS_TIME] = { |
2665 | .procname = "retrans_time", | 2736 | .procname = "retrans_time", |
2666 | .maxlen = sizeof(int), | 2737 | .maxlen = sizeof(int), |
2667 | .mode = 0644, | 2738 | .mode = 0644, |
2668 | .proc_handler = proc_dointvec_userhz_jiffies, | 2739 | .proc_handler = proc_dointvec_userhz_jiffies, |
2669 | }, | 2740 | }, |
2670 | { | 2741 | [NEIGH_VAR_BASE_REACHABLE_TIME] = { |
2671 | .procname = "base_reachable_time", | 2742 | .procname = "base_reachable_time", |
2672 | .maxlen = sizeof(int), | 2743 | .maxlen = sizeof(int), |
2673 | .mode = 0644, | 2744 | .mode = 0644, |
2674 | .proc_handler = proc_dointvec_jiffies, | 2745 | .proc_handler = proc_dointvec_jiffies, |
2675 | }, | 2746 | }, |
2676 | { | 2747 | [NEIGH_VAR_DELAY_PROBE_TIME] = { |
2677 | .procname = "delay_first_probe_time", | 2748 | .procname = "delay_first_probe_time", |
2678 | .maxlen = sizeof(int), | 2749 | .maxlen = sizeof(int), |
2679 | .mode = 0644, | 2750 | .mode = 0644, |
2680 | .proc_handler = proc_dointvec_jiffies, | 2751 | .proc_handler = proc_dointvec_jiffies, |
2681 | }, | 2752 | }, |
2682 | { | 2753 | [NEIGH_VAR_GC_STALETIME] = { |
2683 | .procname = "gc_stale_time", | 2754 | .procname = "gc_stale_time", |
2684 | .maxlen = sizeof(int), | 2755 | .maxlen = sizeof(int), |
2685 | .mode = 0644, | 2756 | .mode = 0644, |
2686 | .proc_handler = proc_dointvec_jiffies, | 2757 | .proc_handler = proc_dointvec_jiffies, |
2687 | }, | 2758 | }, |
2688 | { | 2759 | [NEIGH_VAR_QUEUE_LEN] = { |
2689 | .procname = "unres_qlen", | 2760 | .procname = "unres_qlen", |
2690 | .maxlen = sizeof(int), | 2761 | .maxlen = sizeof(int), |
2691 | .mode = 0644, | 2762 | .mode = 0644, |
2763 | .proc_handler = proc_unres_qlen, | ||
2764 | }, | ||
2765 | [NEIGH_VAR_QUEUE_LEN_BYTES] = { | ||
2766 | .procname = "unres_qlen_bytes", | ||
2767 | .maxlen = sizeof(int), | ||
2768 | .mode = 0644, | ||
2692 | .proc_handler = proc_dointvec, | 2769 | .proc_handler = proc_dointvec, |
2693 | }, | 2770 | }, |
2694 | { | 2771 | [NEIGH_VAR_PROXY_QLEN] = { |
2695 | .procname = "proxy_qlen", | 2772 | .procname = "proxy_qlen", |
2696 | .maxlen = sizeof(int), | 2773 | .maxlen = sizeof(int), |
2697 | .mode = 0644, | 2774 | .mode = 0644, |
2698 | .proc_handler = proc_dointvec, | 2775 | .proc_handler = proc_dointvec, |
2699 | }, | 2776 | }, |
2700 | { | 2777 | [NEIGH_VAR_ANYCAST_DELAY] = { |
2701 | .procname = "anycast_delay", | 2778 | .procname = "anycast_delay", |
2702 | .maxlen = sizeof(int), | 2779 | .maxlen = sizeof(int), |
2703 | .mode = 0644, | 2780 | .mode = 0644, |
2704 | .proc_handler = proc_dointvec_userhz_jiffies, | 2781 | .proc_handler = proc_dointvec_userhz_jiffies, |
2705 | }, | 2782 | }, |
2706 | { | 2783 | [NEIGH_VAR_PROXY_DELAY] = { |
2707 | .procname = "proxy_delay", | 2784 | .procname = "proxy_delay", |
2708 | .maxlen = sizeof(int), | 2785 | .maxlen = sizeof(int), |
2709 | .mode = 0644, | 2786 | .mode = 0644, |
2710 | .proc_handler = proc_dointvec_userhz_jiffies, | 2787 | .proc_handler = proc_dointvec_userhz_jiffies, |
2711 | }, | 2788 | }, |
2712 | { | 2789 | [NEIGH_VAR_LOCKTIME] = { |
2713 | .procname = "locktime", | 2790 | .procname = "locktime", |
2714 | .maxlen = sizeof(int), | 2791 | .maxlen = sizeof(int), |
2715 | .mode = 0644, | 2792 | .mode = 0644, |
2716 | .proc_handler = proc_dointvec_userhz_jiffies, | 2793 | .proc_handler = proc_dointvec_userhz_jiffies, |
2717 | }, | 2794 | }, |
2718 | { | 2795 | [NEIGH_VAR_RETRANS_TIME_MS] = { |
2719 | .procname = "retrans_time_ms", | 2796 | .procname = "retrans_time_ms", |
2720 | .maxlen = sizeof(int), | 2797 | .maxlen = sizeof(int), |
2721 | .mode = 0644, | 2798 | .mode = 0644, |
2722 | .proc_handler = proc_dointvec_ms_jiffies, | 2799 | .proc_handler = proc_dointvec_ms_jiffies, |
2723 | }, | 2800 | }, |
2724 | { | 2801 | [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = { |
2725 | .procname = "base_reachable_time_ms", | 2802 | .procname = "base_reachable_time_ms", |
2726 | .maxlen = sizeof(int), | 2803 | .maxlen = sizeof(int), |
2727 | .mode = 0644, | 2804 | .mode = 0644, |
2728 | .proc_handler = proc_dointvec_ms_jiffies, | 2805 | .proc_handler = proc_dointvec_ms_jiffies, |
2729 | }, | 2806 | }, |
2730 | { | 2807 | [NEIGH_VAR_GC_INTERVAL] = { |
2731 | .procname = "gc_interval", | 2808 | .procname = "gc_interval", |
2732 | .maxlen = sizeof(int), | 2809 | .maxlen = sizeof(int), |
2733 | .mode = 0644, | 2810 | .mode = 0644, |
2734 | .proc_handler = proc_dointvec_jiffies, | 2811 | .proc_handler = proc_dointvec_jiffies, |
2735 | }, | 2812 | }, |
2736 | { | 2813 | [NEIGH_VAR_GC_THRESH1] = { |
2737 | .procname = "gc_thresh1", | 2814 | .procname = "gc_thresh1", |
2738 | .maxlen = sizeof(int), | 2815 | .maxlen = sizeof(int), |
2739 | .mode = 0644, | 2816 | .mode = 0644, |
2740 | .proc_handler = proc_dointvec, | 2817 | .proc_handler = proc_dointvec, |
2741 | }, | 2818 | }, |
2742 | { | 2819 | [NEIGH_VAR_GC_THRESH2] = { |
2743 | .procname = "gc_thresh2", | 2820 | .procname = "gc_thresh2", |
2744 | .maxlen = sizeof(int), | 2821 | .maxlen = sizeof(int), |
2745 | .mode = 0644, | 2822 | .mode = 0644, |
2746 | .proc_handler = proc_dointvec, | 2823 | .proc_handler = proc_dointvec, |
2747 | }, | 2824 | }, |
2748 | { | 2825 | [NEIGH_VAR_GC_THRESH3] = { |
2749 | .procname = "gc_thresh3", | 2826 | .procname = "gc_thresh3", |
2750 | .maxlen = sizeof(int), | 2827 | .maxlen = sizeof(int), |
2751 | .mode = 0644, | 2828 | .mode = 0644, |
@@ -2778,47 +2855,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, | |||
2778 | if (!t) | 2855 | if (!t) |
2779 | goto err; | 2856 | goto err; |
2780 | 2857 | ||
2781 | t->neigh_vars[0].data = &p->mcast_probes; | 2858 | t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; |
2782 | t->neigh_vars[1].data = &p->ucast_probes; | 2859 | t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; |
2783 | t->neigh_vars[2].data = &p->app_probes; | 2860 | t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; |
2784 | t->neigh_vars[3].data = &p->retrans_time; | 2861 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; |
2785 | t->neigh_vars[4].data = &p->base_reachable_time; | 2862 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; |
2786 | t->neigh_vars[5].data = &p->delay_probe_time; | 2863 | t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time; |
2787 | t->neigh_vars[6].data = &p->gc_staletime; | 2864 | t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime; |
2788 | t->neigh_vars[7].data = &p->queue_len; | 2865 | t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes; |
2789 | t->neigh_vars[8].data = &p->proxy_qlen; | 2866 | t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes; |
2790 | t->neigh_vars[9].data = &p->anycast_delay; | 2867 | t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen; |
2791 | t->neigh_vars[10].data = &p->proxy_delay; | 2868 | t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay; |
2792 | t->neigh_vars[11].data = &p->locktime; | 2869 | t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay; |
2793 | t->neigh_vars[12].data = &p->retrans_time; | 2870 | t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime; |
2794 | t->neigh_vars[13].data = &p->base_reachable_time; | 2871 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time; |
2872 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time; | ||
2795 | 2873 | ||
2796 | if (dev) { | 2874 | if (dev) { |
2797 | dev_name_source = dev->name; | 2875 | dev_name_source = dev->name; |
2798 | /* Terminate the table early */ | 2876 | /* Terminate the table early */ |
2799 | memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); | 2877 | memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, |
2878 | sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); | ||
2800 | } else { | 2879 | } else { |
2801 | dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; | 2880 | dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; |
2802 | t->neigh_vars[14].data = (int *)(p + 1); | 2881 | t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); |
2803 | t->neigh_vars[15].data = (int *)(p + 1) + 1; | 2882 | t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; |
2804 | t->neigh_vars[16].data = (int *)(p + 1) + 2; | 2883 | t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; |
2805 | t->neigh_vars[17].data = (int *)(p + 1) + 3; | 2884 | t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; |
2806 | } | 2885 | } |
2807 | 2886 | ||
2808 | 2887 | ||
2809 | if (handler) { | 2888 | if (handler) { |
2810 | /* RetransTime */ | 2889 | /* RetransTime */ |
2811 | t->neigh_vars[3].proc_handler = handler; | 2890 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; |
2812 | t->neigh_vars[3].extra1 = dev; | 2891 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev; |
2813 | /* ReachableTime */ | 2892 | /* ReachableTime */ |
2814 | t->neigh_vars[4].proc_handler = handler; | 2893 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; |
2815 | t->neigh_vars[4].extra1 = dev; | 2894 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev; |
2816 | /* RetransTime (in milliseconds)*/ | 2895 | /* RetransTime (in milliseconds)*/ |
2817 | t->neigh_vars[12].proc_handler = handler; | 2896 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; |
2818 | t->neigh_vars[12].extra1 = dev; | 2897 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev; |
2819 | /* ReachableTime (in milliseconds) */ | 2898 | /* ReachableTime (in milliseconds) */ |
2820 | t->neigh_vars[13].proc_handler = handler; | 2899 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; |
2821 | t->neigh_vars[13].extra1 = dev; | 2900 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; |
2822 | } | 2901 | } |
2823 | 2902 | ||
2824 | t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); | 2903 | t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c71c434a4c05..a1727cda03d7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/wireless.h> | 21 | #include <linux/wireless.h> |
22 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/export.h> | 23 | #include <linux/export.h> |
24 | #include <linux/jiffies.h> | ||
24 | #include <net/wext.h> | 25 | #include <net/wext.h> |
25 | 26 | ||
26 | #include "net-sysfs.h" | 27 | #include "net-sysfs.h" |
@@ -606,9 +607,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, | |||
606 | rcu_assign_pointer(queue->rps_map, map); | 607 | rcu_assign_pointer(queue->rps_map, map); |
607 | spin_unlock(&rps_map_lock); | 608 | spin_unlock(&rps_map_lock); |
608 | 609 | ||
609 | if (old_map) | 610 | if (map) |
611 | jump_label_inc(&rps_needed); | ||
612 | if (old_map) { | ||
610 | kfree_rcu(old_map, rcu); | 613 | kfree_rcu(old_map, rcu); |
611 | 614 | jump_label_dec(&rps_needed); | |
615 | } | ||
612 | free_cpumask_var(mask); | 616 | free_cpumask_var(mask); |
613 | return len; | 617 | return len; |
614 | } | 618 | } |
@@ -618,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | |||
618 | char *buf) | 622 | char *buf) |
619 | { | 623 | { |
620 | struct rps_dev_flow_table *flow_table; | 624 | struct rps_dev_flow_table *flow_table; |
621 | unsigned int val = 0; | 625 | unsigned long val = 0; |
622 | 626 | ||
623 | rcu_read_lock(); | 627 | rcu_read_lock(); |
624 | flow_table = rcu_dereference(queue->rps_flow_table); | 628 | flow_table = rcu_dereference(queue->rps_flow_table); |
625 | if (flow_table) | 629 | if (flow_table) |
626 | val = flow_table->mask + 1; | 630 | val = (unsigned long)flow_table->mask + 1; |
627 | rcu_read_unlock(); | 631 | rcu_read_unlock(); |
628 | 632 | ||
629 | return sprintf(buf, "%u\n", val); | 633 | return sprintf(buf, "%lu\n", val); |
630 | } | 634 | } |
631 | 635 | ||
632 | static void rps_dev_flow_table_release_work(struct work_struct *work) | 636 | static void rps_dev_flow_table_release_work(struct work_struct *work) |
@@ -650,33 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | |||
650 | struct rx_queue_attribute *attr, | 654 | struct rx_queue_attribute *attr, |
651 | const char *buf, size_t len) | 655 | const char *buf, size_t len) |
652 | { | 656 | { |
653 | unsigned int count; | 657 | unsigned long mask, count; |
654 | char *endp; | ||
655 | struct rps_dev_flow_table *table, *old_table; | 658 | struct rps_dev_flow_table *table, *old_table; |
656 | static DEFINE_SPINLOCK(rps_dev_flow_lock); | 659 | static DEFINE_SPINLOCK(rps_dev_flow_lock); |
660 | int rc; | ||
657 | 661 | ||
658 | if (!capable(CAP_NET_ADMIN)) | 662 | if (!capable(CAP_NET_ADMIN)) |
659 | return -EPERM; | 663 | return -EPERM; |
660 | 664 | ||
661 | count = simple_strtoul(buf, &endp, 0); | 665 | rc = kstrtoul(buf, 0, &count); |
662 | if (endp == buf) | 666 | if (rc < 0) |
663 | return -EINVAL; | 667 | return rc; |
664 | 668 | ||
665 | if (count) { | 669 | if (count) { |
666 | int i; | 670 | mask = count - 1; |
667 | 671 | /* mask = roundup_pow_of_two(count) - 1; | |
668 | if (count > 1<<30) { | 672 | * without overflows... |
673 | */ | ||
674 | while ((mask | (mask >> 1)) != mask) | ||
675 | mask |= (mask >> 1); | ||
676 | /* On 64 bit arches, must check mask fits in table->mask (u32), | ||
677 | * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) | ||
678 | * doesnt overflow. | ||
679 | */ | ||
680 | #if BITS_PER_LONG > 32 | ||
681 | if (mask > (unsigned long)(u32)mask) | ||
682 | return -EINVAL; | ||
683 | #else | ||
684 | if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) | ||
685 | / sizeof(struct rps_dev_flow)) { | ||
669 | /* Enforce a limit to prevent overflow */ | 686 | /* Enforce a limit to prevent overflow */ |
670 | return -EINVAL; | 687 | return -EINVAL; |
671 | } | 688 | } |
672 | count = roundup_pow_of_two(count); | 689 | #endif |
673 | table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); | 690 | table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); |
674 | if (!table) | 691 | if (!table) |
675 | return -ENOMEM; | 692 | return -ENOMEM; |
676 | 693 | ||
677 | table->mask = count - 1; | 694 | table->mask = mask; |
678 | for (i = 0; i < count; i++) | 695 | for (count = 0; count <= mask; count++) |
679 | table->flows[i].cpu = RPS_NO_CPU; | 696 | table->flows[count].cpu = RPS_NO_CPU; |
680 | } else | 697 | } else |
681 | table = NULL; | 698 | table = NULL; |
682 | 699 | ||
@@ -780,7 +797,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | |||
780 | #endif | 797 | #endif |
781 | } | 798 | } |
782 | 799 | ||
783 | #ifdef CONFIG_XPS | 800 | #ifdef CONFIG_SYSFS |
784 | /* | 801 | /* |
785 | * netdev_queue sysfs structures and functions. | 802 | * netdev_queue sysfs structures and functions. |
786 | */ | 803 | */ |
@@ -826,6 +843,133 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { | |||
826 | .store = netdev_queue_attr_store, | 843 | .store = netdev_queue_attr_store, |
827 | }; | 844 | }; |
828 | 845 | ||
846 | static ssize_t show_trans_timeout(struct netdev_queue *queue, | ||
847 | struct netdev_queue_attribute *attribute, | ||
848 | char *buf) | ||
849 | { | ||
850 | unsigned long trans_timeout; | ||
851 | |||
852 | spin_lock_irq(&queue->_xmit_lock); | ||
853 | trans_timeout = queue->trans_timeout; | ||
854 | spin_unlock_irq(&queue->_xmit_lock); | ||
855 | |||
856 | return sprintf(buf, "%lu", trans_timeout); | ||
857 | } | ||
858 | |||
859 | static struct netdev_queue_attribute queue_trans_timeout = | ||
860 | __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); | ||
861 | |||
862 | #ifdef CONFIG_BQL | ||
863 | /* | ||
864 | * Byte queue limits sysfs structures and functions. | ||
865 | */ | ||
866 | static ssize_t bql_show(char *buf, unsigned int value) | ||
867 | { | ||
868 | return sprintf(buf, "%u\n", value); | ||
869 | } | ||
870 | |||
871 | static ssize_t bql_set(const char *buf, const size_t count, | ||
872 | unsigned int *pvalue) | ||
873 | { | ||
874 | unsigned int value; | ||
875 | int err; | ||
876 | |||
877 | if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) | ||
878 | value = DQL_MAX_LIMIT; | ||
879 | else { | ||
880 | err = kstrtouint(buf, 10, &value); | ||
881 | if (err < 0) | ||
882 | return err; | ||
883 | if (value > DQL_MAX_LIMIT) | ||
884 | return -EINVAL; | ||
885 | } | ||
886 | |||
887 | *pvalue = value; | ||
888 | |||
889 | return count; | ||
890 | } | ||
891 | |||
892 | static ssize_t bql_show_hold_time(struct netdev_queue *queue, | ||
893 | struct netdev_queue_attribute *attr, | ||
894 | char *buf) | ||
895 | { | ||
896 | struct dql *dql = &queue->dql; | ||
897 | |||
898 | return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); | ||
899 | } | ||
900 | |||
901 | static ssize_t bql_set_hold_time(struct netdev_queue *queue, | ||
902 | struct netdev_queue_attribute *attribute, | ||
903 | const char *buf, size_t len) | ||
904 | { | ||
905 | struct dql *dql = &queue->dql; | ||
906 | unsigned value; | ||
907 | int err; | ||
908 | |||
909 | err = kstrtouint(buf, 10, &value); | ||
910 | if (err < 0) | ||
911 | return err; | ||
912 | |||
913 | dql->slack_hold_time = msecs_to_jiffies(value); | ||
914 | |||
915 | return len; | ||
916 | } | ||
917 | |||
918 | static struct netdev_queue_attribute bql_hold_time_attribute = | ||
919 | __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time, | ||
920 | bql_set_hold_time); | ||
921 | |||
922 | static ssize_t bql_show_inflight(struct netdev_queue *queue, | ||
923 | struct netdev_queue_attribute *attr, | ||
924 | char *buf) | ||
925 | { | ||
926 | struct dql *dql = &queue->dql; | ||
927 | |||
928 | return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed); | ||
929 | } | ||
930 | |||
931 | static struct netdev_queue_attribute bql_inflight_attribute = | ||
932 | __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL); | ||
933 | |||
934 | #define BQL_ATTR(NAME, FIELD) \ | ||
935 | static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ | ||
936 | struct netdev_queue_attribute *attr, \ | ||
937 | char *buf) \ | ||
938 | { \ | ||
939 | return bql_show(buf, queue->dql.FIELD); \ | ||
940 | } \ | ||
941 | \ | ||
942 | static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ | ||
943 | struct netdev_queue_attribute *attr, \ | ||
944 | const char *buf, size_t len) \ | ||
945 | { \ | ||
946 | return bql_set(buf, len, &queue->dql.FIELD); \ | ||
947 | } \ | ||
948 | \ | ||
949 | static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \ | ||
950 | __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \ | ||
951 | bql_set_ ## NAME); | ||
952 | |||
953 | BQL_ATTR(limit, limit) | ||
954 | BQL_ATTR(limit_max, max_limit) | ||
955 | BQL_ATTR(limit_min, min_limit) | ||
956 | |||
957 | static struct attribute *dql_attrs[] = { | ||
958 | &bql_limit_attribute.attr, | ||
959 | &bql_limit_max_attribute.attr, | ||
960 | &bql_limit_min_attribute.attr, | ||
961 | &bql_hold_time_attribute.attr, | ||
962 | &bql_inflight_attribute.attr, | ||
963 | NULL | ||
964 | }; | ||
965 | |||
966 | static struct attribute_group dql_group = { | ||
967 | .name = "byte_queue_limits", | ||
968 | .attrs = dql_attrs, | ||
969 | }; | ||
970 | #endif /* CONFIG_BQL */ | ||
971 | |||
972 | #ifdef CONFIG_XPS | ||
829 | static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) | 973 | static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) |
830 | { | 974 | { |
831 | struct net_device *dev = queue->dev; | 975 | struct net_device *dev = queue->dev; |
@@ -890,6 +1034,52 @@ static DEFINE_MUTEX(xps_map_mutex); | |||
890 | #define xmap_dereference(P) \ | 1034 | #define xmap_dereference(P) \ |
891 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) | 1035 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) |
892 | 1036 | ||
1037 | static void xps_queue_release(struct netdev_queue *queue) | ||
1038 | { | ||
1039 | struct net_device *dev = queue->dev; | ||
1040 | struct xps_dev_maps *dev_maps; | ||
1041 | struct xps_map *map; | ||
1042 | unsigned long index; | ||
1043 | int i, pos, nonempty = 0; | ||
1044 | |||
1045 | index = get_netdev_queue_index(queue); | ||
1046 | |||
1047 | mutex_lock(&xps_map_mutex); | ||
1048 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1049 | |||
1050 | if (dev_maps) { | ||
1051 | for_each_possible_cpu(i) { | ||
1052 | map = xmap_dereference(dev_maps->cpu_map[i]); | ||
1053 | if (!map) | ||
1054 | continue; | ||
1055 | |||
1056 | for (pos = 0; pos < map->len; pos++) | ||
1057 | if (map->queues[pos] == index) | ||
1058 | break; | ||
1059 | |||
1060 | if (pos < map->len) { | ||
1061 | if (map->len > 1) | ||
1062 | map->queues[pos] = | ||
1063 | map->queues[--map->len]; | ||
1064 | else { | ||
1065 | RCU_INIT_POINTER(dev_maps->cpu_map[i], | ||
1066 | NULL); | ||
1067 | kfree_rcu(map, rcu); | ||
1068 | map = NULL; | ||
1069 | } | ||
1070 | } | ||
1071 | if (map) | ||
1072 | nonempty = 1; | ||
1073 | } | ||
1074 | |||
1075 | if (!nonempty) { | ||
1076 | RCU_INIT_POINTER(dev->xps_maps, NULL); | ||
1077 | kfree_rcu(dev_maps, rcu); | ||
1078 | } | ||
1079 | } | ||
1080 | mutex_unlock(&xps_map_mutex); | ||
1081 | } | ||
1082 | |||
893 | static ssize_t store_xps_map(struct netdev_queue *queue, | 1083 | static ssize_t store_xps_map(struct netdev_queue *queue, |
894 | struct netdev_queue_attribute *attribute, | 1084 | struct netdev_queue_attribute *attribute, |
895 | const char *buf, size_t len) | 1085 | const char *buf, size_t len) |
@@ -901,7 +1091,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
901 | struct xps_map *map, *new_map; | 1091 | struct xps_map *map, *new_map; |
902 | struct xps_dev_maps *dev_maps, *new_dev_maps; | 1092 | struct xps_dev_maps *dev_maps, *new_dev_maps; |
903 | int nonempty = 0; | 1093 | int nonempty = 0; |
904 | int numa_node = -2; | 1094 | int numa_node_id = -2; |
905 | 1095 | ||
906 | if (!capable(CAP_NET_ADMIN)) | 1096 | if (!capable(CAP_NET_ADMIN)) |
907 | return -EPERM; | 1097 | return -EPERM; |
@@ -944,10 +1134,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
944 | need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); | 1134 | need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); |
945 | #ifdef CONFIG_NUMA | 1135 | #ifdef CONFIG_NUMA |
946 | if (need_set) { | 1136 | if (need_set) { |
947 | if (numa_node == -2) | 1137 | if (numa_node_id == -2) |
948 | numa_node = cpu_to_node(cpu); | 1138 | numa_node_id = cpu_to_node(cpu); |
949 | else if (numa_node != cpu_to_node(cpu)) | 1139 | else if (numa_node_id != cpu_to_node(cpu)) |
950 | numa_node = -1; | 1140 | numa_node_id = -1; |
951 | } | 1141 | } |
952 | #endif | 1142 | #endif |
953 | if (need_set && pos >= map_len) { | 1143 | if (need_set && pos >= map_len) { |
@@ -987,9 +1177,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
987 | nonempty = 1; | 1177 | nonempty = 1; |
988 | } | 1178 | } |
989 | 1179 | ||
990 | if (nonempty) | 1180 | if (nonempty) { |
991 | RCU_INIT_POINTER(dev->xps_maps, new_dev_maps); | 1181 | rcu_assign_pointer(dev->xps_maps, new_dev_maps); |
992 | else { | 1182 | } else { |
993 | kfree(new_dev_maps); | 1183 | kfree(new_dev_maps); |
994 | RCU_INIT_POINTER(dev->xps_maps, NULL); | 1184 | RCU_INIT_POINTER(dev->xps_maps, NULL); |
995 | } | 1185 | } |
@@ -997,7 +1187,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
997 | if (dev_maps) | 1187 | if (dev_maps) |
998 | kfree_rcu(dev_maps, rcu); | 1188 | kfree_rcu(dev_maps, rcu); |
999 | 1189 | ||
1000 | netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : | 1190 | netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : |
1001 | NUMA_NO_NODE); | 1191 | NUMA_NO_NODE); |
1002 | 1192 | ||
1003 | mutex_unlock(&xps_map_mutex); | 1193 | mutex_unlock(&xps_map_mutex); |
@@ -1020,58 +1210,23 @@ error: | |||
1020 | 1210 | ||
1021 | static struct netdev_queue_attribute xps_cpus_attribute = | 1211 | static struct netdev_queue_attribute xps_cpus_attribute = |
1022 | __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); | 1212 | __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); |
1213 | #endif /* CONFIG_XPS */ | ||
1023 | 1214 | ||
1024 | static struct attribute *netdev_queue_default_attrs[] = { | 1215 | static struct attribute *netdev_queue_default_attrs[] = { |
1216 | &queue_trans_timeout.attr, | ||
1217 | #ifdef CONFIG_XPS | ||
1025 | &xps_cpus_attribute.attr, | 1218 | &xps_cpus_attribute.attr, |
1219 | #endif | ||
1026 | NULL | 1220 | NULL |
1027 | }; | 1221 | }; |
1028 | 1222 | ||
1029 | static void netdev_queue_release(struct kobject *kobj) | 1223 | static void netdev_queue_release(struct kobject *kobj) |
1030 | { | 1224 | { |
1031 | struct netdev_queue *queue = to_netdev_queue(kobj); | 1225 | struct netdev_queue *queue = to_netdev_queue(kobj); |
1032 | struct net_device *dev = queue->dev; | ||
1033 | struct xps_dev_maps *dev_maps; | ||
1034 | struct xps_map *map; | ||
1035 | unsigned long index; | ||
1036 | int i, pos, nonempty = 0; | ||
1037 | |||
1038 | index = get_netdev_queue_index(queue); | ||
1039 | |||
1040 | mutex_lock(&xps_map_mutex); | ||
1041 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1042 | |||
1043 | if (dev_maps) { | ||
1044 | for_each_possible_cpu(i) { | ||
1045 | map = xmap_dereference(dev_maps->cpu_map[i]); | ||
1046 | if (!map) | ||
1047 | continue; | ||
1048 | 1226 | ||
1049 | for (pos = 0; pos < map->len; pos++) | 1227 | #ifdef CONFIG_XPS |
1050 | if (map->queues[pos] == index) | 1228 | xps_queue_release(queue); |
1051 | break; | 1229 | #endif |
1052 | |||
1053 | if (pos < map->len) { | ||
1054 | if (map->len > 1) | ||
1055 | map->queues[pos] = | ||
1056 | map->queues[--map->len]; | ||
1057 | else { | ||
1058 | RCU_INIT_POINTER(dev_maps->cpu_map[i], | ||
1059 | NULL); | ||
1060 | kfree_rcu(map, rcu); | ||
1061 | map = NULL; | ||
1062 | } | ||
1063 | } | ||
1064 | if (map) | ||
1065 | nonempty = 1; | ||
1066 | } | ||
1067 | |||
1068 | if (!nonempty) { | ||
1069 | RCU_INIT_POINTER(dev->xps_maps, NULL); | ||
1070 | kfree_rcu(dev_maps, rcu); | ||
1071 | } | ||
1072 | } | ||
1073 | |||
1074 | mutex_unlock(&xps_map_mutex); | ||
1075 | 1230 | ||
1076 | memset(kobj, 0, sizeof(*kobj)); | 1231 | memset(kobj, 0, sizeof(*kobj)); |
1077 | dev_put(queue->dev); | 1232 | dev_put(queue->dev); |
@@ -1092,22 +1247,29 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) | |||
1092 | kobj->kset = net->queues_kset; | 1247 | kobj->kset = net->queues_kset; |
1093 | error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, | 1248 | error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, |
1094 | "tx-%u", index); | 1249 | "tx-%u", index); |
1095 | if (error) { | 1250 | if (error) |
1096 | kobject_put(kobj); | 1251 | goto exit; |
1097 | return error; | 1252 | |
1098 | } | 1253 | #ifdef CONFIG_BQL |
1254 | error = sysfs_create_group(kobj, &dql_group); | ||
1255 | if (error) | ||
1256 | goto exit; | ||
1257 | #endif | ||
1099 | 1258 | ||
1100 | kobject_uevent(kobj, KOBJ_ADD); | 1259 | kobject_uevent(kobj, KOBJ_ADD); |
1101 | dev_hold(queue->dev); | 1260 | dev_hold(queue->dev); |
1102 | 1261 | ||
1262 | return 0; | ||
1263 | exit: | ||
1264 | kobject_put(kobj); | ||
1103 | return error; | 1265 | return error; |
1104 | } | 1266 | } |
1105 | #endif /* CONFIG_XPS */ | 1267 | #endif /* CONFIG_SYSFS */ |
1106 | 1268 | ||
1107 | int | 1269 | int |
1108 | netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | 1270 | netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) |
1109 | { | 1271 | { |
1110 | #ifdef CONFIG_XPS | 1272 | #ifdef CONFIG_SYSFS |
1111 | int i; | 1273 | int i; |
1112 | int error = 0; | 1274 | int error = 0; |
1113 | 1275 | ||
@@ -1119,20 +1281,26 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | |||
1119 | } | 1281 | } |
1120 | } | 1282 | } |
1121 | 1283 | ||
1122 | while (--i >= new_num) | 1284 | while (--i >= new_num) { |
1123 | kobject_put(&net->_tx[i].kobj); | 1285 | struct netdev_queue *queue = net->_tx + i; |
1286 | |||
1287 | #ifdef CONFIG_BQL | ||
1288 | sysfs_remove_group(&queue->kobj, &dql_group); | ||
1289 | #endif | ||
1290 | kobject_put(&queue->kobj); | ||
1291 | } | ||
1124 | 1292 | ||
1125 | return error; | 1293 | return error; |
1126 | #else | 1294 | #else |
1127 | return 0; | 1295 | return 0; |
1128 | #endif | 1296 | #endif /* CONFIG_SYSFS */ |
1129 | } | 1297 | } |
1130 | 1298 | ||
1131 | static int register_queue_kobjects(struct net_device *net) | 1299 | static int register_queue_kobjects(struct net_device *net) |
1132 | { | 1300 | { |
1133 | int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; | 1301 | int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; |
1134 | 1302 | ||
1135 | #if defined(CONFIG_RPS) || defined(CONFIG_XPS) | 1303 | #ifdef CONFIG_SYSFS |
1136 | net->queues_kset = kset_create_and_add("queues", | 1304 | net->queues_kset = kset_create_and_add("queues", |
1137 | NULL, &net->dev.kobj); | 1305 | NULL, &net->dev.kobj); |
1138 | if (!net->queues_kset) | 1306 | if (!net->queues_kset) |
@@ -1173,7 +1341,7 @@ static void remove_queue_kobjects(struct net_device *net) | |||
1173 | 1341 | ||
1174 | net_rx_queue_update_kobjects(net, real_rx, 0); | 1342 | net_rx_queue_update_kobjects(net, real_rx, 0); |
1175 | netdev_queue_update_kobjects(net, real_tx, 0); | 1343 | netdev_queue_update_kobjects(net, real_tx, 0); |
1176 | #if defined(CONFIG_RPS) || defined(CONFIG_XPS) | 1344 | #ifdef CONFIG_SYSFS |
1177 | kset_unregister(net->queues_kset); | 1345 | kset_unregister(net->queues_kset); |
1178 | #endif | 1346 | #endif |
1179 | } | 1347 | } |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index aefcd7acbffa..0e950fda9a0a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -30,6 +30,20 @@ EXPORT_SYMBOL(init_net); | |||
30 | 30 | ||
31 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ | 31 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
32 | 32 | ||
33 | static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; | ||
34 | |||
35 | static struct net_generic *net_alloc_generic(void) | ||
36 | { | ||
37 | struct net_generic *ng; | ||
38 | size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); | ||
39 | |||
40 | ng = kzalloc(generic_size, GFP_KERNEL); | ||
41 | if (ng) | ||
42 | ng->len = max_gen_ptrs; | ||
43 | |||
44 | return ng; | ||
45 | } | ||
46 | |||
33 | static int net_assign_generic(struct net *net, int id, void *data) | 47 | static int net_assign_generic(struct net *net, int id, void *data) |
34 | { | 48 | { |
35 | struct net_generic *ng, *old_ng; | 49 | struct net_generic *ng, *old_ng; |
@@ -43,8 +57,7 @@ static int net_assign_generic(struct net *net, int id, void *data) | |||
43 | if (old_ng->len >= id) | 57 | if (old_ng->len >= id) |
44 | goto assign; | 58 | goto assign; |
45 | 59 | ||
46 | ng = kzalloc(sizeof(struct net_generic) + | 60 | ng = net_alloc_generic(); |
47 | id * sizeof(void *), GFP_KERNEL); | ||
48 | if (ng == NULL) | 61 | if (ng == NULL) |
49 | return -ENOMEM; | 62 | return -ENOMEM; |
50 | 63 | ||
@@ -59,7 +72,6 @@ static int net_assign_generic(struct net *net, int id, void *data) | |||
59 | * the old copy for kfree after a grace period. | 72 | * the old copy for kfree after a grace period. |
60 | */ | 73 | */ |
61 | 74 | ||
62 | ng->len = id; | ||
63 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | 75 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); |
64 | 76 | ||
65 | rcu_assign_pointer(net->gen, ng); | 77 | rcu_assign_pointer(net->gen, ng); |
@@ -161,18 +173,6 @@ out_undo: | |||
161 | goto out; | 173 | goto out; |
162 | } | 174 | } |
163 | 175 | ||
164 | static struct net_generic *net_alloc_generic(void) | ||
165 | { | ||
166 | struct net_generic *ng; | ||
167 | size_t generic_size = sizeof(struct net_generic) + | ||
168 | INITIAL_NET_GEN_PTRS * sizeof(void *); | ||
169 | |||
170 | ng = kzalloc(generic_size, GFP_KERNEL); | ||
171 | if (ng) | ||
172 | ng->len = INITIAL_NET_GEN_PTRS; | ||
173 | |||
174 | return ng; | ||
175 | } | ||
176 | 176 | ||
177 | #ifdef CONFIG_NET_NS | 177 | #ifdef CONFIG_NET_NS |
178 | static struct kmem_cache *net_cachep; | 178 | static struct kmem_cache *net_cachep; |
@@ -483,6 +483,7 @@ again: | |||
483 | } | 483 | } |
484 | return error; | 484 | return error; |
485 | } | 485 | } |
486 | max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); | ||
486 | } | 487 | } |
487 | error = __register_pernet_operations(list, ops); | 488 | error = __register_pernet_operations(list, ops); |
488 | if (error) { | 489 | if (error) { |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cf64c1ffa4cd..ddefc513b44a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work) | |||
76 | 76 | ||
77 | local_irq_save(flags); | 77 | local_irq_save(flags); |
78 | __netif_tx_lock(txq, smp_processor_id()); | 78 | __netif_tx_lock(txq, smp_processor_id()); |
79 | if (netif_tx_queue_frozen_or_stopped(txq) || | 79 | if (netif_xmit_frozen_or_stopped(txq) || |
80 | ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { | 80 | ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { |
81 | skb_queue_head(&npinfo->txq, skb); | 81 | skb_queue_head(&npinfo->txq, skb); |
82 | __netif_tx_unlock(txq); | 82 | __netif_tx_unlock(txq); |
@@ -194,7 +194,7 @@ static void netpoll_poll_dev(struct net_device *dev) | |||
194 | 194 | ||
195 | poll_napi(dev); | 195 | poll_napi(dev); |
196 | 196 | ||
197 | if (dev->priv_flags & IFF_SLAVE) { | 197 | if (dev->flags & IFF_SLAVE) { |
198 | if (dev->npinfo) { | 198 | if (dev->npinfo) { |
199 | struct net_device *bond_dev = dev->master; | 199 | struct net_device *bond_dev = dev->master; |
200 | struct sk_buff *skb; | 200 | struct sk_buff *skb; |
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, | |||
317 | for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; | 317 | for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; |
318 | tries > 0; --tries) { | 318 | tries > 0; --tries) { |
319 | if (__netif_tx_trylock(txq)) { | 319 | if (__netif_tx_trylock(txq)) { |
320 | if (!netif_tx_queue_stopped(txq)) { | 320 | if (!netif_xmit_stopped(txq)) { |
321 | status = ops->ndo_start_xmit(skb, dev); | 321 | status = ops->ndo_start_xmit(skb, dev); |
322 | if (status == NETDEV_TX_OK) | 322 | if (status == NETDEV_TX_OK) |
323 | txq_trans_update(txq); | 323 | txq_trans_update(txq); |
@@ -422,6 +422,7 @@ static void arp_reply(struct sk_buff *skb) | |||
422 | struct sk_buff *send_skb; | 422 | struct sk_buff *send_skb; |
423 | struct netpoll *np, *tmp; | 423 | struct netpoll *np, *tmp; |
424 | unsigned long flags; | 424 | unsigned long flags; |
425 | int hlen, tlen; | ||
425 | int hits = 0; | 426 | int hits = 0; |
426 | 427 | ||
427 | if (list_empty(&npinfo->rx_np)) | 428 | if (list_empty(&npinfo->rx_np)) |
@@ -479,8 +480,9 @@ static void arp_reply(struct sk_buff *skb) | |||
479 | if (tip != np->local_ip) | 480 | if (tip != np->local_ip) |
480 | continue; | 481 | continue; |
481 | 482 | ||
482 | send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), | 483 | hlen = LL_RESERVED_SPACE(np->dev); |
483 | LL_RESERVED_SPACE(np->dev)); | 484 | tlen = np->dev->needed_tailroom; |
485 | send_skb = find_skb(np, size + hlen + tlen, hlen); | ||
484 | if (!send_skb) | 486 | if (!send_skb) |
485 | continue; | 487 | continue; |
486 | 488 | ||
@@ -763,7 +765,7 @@ int __netpoll_setup(struct netpoll *np) | |||
763 | } | 765 | } |
764 | 766 | ||
765 | /* last thing to do is link it to the net device structure */ | 767 | /* last thing to do is link it to the net device structure */ |
766 | RCU_INIT_POINTER(ndev->npinfo, npinfo); | 768 | rcu_assign_pointer(ndev->npinfo, npinfo); |
767 | 769 | ||
768 | return 0; | 770 | return 0; |
769 | 771 | ||
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c new file mode 100644 index 000000000000..4dacc44637ef --- /dev/null +++ b/net/core/netprio_cgroup.c | |||
@@ -0,0 +1,339 @@ | |||
1 | /* | ||
2 | * net/core/netprio_cgroup.c Priority Control Group | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Neil Horman <nhorman@tuxdriver.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/errno.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/cgroup.h> | ||
19 | #include <linux/rcupdate.h> | ||
20 | #include <linux/atomic.h> | ||
21 | #include <net/rtnetlink.h> | ||
22 | #include <net/pkt_cls.h> | ||
23 | #include <net/sock.h> | ||
24 | #include <net/netprio_cgroup.h> | ||
25 | |||
26 | static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, | ||
27 | struct cgroup *cgrp); | ||
28 | static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); | ||
29 | static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); | ||
30 | |||
31 | struct cgroup_subsys net_prio_subsys = { | ||
32 | .name = "net_prio", | ||
33 | .create = cgrp_create, | ||
34 | .destroy = cgrp_destroy, | ||
35 | .populate = cgrp_populate, | ||
36 | #ifdef CONFIG_NETPRIO_CGROUP | ||
37 | .subsys_id = net_prio_subsys_id, | ||
38 | #endif | ||
39 | .module = THIS_MODULE | ||
40 | }; | ||
41 | |||
42 | #define PRIOIDX_SZ 128 | ||
43 | |||
44 | static unsigned long prioidx_map[PRIOIDX_SZ]; | ||
45 | static DEFINE_SPINLOCK(prioidx_map_lock); | ||
46 | static atomic_t max_prioidx = ATOMIC_INIT(0); | ||
47 | |||
48 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) | ||
49 | { | ||
50 | return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), | ||
51 | struct cgroup_netprio_state, css); | ||
52 | } | ||
53 | |||
54 | static int get_prioidx(u32 *prio) | ||
55 | { | ||
56 | unsigned long flags; | ||
57 | u32 prioidx; | ||
58 | |||
59 | spin_lock_irqsave(&prioidx_map_lock, flags); | ||
60 | prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); | ||
61 | if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { | ||
62 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
63 | return -ENOSPC; | ||
64 | } | ||
65 | set_bit(prioidx, prioidx_map); | ||
66 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
67 | atomic_set(&max_prioidx, prioidx); | ||
68 | *prio = prioidx; | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static void put_prioidx(u32 idx) | ||
73 | { | ||
74 | unsigned long flags; | ||
75 | |||
76 | spin_lock_irqsave(&prioidx_map_lock, flags); | ||
77 | clear_bit(idx, prioidx_map); | ||
78 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
79 | } | ||
80 | |||
81 | static void extend_netdev_table(struct net_device *dev, u32 new_len) | ||
82 | { | ||
83 | size_t new_size = sizeof(struct netprio_map) + | ||
84 | ((sizeof(u32) * new_len)); | ||
85 | struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); | ||
86 | struct netprio_map *old_priomap; | ||
87 | int i; | ||
88 | |||
89 | old_priomap = rtnl_dereference(dev->priomap); | ||
90 | |||
91 | if (!new_priomap) { | ||
92 | printk(KERN_WARNING "Unable to alloc new priomap!\n"); | ||
93 | return; | ||
94 | } | ||
95 | |||
96 | for (i = 0; | ||
97 | old_priomap && (i < old_priomap->priomap_len); | ||
98 | i++) | ||
99 | new_priomap->priomap[i] = old_priomap->priomap[i]; | ||
100 | |||
101 | new_priomap->priomap_len = new_len; | ||
102 | |||
103 | rcu_assign_pointer(dev->priomap, new_priomap); | ||
104 | if (old_priomap) | ||
105 | kfree_rcu(old_priomap, rcu); | ||
106 | } | ||
107 | |||
108 | static void update_netdev_tables(void) | ||
109 | { | ||
110 | struct net_device *dev; | ||
111 | u32 max_len = atomic_read(&max_prioidx) + 1; | ||
112 | struct netprio_map *map; | ||
113 | |||
114 | rtnl_lock(); | ||
115 | for_each_netdev(&init_net, dev) { | ||
116 | map = rtnl_dereference(dev->priomap); | ||
117 | if ((!map) || | ||
118 | (map->priomap_len < max_len)) | ||
119 | extend_netdev_table(dev, max_len); | ||
120 | } | ||
121 | rtnl_unlock(); | ||
122 | } | ||
123 | |||
124 | static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, | ||
125 | struct cgroup *cgrp) | ||
126 | { | ||
127 | struct cgroup_netprio_state *cs; | ||
128 | int ret; | ||
129 | |||
130 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | ||
131 | if (!cs) | ||
132 | return ERR_PTR(-ENOMEM); | ||
133 | |||
134 | if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { | ||
135 | kfree(cs); | ||
136 | return ERR_PTR(-EINVAL); | ||
137 | } | ||
138 | |||
139 | ret = get_prioidx(&cs->prioidx); | ||
140 | if (ret != 0) { | ||
141 | printk(KERN_WARNING "No space in priority index array\n"); | ||
142 | kfree(cs); | ||
143 | return ERR_PTR(ret); | ||
144 | } | ||
145 | |||
146 | return &cs->css; | ||
147 | } | ||
148 | |||
149 | static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
150 | { | ||
151 | struct cgroup_netprio_state *cs; | ||
152 | struct net_device *dev; | ||
153 | struct netprio_map *map; | ||
154 | |||
155 | cs = cgrp_netprio_state(cgrp); | ||
156 | rtnl_lock(); | ||
157 | for_each_netdev(&init_net, dev) { | ||
158 | map = rtnl_dereference(dev->priomap); | ||
159 | if (map) | ||
160 | map->priomap[cs->prioidx] = 0; | ||
161 | } | ||
162 | rtnl_unlock(); | ||
163 | put_prioidx(cs->prioidx); | ||
164 | kfree(cs); | ||
165 | } | ||
166 | |||
167 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) | ||
168 | { | ||
169 | return (u64)cgrp_netprio_state(cgrp)->prioidx; | ||
170 | } | ||
171 | |||
172 | static int read_priomap(struct cgroup *cont, struct cftype *cft, | ||
173 | struct cgroup_map_cb *cb) | ||
174 | { | ||
175 | struct net_device *dev; | ||
176 | u32 prioidx = cgrp_netprio_state(cont)->prioidx; | ||
177 | u32 priority; | ||
178 | struct netprio_map *map; | ||
179 | |||
180 | rcu_read_lock(); | ||
181 | for_each_netdev_rcu(&init_net, dev) { | ||
182 | map = rcu_dereference(dev->priomap); | ||
183 | priority = map ? map->priomap[prioidx] : 0; | ||
184 | cb->fill(cb, dev->name, priority); | ||
185 | } | ||
186 | rcu_read_unlock(); | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | ||
191 | const char *buffer) | ||
192 | { | ||
193 | char *devname = kstrdup(buffer, GFP_KERNEL); | ||
194 | int ret = -EINVAL; | ||
195 | u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; | ||
196 | unsigned long priority; | ||
197 | char *priostr; | ||
198 | struct net_device *dev; | ||
199 | struct netprio_map *map; | ||
200 | |||
201 | if (!devname) | ||
202 | return -ENOMEM; | ||
203 | |||
204 | /* | ||
205 | * Minimally sized valid priomap string | ||
206 | */ | ||
207 | if (strlen(devname) < 3) | ||
208 | goto out_free_devname; | ||
209 | |||
210 | priostr = strstr(devname, " "); | ||
211 | if (!priostr) | ||
212 | goto out_free_devname; | ||
213 | |||
214 | /* | ||
215 | *Separate the devname from the associated priority | ||
216 | *and advance the priostr poitner to the priority value | ||
217 | */ | ||
218 | *priostr = '\0'; | ||
219 | priostr++; | ||
220 | |||
221 | /* | ||
222 | * If the priostr points to NULL, we're at the end of the passed | ||
223 | * in string, and its not a valid write | ||
224 | */ | ||
225 | if (*priostr == '\0') | ||
226 | goto out_free_devname; | ||
227 | |||
228 | ret = kstrtoul(priostr, 10, &priority); | ||
229 | if (ret < 0) | ||
230 | goto out_free_devname; | ||
231 | |||
232 | ret = -ENODEV; | ||
233 | |||
234 | dev = dev_get_by_name(&init_net, devname); | ||
235 | if (!dev) | ||
236 | goto out_free_devname; | ||
237 | |||
238 | update_netdev_tables(); | ||
239 | ret = 0; | ||
240 | rcu_read_lock(); | ||
241 | map = rcu_dereference(dev->priomap); | ||
242 | if (map) | ||
243 | map->priomap[prioidx] = priority; | ||
244 | rcu_read_unlock(); | ||
245 | dev_put(dev); | ||
246 | |||
247 | out_free_devname: | ||
248 | kfree(devname); | ||
249 | return ret; | ||
250 | } | ||
251 | |||
252 | static struct cftype ss_files[] = { | ||
253 | { | ||
254 | .name = "prioidx", | ||
255 | .read_u64 = read_prioidx, | ||
256 | }, | ||
257 | { | ||
258 | .name = "ifpriomap", | ||
259 | .read_map = read_priomap, | ||
260 | .write_string = write_priomap, | ||
261 | }, | ||
262 | }; | ||
263 | |||
264 | static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
265 | { | ||
266 | return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); | ||
267 | } | ||
268 | |||
269 | static int netprio_device_event(struct notifier_block *unused, | ||
270 | unsigned long event, void *ptr) | ||
271 | { | ||
272 | struct net_device *dev = ptr; | ||
273 | struct netprio_map *old; | ||
274 | |||
275 | /* | ||
276 | * Note this is called with rtnl_lock held so we have update side | ||
277 | * protection on our rcu assignments | ||
278 | */ | ||
279 | |||
280 | switch (event) { | ||
281 | case NETDEV_UNREGISTER: | ||
282 | old = rtnl_dereference(dev->priomap); | ||
283 | RCU_INIT_POINTER(dev->priomap, NULL); | ||
284 | if (old) | ||
285 | kfree_rcu(old, rcu); | ||
286 | break; | ||
287 | } | ||
288 | return NOTIFY_DONE; | ||
289 | } | ||
290 | |||
291 | static struct notifier_block netprio_device_notifier = { | ||
292 | .notifier_call = netprio_device_event | ||
293 | }; | ||
294 | |||
295 | static int __init init_cgroup_netprio(void) | ||
296 | { | ||
297 | int ret; | ||
298 | |||
299 | ret = cgroup_load_subsys(&net_prio_subsys); | ||
300 | if (ret) | ||
301 | goto out; | ||
302 | #ifndef CONFIG_NETPRIO_CGROUP | ||
303 | smp_wmb(); | ||
304 | net_prio_subsys_id = net_prio_subsys.subsys_id; | ||
305 | #endif | ||
306 | |||
307 | register_netdevice_notifier(&netprio_device_notifier); | ||
308 | |||
309 | out: | ||
310 | return ret; | ||
311 | } | ||
312 | |||
313 | static void __exit exit_cgroup_netprio(void) | ||
314 | { | ||
315 | struct netprio_map *old; | ||
316 | struct net_device *dev; | ||
317 | |||
318 | unregister_netdevice_notifier(&netprio_device_notifier); | ||
319 | |||
320 | cgroup_unload_subsys(&net_prio_subsys); | ||
321 | |||
322 | #ifndef CONFIG_NETPRIO_CGROUP | ||
323 | net_prio_subsys_id = -1; | ||
324 | synchronize_rcu(); | ||
325 | #endif | ||
326 | |||
327 | rtnl_lock(); | ||
328 | for_each_netdev(&init_net, dev) { | ||
329 | old = rtnl_dereference(dev->priomap); | ||
330 | RCU_INIT_POINTER(dev->priomap, NULL); | ||
331 | if (old) | ||
332 | kfree_rcu(old, rcu); | ||
333 | } | ||
334 | rtnl_unlock(); | ||
335 | } | ||
336 | |||
337 | module_init(init_cgroup_netprio); | ||
338 | module_exit(exit_cgroup_netprio); | ||
339 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0001c243b35c..4d8ce93cd503 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -767,8 +767,8 @@ done: | |||
767 | return i; | 767 | return i; |
768 | } | 768 | } |
769 | 769 | ||
770 | static unsigned long num_arg(const char __user * user_buffer, | 770 | static long num_arg(const char __user *user_buffer, unsigned long maxlen, |
771 | unsigned long maxlen, unsigned long *num) | 771 | unsigned long *num) |
772 | { | 772 | { |
773 | int i; | 773 | int i; |
774 | *num = 0; | 774 | *num = 0; |
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1304 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); | 1304 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); |
1305 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); | 1305 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); |
1306 | 1306 | ||
1307 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); | 1307 | pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr; |
1308 | 1308 | ||
1309 | if (debug) | 1309 | if (debug) |
1310 | printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); | 1310 | printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); |
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1327 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); | 1327 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); |
1328 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); | 1328 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); |
1329 | 1329 | ||
1330 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, | 1330 | pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr; |
1331 | &pkt_dev->min_in6_daddr); | ||
1332 | if (debug) | 1331 | if (debug) |
1333 | printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); | 1332 | printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); |
1334 | 1333 | ||
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1371 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); | 1370 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); |
1372 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); | 1371 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); |
1373 | 1372 | ||
1374 | ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); | 1373 | pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr; |
1375 | 1374 | ||
1376 | if (debug) | 1375 | if (debug) |
1377 | printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); | 1376 | printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); |
@@ -2025,13 +2024,13 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | |||
2025 | pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", | 2024 | pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", |
2026 | pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, | 2025 | pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, |
2027 | pkt_dev->odevname); | 2026 | pkt_dev->odevname); |
2028 | pkt_dev->queue_map_min = ntxq - 1; | 2027 | pkt_dev->queue_map_min = (ntxq ?: 1) - 1; |
2029 | } | 2028 | } |
2030 | if (pkt_dev->queue_map_max >= ntxq) { | 2029 | if (pkt_dev->queue_map_max >= ntxq) { |
2031 | pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", | 2030 | pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", |
2032 | pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, | 2031 | pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, |
2033 | pkt_dev->odevname); | 2032 | pkt_dev->odevname); |
2034 | pkt_dev->queue_map_max = ntxq - 1; | 2033 | pkt_dev->queue_map_max = (ntxq ?: 1) - 1; |
2035 | } | 2034 | } |
2036 | 2035 | ||
2037 | /* Default to the interface's mac if not explicitly set. */ | 2036 | /* Default to the interface's mac if not explicitly set. */ |
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | |||
2079 | ifp = ifp->if_next) { | 2078 | ifp = ifp->if_next) { |
2080 | if (ifp->scope == IFA_LINK && | 2079 | if (ifp->scope == IFA_LINK && |
2081 | !(ifp->flags & IFA_F_TENTATIVE)) { | 2080 | !(ifp->flags & IFA_F_TENTATIVE)) { |
2082 | ipv6_addr_copy(&pkt_dev-> | 2081 | pkt_dev->cur_in6_saddr = ifp->addr; |
2083 | cur_in6_saddr, | ||
2084 | &ifp->addr); | ||
2085 | err = 0; | 2082 | err = 0; |
2086 | break; | 2083 | break; |
2087 | } | 2084 | } |
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
2958 | iph->payload_len = htons(sizeof(struct udphdr) + datalen); | 2955 | iph->payload_len = htons(sizeof(struct udphdr) + datalen); |
2959 | iph->nexthdr = IPPROTO_UDP; | 2956 | iph->nexthdr = IPPROTO_UDP; |
2960 | 2957 | ||
2961 | ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); | 2958 | iph->daddr = pkt_dev->cur_in6_daddr; |
2962 | ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); | 2959 | iph->saddr = pkt_dev->cur_in6_saddr; |
2963 | 2960 | ||
2964 | skb->mac_header = (skb->network_header - ETH_HLEN - | 2961 | skb->mac_header = (skb->network_header - ETH_HLEN - |
2965 | pkt_dev->pkt_overhead); | 2962 | pkt_dev->pkt_overhead); |
@@ -3345,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3345 | 3342 | ||
3346 | __netif_tx_lock_bh(txq); | 3343 | __netif_tx_lock_bh(txq); |
3347 | 3344 | ||
3348 | if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { | 3345 | if (unlikely(netif_xmit_frozen_or_stopped(txq))) { |
3349 | ret = NETDEV_TX_BUSY; | 3346 | ret = NETDEV_TX_BUSY; |
3350 | pkt_dev->last_ok = 0; | 3347 | pkt_dev->last_ok = 0; |
3351 | goto unlock; | 3348 | goto unlock; |
diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 182236b2510a..9b570a6a33c5 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c | |||
@@ -26,10 +26,11 @@ | |||
26 | * but then some measure against one socket starving all other sockets | 26 | * but then some measure against one socket starving all other sockets |
27 | * would be needed. | 27 | * would be needed. |
28 | * | 28 | * |
29 | * It was 128 by default. Experiments with real servers show, that | 29 | * The minimum value of it is 128. Experiments with real servers show that |
30 | * it is absolutely not enough even at 100conn/sec. 256 cures most | 30 | * it is absolutely not enough even at 100conn/sec. 256 cures most |
31 | * of problems. This value is adjusted to 128 for very small machines | 31 | * of problems. |
32 | * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). | 32 | * This value is adjusted to 128 for low memory machines, |
33 | * and it will increase in proportion to the memory of machine. | ||
33 | * Note : Dont forget somaxconn that may limit backlog too. | 34 | * Note : Dont forget somaxconn that may limit backlog too. |
34 | */ | 35 | */ |
35 | int sysctl_max_syn_backlog = 256; | 36 | int sysctl_max_syn_backlog = 256; |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9083e82bdae5..606a6e8f3671 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -60,7 +60,6 @@ struct rtnl_link { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | static DEFINE_MUTEX(rtnl_mutex); | 62 | static DEFINE_MUTEX(rtnl_mutex); |
63 | static u16 min_ifinfo_dump_size; | ||
64 | 63 | ||
65 | void rtnl_lock(void) | 64 | void rtnl_lock(void) |
66 | { | 65 | { |
@@ -273,6 +272,17 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all); | |||
273 | 272 | ||
274 | static LIST_HEAD(link_ops); | 273 | static LIST_HEAD(link_ops); |
275 | 274 | ||
275 | static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) | ||
276 | { | ||
277 | const struct rtnl_link_ops *ops; | ||
278 | |||
279 | list_for_each_entry(ops, &link_ops, list) { | ||
280 | if (!strcmp(ops->kind, kind)) | ||
281 | return ops; | ||
282 | } | ||
283 | return NULL; | ||
284 | } | ||
285 | |||
276 | /** | 286 | /** |
277 | * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. | 287 | * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. |
278 | * @ops: struct rtnl_link_ops * to register | 288 | * @ops: struct rtnl_link_ops * to register |
@@ -285,6 +295,9 @@ static LIST_HEAD(link_ops); | |||
285 | */ | 295 | */ |
286 | int __rtnl_link_register(struct rtnl_link_ops *ops) | 296 | int __rtnl_link_register(struct rtnl_link_ops *ops) |
287 | { | 297 | { |
298 | if (rtnl_link_ops_get(ops->kind)) | ||
299 | return -EEXIST; | ||
300 | |||
288 | if (!ops->dellink) | 301 | if (!ops->dellink) |
289 | ops->dellink = unregister_netdevice_queue; | 302 | ops->dellink = unregister_netdevice_queue; |
290 | 303 | ||
@@ -351,17 +364,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) | |||
351 | } | 364 | } |
352 | EXPORT_SYMBOL_GPL(rtnl_link_unregister); | 365 | EXPORT_SYMBOL_GPL(rtnl_link_unregister); |
353 | 366 | ||
354 | static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) | ||
355 | { | ||
356 | const struct rtnl_link_ops *ops; | ||
357 | |||
358 | list_for_each_entry(ops, &link_ops, list) { | ||
359 | if (!strcmp(ops->kind, kind)) | ||
360 | return ops; | ||
361 | } | ||
362 | return NULL; | ||
363 | } | ||
364 | |||
365 | static size_t rtnl_link_get_size(const struct net_device *dev) | 367 | static size_t rtnl_link_get_size(const struct net_device *dev) |
366 | { | 368 | { |
367 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; | 369 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; |
@@ -721,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) | |||
721 | } | 723 | } |
722 | 724 | ||
723 | /* All VF info */ | 725 | /* All VF info */ |
724 | static inline int rtnl_vfinfo_size(const struct net_device *dev) | 726 | static inline int rtnl_vfinfo_size(const struct net_device *dev, |
727 | u32 ext_filter_mask) | ||
725 | { | 728 | { |
726 | if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { | 729 | if (dev->dev.parent && dev_is_pci(dev->dev.parent) && |
727 | 730 | (ext_filter_mask & RTEXT_FILTER_VF)) { | |
728 | int num_vfs = dev_num_vf(dev->dev.parent); | 731 | int num_vfs = dev_num_vf(dev->dev.parent); |
729 | size_t size = nla_total_size(sizeof(struct nlattr)); | 732 | size_t size = nla_total_size(sizeof(struct nlattr)); |
730 | size += nla_total_size(num_vfs * sizeof(struct nlattr)); | 733 | size += nla_total_size(num_vfs * sizeof(struct nlattr)); |
@@ -763,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev) | |||
763 | return port_self_size; | 766 | return port_self_size; |
764 | } | 767 | } |
765 | 768 | ||
766 | static noinline size_t if_nlmsg_size(const struct net_device *dev) | 769 | static noinline size_t if_nlmsg_size(const struct net_device *dev, |
770 | u32 ext_filter_mask) | ||
767 | { | 771 | { |
768 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) | 772 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) |
769 | + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ | 773 | + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ |
@@ -781,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) | |||
781 | + nla_total_size(4) /* IFLA_MASTER */ | 785 | + nla_total_size(4) /* IFLA_MASTER */ |
782 | + nla_total_size(1) /* IFLA_OPERSTATE */ | 786 | + nla_total_size(1) /* IFLA_OPERSTATE */ |
783 | + nla_total_size(1) /* IFLA_LINKMODE */ | 787 | + nla_total_size(1) /* IFLA_LINKMODE */ |
784 | + nla_total_size(4) /* IFLA_NUM_VF */ | 788 | + nla_total_size(ext_filter_mask |
785 | + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ | 789 | & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ |
790 | + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ | ||
786 | + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ | 791 | + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ |
787 | + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ | 792 | + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ |
788 | + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ | 793 | + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ |
@@ -865,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) | |||
865 | 870 | ||
866 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | 871 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, |
867 | int type, u32 pid, u32 seq, u32 change, | 872 | int type, u32 pid, u32 seq, u32 change, |
868 | unsigned int flags) | 873 | unsigned int flags, u32 ext_filter_mask) |
869 | { | 874 | { |
870 | struct ifinfomsg *ifm; | 875 | struct ifinfomsg *ifm; |
871 | struct nlmsghdr *nlh; | 876 | struct nlmsghdr *nlh; |
@@ -938,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
938 | goto nla_put_failure; | 943 | goto nla_put_failure; |
939 | copy_rtnl_link_stats64(nla_data(attr), stats); | 944 | copy_rtnl_link_stats64(nla_data(attr), stats); |
940 | 945 | ||
941 | if (dev->dev.parent) | 946 | if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) |
942 | NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); | 947 | NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); |
943 | 948 | ||
944 | if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { | 949 | if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent |
950 | && (ext_filter_mask & RTEXT_FILTER_VF)) { | ||
945 | int i; | 951 | int i; |
946 | 952 | ||
947 | struct nlattr *vfinfo, *vf; | 953 | struct nlattr *vfinfo, *vf; |
@@ -1045,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1045 | struct net_device *dev; | 1051 | struct net_device *dev; |
1046 | struct hlist_head *head; | 1052 | struct hlist_head *head; |
1047 | struct hlist_node *node; | 1053 | struct hlist_node *node; |
1054 | struct nlattr *tb[IFLA_MAX+1]; | ||
1055 | u32 ext_filter_mask = 0; | ||
1048 | 1056 | ||
1049 | s_h = cb->args[0]; | 1057 | s_h = cb->args[0]; |
1050 | s_idx = cb->args[1]; | 1058 | s_idx = cb->args[1]; |
@@ -1052,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1052 | rcu_read_lock(); | 1060 | rcu_read_lock(); |
1053 | cb->seq = net->dev_base_seq; | 1061 | cb->seq = net->dev_base_seq; |
1054 | 1062 | ||
1063 | nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, | ||
1064 | ifla_policy); | ||
1065 | |||
1066 | if (tb[IFLA_EXT_MASK]) | ||
1067 | ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); | ||
1068 | |||
1055 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 1069 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
1056 | idx = 0; | 1070 | idx = 0; |
1057 | head = &net->dev_index_head[h]; | 1071 | head = &net->dev_index_head[h]; |
@@ -1061,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1061 | if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, | 1075 | if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, |
1062 | NETLINK_CB(cb->skb).pid, | 1076 | NETLINK_CB(cb->skb).pid, |
1063 | cb->nlh->nlmsg_seq, 0, | 1077 | cb->nlh->nlmsg_seq, 0, |
1064 | NLM_F_MULTI) <= 0) | 1078 | NLM_F_MULTI, |
1079 | ext_filter_mask) <= 0) | ||
1065 | goto out; | 1080 | goto out; |
1066 | 1081 | ||
1067 | nl_dump_check_consistent(cb, nlmsg_hdr(skb)); | 1082 | nl_dump_check_consistent(cb, nlmsg_hdr(skb)); |
@@ -1097,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { | |||
1097 | [IFLA_VF_PORTS] = { .type = NLA_NESTED }, | 1112 | [IFLA_VF_PORTS] = { .type = NLA_NESTED }, |
1098 | [IFLA_PORT_SELF] = { .type = NLA_NESTED }, | 1113 | [IFLA_PORT_SELF] = { .type = NLA_NESTED }, |
1099 | [IFLA_AF_SPEC] = { .type = NLA_NESTED }, | 1114 | [IFLA_AF_SPEC] = { .type = NLA_NESTED }, |
1115 | [IFLA_EXT_MASK] = { .type = NLA_U32 }, | ||
1100 | }; | 1116 | }; |
1101 | EXPORT_SYMBOL(ifla_policy); | 1117 | EXPORT_SYMBOL(ifla_policy); |
1102 | 1118 | ||
@@ -1506,6 +1522,7 @@ errout: | |||
1506 | 1522 | ||
1507 | if (send_addr_notify) | 1523 | if (send_addr_notify) |
1508 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | 1524 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
1525 | |||
1509 | return err; | 1526 | return err; |
1510 | } | 1527 | } |
1511 | 1528 | ||
@@ -1836,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
1836 | struct net_device *dev = NULL; | 1853 | struct net_device *dev = NULL; |
1837 | struct sk_buff *nskb; | 1854 | struct sk_buff *nskb; |
1838 | int err; | 1855 | int err; |
1856 | u32 ext_filter_mask = 0; | ||
1839 | 1857 | ||
1840 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); | 1858 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); |
1841 | if (err < 0) | 1859 | if (err < 0) |
@@ -1844,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
1844 | if (tb[IFLA_IFNAME]) | 1862 | if (tb[IFLA_IFNAME]) |
1845 | nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); | 1863 | nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); |
1846 | 1864 | ||
1865 | if (tb[IFLA_EXT_MASK]) | ||
1866 | ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); | ||
1867 | |||
1847 | ifm = nlmsg_data(nlh); | 1868 | ifm = nlmsg_data(nlh); |
1848 | if (ifm->ifi_index > 0) | 1869 | if (ifm->ifi_index > 0) |
1849 | dev = __dev_get_by_index(net, ifm->ifi_index); | 1870 | dev = __dev_get_by_index(net, ifm->ifi_index); |
@@ -1855,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
1855 | if (dev == NULL) | 1876 | if (dev == NULL) |
1856 | return -ENODEV; | 1877 | return -ENODEV; |
1857 | 1878 | ||
1858 | nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); | 1879 | nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); |
1859 | if (nskb == NULL) | 1880 | if (nskb == NULL) |
1860 | return -ENOBUFS; | 1881 | return -ENOBUFS; |
1861 | 1882 | ||
1862 | err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, | 1883 | err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, |
1863 | nlh->nlmsg_seq, 0, 0); | 1884 | nlh->nlmsg_seq, 0, 0, ext_filter_mask); |
1864 | if (err < 0) { | 1885 | if (err < 0) { |
1865 | /* -EMSGSIZE implies BUG in if_nlmsg_size */ | 1886 | /* -EMSGSIZE implies BUG in if_nlmsg_size */ |
1866 | WARN_ON(err == -EMSGSIZE); | 1887 | WARN_ON(err == -EMSGSIZE); |
@@ -1871,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | |||
1871 | return err; | 1892 | return err; |
1872 | } | 1893 | } |
1873 | 1894 | ||
1874 | static u16 rtnl_calcit(struct sk_buff *skb) | 1895 | static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) |
1875 | { | 1896 | { |
1897 | struct net *net = sock_net(skb->sk); | ||
1898 | struct net_device *dev; | ||
1899 | struct nlattr *tb[IFLA_MAX+1]; | ||
1900 | u32 ext_filter_mask = 0; | ||
1901 | u16 min_ifinfo_dump_size = 0; | ||
1902 | |||
1903 | nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy); | ||
1904 | |||
1905 | if (tb[IFLA_EXT_MASK]) | ||
1906 | ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); | ||
1907 | |||
1908 | if (!ext_filter_mask) | ||
1909 | return NLMSG_GOODSIZE; | ||
1910 | /* | ||
1911 | * traverse the list of net devices and compute the minimum | ||
1912 | * buffer size based upon the filter mask. | ||
1913 | */ | ||
1914 | list_for_each_entry(dev, &net->dev_base_head, dev_list) { | ||
1915 | min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, | ||
1916 | if_nlmsg_size(dev, | ||
1917 | ext_filter_mask)); | ||
1918 | } | ||
1919 | |||
1876 | return min_ifinfo_dump_size; | 1920 | return min_ifinfo_dump_size; |
1877 | } | 1921 | } |
1878 | 1922 | ||
@@ -1907,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | |||
1907 | int err = -ENOBUFS; | 1951 | int err = -ENOBUFS; |
1908 | size_t if_info_size; | 1952 | size_t if_info_size; |
1909 | 1953 | ||
1910 | skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); | 1954 | skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL); |
1911 | if (skb == NULL) | 1955 | if (skb == NULL) |
1912 | goto errout; | 1956 | goto errout; |
1913 | 1957 | ||
1914 | min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); | 1958 | err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0); |
1915 | |||
1916 | err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); | ||
1917 | if (err < 0) { | 1959 | if (err < 0) { |
1918 | /* -EMSGSIZE implies BUG in if_nlmsg_size() */ | 1960 | /* -EMSGSIZE implies BUG in if_nlmsg_size() */ |
1919 | WARN_ON(err == -EMSGSIZE); | 1961 | WARN_ON(err == -EMSGSIZE); |
@@ -1957,7 +1999,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
1957 | sz_idx = type>>2; | 1999 | sz_idx = type>>2; |
1958 | kind = type&3; | 2000 | kind = type&3; |
1959 | 2001 | ||
1960 | if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) | 2002 | if (kind != 2 && !capable(CAP_NET_ADMIN)) |
1961 | return -EPERM; | 2003 | return -EPERM; |
1962 | 2004 | ||
1963 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { | 2005 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { |
@@ -1971,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
1971 | return -EOPNOTSUPP; | 2013 | return -EOPNOTSUPP; |
1972 | calcit = rtnl_get_calcit(family, type); | 2014 | calcit = rtnl_get_calcit(family, type); |
1973 | if (calcit) | 2015 | if (calcit) |
1974 | min_dump_alloc = calcit(skb); | 2016 | min_dump_alloc = calcit(skb, nlh); |
1975 | 2017 | ||
1976 | __rtnl_unlock(); | 2018 | __rtnl_unlock(); |
1977 | rtnl = net->rtnl; | 2019 | rtnl = net->rtnl; |
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 025233de25f9..99b2596531bb 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c | |||
@@ -19,6 +19,7 @@ static int __init net_secret_init(void) | |||
19 | } | 19 | } |
20 | late_initcall(net_secret_init); | 20 | late_initcall(net_secret_init); |
21 | 21 | ||
22 | #ifdef CONFIG_INET | ||
22 | static u32 seq_scale(u32 seq) | 23 | static u32 seq_scale(u32 seq) |
23 | { | 24 | { |
24 | /* | 25 | /* |
@@ -33,8 +34,9 @@ static u32 seq_scale(u32 seq) | |||
33 | */ | 34 | */ |
34 | return seq + (ktime_to_ns(ktime_get_real()) >> 6); | 35 | return seq + (ktime_to_ns(ktime_get_real()) >> 6); |
35 | } | 36 | } |
37 | #endif | ||
36 | 38 | ||
37 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 39 | #if IS_ENABLED(CONFIG_IPV6) |
38 | __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, | 40 | __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, |
39 | __be16 sport, __be16 dport) | 41 | __be16 sport, __be16 dport) |
40 | { | 42 | { |
@@ -44,7 +46,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, | |||
44 | 46 | ||
45 | memcpy(hash, saddr, 16); | 47 | memcpy(hash, saddr, 16); |
46 | for (i = 0; i < 4; i++) | 48 | for (i = 0; i < 4; i++) |
47 | secret[i] = net_secret[i] + daddr[i]; | 49 | secret[i] = net_secret[i] + (__force u32)daddr[i]; |
48 | secret[4] = net_secret[4] + | 50 | secret[4] = net_secret[4] + |
49 | (((__force u16)sport << 16) + (__force u16)dport); | 51 | (((__force u16)sport << 16) + (__force u16)dport); |
50 | for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) | 52 | for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) |
@@ -132,7 +134,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) | |||
132 | EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); | 134 | EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); |
133 | #endif | 135 | #endif |
134 | 136 | ||
135 | #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) | 137 | #if IS_ENABLED(CONFIG_IP_DCCP) |
136 | u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, | 138 | u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, |
137 | __be16 sport, __be16 dport) | 139 | __be16 sport, __be16 dport) |
138 | { | 140 | { |
@@ -154,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, | |||
154 | } | 156 | } |
155 | EXPORT_SYMBOL(secure_dccp_sequence_number); | 157 | EXPORT_SYMBOL(secure_dccp_sequence_number); |
156 | 158 | ||
157 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 159 | #if IS_ENABLED(CONFIG_IPV6) |
158 | u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, | 160 | u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, |
159 | __be16 sport, __be16 dport) | 161 | __be16 sport, __be16 dport) |
160 | { | 162 | { |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 18a3cebb753d..da0c97f2fab4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -245,6 +245,55 @@ nodata: | |||
245 | EXPORT_SYMBOL(__alloc_skb); | 245 | EXPORT_SYMBOL(__alloc_skb); |
246 | 246 | ||
247 | /** | 247 | /** |
248 | * build_skb - build a network buffer | ||
249 | * @data: data buffer provided by caller | ||
250 | * | ||
251 | * Allocate a new &sk_buff. Caller provides space holding head and | ||
252 | * skb_shared_info. @data must have been allocated by kmalloc() | ||
253 | * The return is the new skb buffer. | ||
254 | * On a failure the return is %NULL, and @data is not freed. | ||
255 | * Notes : | ||
256 | * Before IO, driver allocates only data buffer where NIC put incoming frame | ||
257 | * Driver should add room at head (NET_SKB_PAD) and | ||
258 | * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) | ||
259 | * After IO, driver calls build_skb(), to allocate sk_buff and populate it | ||
260 | * before giving packet to stack. | ||
261 | * RX rings only contains data buffers, not full skbs. | ||
262 | */ | ||
263 | struct sk_buff *build_skb(void *data) | ||
264 | { | ||
265 | struct skb_shared_info *shinfo; | ||
266 | struct sk_buff *skb; | ||
267 | unsigned int size; | ||
268 | |||
269 | skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); | ||
270 | if (!skb) | ||
271 | return NULL; | ||
272 | |||
273 | size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
274 | |||
275 | memset(skb, 0, offsetof(struct sk_buff, tail)); | ||
276 | skb->truesize = SKB_TRUESIZE(size); | ||
277 | atomic_set(&skb->users, 1); | ||
278 | skb->head = data; | ||
279 | skb->data = data; | ||
280 | skb_reset_tail_pointer(skb); | ||
281 | skb->end = skb->tail + size; | ||
282 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | ||
283 | skb->mac_header = ~0U; | ||
284 | #endif | ||
285 | |||
286 | /* make sure we initialize shinfo sequentially */ | ||
287 | shinfo = skb_shinfo(skb); | ||
288 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
289 | atomic_set(&shinfo->dataref, 1); | ||
290 | kmemcheck_annotate_variable(shinfo->destructor_arg); | ||
291 | |||
292 | return skb; | ||
293 | } | ||
294 | EXPORT_SYMBOL(build_skb); | ||
295 | |||
296 | /** | ||
248 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | 297 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device |
249 | * @dev: network device to receive on | 298 | * @dev: network device to receive on |
250 | * @length: length to allocate | 299 | * @length: length to allocate |
@@ -403,7 +452,7 @@ static void skb_release_head_state(struct sk_buff *skb) | |||
403 | WARN_ON(in_irq()); | 452 | WARN_ON(in_irq()); |
404 | skb->destructor(skb); | 453 | skb->destructor(skb); |
405 | } | 454 | } |
406 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 455 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
407 | nf_conntrack_put(skb->nfct); | 456 | nf_conntrack_put(skb->nfct); |
408 | #endif | 457 | #endif |
409 | #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED | 458 | #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED |
@@ -553,15 +602,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
553 | new->ip_summed = old->ip_summed; | 602 | new->ip_summed = old->ip_summed; |
554 | skb_copy_queue_mapping(new, old); | 603 | skb_copy_queue_mapping(new, old); |
555 | new->priority = old->priority; | 604 | new->priority = old->priority; |
556 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 605 | #if IS_ENABLED(CONFIG_IP_VS) |
557 | new->ipvs_property = old->ipvs_property; | 606 | new->ipvs_property = old->ipvs_property; |
558 | #endif | 607 | #endif |
559 | new->protocol = old->protocol; | 608 | new->protocol = old->protocol; |
560 | new->mark = old->mark; | 609 | new->mark = old->mark; |
561 | new->skb_iif = old->skb_iif; | 610 | new->skb_iif = old->skb_iif; |
562 | __nf_copy(new, old); | 611 | __nf_copy(new, old); |
563 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ | 612 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) |
564 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) | ||
565 | new->nf_trace = old->nf_trace; | 613 | new->nf_trace = old->nf_trace; |
566 | #endif | 614 | #endif |
567 | #ifdef CONFIG_NET_SCHED | 615 | #ifdef CONFIG_NET_SCHED |
@@ -791,8 +839,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |||
791 | EXPORT_SYMBOL(skb_copy); | 839 | EXPORT_SYMBOL(skb_copy); |
792 | 840 | ||
793 | /** | 841 | /** |
794 | * pskb_copy - create copy of an sk_buff with private head. | 842 | * __pskb_copy - create copy of an sk_buff with private head. |
795 | * @skb: buffer to copy | 843 | * @skb: buffer to copy |
844 | * @headroom: headroom of new skb | ||
796 | * @gfp_mask: allocation priority | 845 | * @gfp_mask: allocation priority |
797 | * | 846 | * |
798 | * Make a copy of both an &sk_buff and part of its data, located | 847 | * Make a copy of both an &sk_buff and part of its data, located |
@@ -803,16 +852,16 @@ EXPORT_SYMBOL(skb_copy); | |||
803 | * The returned buffer has a reference count of 1. | 852 | * The returned buffer has a reference count of 1. |
804 | */ | 853 | */ |
805 | 854 | ||
806 | struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | 855 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
807 | { | 856 | { |
808 | unsigned int size = skb_end_pointer(skb) - skb->head; | 857 | unsigned int size = skb_headlen(skb) + headroom; |
809 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 858 | struct sk_buff *n = alloc_skb(size, gfp_mask); |
810 | 859 | ||
811 | if (!n) | 860 | if (!n) |
812 | goto out; | 861 | goto out; |
813 | 862 | ||
814 | /* Set the data pointer */ | 863 | /* Set the data pointer */ |
815 | skb_reserve(n, skb_headroom(skb)); | 864 | skb_reserve(n, headroom); |
816 | /* Set the tail pointer and length */ | 865 | /* Set the tail pointer and length */ |
817 | skb_put(n, skb_headlen(skb)); | 866 | skb_put(n, skb_headlen(skb)); |
818 | /* Copy the bytes */ | 867 | /* Copy the bytes */ |
@@ -848,7 +897,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | |||
848 | out: | 897 | out: |
849 | return n; | 898 | return n; |
850 | } | 899 | } |
851 | EXPORT_SYMBOL(pskb_copy); | 900 | EXPORT_SYMBOL(__pskb_copy); |
852 | 901 | ||
853 | /** | 902 | /** |
854 | * pskb_expand_head - reallocate header of &sk_buff | 903 | * pskb_expand_head - reallocate header of &sk_buff |
@@ -2230,7 +2279,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb) | |||
2230 | * @shiftlen: shift up to this many bytes | 2279 | * @shiftlen: shift up to this many bytes |
2231 | * | 2280 | * |
2232 | * Attempts to shift up to shiftlen worth of bytes, which may be less than | 2281 | * Attempts to shift up to shiftlen worth of bytes, which may be less than |
2233 | * the length of the skb, from tgt to skb. Returns number bytes shifted. | 2282 | * the length of the skb, from skb to tgt. Returns number bytes shifted. |
2234 | * It's up to caller to free skb if everything was shifted. | 2283 | * It's up to caller to free skb if everything was shifted. |
2235 | * | 2284 | * |
2236 | * If @tgt runs out of frags, the whole operation is aborted. | 2285 | * If @tgt runs out of frags, the whole operation is aborted. |
@@ -2621,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); | |||
2621 | * a pointer to the first in a list of new skbs for the segments. | 2670 | * a pointer to the first in a list of new skbs for the segments. |
2622 | * In case of error it returns ERR_PTR(err). | 2671 | * In case of error it returns ERR_PTR(err). |
2623 | */ | 2672 | */ |
2624 | struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) | 2673 | struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) |
2625 | { | 2674 | { |
2626 | struct sk_buff *segs = NULL; | 2675 | struct sk_buff *segs = NULL; |
2627 | struct sk_buff *tail = NULL; | 2676 | struct sk_buff *tail = NULL; |
@@ -3169,6 +3218,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, | |||
3169 | } | 3218 | } |
3170 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); | 3219 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); |
3171 | 3220 | ||
3221 | void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) | ||
3222 | { | ||
3223 | struct sock *sk = skb->sk; | ||
3224 | struct sock_exterr_skb *serr; | ||
3225 | int err; | ||
3226 | |||
3227 | skb->wifi_acked_valid = 1; | ||
3228 | skb->wifi_acked = acked; | ||
3229 | |||
3230 | serr = SKB_EXT_ERR(skb); | ||
3231 | memset(serr, 0, sizeof(*serr)); | ||
3232 | serr->ee.ee_errno = ENOMSG; | ||
3233 | serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; | ||
3234 | |||
3235 | err = sock_queue_err_skb(sk, skb); | ||
3236 | if (err) | ||
3237 | kfree_skb(skb); | ||
3238 | } | ||
3239 | EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); | ||
3240 | |||
3172 | 3241 | ||
3173 | /** | 3242 | /** |
3174 | * skb_partial_csum_set - set up and verify partial csum values for packet | 3243 | * skb_partial_csum_set - set up and verify partial csum values for packet |
diff --git a/net/core/sock.c b/net/core/sock.c index 4ed7b1d12f5e..02f8dfe320b7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -111,6 +111,8 @@ | |||
111 | #include <linux/init.h> | 111 | #include <linux/init.h> |
112 | #include <linux/highmem.h> | 112 | #include <linux/highmem.h> |
113 | #include <linux/user_namespace.h> | 113 | #include <linux/user_namespace.h> |
114 | #include <linux/jump_label.h> | ||
115 | #include <linux/memcontrol.h> | ||
114 | 116 | ||
115 | #include <asm/uaccess.h> | 117 | #include <asm/uaccess.h> |
116 | #include <asm/system.h> | 118 | #include <asm/system.h> |
@@ -125,6 +127,7 @@ | |||
125 | #include <net/xfrm.h> | 127 | #include <net/xfrm.h> |
126 | #include <linux/ipsec.h> | 128 | #include <linux/ipsec.h> |
127 | #include <net/cls_cgroup.h> | 129 | #include <net/cls_cgroup.h> |
130 | #include <net/netprio_cgroup.h> | ||
128 | 131 | ||
129 | #include <linux/filter.h> | 132 | #include <linux/filter.h> |
130 | 133 | ||
@@ -134,6 +137,46 @@ | |||
134 | #include <net/tcp.h> | 137 | #include <net/tcp.h> |
135 | #endif | 138 | #endif |
136 | 139 | ||
140 | static DEFINE_MUTEX(proto_list_mutex); | ||
141 | static LIST_HEAD(proto_list); | ||
142 | |||
143 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
144 | int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
145 | { | ||
146 | struct proto *proto; | ||
147 | int ret = 0; | ||
148 | |||
149 | mutex_lock(&proto_list_mutex); | ||
150 | list_for_each_entry(proto, &proto_list, node) { | ||
151 | if (proto->init_cgroup) { | ||
152 | ret = proto->init_cgroup(cgrp, ss); | ||
153 | if (ret) | ||
154 | goto out; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | mutex_unlock(&proto_list_mutex); | ||
159 | return ret; | ||
160 | out: | ||
161 | list_for_each_entry_continue_reverse(proto, &proto_list, node) | ||
162 | if (proto->destroy_cgroup) | ||
163 | proto->destroy_cgroup(cgrp, ss); | ||
164 | mutex_unlock(&proto_list_mutex); | ||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
169 | { | ||
170 | struct proto *proto; | ||
171 | |||
172 | mutex_lock(&proto_list_mutex); | ||
173 | list_for_each_entry_reverse(proto, &proto_list, node) | ||
174 | if (proto->destroy_cgroup) | ||
175 | proto->destroy_cgroup(cgrp, ss); | ||
176 | mutex_unlock(&proto_list_mutex); | ||
177 | } | ||
178 | #endif | ||
179 | |||
137 | /* | 180 | /* |
138 | * Each address family might have different locking rules, so we have | 181 | * Each address family might have different locking rules, so we have |
139 | * one slock key per address family: | 182 | * one slock key per address family: |
@@ -141,6 +184,9 @@ | |||
141 | static struct lock_class_key af_family_keys[AF_MAX]; | 184 | static struct lock_class_key af_family_keys[AF_MAX]; |
142 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 185 | static struct lock_class_key af_family_slock_keys[AF_MAX]; |
143 | 186 | ||
187 | struct jump_label_key memcg_socket_limit_enabled; | ||
188 | EXPORT_SYMBOL(memcg_socket_limit_enabled); | ||
189 | |||
144 | /* | 190 | /* |
145 | * Make lock validator output more readable. (we pre-construct these | 191 | * Make lock validator output more readable. (we pre-construct these |
146 | * strings build-time, so that runtime initialization of socket | 192 | * strings build-time, so that runtime initialization of socket |
@@ -221,10 +267,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
221 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 267 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
222 | EXPORT_SYMBOL(sysctl_optmem_max); | 268 | EXPORT_SYMBOL(sysctl_optmem_max); |
223 | 269 | ||
224 | #if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) | 270 | #if defined(CONFIG_CGROUPS) |
271 | #if !defined(CONFIG_NET_CLS_CGROUP) | ||
225 | int net_cls_subsys_id = -1; | 272 | int net_cls_subsys_id = -1; |
226 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); | 273 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); |
227 | #endif | 274 | #endif |
275 | #if !defined(CONFIG_NETPRIO_CGROUP) | ||
276 | int net_prio_subsys_id = -1; | ||
277 | EXPORT_SYMBOL_GPL(net_prio_subsys_id); | ||
278 | #endif | ||
279 | #endif | ||
228 | 280 | ||
229 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 281 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
230 | { | 282 | { |
@@ -269,14 +321,14 @@ static void sock_warn_obsolete_bsdism(const char *name) | |||
269 | } | 321 | } |
270 | } | 322 | } |
271 | 323 | ||
272 | static void sock_disable_timestamp(struct sock *sk, int flag) | 324 | #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) |
325 | |||
326 | static void sock_disable_timestamp(struct sock *sk, unsigned long flags) | ||
273 | { | 327 | { |
274 | if (sock_flag(sk, flag)) { | 328 | if (sk->sk_flags & flags) { |
275 | sock_reset_flag(sk, flag); | 329 | sk->sk_flags &= ~flags; |
276 | if (!sock_flag(sk, SOCK_TIMESTAMP) && | 330 | if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) |
277 | !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) { | ||
278 | net_disable_timestamp(); | 331 | net_disable_timestamp(); |
279 | } | ||
280 | } | 332 | } |
281 | } | 333 | } |
282 | 334 | ||
@@ -288,11 +340,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
288 | unsigned long flags; | 340 | unsigned long flags; |
289 | struct sk_buff_head *list = &sk->sk_receive_queue; | 341 | struct sk_buff_head *list = &sk->sk_receive_queue; |
290 | 342 | ||
291 | /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces | 343 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { |
292 | number of warnings when compiling with -W --ANK | ||
293 | */ | ||
294 | if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= | ||
295 | (unsigned)sk->sk_rcvbuf) { | ||
296 | atomic_inc(&sk->sk_drops); | 344 | atomic_inc(&sk->sk_drops); |
297 | trace_sock_rcvqueue_full(sk, skb); | 345 | trace_sock_rcvqueue_full(sk, skb); |
298 | return -ENOMEM; | 346 | return -ENOMEM; |
@@ -682,7 +730,7 @@ set_rcvbuf: | |||
682 | SOCK_TIMESTAMPING_RX_SOFTWARE); | 730 | SOCK_TIMESTAMPING_RX_SOFTWARE); |
683 | else | 731 | else |
684 | sock_disable_timestamp(sk, | 732 | sock_disable_timestamp(sk, |
685 | SOCK_TIMESTAMPING_RX_SOFTWARE); | 733 | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); |
686 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, | 734 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, |
687 | val & SOF_TIMESTAMPING_SOFTWARE); | 735 | val & SOF_TIMESTAMPING_SOFTWARE); |
688 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, | 736 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, |
@@ -740,6 +788,11 @@ set_rcvbuf: | |||
740 | case SO_RXQ_OVFL: | 788 | case SO_RXQ_OVFL: |
741 | sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); | 789 | sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); |
742 | break; | 790 | break; |
791 | |||
792 | case SO_WIFI_STATUS: | ||
793 | sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); | ||
794 | break; | ||
795 | |||
743 | default: | 796 | default: |
744 | ret = -ENOPROTOOPT; | 797 | ret = -ENOPROTOOPT; |
745 | break; | 798 | break; |
@@ -961,6 +1014,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
961 | v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); | 1014 | v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); |
962 | break; | 1015 | break; |
963 | 1016 | ||
1017 | case SO_WIFI_STATUS: | ||
1018 | v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); | ||
1019 | break; | ||
1020 | |||
964 | default: | 1021 | default: |
965 | return -ENOPROTOOPT; | 1022 | return -ENOPROTOOPT; |
966 | } | 1023 | } |
@@ -1111,6 +1168,15 @@ void sock_update_classid(struct sock *sk) | |||
1111 | sk->sk_classid = classid; | 1168 | sk->sk_classid = classid; |
1112 | } | 1169 | } |
1113 | EXPORT_SYMBOL(sock_update_classid); | 1170 | EXPORT_SYMBOL(sock_update_classid); |
1171 | |||
1172 | void sock_update_netprioidx(struct sock *sk) | ||
1173 | { | ||
1174 | if (in_interrupt()) | ||
1175 | return; | ||
1176 | |||
1177 | sk->sk_cgrp_prioidx = task_netprioidx(current); | ||
1178 | } | ||
1179 | EXPORT_SYMBOL_GPL(sock_update_netprioidx); | ||
1114 | #endif | 1180 | #endif |
1115 | 1181 | ||
1116 | /** | 1182 | /** |
@@ -1138,6 +1204,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||
1138 | atomic_set(&sk->sk_wmem_alloc, 1); | 1204 | atomic_set(&sk->sk_wmem_alloc, 1); |
1139 | 1205 | ||
1140 | sock_update_classid(sk); | 1206 | sock_update_classid(sk); |
1207 | sock_update_netprioidx(sk); | ||
1141 | } | 1208 | } |
1142 | 1209 | ||
1143 | return sk; | 1210 | return sk; |
@@ -1158,8 +1225,7 @@ static void __sk_free(struct sock *sk) | |||
1158 | RCU_INIT_POINTER(sk->sk_filter, NULL); | 1225 | RCU_INIT_POINTER(sk->sk_filter, NULL); |
1159 | } | 1226 | } |
1160 | 1227 | ||
1161 | sock_disable_timestamp(sk, SOCK_TIMESTAMP); | 1228 | sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); |
1162 | sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); | ||
1163 | 1229 | ||
1164 | if (atomic_read(&sk->sk_omem_alloc)) | 1230 | if (atomic_read(&sk->sk_omem_alloc)) |
1165 | printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", | 1231 | printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", |
@@ -1204,7 +1270,20 @@ void sk_release_kernel(struct sock *sk) | |||
1204 | } | 1270 | } |
1205 | EXPORT_SYMBOL(sk_release_kernel); | 1271 | EXPORT_SYMBOL(sk_release_kernel); |
1206 | 1272 | ||
1207 | struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | 1273 | static void sk_update_clone(const struct sock *sk, struct sock *newsk) |
1274 | { | ||
1275 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1276 | sock_update_memcg(newsk); | ||
1277 | } | ||
1278 | |||
1279 | /** | ||
1280 | * sk_clone_lock - clone a socket, and lock its clone | ||
1281 | * @sk: the socket to clone | ||
1282 | * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) | ||
1283 | * | ||
1284 | * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) | ||
1285 | */ | ||
1286 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | ||
1208 | { | 1287 | { |
1209 | struct sock *newsk; | 1288 | struct sock *newsk; |
1210 | 1289 | ||
@@ -1287,17 +1366,18 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1287 | sk_set_socket(newsk, NULL); | 1366 | sk_set_socket(newsk, NULL); |
1288 | newsk->sk_wq = NULL; | 1367 | newsk->sk_wq = NULL; |
1289 | 1368 | ||
1369 | sk_update_clone(sk, newsk); | ||
1370 | |||
1290 | if (newsk->sk_prot->sockets_allocated) | 1371 | if (newsk->sk_prot->sockets_allocated) |
1291 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1372 | sk_sockets_allocated_inc(newsk); |
1292 | 1373 | ||
1293 | if (sock_flag(newsk, SOCK_TIMESTAMP) || | 1374 | if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
1294 | sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE)) | ||
1295 | net_enable_timestamp(); | 1375 | net_enable_timestamp(); |
1296 | } | 1376 | } |
1297 | out: | 1377 | out: |
1298 | return newsk; | 1378 | return newsk; |
1299 | } | 1379 | } |
1300 | EXPORT_SYMBOL_GPL(sk_clone); | 1380 | EXPORT_SYMBOL_GPL(sk_clone_lock); |
1301 | 1381 | ||
1302 | void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | 1382 | void sk_setup_caps(struct sock *sk, struct dst_entry *dst) |
1303 | { | 1383 | { |
@@ -1677,30 +1757,34 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1677 | struct proto *prot = sk->sk_prot; | 1757 | struct proto *prot = sk->sk_prot; |
1678 | int amt = sk_mem_pages(size); | 1758 | int amt = sk_mem_pages(size); |
1679 | long allocated; | 1759 | long allocated; |
1760 | int parent_status = UNDER_LIMIT; | ||
1680 | 1761 | ||
1681 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 1762 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
1682 | allocated = atomic_long_add_return(amt, prot->memory_allocated); | 1763 | |
1764 | allocated = sk_memory_allocated_add(sk, amt, &parent_status); | ||
1683 | 1765 | ||
1684 | /* Under limit. */ | 1766 | /* Under limit. */ |
1685 | if (allocated <= prot->sysctl_mem[0]) { | 1767 | if (parent_status == UNDER_LIMIT && |
1686 | if (prot->memory_pressure && *prot->memory_pressure) | 1768 | allocated <= sk_prot_mem_limits(sk, 0)) { |
1687 | *prot->memory_pressure = 0; | 1769 | sk_leave_memory_pressure(sk); |
1688 | return 1; | 1770 | return 1; |
1689 | } | 1771 | } |
1690 | 1772 | ||
1691 | /* Under pressure. */ | 1773 | /* Under pressure. (we or our parents) */ |
1692 | if (allocated > prot->sysctl_mem[1]) | 1774 | if ((parent_status > SOFT_LIMIT) || |
1693 | if (prot->enter_memory_pressure) | 1775 | allocated > sk_prot_mem_limits(sk, 1)) |
1694 | prot->enter_memory_pressure(sk); | 1776 | sk_enter_memory_pressure(sk); |
1695 | 1777 | ||
1696 | /* Over hard limit. */ | 1778 | /* Over hard limit (we or our parents) */ |
1697 | if (allocated > prot->sysctl_mem[2]) | 1779 | if ((parent_status == OVER_LIMIT) || |
1780 | (allocated > sk_prot_mem_limits(sk, 2))) | ||
1698 | goto suppress_allocation; | 1781 | goto suppress_allocation; |
1699 | 1782 | ||
1700 | /* guarantee minimum buffer size under pressure */ | 1783 | /* guarantee minimum buffer size under pressure */ |
1701 | if (kind == SK_MEM_RECV) { | 1784 | if (kind == SK_MEM_RECV) { |
1702 | if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) | 1785 | if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) |
1703 | return 1; | 1786 | return 1; |
1787 | |||
1704 | } else { /* SK_MEM_SEND */ | 1788 | } else { /* SK_MEM_SEND */ |
1705 | if (sk->sk_type == SOCK_STREAM) { | 1789 | if (sk->sk_type == SOCK_STREAM) { |
1706 | if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) | 1790 | if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) |
@@ -1710,13 +1794,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1710 | return 1; | 1794 | return 1; |
1711 | } | 1795 | } |
1712 | 1796 | ||
1713 | if (prot->memory_pressure) { | 1797 | if (sk_has_memory_pressure(sk)) { |
1714 | int alloc; | 1798 | int alloc; |
1715 | 1799 | ||
1716 | if (!*prot->memory_pressure) | 1800 | if (!sk_under_memory_pressure(sk)) |
1717 | return 1; | 1801 | return 1; |
1718 | alloc = percpu_counter_read_positive(prot->sockets_allocated); | 1802 | alloc = sk_sockets_allocated_read_positive(sk); |
1719 | if (prot->sysctl_mem[2] > alloc * | 1803 | if (sk_prot_mem_limits(sk, 2) > alloc * |
1720 | sk_mem_pages(sk->sk_wmem_queued + | 1804 | sk_mem_pages(sk->sk_wmem_queued + |
1721 | atomic_read(&sk->sk_rmem_alloc) + | 1805 | atomic_read(&sk->sk_rmem_alloc) + |
1722 | sk->sk_forward_alloc)) | 1806 | sk->sk_forward_alloc)) |
@@ -1739,7 +1823,9 @@ suppress_allocation: | |||
1739 | 1823 | ||
1740 | /* Alas. Undo changes. */ | 1824 | /* Alas. Undo changes. */ |
1741 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; | 1825 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; |
1742 | atomic_long_sub(amt, prot->memory_allocated); | 1826 | |
1827 | sk_memory_allocated_sub(sk, amt); | ||
1828 | |||
1743 | return 0; | 1829 | return 0; |
1744 | } | 1830 | } |
1745 | EXPORT_SYMBOL(__sk_mem_schedule); | 1831 | EXPORT_SYMBOL(__sk_mem_schedule); |
@@ -1750,15 +1836,13 @@ EXPORT_SYMBOL(__sk_mem_schedule); | |||
1750 | */ | 1836 | */ |
1751 | void __sk_mem_reclaim(struct sock *sk) | 1837 | void __sk_mem_reclaim(struct sock *sk) |
1752 | { | 1838 | { |
1753 | struct proto *prot = sk->sk_prot; | 1839 | sk_memory_allocated_sub(sk, |
1754 | 1840 | sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); | |
1755 | atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, | ||
1756 | prot->memory_allocated); | ||
1757 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; | 1841 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; |
1758 | 1842 | ||
1759 | if (prot->memory_pressure && *prot->memory_pressure && | 1843 | if (sk_under_memory_pressure(sk) && |
1760 | (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) | 1844 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) |
1761 | *prot->memory_pressure = 0; | 1845 | sk_leave_memory_pressure(sk); |
1762 | } | 1846 | } |
1763 | EXPORT_SYMBOL(__sk_mem_reclaim); | 1847 | EXPORT_SYMBOL(__sk_mem_reclaim); |
1764 | 1848 | ||
@@ -2129,16 +2213,15 @@ EXPORT_SYMBOL(sock_get_timestampns); | |||
2129 | void sock_enable_timestamp(struct sock *sk, int flag) | 2213 | void sock_enable_timestamp(struct sock *sk, int flag) |
2130 | { | 2214 | { |
2131 | if (!sock_flag(sk, flag)) { | 2215 | if (!sock_flag(sk, flag)) { |
2216 | unsigned long previous_flags = sk->sk_flags; | ||
2217 | |||
2132 | sock_set_flag(sk, flag); | 2218 | sock_set_flag(sk, flag); |
2133 | /* | 2219 | /* |
2134 | * we just set one of the two flags which require net | 2220 | * we just set one of the two flags which require net |
2135 | * time stamping, but time stamping might have been on | 2221 | * time stamping, but time stamping might have been on |
2136 | * already because of the other one | 2222 | * already because of the other one |
2137 | */ | 2223 | */ |
2138 | if (!sock_flag(sk, | 2224 | if (!(previous_flags & SK_FLAGS_TIMESTAMP)) |
2139 | flag == SOCK_TIMESTAMP ? | ||
2140 | SOCK_TIMESTAMPING_RX_SOFTWARE : | ||
2141 | SOCK_TIMESTAMP)) | ||
2142 | net_enable_timestamp(); | 2225 | net_enable_timestamp(); |
2143 | } | 2226 | } |
2144 | } | 2227 | } |
@@ -2250,9 +2333,6 @@ void sk_common_release(struct sock *sk) | |||
2250 | } | 2333 | } |
2251 | EXPORT_SYMBOL(sk_common_release); | 2334 | EXPORT_SYMBOL(sk_common_release); |
2252 | 2335 | ||
2253 | static DEFINE_RWLOCK(proto_list_lock); | ||
2254 | static LIST_HEAD(proto_list); | ||
2255 | |||
2256 | #ifdef CONFIG_PROC_FS | 2336 | #ifdef CONFIG_PROC_FS |
2257 | #define PROTO_INUSE_NR 64 /* should be enough for the first time */ | 2337 | #define PROTO_INUSE_NR 64 /* should be enough for the first time */ |
2258 | struct prot_inuse { | 2338 | struct prot_inuse { |
@@ -2401,10 +2481,10 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
2401 | } | 2481 | } |
2402 | } | 2482 | } |
2403 | 2483 | ||
2404 | write_lock(&proto_list_lock); | 2484 | mutex_lock(&proto_list_mutex); |
2405 | list_add(&prot->node, &proto_list); | 2485 | list_add(&prot->node, &proto_list); |
2406 | assign_proto_idx(prot); | 2486 | assign_proto_idx(prot); |
2407 | write_unlock(&proto_list_lock); | 2487 | mutex_unlock(&proto_list_mutex); |
2408 | return 0; | 2488 | return 0; |
2409 | 2489 | ||
2410 | out_free_timewait_sock_slab_name: | 2490 | out_free_timewait_sock_slab_name: |
@@ -2427,10 +2507,10 @@ EXPORT_SYMBOL(proto_register); | |||
2427 | 2507 | ||
2428 | void proto_unregister(struct proto *prot) | 2508 | void proto_unregister(struct proto *prot) |
2429 | { | 2509 | { |
2430 | write_lock(&proto_list_lock); | 2510 | mutex_lock(&proto_list_mutex); |
2431 | release_proto_idx(prot); | 2511 | release_proto_idx(prot); |
2432 | list_del(&prot->node); | 2512 | list_del(&prot->node); |
2433 | write_unlock(&proto_list_lock); | 2513 | mutex_unlock(&proto_list_mutex); |
2434 | 2514 | ||
2435 | if (prot->slab != NULL) { | 2515 | if (prot->slab != NULL) { |
2436 | kmem_cache_destroy(prot->slab); | 2516 | kmem_cache_destroy(prot->slab); |
@@ -2453,9 +2533,9 @@ EXPORT_SYMBOL(proto_unregister); | |||
2453 | 2533 | ||
2454 | #ifdef CONFIG_PROC_FS | 2534 | #ifdef CONFIG_PROC_FS |
2455 | static void *proto_seq_start(struct seq_file *seq, loff_t *pos) | 2535 | static void *proto_seq_start(struct seq_file *seq, loff_t *pos) |
2456 | __acquires(proto_list_lock) | 2536 | __acquires(proto_list_mutex) |
2457 | { | 2537 | { |
2458 | read_lock(&proto_list_lock); | 2538 | mutex_lock(&proto_list_mutex); |
2459 | return seq_list_start_head(&proto_list, *pos); | 2539 | return seq_list_start_head(&proto_list, *pos); |
2460 | } | 2540 | } |
2461 | 2541 | ||
@@ -2465,25 +2545,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2465 | } | 2545 | } |
2466 | 2546 | ||
2467 | static void proto_seq_stop(struct seq_file *seq, void *v) | 2547 | static void proto_seq_stop(struct seq_file *seq, void *v) |
2468 | __releases(proto_list_lock) | 2548 | __releases(proto_list_mutex) |
2469 | { | 2549 | { |
2470 | read_unlock(&proto_list_lock); | 2550 | mutex_unlock(&proto_list_mutex); |
2471 | } | 2551 | } |
2472 | 2552 | ||
2473 | static char proto_method_implemented(const void *method) | 2553 | static char proto_method_implemented(const void *method) |
2474 | { | 2554 | { |
2475 | return method == NULL ? 'n' : 'y'; | 2555 | return method == NULL ? 'n' : 'y'; |
2476 | } | 2556 | } |
2557 | static long sock_prot_memory_allocated(struct proto *proto) | ||
2558 | { | ||
2559 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; | ||
2560 | } | ||
2561 | |||
2562 | static char *sock_prot_memory_pressure(struct proto *proto) | ||
2563 | { | ||
2564 | return proto->memory_pressure != NULL ? | ||
2565 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; | ||
2566 | } | ||
2477 | 2567 | ||
2478 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) | 2568 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) |
2479 | { | 2569 | { |
2570 | |||
2480 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " | 2571 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " |
2481 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", | 2572 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", |
2482 | proto->name, | 2573 | proto->name, |
2483 | proto->obj_size, | 2574 | proto->obj_size, |
2484 | sock_prot_inuse_get(seq_file_net(seq), proto), | 2575 | sock_prot_inuse_get(seq_file_net(seq), proto), |
2485 | proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, | 2576 | sock_prot_memory_allocated(proto), |
2486 | proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", | 2577 | sock_prot_memory_pressure(proto), |
2487 | proto->max_header, | 2578 | proto->max_header, |
2488 | proto->slab == NULL ? "no" : "yes", | 2579 | proto->slab == NULL ? "no" : "yes", |
2489 | module_name(proto->owner), | 2580 | module_name(proto->owner), |
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c new file mode 100644 index 000000000000..b9868e1fd62c --- /dev/null +++ b/net/core/sock_diag.c | |||
@@ -0,0 +1,192 @@ | |||
1 | #include <linux/mutex.h> | ||
2 | #include <linux/socket.h> | ||
3 | #include <linux/skbuff.h> | ||
4 | #include <net/netlink.h> | ||
5 | #include <net/net_namespace.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/rtnetlink.h> | ||
8 | #include <net/sock.h> | ||
9 | |||
10 | #include <linux/inet_diag.h> | ||
11 | #include <linux/sock_diag.h> | ||
12 | |||
13 | static struct sock_diag_handler *sock_diag_handlers[AF_MAX]; | ||
14 | static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); | ||
15 | static DEFINE_MUTEX(sock_diag_table_mutex); | ||
16 | |||
17 | int sock_diag_check_cookie(void *sk, __u32 *cookie) | ||
18 | { | ||
19 | if ((cookie[0] != INET_DIAG_NOCOOKIE || | ||
20 | cookie[1] != INET_DIAG_NOCOOKIE) && | ||
21 | ((u32)(unsigned long)sk != cookie[0] || | ||
22 | (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) | ||
23 | return -ESTALE; | ||
24 | else | ||
25 | return 0; | ||
26 | } | ||
27 | EXPORT_SYMBOL_GPL(sock_diag_check_cookie); | ||
28 | |||
29 | void sock_diag_save_cookie(void *sk, __u32 *cookie) | ||
30 | { | ||
31 | cookie[0] = (u32)(unsigned long)sk; | ||
32 | cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | ||
33 | } | ||
34 | EXPORT_SYMBOL_GPL(sock_diag_save_cookie); | ||
35 | |||
36 | int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) | ||
37 | { | ||
38 | __u32 *mem; | ||
39 | |||
40 | mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32))); | ||
41 | |||
42 | mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); | ||
43 | mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; | ||
44 | mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); | ||
45 | mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; | ||
46 | mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; | ||
47 | mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; | ||
48 | mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); | ||
49 | |||
50 | return 0; | ||
51 | |||
52 | rtattr_failure: | ||
53 | return -EMSGSIZE; | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); | ||
56 | |||
57 | void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) | ||
58 | { | ||
59 | mutex_lock(&sock_diag_table_mutex); | ||
60 | inet_rcv_compat = fn; | ||
61 | mutex_unlock(&sock_diag_table_mutex); | ||
62 | } | ||
63 | EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); | ||
64 | |||
65 | void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) | ||
66 | { | ||
67 | mutex_lock(&sock_diag_table_mutex); | ||
68 | inet_rcv_compat = NULL; | ||
69 | mutex_unlock(&sock_diag_table_mutex); | ||
70 | } | ||
71 | EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); | ||
72 | |||
73 | int sock_diag_register(struct sock_diag_handler *hndl) | ||
74 | { | ||
75 | int err = 0; | ||
76 | |||
77 | if (hndl->family >= AF_MAX) | ||
78 | return -EINVAL; | ||
79 | |||
80 | mutex_lock(&sock_diag_table_mutex); | ||
81 | if (sock_diag_handlers[hndl->family]) | ||
82 | err = -EBUSY; | ||
83 | else | ||
84 | sock_diag_handlers[hndl->family] = hndl; | ||
85 | mutex_unlock(&sock_diag_table_mutex); | ||
86 | |||
87 | return err; | ||
88 | } | ||
89 | EXPORT_SYMBOL_GPL(sock_diag_register); | ||
90 | |||
91 | void sock_diag_unregister(struct sock_diag_handler *hnld) | ||
92 | { | ||
93 | int family = hnld->family; | ||
94 | |||
95 | if (family >= AF_MAX) | ||
96 | return; | ||
97 | |||
98 | mutex_lock(&sock_diag_table_mutex); | ||
99 | BUG_ON(sock_diag_handlers[family] != hnld); | ||
100 | sock_diag_handlers[family] = NULL; | ||
101 | mutex_unlock(&sock_diag_table_mutex); | ||
102 | } | ||
103 | EXPORT_SYMBOL_GPL(sock_diag_unregister); | ||
104 | |||
105 | static inline struct sock_diag_handler *sock_diag_lock_handler(int family) | ||
106 | { | ||
107 | if (sock_diag_handlers[family] == NULL) | ||
108 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, | ||
109 | NETLINK_SOCK_DIAG, family); | ||
110 | |||
111 | mutex_lock(&sock_diag_table_mutex); | ||
112 | return sock_diag_handlers[family]; | ||
113 | } | ||
114 | |||
115 | static inline void sock_diag_unlock_handler(struct sock_diag_handler *h) | ||
116 | { | ||
117 | mutex_unlock(&sock_diag_table_mutex); | ||
118 | } | ||
119 | |||
120 | static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
121 | { | ||
122 | int err; | ||
123 | struct sock_diag_req *req = NLMSG_DATA(nlh); | ||
124 | struct sock_diag_handler *hndl; | ||
125 | |||
126 | if (nlmsg_len(nlh) < sizeof(*req)) | ||
127 | return -EINVAL; | ||
128 | |||
129 | hndl = sock_diag_lock_handler(req->sdiag_family); | ||
130 | if (hndl == NULL) | ||
131 | err = -ENOENT; | ||
132 | else | ||
133 | err = hndl->dump(skb, nlh); | ||
134 | sock_diag_unlock_handler(hndl); | ||
135 | |||
136 | return err; | ||
137 | } | ||
138 | |||
139 | static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
140 | { | ||
141 | int ret; | ||
142 | |||
143 | switch (nlh->nlmsg_type) { | ||
144 | case TCPDIAG_GETSOCK: | ||
145 | case DCCPDIAG_GETSOCK: | ||
146 | if (inet_rcv_compat == NULL) | ||
147 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, | ||
148 | NETLINK_SOCK_DIAG, AF_INET); | ||
149 | |||
150 | mutex_lock(&sock_diag_table_mutex); | ||
151 | if (inet_rcv_compat != NULL) | ||
152 | ret = inet_rcv_compat(skb, nlh); | ||
153 | else | ||
154 | ret = -EOPNOTSUPP; | ||
155 | mutex_unlock(&sock_diag_table_mutex); | ||
156 | |||
157 | return ret; | ||
158 | case SOCK_DIAG_BY_FAMILY: | ||
159 | return __sock_diag_rcv_msg(skb, nlh); | ||
160 | default: | ||
161 | return -EINVAL; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | static DEFINE_MUTEX(sock_diag_mutex); | ||
166 | |||
167 | static void sock_diag_rcv(struct sk_buff *skb) | ||
168 | { | ||
169 | mutex_lock(&sock_diag_mutex); | ||
170 | netlink_rcv_skb(skb, &sock_diag_rcv_msg); | ||
171 | mutex_unlock(&sock_diag_mutex); | ||
172 | } | ||
173 | |||
174 | struct sock *sock_diag_nlsk; | ||
175 | EXPORT_SYMBOL_GPL(sock_diag_nlsk); | ||
176 | |||
177 | static int __init sock_diag_init(void) | ||
178 | { | ||
179 | sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0, | ||
180 | sock_diag_rcv, NULL, THIS_MODULE); | ||
181 | return sock_diag_nlsk == NULL ? -ENOMEM : 0; | ||
182 | } | ||
183 | |||
184 | static void __exit sock_diag_exit(void) | ||
185 | { | ||
186 | netlink_kernel_release(sock_diag_nlsk); | ||
187 | } | ||
188 | |||
189 | module_init(sock_diag_init); | ||
190 | module_exit(sock_diag_exit); | ||
191 | MODULE_LICENSE("GPL"); | ||
192 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG); | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 77a65f031488..d05559d4d9cd 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, | |||
68 | 68 | ||
69 | if (sock_table != orig_sock_table) { | 69 | if (sock_table != orig_sock_table) { |
70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); | 70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); |
71 | synchronize_rcu(); | 71 | if (sock_table) |
72 | vfree(orig_sock_table); | 72 | jump_label_inc(&rps_needed); |
73 | if (orig_sock_table) { | ||
74 | jump_label_dec(&rps_needed); | ||
75 | synchronize_rcu(); | ||
76 | vfree(orig_sock_table); | ||
77 | } | ||
73 | } | 78 | } |
74 | } | 79 | } |
75 | 80 | ||