diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 20:22:09 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 20:22:09 -0500 |
commit | 9753dfe19a85e7e45a34a56f4cb2048bb4f50e27 (patch) | |
tree | c017a1b4a70b8447c71b01d8b320e071546b5c9d /net/core | |
parent | edf7c8148ec40c0fd27c0ef3f688defcc65e3913 (diff) | |
parent | 9f42f126154786e6e76df513004800c8c633f020 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1958 commits)
net: pack skb_shared_info more efficiently
net_sched: red: split red_parms into parms and vars
net_sched: sfq: extend limits
cnic: Improve error recovery on bnx2x devices
cnic: Re-init dev->stats_addr after chip reset
net_sched: Bug in netem reordering
bna: fix sparse warnings/errors
bna: make ethtool_ops and strings const
xgmac: cleanups
net: make ethtool_ops const
vmxnet3" make ethtool ops const
xen-netback: make ops structs const
virtio_net: Pass gfp flags when allocating rx buffers.
ixgbe: FCoE: Add support for ndo_get_fcoe_hbainfo() call
netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call
igb: reset PHY after recovering from PHY power down
igb: add basic runtime PM support
igb: Add support for byte queue limits.
e1000: cleanup CE4100 MDIO registers access
e1000: unmap ce4100_gbe_mdio_base_virt in e1000_remove
...
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 6 | ||||
-rw-r--r-- | net/core/dev.c | 307 | ||||
-rw-r--r-- | net/core/dst.c | 2 | ||||
-rw-r--r-- | net/core/ethtool.c | 712 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 143 | ||||
-rw-r--r-- | net/core/neighbour.c | 222 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 323 | ||||
-rw-r--r-- | net/core/netpoll.c | 10 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 344 | ||||
-rw-r--r-- | net/core/pktgen.c | 17 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 25 | ||||
-rw-r--r-- | net/core/secure_seq.c | 6 | ||||
-rw-r--r-- | net/core/skbuff.c | 89 | ||||
-rw-r--r-- | net/core/sock.c | 197 | ||||
-rw-r--r-- | net/core/sock_diag.c | 192 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 9 |
16 files changed, 1624 insertions, 980 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 0d357b1c4e57..674641b13aea 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -3,12 +3,13 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ | 5 | obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ |
6 | gen_stats.o gen_estimator.o net_namespace.o secure_seq.o | 6 | gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o |
7 | 7 | ||
8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o |
9 | 9 | ||
10 | obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ | 10 | obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ |
11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o | 11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ |
12 | sock_diag.o | ||
12 | 13 | ||
13 | obj-$(CONFIG_XFRM) += flow.o | 14 | obj-$(CONFIG_XFRM) += flow.o |
14 | obj-y += net-sysfs.o | 15 | obj-y += net-sysfs.o |
@@ -19,3 +20,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o | |||
19 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o | 20 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o |
20 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o | 21 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o |
21 | obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o | 22 | obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o |
23 | obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o | ||
diff --git a/net/core/dev.c b/net/core/dev.c index 5a13edfc9f73..f494675471a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -133,10 +133,9 @@ | |||
133 | #include <linux/pci.h> | 133 | #include <linux/pci.h> |
134 | #include <linux/inetdevice.h> | 134 | #include <linux/inetdevice.h> |
135 | #include <linux/cpu_rmap.h> | 135 | #include <linux/cpu_rmap.h> |
136 | #include <linux/if_tunnel.h> | ||
137 | #include <linux/if_pppox.h> | ||
138 | #include <linux/ppp_defs.h> | ||
139 | #include <linux/net_tstamp.h> | 136 | #include <linux/net_tstamp.h> |
137 | #include <linux/jump_label.h> | ||
138 | #include <net/flow_keys.h> | ||
140 | 139 | ||
141 | #include "net-sysfs.h" | 140 | #include "net-sysfs.h" |
142 | 141 | ||
@@ -1320,8 +1319,6 @@ EXPORT_SYMBOL(dev_close); | |||
1320 | */ | 1319 | */ |
1321 | void dev_disable_lro(struct net_device *dev) | 1320 | void dev_disable_lro(struct net_device *dev) |
1322 | { | 1321 | { |
1323 | u32 flags; | ||
1324 | |||
1325 | /* | 1322 | /* |
1326 | * If we're trying to disable lro on a vlan device | 1323 | * If we're trying to disable lro on a vlan device |
1327 | * use the underlying physical device instead | 1324 | * use the underlying physical device instead |
@@ -1329,15 +1326,9 @@ void dev_disable_lro(struct net_device *dev) | |||
1329 | if (is_vlan_dev(dev)) | 1326 | if (is_vlan_dev(dev)) |
1330 | dev = vlan_dev_real_dev(dev); | 1327 | dev = vlan_dev_real_dev(dev); |
1331 | 1328 | ||
1332 | if (dev->ethtool_ops && dev->ethtool_ops->get_flags) | 1329 | dev->wanted_features &= ~NETIF_F_LRO; |
1333 | flags = dev->ethtool_ops->get_flags(dev); | 1330 | netdev_update_features(dev); |
1334 | else | ||
1335 | flags = ethtool_op_get_flags(dev); | ||
1336 | 1331 | ||
1337 | if (!(flags & ETH_FLAG_LRO)) | ||
1338 | return; | ||
1339 | |||
1340 | __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); | ||
1341 | if (unlikely(dev->features & NETIF_F_LRO)) | 1332 | if (unlikely(dev->features & NETIF_F_LRO)) |
1342 | netdev_WARN(dev, "failed to disable LRO!\n"); | 1333 | netdev_WARN(dev, "failed to disable LRO!\n"); |
1343 | } | 1334 | } |
@@ -1450,34 +1441,55 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
1450 | } | 1441 | } |
1451 | EXPORT_SYMBOL(call_netdevice_notifiers); | 1442 | EXPORT_SYMBOL(call_netdevice_notifiers); |
1452 | 1443 | ||
1453 | /* When > 0 there are consumers of rx skb time stamps */ | 1444 | static struct jump_label_key netstamp_needed __read_mostly; |
1454 | static atomic_t netstamp_needed = ATOMIC_INIT(0); | 1445 | #ifdef HAVE_JUMP_LABEL |
1446 | /* We are not allowed to call jump_label_dec() from irq context | ||
1447 | * If net_disable_timestamp() is called from irq context, defer the | ||
1448 | * jump_label_dec() calls. | ||
1449 | */ | ||
1450 | static atomic_t netstamp_needed_deferred; | ||
1451 | #endif | ||
1455 | 1452 | ||
1456 | void net_enable_timestamp(void) | 1453 | void net_enable_timestamp(void) |
1457 | { | 1454 | { |
1458 | atomic_inc(&netstamp_needed); | 1455 | #ifdef HAVE_JUMP_LABEL |
1456 | int deferred = atomic_xchg(&netstamp_needed_deferred, 0); | ||
1457 | |||
1458 | if (deferred) { | ||
1459 | while (--deferred) | ||
1460 | jump_label_dec(&netstamp_needed); | ||
1461 | return; | ||
1462 | } | ||
1463 | #endif | ||
1464 | WARN_ON(in_interrupt()); | ||
1465 | jump_label_inc(&netstamp_needed); | ||
1459 | } | 1466 | } |
1460 | EXPORT_SYMBOL(net_enable_timestamp); | 1467 | EXPORT_SYMBOL(net_enable_timestamp); |
1461 | 1468 | ||
1462 | void net_disable_timestamp(void) | 1469 | void net_disable_timestamp(void) |
1463 | { | 1470 | { |
1464 | atomic_dec(&netstamp_needed); | 1471 | #ifdef HAVE_JUMP_LABEL |
1472 | if (in_interrupt()) { | ||
1473 | atomic_inc(&netstamp_needed_deferred); | ||
1474 | return; | ||
1475 | } | ||
1476 | #endif | ||
1477 | jump_label_dec(&netstamp_needed); | ||
1465 | } | 1478 | } |
1466 | EXPORT_SYMBOL(net_disable_timestamp); | 1479 | EXPORT_SYMBOL(net_disable_timestamp); |
1467 | 1480 | ||
1468 | static inline void net_timestamp_set(struct sk_buff *skb) | 1481 | static inline void net_timestamp_set(struct sk_buff *skb) |
1469 | { | 1482 | { |
1470 | if (atomic_read(&netstamp_needed)) | 1483 | skb->tstamp.tv64 = 0; |
1484 | if (static_branch(&netstamp_needed)) | ||
1471 | __net_timestamp(skb); | 1485 | __net_timestamp(skb); |
1472 | else | ||
1473 | skb->tstamp.tv64 = 0; | ||
1474 | } | 1486 | } |
1475 | 1487 | ||
1476 | static inline void net_timestamp_check(struct sk_buff *skb) | 1488 | #define net_timestamp_check(COND, SKB) \ |
1477 | { | 1489 | if (static_branch(&netstamp_needed)) { \ |
1478 | if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) | 1490 | if ((COND) && !(SKB)->tstamp.tv64) \ |
1479 | __net_timestamp(skb); | 1491 | __net_timestamp(SKB); \ |
1480 | } | 1492 | } \ |
1481 | 1493 | ||
1482 | static int net_hwtstamp_validate(struct ifreq *ifr) | 1494 | static int net_hwtstamp_validate(struct ifreq *ifr) |
1483 | { | 1495 | { |
@@ -1924,7 +1936,8 @@ EXPORT_SYMBOL(skb_checksum_help); | |||
1924 | * It may return NULL if the skb requires no segmentation. This is | 1936 | * It may return NULL if the skb requires no segmentation. This is |
1925 | * only possible when GSO is used for verifying header integrity. | 1937 | * only possible when GSO is used for verifying header integrity. |
1926 | */ | 1938 | */ |
1927 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) | 1939 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, |
1940 | netdev_features_t features) | ||
1928 | { | 1941 | { |
1929 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | 1942 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); |
1930 | struct packet_type *ptype; | 1943 | struct packet_type *ptype; |
@@ -1954,9 +1967,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) | |||
1954 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) | 1967 | if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) |
1955 | dev->ethtool_ops->get_drvinfo(dev, &info); | 1968 | dev->ethtool_ops->get_drvinfo(dev, &info); |
1956 | 1969 | ||
1957 | WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", | 1970 | WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n", |
1958 | info.driver, dev ? dev->features : 0L, | 1971 | info.driver, dev ? &dev->features : NULL, |
1959 | skb->sk ? skb->sk->sk_route_caps : 0L, | 1972 | skb->sk ? &skb->sk->sk_route_caps : NULL, |
1960 | skb->len, skb->data_len, skb->ip_summed); | 1973 | skb->len, skb->data_len, skb->ip_summed); |
1961 | 1974 | ||
1962 | if (skb_header_cloned(skb) && | 1975 | if (skb_header_cloned(skb) && |
@@ -2065,7 +2078,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) | |||
2065 | * This function segments the given skb and stores the list of segments | 2078 | * This function segments the given skb and stores the list of segments |
2066 | * in skb->next. | 2079 | * in skb->next. |
2067 | */ | 2080 | */ |
2068 | static int dev_gso_segment(struct sk_buff *skb, int features) | 2081 | static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) |
2069 | { | 2082 | { |
2070 | struct sk_buff *segs; | 2083 | struct sk_buff *segs; |
2071 | 2084 | ||
@@ -2104,7 +2117,7 @@ static inline void skb_orphan_try(struct sk_buff *skb) | |||
2104 | } | 2117 | } |
2105 | } | 2118 | } |
2106 | 2119 | ||
2107 | static bool can_checksum_protocol(unsigned long features, __be16 protocol) | 2120 | static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) |
2108 | { | 2121 | { |
2109 | return ((features & NETIF_F_GEN_CSUM) || | 2122 | return ((features & NETIF_F_GEN_CSUM) || |
2110 | ((features & NETIF_F_V4_CSUM) && | 2123 | ((features & NETIF_F_V4_CSUM) && |
@@ -2115,7 +2128,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) | |||
2115 | protocol == htons(ETH_P_FCOE))); | 2128 | protocol == htons(ETH_P_FCOE))); |
2116 | } | 2129 | } |
2117 | 2130 | ||
2118 | static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) | 2131 | static netdev_features_t harmonize_features(struct sk_buff *skb, |
2132 | __be16 protocol, netdev_features_t features) | ||
2119 | { | 2133 | { |
2120 | if (!can_checksum_protocol(features, protocol)) { | 2134 | if (!can_checksum_protocol(features, protocol)) { |
2121 | features &= ~NETIF_F_ALL_CSUM; | 2135 | features &= ~NETIF_F_ALL_CSUM; |
@@ -2127,10 +2141,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features | |||
2127 | return features; | 2141 | return features; |
2128 | } | 2142 | } |
2129 | 2143 | ||
2130 | u32 netif_skb_features(struct sk_buff *skb) | 2144 | netdev_features_t netif_skb_features(struct sk_buff *skb) |
2131 | { | 2145 | { |
2132 | __be16 protocol = skb->protocol; | 2146 | __be16 protocol = skb->protocol; |
2133 | u32 features = skb->dev->features; | 2147 | netdev_features_t features = skb->dev->features; |
2134 | 2148 | ||
2135 | if (protocol == htons(ETH_P_8021Q)) { | 2149 | if (protocol == htons(ETH_P_8021Q)) { |
2136 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2150 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
@@ -2176,7 +2190,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
2176 | unsigned int skb_len; | 2190 | unsigned int skb_len; |
2177 | 2191 | ||
2178 | if (likely(!skb->next)) { | 2192 | if (likely(!skb->next)) { |
2179 | u32 features; | 2193 | netdev_features_t features; |
2180 | 2194 | ||
2181 | /* | 2195 | /* |
2182 | * If device doesn't need skb->dst, release it right now while | 2196 | * If device doesn't need skb->dst, release it right now while |
@@ -2257,7 +2271,7 @@ gso: | |||
2257 | return rc; | 2271 | return rc; |
2258 | } | 2272 | } |
2259 | txq_trans_update(txq); | 2273 | txq_trans_update(txq); |
2260 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) | 2274 | if (unlikely(netif_xmit_stopped(txq) && skb->next)) |
2261 | return NETDEV_TX_BUSY; | 2275 | return NETDEV_TX_BUSY; |
2262 | } while (skb->next); | 2276 | } while (skb->next); |
2263 | 2277 | ||
@@ -2457,6 +2471,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2457 | return rc; | 2471 | return rc; |
2458 | } | 2472 | } |
2459 | 2473 | ||
2474 | #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) | ||
2475 | static void skb_update_prio(struct sk_buff *skb) | ||
2476 | { | ||
2477 | struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); | ||
2478 | |||
2479 | if ((!skb->priority) && (skb->sk) && map) | ||
2480 | skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; | ||
2481 | } | ||
2482 | #else | ||
2483 | #define skb_update_prio(skb) | ||
2484 | #endif | ||
2485 | |||
2460 | static DEFINE_PER_CPU(int, xmit_recursion); | 2486 | static DEFINE_PER_CPU(int, xmit_recursion); |
2461 | #define RECURSION_LIMIT 10 | 2487 | #define RECURSION_LIMIT 10 |
2462 | 2488 | ||
@@ -2497,6 +2523,8 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2497 | */ | 2523 | */ |
2498 | rcu_read_lock_bh(); | 2524 | rcu_read_lock_bh(); |
2499 | 2525 | ||
2526 | skb_update_prio(skb); | ||
2527 | |||
2500 | txq = dev_pick_tx(dev, skb); | 2528 | txq = dev_pick_tx(dev, skb); |
2501 | q = rcu_dereference_bh(txq->qdisc); | 2529 | q = rcu_dereference_bh(txq->qdisc); |
2502 | 2530 | ||
@@ -2531,7 +2559,7 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
2531 | 2559 | ||
2532 | HARD_TX_LOCK(dev, txq, cpu); | 2560 | HARD_TX_LOCK(dev, txq, cpu); |
2533 | 2561 | ||
2534 | if (!netif_tx_queue_stopped(txq)) { | 2562 | if (!netif_xmit_stopped(txq)) { |
2535 | __this_cpu_inc(xmit_recursion); | 2563 | __this_cpu_inc(xmit_recursion); |
2536 | rc = dev_hard_start_xmit(skb, dev, txq); | 2564 | rc = dev_hard_start_xmit(skb, dev, txq); |
2537 | __this_cpu_dec(xmit_recursion); | 2565 | __this_cpu_dec(xmit_recursion); |
@@ -2592,123 +2620,28 @@ static inline void ____napi_schedule(struct softnet_data *sd, | |||
2592 | */ | 2620 | */ |
2593 | void __skb_get_rxhash(struct sk_buff *skb) | 2621 | void __skb_get_rxhash(struct sk_buff *skb) |
2594 | { | 2622 | { |
2595 | int nhoff, hash = 0, poff; | 2623 | struct flow_keys keys; |
2596 | const struct ipv6hdr *ip6; | 2624 | u32 hash; |
2597 | const struct iphdr *ip; | ||
2598 | const struct vlan_hdr *vlan; | ||
2599 | u8 ip_proto; | ||
2600 | u32 addr1, addr2; | ||
2601 | u16 proto; | ||
2602 | union { | ||
2603 | u32 v32; | ||
2604 | u16 v16[2]; | ||
2605 | } ports; | ||
2606 | |||
2607 | nhoff = skb_network_offset(skb); | ||
2608 | proto = skb->protocol; | ||
2609 | |||
2610 | again: | ||
2611 | switch (proto) { | ||
2612 | case __constant_htons(ETH_P_IP): | ||
2613 | ip: | ||
2614 | if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) | ||
2615 | goto done; | ||
2616 | |||
2617 | ip = (const struct iphdr *) (skb->data + nhoff); | ||
2618 | if (ip_is_fragment(ip)) | ||
2619 | ip_proto = 0; | ||
2620 | else | ||
2621 | ip_proto = ip->protocol; | ||
2622 | addr1 = (__force u32) ip->saddr; | ||
2623 | addr2 = (__force u32) ip->daddr; | ||
2624 | nhoff += ip->ihl * 4; | ||
2625 | break; | ||
2626 | case __constant_htons(ETH_P_IPV6): | ||
2627 | ipv6: | ||
2628 | if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) | ||
2629 | goto done; | ||
2630 | |||
2631 | ip6 = (const struct ipv6hdr *) (skb->data + nhoff); | ||
2632 | ip_proto = ip6->nexthdr; | ||
2633 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | ||
2634 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | ||
2635 | nhoff += 40; | ||
2636 | break; | ||
2637 | case __constant_htons(ETH_P_8021Q): | ||
2638 | if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff)) | ||
2639 | goto done; | ||
2640 | vlan = (const struct vlan_hdr *) (skb->data + nhoff); | ||
2641 | proto = vlan->h_vlan_encapsulated_proto; | ||
2642 | nhoff += sizeof(*vlan); | ||
2643 | goto again; | ||
2644 | case __constant_htons(ETH_P_PPP_SES): | ||
2645 | if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff)) | ||
2646 | goto done; | ||
2647 | proto = *((__be16 *) (skb->data + nhoff + | ||
2648 | sizeof(struct pppoe_hdr))); | ||
2649 | nhoff += PPPOE_SES_HLEN; | ||
2650 | switch (proto) { | ||
2651 | case __constant_htons(PPP_IP): | ||
2652 | goto ip; | ||
2653 | case __constant_htons(PPP_IPV6): | ||
2654 | goto ipv6; | ||
2655 | default: | ||
2656 | goto done; | ||
2657 | } | ||
2658 | default: | ||
2659 | goto done; | ||
2660 | } | ||
2661 | |||
2662 | switch (ip_proto) { | ||
2663 | case IPPROTO_GRE: | ||
2664 | if (pskb_may_pull(skb, nhoff + 16)) { | ||
2665 | u8 *h = skb->data + nhoff; | ||
2666 | __be16 flags = *(__be16 *)h; | ||
2667 | 2625 | ||
2668 | /* | 2626 | if (!skb_flow_dissect(skb, &keys)) |
2669 | * Only look inside GRE if version zero and no | 2627 | return; |
2670 | * routing | ||
2671 | */ | ||
2672 | if (!(flags & (GRE_VERSION|GRE_ROUTING))) { | ||
2673 | proto = *(__be16 *)(h + 2); | ||
2674 | nhoff += 4; | ||
2675 | if (flags & GRE_CSUM) | ||
2676 | nhoff += 4; | ||
2677 | if (flags & GRE_KEY) | ||
2678 | nhoff += 4; | ||
2679 | if (flags & GRE_SEQ) | ||
2680 | nhoff += 4; | ||
2681 | goto again; | ||
2682 | } | ||
2683 | } | ||
2684 | break; | ||
2685 | case IPPROTO_IPIP: | ||
2686 | goto again; | ||
2687 | default: | ||
2688 | break; | ||
2689 | } | ||
2690 | 2628 | ||
2691 | ports.v32 = 0; | 2629 | if (keys.ports) { |
2692 | poff = proto_ports_offset(ip_proto); | 2630 | if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]) |
2693 | if (poff >= 0) { | 2631 | swap(keys.port16[0], keys.port16[1]); |
2694 | nhoff += poff; | 2632 | skb->l4_rxhash = 1; |
2695 | if (pskb_may_pull(skb, nhoff + 4)) { | ||
2696 | ports.v32 = * (__force u32 *) (skb->data + nhoff); | ||
2697 | if (ports.v16[1] < ports.v16[0]) | ||
2698 | swap(ports.v16[0], ports.v16[1]); | ||
2699 | skb->l4_rxhash = 1; | ||
2700 | } | ||
2701 | } | 2633 | } |
2702 | 2634 | ||
2703 | /* get a consistent hash (same value on both flow directions) */ | 2635 | /* get a consistent hash (same value on both flow directions) */ |
2704 | if (addr2 < addr1) | 2636 | if ((__force u32)keys.dst < (__force u32)keys.src) |
2705 | swap(addr1, addr2); | 2637 | swap(keys.dst, keys.src); |
2706 | 2638 | ||
2707 | hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | 2639 | hash = jhash_3words((__force u32)keys.dst, |
2640 | (__force u32)keys.src, | ||
2641 | (__force u32)keys.ports, hashrnd); | ||
2708 | if (!hash) | 2642 | if (!hash) |
2709 | hash = 1; | 2643 | hash = 1; |
2710 | 2644 | ||
2711 | done: | ||
2712 | skb->rxhash = hash; | 2645 | skb->rxhash = hash; |
2713 | } | 2646 | } |
2714 | EXPORT_SYMBOL(__skb_get_rxhash); | 2647 | EXPORT_SYMBOL(__skb_get_rxhash); |
@@ -2719,6 +2652,8 @@ EXPORT_SYMBOL(__skb_get_rxhash); | |||
2719 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2652 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
2720 | EXPORT_SYMBOL(rps_sock_flow_table); | 2653 | EXPORT_SYMBOL(rps_sock_flow_table); |
2721 | 2654 | ||
2655 | struct jump_label_key rps_needed __read_mostly; | ||
2656 | |||
2722 | static struct rps_dev_flow * | 2657 | static struct rps_dev_flow * |
2723 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2658 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
2724 | struct rps_dev_flow *rflow, u16 next_cpu) | 2659 | struct rps_dev_flow *rflow, u16 next_cpu) |
@@ -2998,12 +2933,11 @@ int netif_rx(struct sk_buff *skb) | |||
2998 | if (netpoll_rx(skb)) | 2933 | if (netpoll_rx(skb)) |
2999 | return NET_RX_DROP; | 2934 | return NET_RX_DROP; |
3000 | 2935 | ||
3001 | if (netdev_tstamp_prequeue) | 2936 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3002 | net_timestamp_check(skb); | ||
3003 | 2937 | ||
3004 | trace_netif_rx(skb); | 2938 | trace_netif_rx(skb); |
3005 | #ifdef CONFIG_RPS | 2939 | #ifdef CONFIG_RPS |
3006 | { | 2940 | if (static_branch(&rps_needed)) { |
3007 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2941 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3008 | int cpu; | 2942 | int cpu; |
3009 | 2943 | ||
@@ -3018,14 +2952,13 @@ int netif_rx(struct sk_buff *skb) | |||
3018 | 2952 | ||
3019 | rcu_read_unlock(); | 2953 | rcu_read_unlock(); |
3020 | preempt_enable(); | 2954 | preempt_enable(); |
3021 | } | 2955 | } else |
3022 | #else | 2956 | #endif |
3023 | { | 2957 | { |
3024 | unsigned int qtail; | 2958 | unsigned int qtail; |
3025 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | 2959 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
3026 | put_cpu(); | 2960 | put_cpu(); |
3027 | } | 2961 | } |
3028 | #endif | ||
3029 | return ret; | 2962 | return ret; |
3030 | } | 2963 | } |
3031 | EXPORT_SYMBOL(netif_rx); | 2964 | EXPORT_SYMBOL(netif_rx); |
@@ -3231,8 +3164,7 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3231 | int ret = NET_RX_DROP; | 3164 | int ret = NET_RX_DROP; |
3232 | __be16 type; | 3165 | __be16 type; |
3233 | 3166 | ||
3234 | if (!netdev_tstamp_prequeue) | 3167 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
3235 | net_timestamp_check(skb); | ||
3236 | 3168 | ||
3237 | trace_netif_receive_skb(skb); | 3169 | trace_netif_receive_skb(skb); |
3238 | 3170 | ||
@@ -3363,14 +3295,13 @@ out: | |||
3363 | */ | 3295 | */ |
3364 | int netif_receive_skb(struct sk_buff *skb) | 3296 | int netif_receive_skb(struct sk_buff *skb) |
3365 | { | 3297 | { |
3366 | if (netdev_tstamp_prequeue) | 3298 | net_timestamp_check(netdev_tstamp_prequeue, skb); |
3367 | net_timestamp_check(skb); | ||
3368 | 3299 | ||
3369 | if (skb_defer_rx_timestamp(skb)) | 3300 | if (skb_defer_rx_timestamp(skb)) |
3370 | return NET_RX_SUCCESS; | 3301 | return NET_RX_SUCCESS; |
3371 | 3302 | ||
3372 | #ifdef CONFIG_RPS | 3303 | #ifdef CONFIG_RPS |
3373 | { | 3304 | if (static_branch(&rps_needed)) { |
3374 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 3305 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3375 | int cpu, ret; | 3306 | int cpu, ret; |
3376 | 3307 | ||
@@ -3381,16 +3312,12 @@ int netif_receive_skb(struct sk_buff *skb) | |||
3381 | if (cpu >= 0) { | 3312 | if (cpu >= 0) { |
3382 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | 3313 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
3383 | rcu_read_unlock(); | 3314 | rcu_read_unlock(); |
3384 | } else { | 3315 | return ret; |
3385 | rcu_read_unlock(); | ||
3386 | ret = __netif_receive_skb(skb); | ||
3387 | } | 3316 | } |
3388 | 3317 | rcu_read_unlock(); | |
3389 | return ret; | ||
3390 | } | 3318 | } |
3391 | #else | ||
3392 | return __netif_receive_skb(skb); | ||
3393 | #endif | 3319 | #endif |
3320 | return __netif_receive_skb(skb); | ||
3394 | } | 3321 | } |
3395 | EXPORT_SYMBOL(netif_receive_skb); | 3322 | EXPORT_SYMBOL(netif_receive_skb); |
3396 | 3323 | ||
@@ -4539,7 +4466,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) | |||
4539 | 4466 | ||
4540 | static int __dev_set_promiscuity(struct net_device *dev, int inc) | 4467 | static int __dev_set_promiscuity(struct net_device *dev, int inc) |
4541 | { | 4468 | { |
4542 | unsigned short old_flags = dev->flags; | 4469 | unsigned int old_flags = dev->flags; |
4543 | uid_t uid; | 4470 | uid_t uid; |
4544 | gid_t gid; | 4471 | gid_t gid; |
4545 | 4472 | ||
@@ -4596,7 +4523,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) | |||
4596 | */ | 4523 | */ |
4597 | int dev_set_promiscuity(struct net_device *dev, int inc) | 4524 | int dev_set_promiscuity(struct net_device *dev, int inc) |
4598 | { | 4525 | { |
4599 | unsigned short old_flags = dev->flags; | 4526 | unsigned int old_flags = dev->flags; |
4600 | int err; | 4527 | int err; |
4601 | 4528 | ||
4602 | err = __dev_set_promiscuity(dev, inc); | 4529 | err = __dev_set_promiscuity(dev, inc); |
@@ -4623,7 +4550,7 @@ EXPORT_SYMBOL(dev_set_promiscuity); | |||
4623 | 4550 | ||
4624 | int dev_set_allmulti(struct net_device *dev, int inc) | 4551 | int dev_set_allmulti(struct net_device *dev, int inc) |
4625 | { | 4552 | { |
4626 | unsigned short old_flags = dev->flags; | 4553 | unsigned int old_flags = dev->flags; |
4627 | 4554 | ||
4628 | ASSERT_RTNL(); | 4555 | ASSERT_RTNL(); |
4629 | 4556 | ||
@@ -4726,7 +4653,7 @@ EXPORT_SYMBOL(dev_get_flags); | |||
4726 | 4653 | ||
4727 | int __dev_change_flags(struct net_device *dev, unsigned int flags) | 4654 | int __dev_change_flags(struct net_device *dev, unsigned int flags) |
4728 | { | 4655 | { |
4729 | int old_flags = dev->flags; | 4656 | unsigned int old_flags = dev->flags; |
4730 | int ret; | 4657 | int ret; |
4731 | 4658 | ||
4732 | ASSERT_RTNL(); | 4659 | ASSERT_RTNL(); |
@@ -4809,10 +4736,10 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) | |||
4809 | * Change settings on device based state flags. The flags are | 4736 | * Change settings on device based state flags. The flags are |
4810 | * in the userspace exported format. | 4737 | * in the userspace exported format. |
4811 | */ | 4738 | */ |
4812 | int dev_change_flags(struct net_device *dev, unsigned flags) | 4739 | int dev_change_flags(struct net_device *dev, unsigned int flags) |
4813 | { | 4740 | { |
4814 | int ret, changes; | 4741 | int ret; |
4815 | int old_flags = dev->flags; | 4742 | unsigned int changes, old_flags = dev->flags; |
4816 | 4743 | ||
4817 | ret = __dev_change_flags(dev, flags); | 4744 | ret = __dev_change_flags(dev, flags); |
4818 | if (ret < 0) | 4745 | if (ret < 0) |
@@ -5369,7 +5296,8 @@ static void rollback_registered(struct net_device *dev) | |||
5369 | list_del(&single); | 5296 | list_del(&single); |
5370 | } | 5297 | } |
5371 | 5298 | ||
5372 | static u32 netdev_fix_features(struct net_device *dev, u32 features) | 5299 | static netdev_features_t netdev_fix_features(struct net_device *dev, |
5300 | netdev_features_t features) | ||
5373 | { | 5301 | { |
5374 | /* Fix illegal checksum combinations */ | 5302 | /* Fix illegal checksum combinations */ |
5375 | if ((features & NETIF_F_HW_CSUM) && | 5303 | if ((features & NETIF_F_HW_CSUM) && |
@@ -5378,12 +5306,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features) | |||
5378 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); | 5306 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); |
5379 | } | 5307 | } |
5380 | 5308 | ||
5381 | if ((features & NETIF_F_NO_CSUM) && | ||
5382 | (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { | ||
5383 | netdev_warn(dev, "mixed no checksumming and other settings.\n"); | ||
5384 | features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); | ||
5385 | } | ||
5386 | |||
5387 | /* Fix illegal SG+CSUM combinations. */ | 5309 | /* Fix illegal SG+CSUM combinations. */ |
5388 | if ((features & NETIF_F_SG) && | 5310 | if ((features & NETIF_F_SG) && |
5389 | !(features & NETIF_F_ALL_CSUM)) { | 5311 | !(features & NETIF_F_ALL_CSUM)) { |
@@ -5431,7 +5353,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features) | |||
5431 | 5353 | ||
5432 | int __netdev_update_features(struct net_device *dev) | 5354 | int __netdev_update_features(struct net_device *dev) |
5433 | { | 5355 | { |
5434 | u32 features; | 5356 | netdev_features_t features; |
5435 | int err = 0; | 5357 | int err = 0; |
5436 | 5358 | ||
5437 | ASSERT_RTNL(); | 5359 | ASSERT_RTNL(); |
@@ -5447,16 +5369,16 @@ int __netdev_update_features(struct net_device *dev) | |||
5447 | if (dev->features == features) | 5369 | if (dev->features == features) |
5448 | return 0; | 5370 | return 0; |
5449 | 5371 | ||
5450 | netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", | 5372 | netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", |
5451 | dev->features, features); | 5373 | &dev->features, &features); |
5452 | 5374 | ||
5453 | if (dev->netdev_ops->ndo_set_features) | 5375 | if (dev->netdev_ops->ndo_set_features) |
5454 | err = dev->netdev_ops->ndo_set_features(dev, features); | 5376 | err = dev->netdev_ops->ndo_set_features(dev, features); |
5455 | 5377 | ||
5456 | if (unlikely(err < 0)) { | 5378 | if (unlikely(err < 0)) { |
5457 | netdev_err(dev, | 5379 | netdev_err(dev, |
5458 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", | 5380 | "set_features() failed (%d); wanted %pNF, left %pNF\n", |
5459 | err, features, dev->features); | 5381 | err, &features, &dev->features); |
5460 | return -1; | 5382 | return -1; |
5461 | } | 5383 | } |
5462 | 5384 | ||
@@ -5555,6 +5477,9 @@ static void netdev_init_one_queue(struct net_device *dev, | |||
5555 | queue->xmit_lock_owner = -1; | 5477 | queue->xmit_lock_owner = -1; |
5556 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); | 5478 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); |
5557 | queue->dev = dev; | 5479 | queue->dev = dev; |
5480 | #ifdef CONFIG_BQL | ||
5481 | dql_init(&queue->dql, HZ); | ||
5482 | #endif | ||
5558 | } | 5483 | } |
5559 | 5484 | ||
5560 | static int netif_alloc_netdev_queues(struct net_device *dev) | 5485 | static int netif_alloc_netdev_queues(struct net_device *dev) |
@@ -5640,11 +5565,12 @@ int register_netdevice(struct net_device *dev) | |||
5640 | dev->wanted_features = dev->features & dev->hw_features; | 5565 | dev->wanted_features = dev->features & dev->hw_features; |
5641 | 5566 | ||
5642 | /* Turn on no cache copy if HW is doing checksum */ | 5567 | /* Turn on no cache copy if HW is doing checksum */ |
5643 | dev->hw_features |= NETIF_F_NOCACHE_COPY; | 5568 | if (!(dev->flags & IFF_LOOPBACK)) { |
5644 | if ((dev->features & NETIF_F_ALL_CSUM) && | 5569 | dev->hw_features |= NETIF_F_NOCACHE_COPY; |
5645 | !(dev->features & NETIF_F_NO_CSUM)) { | 5570 | if (dev->features & NETIF_F_ALL_CSUM) { |
5646 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; | 5571 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; |
5647 | dev->features |= NETIF_F_NOCACHE_COPY; | 5572 | dev->features |= NETIF_F_NOCACHE_COPY; |
5573 | } | ||
5648 | } | 5574 | } |
5649 | 5575 | ||
5650 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. | 5576 | /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. |
@@ -6380,7 +6306,8 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
6380 | * @one to the master device with current feature set @all. Will not | 6306 | * @one to the master device with current feature set @all. Will not |
6381 | * enable anything that is off in @mask. Returns the new feature set. | 6307 | * enable anything that is off in @mask. Returns the new feature set. |
6382 | */ | 6308 | */ |
6383 | u32 netdev_increment_features(u32 all, u32 one, u32 mask) | 6309 | netdev_features_t netdev_increment_features(netdev_features_t all, |
6310 | netdev_features_t one, netdev_features_t mask) | ||
6384 | { | 6311 | { |
6385 | if (mask & NETIF_F_GEN_CSUM) | 6312 | if (mask & NETIF_F_GEN_CSUM) |
6386 | mask |= NETIF_F_ALL_CSUM; | 6313 | mask |= NETIF_F_ALL_CSUM; |
@@ -6389,10 +6316,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) | |||
6389 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; | 6316 | all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; |
6390 | all &= one | ~NETIF_F_ALL_FOR_ALL; | 6317 | all &= one | ~NETIF_F_ALL_FOR_ALL; |
6391 | 6318 | ||
6392 | /* If device needs checksumming, downgrade to it. */ | ||
6393 | if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) | ||
6394 | all &= ~NETIF_F_NO_CSUM; | ||
6395 | |||
6396 | /* If one device supports hw checksumming, set for all. */ | 6319 | /* If one device supports hw checksumming, set for all. */ |
6397 | if (all & NETIF_F_GEN_CSUM) | 6320 | if (all & NETIF_F_GEN_CSUM) |
6398 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); | 6321 | all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); |
diff --git a/net/core/dst.c b/net/core/dst.c index d5e2c4c09107..43d94cedbf7c 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -366,7 +366,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
366 | dev_hold(dst->dev); | 366 | dev_hold(dst->dev); |
367 | dev_put(dev); | 367 | dev_put(dev); |
368 | rcu_read_lock(); | 368 | rcu_read_lock(); |
369 | neigh = dst_get_neighbour(dst); | 369 | neigh = dst_get_neighbour_noref(dst); |
370 | if (neigh && neigh->dev == dev) { | 370 | if (neigh && neigh->dev == dev) { |
371 | neigh->dev = dst->dev; | 371 | neigh->dev = dst->dev; |
372 | dev_hold(dst->dev); | 372 | dev_hold(dst->dev); |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f44481707124..921aa2b4b415 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -36,235 +36,44 @@ u32 ethtool_op_get_link(struct net_device *dev) | |||
36 | } | 36 | } |
37 | EXPORT_SYMBOL(ethtool_op_get_link); | 37 | EXPORT_SYMBOL(ethtool_op_get_link); |
38 | 38 | ||
39 | u32 ethtool_op_get_tx_csum(struct net_device *dev) | ||
40 | { | ||
41 | return (dev->features & NETIF_F_ALL_CSUM) != 0; | ||
42 | } | ||
43 | EXPORT_SYMBOL(ethtool_op_get_tx_csum); | ||
44 | |||
45 | int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) | ||
46 | { | ||
47 | if (data) | ||
48 | dev->features |= NETIF_F_IP_CSUM; | ||
49 | else | ||
50 | dev->features &= ~NETIF_F_IP_CSUM; | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
55 | |||
56 | int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) | ||
57 | { | ||
58 | if (data) | ||
59 | dev->features |= NETIF_F_HW_CSUM; | ||
60 | else | ||
61 | dev->features &= ~NETIF_F_HW_CSUM; | ||
62 | |||
63 | return 0; | ||
64 | } | ||
65 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
66 | |||
67 | int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) | ||
68 | { | ||
69 | if (data) | ||
70 | dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; | ||
71 | else | ||
72 | dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); | ||
77 | |||
78 | u32 ethtool_op_get_sg(struct net_device *dev) | ||
79 | { | ||
80 | return (dev->features & NETIF_F_SG) != 0; | ||
81 | } | ||
82 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
83 | |||
84 | int ethtool_op_set_sg(struct net_device *dev, u32 data) | ||
85 | { | ||
86 | if (data) | ||
87 | dev->features |= NETIF_F_SG; | ||
88 | else | ||
89 | dev->features &= ~NETIF_F_SG; | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
94 | |||
95 | u32 ethtool_op_get_tso(struct net_device *dev) | ||
96 | { | ||
97 | return (dev->features & NETIF_F_TSO) != 0; | ||
98 | } | ||
99 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
100 | |||
101 | int ethtool_op_set_tso(struct net_device *dev, u32 data) | ||
102 | { | ||
103 | if (data) | ||
104 | dev->features |= NETIF_F_TSO; | ||
105 | else | ||
106 | dev->features &= ~NETIF_F_TSO; | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
111 | |||
112 | u32 ethtool_op_get_ufo(struct net_device *dev) | ||
113 | { | ||
114 | return (dev->features & NETIF_F_UFO) != 0; | ||
115 | } | ||
116 | EXPORT_SYMBOL(ethtool_op_get_ufo); | ||
117 | |||
118 | int ethtool_op_set_ufo(struct net_device *dev, u32 data) | ||
119 | { | ||
120 | if (data) | ||
121 | dev->features |= NETIF_F_UFO; | ||
122 | else | ||
123 | dev->features &= ~NETIF_F_UFO; | ||
124 | return 0; | ||
125 | } | ||
126 | EXPORT_SYMBOL(ethtool_op_set_ufo); | ||
127 | |||
128 | /* the following list of flags are the same as their associated | ||
129 | * NETIF_F_xxx values in include/linux/netdevice.h | ||
130 | */ | ||
131 | static const u32 flags_dup_features = | ||
132 | (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE | | ||
133 | ETH_FLAG_RXHASH); | ||
134 | |||
135 | u32 ethtool_op_get_flags(struct net_device *dev) | ||
136 | { | ||
137 | /* in the future, this function will probably contain additional | ||
138 | * handling for flags which are not so easily handled | ||
139 | * by a simple masking operation | ||
140 | */ | ||
141 | |||
142 | return dev->features & flags_dup_features; | ||
143 | } | ||
144 | EXPORT_SYMBOL(ethtool_op_get_flags); | ||
145 | |||
146 | /* Check if device can enable (or disable) particular feature coded in "data" | ||
147 | * argument. Flags "supported" describe features that can be toggled by device. | ||
148 | * If feature can not be toggled, it state (enabled or disabled) must match | ||
149 | * hardcoded device features state, otherwise flags are marked as invalid. | ||
150 | */ | ||
151 | bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported) | ||
152 | { | ||
153 | u32 features = dev->features & flags_dup_features; | ||
154 | /* "data" can contain only flags_dup_features bits, | ||
155 | * see __ethtool_set_flags */ | ||
156 | |||
157 | return (features & ~supported) != (data & ~supported); | ||
158 | } | ||
159 | EXPORT_SYMBOL(ethtool_invalid_flags); | ||
160 | |||
161 | int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) | ||
162 | { | ||
163 | if (ethtool_invalid_flags(dev, data, supported)) | ||
164 | return -EINVAL; | ||
165 | |||
166 | dev->features = ((dev->features & ~flags_dup_features) | | ||
167 | (data & flags_dup_features)); | ||
168 | return 0; | ||
169 | } | ||
170 | EXPORT_SYMBOL(ethtool_op_set_flags); | ||
171 | |||
172 | /* Handlers for each ethtool command */ | 39 | /* Handlers for each ethtool command */ |
173 | 40 | ||
174 | #define ETHTOOL_DEV_FEATURE_WORDS 1 | 41 | #define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) |
175 | 42 | ||
176 | static void ethtool_get_features_compat(struct net_device *dev, | 43 | static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { |
177 | struct ethtool_get_features_block *features) | 44 | [NETIF_F_SG_BIT] = "tx-scatter-gather", |
178 | { | 45 | [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", |
179 | if (!dev->ethtool_ops) | 46 | [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", |
180 | return; | 47 | [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", |
181 | 48 | [NETIF_F_HIGHDMA_BIT] = "highdma", | |
182 | /* getting RX checksum */ | 49 | [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", |
183 | if (dev->ethtool_ops->get_rx_csum) | 50 | [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", |
184 | if (dev->ethtool_ops->get_rx_csum(dev)) | 51 | |
185 | features[0].active |= NETIF_F_RXCSUM; | 52 | [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", |
186 | 53 | [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", | |
187 | /* mark legacy-changeable features */ | 54 | [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", |
188 | if (dev->ethtool_ops->set_sg) | 55 | [NETIF_F_GSO_BIT] = "tx-generic-segmentation", |
189 | features[0].available |= NETIF_F_SG; | 56 | [NETIF_F_LLTX_BIT] = "tx-lockless", |
190 | if (dev->ethtool_ops->set_tx_csum) | 57 | [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", |
191 | features[0].available |= NETIF_F_ALL_CSUM; | 58 | [NETIF_F_GRO_BIT] = "rx-gro", |
192 | if (dev->ethtool_ops->set_tso) | 59 | [NETIF_F_LRO_BIT] = "rx-lro", |
193 | features[0].available |= NETIF_F_ALL_TSO; | 60 | |
194 | if (dev->ethtool_ops->set_rx_csum) | 61 | [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", |
195 | features[0].available |= NETIF_F_RXCSUM; | 62 | [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", |
196 | if (dev->ethtool_ops->set_flags) | 63 | [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", |
197 | features[0].available |= flags_dup_features; | 64 | [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", |
198 | } | 65 | [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", |
199 | 66 | [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", | |
200 | static int ethtool_set_feature_compat(struct net_device *dev, | 67 | |
201 | int (*legacy_set)(struct net_device *, u32), | 68 | [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", |
202 | struct ethtool_set_features_block *features, u32 mask) | 69 | [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", |
203 | { | 70 | [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", |
204 | u32 do_set; | 71 | [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", |
205 | 72 | [NETIF_F_RXHASH_BIT] = "rx-hashing", | |
206 | if (!legacy_set) | 73 | [NETIF_F_RXCSUM_BIT] = "rx-checksum", |
207 | return 0; | 74 | [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", |
208 | 75 | [NETIF_F_LOOPBACK_BIT] = "loopback", | |
209 | if (!(features[0].valid & mask)) | 76 | }; |
210 | return 0; | ||
211 | |||
212 | features[0].valid &= ~mask; | ||
213 | |||
214 | do_set = !!(features[0].requested & mask); | ||
215 | |||
216 | if (legacy_set(dev, do_set) < 0) | ||
217 | netdev_info(dev, | ||
218 | "Legacy feature change (%s) failed for 0x%08x\n", | ||
219 | do_set ? "set" : "clear", mask); | ||
220 | |||
221 | return 1; | ||
222 | } | ||
223 | |||
224 | static int ethtool_set_flags_compat(struct net_device *dev, | ||
225 | int (*legacy_set)(struct net_device *, u32), | ||
226 | struct ethtool_set_features_block *features, u32 mask) | ||
227 | { | ||
228 | u32 value; | ||
229 | |||
230 | if (!legacy_set) | ||
231 | return 0; | ||
232 | |||
233 | if (!(features[0].valid & mask)) | ||
234 | return 0; | ||
235 | |||
236 | value = dev->features & ~features[0].valid; | ||
237 | value |= features[0].requested; | ||
238 | |||
239 | features[0].valid &= ~mask; | ||
240 | |||
241 | if (legacy_set(dev, value & mask) < 0) | ||
242 | netdev_info(dev, "Legacy flags change failed\n"); | ||
243 | |||
244 | return 1; | ||
245 | } | ||
246 | |||
247 | static int ethtool_set_features_compat(struct net_device *dev, | ||
248 | struct ethtool_set_features_block *features) | ||
249 | { | ||
250 | int compat; | ||
251 | |||
252 | if (!dev->ethtool_ops) | ||
253 | return 0; | ||
254 | |||
255 | compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, | ||
256 | features, NETIF_F_SG); | ||
257 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, | ||
258 | features, NETIF_F_ALL_CSUM); | ||
259 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, | ||
260 | features, NETIF_F_ALL_TSO); | ||
261 | compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, | ||
262 | features, NETIF_F_RXCSUM); | ||
263 | compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags, | ||
264 | features, flags_dup_features); | ||
265 | |||
266 | return compat; | ||
267 | } | ||
268 | 77 | ||
269 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) | 78 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) |
270 | { | 79 | { |
@@ -272,18 +81,21 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) | |||
272 | .cmd = ETHTOOL_GFEATURES, | 81 | .cmd = ETHTOOL_GFEATURES, |
273 | .size = ETHTOOL_DEV_FEATURE_WORDS, | 82 | .size = ETHTOOL_DEV_FEATURE_WORDS, |
274 | }; | 83 | }; |
275 | struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { | 84 | struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; |
276 | { | ||
277 | .available = dev->hw_features, | ||
278 | .requested = dev->wanted_features, | ||
279 | .active = dev->features, | ||
280 | .never_changed = NETIF_F_NEVER_CHANGE, | ||
281 | }, | ||
282 | }; | ||
283 | u32 __user *sizeaddr; | 85 | u32 __user *sizeaddr; |
284 | u32 copy_size; | 86 | u32 copy_size; |
87 | int i; | ||
285 | 88 | ||
286 | ethtool_get_features_compat(dev, features); | 89 | /* in case feature bits run out again */ |
90 | BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t)); | ||
91 | |||
92 | for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { | ||
93 | features[i].available = (u32)(dev->hw_features >> (32 * i)); | ||
94 | features[i].requested = (u32)(dev->wanted_features >> (32 * i)); | ||
95 | features[i].active = (u32)(dev->features >> (32 * i)); | ||
96 | features[i].never_changed = | ||
97 | (u32)(NETIF_F_NEVER_CHANGE >> (32 * i)); | ||
98 | } | ||
287 | 99 | ||
288 | sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); | 100 | sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); |
289 | if (get_user(copy_size, sizeaddr)) | 101 | if (get_user(copy_size, sizeaddr)) |
@@ -305,7 +117,8 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) | |||
305 | { | 117 | { |
306 | struct ethtool_sfeatures cmd; | 118 | struct ethtool_sfeatures cmd; |
307 | struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; | 119 | struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; |
308 | int ret = 0; | 120 | netdev_features_t wanted = 0, valid = 0; |
121 | int i, ret = 0; | ||
309 | 122 | ||
310 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | 123 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) |
311 | return -EFAULT; | 124 | return -EFAULT; |
@@ -317,65 +130,29 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) | |||
317 | if (copy_from_user(features, useraddr, sizeof(features))) | 130 | if (copy_from_user(features, useraddr, sizeof(features))) |
318 | return -EFAULT; | 131 | return -EFAULT; |
319 | 132 | ||
320 | if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) | 133 | for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { |
321 | return -EINVAL; | 134 | valid |= (netdev_features_t)features[i].valid << (32 * i); |
135 | wanted |= (netdev_features_t)features[i].requested << (32 * i); | ||
136 | } | ||
322 | 137 | ||
323 | if (ethtool_set_features_compat(dev, features)) | 138 | if (valid & ~NETIF_F_ETHTOOL_BITS) |
324 | ret |= ETHTOOL_F_COMPAT; | 139 | return -EINVAL; |
325 | 140 | ||
326 | if (features[0].valid & ~dev->hw_features) { | 141 | if (valid & ~dev->hw_features) { |
327 | features[0].valid &= dev->hw_features; | 142 | valid &= dev->hw_features; |
328 | ret |= ETHTOOL_F_UNSUPPORTED; | 143 | ret |= ETHTOOL_F_UNSUPPORTED; |
329 | } | 144 | } |
330 | 145 | ||
331 | dev->wanted_features &= ~features[0].valid; | 146 | dev->wanted_features &= ~valid; |
332 | dev->wanted_features |= features[0].valid & features[0].requested; | 147 | dev->wanted_features |= wanted & valid; |
333 | __netdev_update_features(dev); | 148 | __netdev_update_features(dev); |
334 | 149 | ||
335 | if ((dev->wanted_features ^ dev->features) & features[0].valid) | 150 | if ((dev->wanted_features ^ dev->features) & valid) |
336 | ret |= ETHTOOL_F_WISH; | 151 | ret |= ETHTOOL_F_WISH; |
337 | 152 | ||
338 | return ret; | 153 | return ret; |
339 | } | 154 | } |
340 | 155 | ||
341 | static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { | ||
342 | /* NETIF_F_SG */ "tx-scatter-gather", | ||
343 | /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", | ||
344 | /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", | ||
345 | /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", | ||
346 | /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6", | ||
347 | /* NETIF_F_HIGHDMA */ "highdma", | ||
348 | /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", | ||
349 | /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", | ||
350 | |||
351 | /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", | ||
352 | /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", | ||
353 | /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", | ||
354 | /* NETIF_F_GSO */ "tx-generic-segmentation", | ||
355 | /* NETIF_F_LLTX */ "tx-lockless", | ||
356 | /* NETIF_F_NETNS_LOCAL */ "netns-local", | ||
357 | /* NETIF_F_GRO */ "rx-gro", | ||
358 | /* NETIF_F_LRO */ "rx-lro", | ||
359 | |||
360 | /* NETIF_F_TSO */ "tx-tcp-segmentation", | ||
361 | /* NETIF_F_UFO */ "tx-udp-fragmentation", | ||
362 | /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", | ||
363 | /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", | ||
364 | /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", | ||
365 | /* NETIF_F_FSO */ "tx-fcoe-segmentation", | ||
366 | "", | ||
367 | "", | ||
368 | |||
369 | /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", | ||
370 | /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", | ||
371 | /* NETIF_F_FCOE_MTU */ "fcoe-mtu", | ||
372 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", | ||
373 | /* NETIF_F_RXHASH */ "rx-hashing", | ||
374 | /* NETIF_F_RXCSUM */ "rx-checksum", | ||
375 | /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy", | ||
376 | /* NETIF_F_LOOPBACK */ "loopback", | ||
377 | }; | ||
378 | |||
379 | static int __ethtool_get_sset_count(struct net_device *dev, int sset) | 156 | static int __ethtool_get_sset_count(struct net_device *dev, int sset) |
380 | { | 157 | { |
381 | const struct ethtool_ops *ops = dev->ethtool_ops; | 158 | const struct ethtool_ops *ops = dev->ethtool_ops; |
@@ -402,7 +179,7 @@ static void __ethtool_get_strings(struct net_device *dev, | |||
402 | ops->get_strings(dev, stringset, data); | 179 | ops->get_strings(dev, stringset, data); |
403 | } | 180 | } |
404 | 181 | ||
405 | static u32 ethtool_get_feature_mask(u32 eth_cmd) | 182 | static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) |
406 | { | 183 | { |
407 | /* feature masks of legacy discrete ethtool ops */ | 184 | /* feature masks of legacy discrete ethtool ops */ |
408 | 185 | ||
@@ -433,136 +210,82 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd) | |||
433 | } | 210 | } |
434 | } | 211 | } |
435 | 212 | ||
436 | static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) | ||
437 | { | ||
438 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
439 | |||
440 | if (!ops) | ||
441 | return NULL; | ||
442 | |||
443 | switch (ethcmd) { | ||
444 | case ETHTOOL_GTXCSUM: | ||
445 | return ops->get_tx_csum; | ||
446 | case ETHTOOL_GRXCSUM: | ||
447 | return ops->get_rx_csum; | ||
448 | case ETHTOOL_SSG: | ||
449 | return ops->get_sg; | ||
450 | case ETHTOOL_STSO: | ||
451 | return ops->get_tso; | ||
452 | case ETHTOOL_SUFO: | ||
453 | return ops->get_ufo; | ||
454 | default: | ||
455 | return NULL; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) | ||
460 | { | ||
461 | return !!(dev->features & NETIF_F_ALL_CSUM); | ||
462 | } | ||
463 | |||
464 | static int ethtool_get_one_feature(struct net_device *dev, | 213 | static int ethtool_get_one_feature(struct net_device *dev, |
465 | char __user *useraddr, u32 ethcmd) | 214 | char __user *useraddr, u32 ethcmd) |
466 | { | 215 | { |
467 | u32 mask = ethtool_get_feature_mask(ethcmd); | 216 | netdev_features_t mask = ethtool_get_feature_mask(ethcmd); |
468 | struct ethtool_value edata = { | 217 | struct ethtool_value edata = { |
469 | .cmd = ethcmd, | 218 | .cmd = ethcmd, |
470 | .data = !!(dev->features & mask), | 219 | .data = !!(dev->features & mask), |
471 | }; | 220 | }; |
472 | 221 | ||
473 | /* compatibility with discrete get_ ops */ | ||
474 | if (!(dev->hw_features & mask)) { | ||
475 | u32 (*actor)(struct net_device *); | ||
476 | |||
477 | actor = __ethtool_get_one_feature_actor(dev, ethcmd); | ||
478 | |||
479 | /* bug compatibility with old get_rx_csum */ | ||
480 | if (ethcmd == ETHTOOL_GRXCSUM && !actor) | ||
481 | actor = __ethtool_get_rx_csum_oldbug; | ||
482 | |||
483 | if (actor) | ||
484 | edata.data = actor(dev); | ||
485 | } | ||
486 | |||
487 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | 222 | if (copy_to_user(useraddr, &edata, sizeof(edata))) |
488 | return -EFAULT; | 223 | return -EFAULT; |
489 | return 0; | 224 | return 0; |
490 | } | 225 | } |
491 | 226 | ||
492 | static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); | ||
493 | static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); | ||
494 | static int __ethtool_set_sg(struct net_device *dev, u32 data); | ||
495 | static int __ethtool_set_tso(struct net_device *dev, u32 data); | ||
496 | static int __ethtool_set_ufo(struct net_device *dev, u32 data); | ||
497 | |||
498 | static int ethtool_set_one_feature(struct net_device *dev, | 227 | static int ethtool_set_one_feature(struct net_device *dev, |
499 | void __user *useraddr, u32 ethcmd) | 228 | void __user *useraddr, u32 ethcmd) |
500 | { | 229 | { |
501 | struct ethtool_value edata; | 230 | struct ethtool_value edata; |
502 | u32 mask; | 231 | netdev_features_t mask; |
503 | 232 | ||
504 | if (copy_from_user(&edata, useraddr, sizeof(edata))) | 233 | if (copy_from_user(&edata, useraddr, sizeof(edata))) |
505 | return -EFAULT; | 234 | return -EFAULT; |
506 | 235 | ||
507 | mask = ethtool_get_feature_mask(ethcmd); | 236 | mask = ethtool_get_feature_mask(ethcmd); |
508 | mask &= dev->hw_features; | 237 | mask &= dev->hw_features; |
509 | if (mask) { | 238 | if (!mask) |
510 | if (edata.data) | 239 | return -EOPNOTSUPP; |
511 | dev->wanted_features |= mask; | ||
512 | else | ||
513 | dev->wanted_features &= ~mask; | ||
514 | 240 | ||
515 | __netdev_update_features(dev); | 241 | if (edata.data) |
516 | return 0; | 242 | dev->wanted_features |= mask; |
517 | } | 243 | else |
244 | dev->wanted_features &= ~mask; | ||
518 | 245 | ||
519 | /* Driver is not converted to ndo_fix_features or does not | 246 | __netdev_update_features(dev); |
520 | * support changing this offload. In the latter case it won't | ||
521 | * have corresponding ethtool_ops field set. | ||
522 | * | ||
523 | * Following part is to be removed after all drivers advertise | ||
524 | * their changeable features in netdev->hw_features and stop | ||
525 | * using discrete offload setting ops. | ||
526 | */ | ||
527 | 247 | ||
528 | switch (ethcmd) { | 248 | return 0; |
529 | case ETHTOOL_STXCSUM: | 249 | } |
530 | return __ethtool_set_tx_csum(dev, edata.data); | 250 | |
531 | case ETHTOOL_SRXCSUM: | 251 | #define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ |
532 | return __ethtool_set_rx_csum(dev, edata.data); | 252 | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) |
533 | case ETHTOOL_SSG: | 253 | #define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ |
534 | return __ethtool_set_sg(dev, edata.data); | 254 | NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) |
535 | case ETHTOOL_STSO: | 255 | |
536 | return __ethtool_set_tso(dev, edata.data); | 256 | static u32 __ethtool_get_flags(struct net_device *dev) |
537 | case ETHTOOL_SUFO: | 257 | { |
538 | return __ethtool_set_ufo(dev, edata.data); | 258 | u32 flags = 0; |
539 | default: | 259 | |
540 | return -EOPNOTSUPP; | 260 | if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; |
541 | } | 261 | if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; |
262 | if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; | ||
263 | if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; | ||
264 | if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; | ||
265 | |||
266 | return flags; | ||
542 | } | 267 | } |
543 | 268 | ||
544 | int __ethtool_set_flags(struct net_device *dev, u32 data) | 269 | static int __ethtool_set_flags(struct net_device *dev, u32 data) |
545 | { | 270 | { |
546 | u32 changed; | 271 | netdev_features_t features = 0, changed; |
547 | 272 | ||
548 | if (data & ~flags_dup_features) | 273 | if (data & ~ETH_ALL_FLAGS) |
549 | return -EINVAL; | 274 | return -EINVAL; |
550 | 275 | ||
551 | /* legacy set_flags() op */ | 276 | if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; |
552 | if (dev->ethtool_ops->set_flags) { | 277 | if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; |
553 | if (unlikely(dev->hw_features & flags_dup_features)) | 278 | if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; |
554 | netdev_warn(dev, | 279 | if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; |
555 | "driver BUG: mixed hw_features and set_flags()\n"); | 280 | if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; |
556 | return dev->ethtool_ops->set_flags(dev, data); | ||
557 | } | ||
558 | 281 | ||
559 | /* allow changing only bits set in hw_features */ | 282 | /* allow changing only bits set in hw_features */ |
560 | changed = (data ^ dev->features) & flags_dup_features; | 283 | changed = (features ^ dev->features) & ETH_ALL_FEATURES; |
561 | if (changed & ~dev->hw_features) | 284 | if (changed & ~dev->hw_features) |
562 | return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; | 285 | return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; |
563 | 286 | ||
564 | dev->wanted_features = | 287 | dev->wanted_features = |
565 | (dev->wanted_features & ~changed) | (data & dev->hw_features); | 288 | (dev->wanted_features & ~changed) | (features & changed); |
566 | 289 | ||
567 | __netdev_update_features(dev); | 290 | __netdev_update_features(dev); |
568 | 291 | ||
@@ -716,6 +439,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, | |||
716 | { | 439 | { |
717 | struct ethtool_rxnfc info; | 440 | struct ethtool_rxnfc info; |
718 | size_t info_size = sizeof(info); | 441 | size_t info_size = sizeof(info); |
442 | int rc; | ||
719 | 443 | ||
720 | if (!dev->ethtool_ops->set_rxnfc) | 444 | if (!dev->ethtool_ops->set_rxnfc) |
721 | return -EOPNOTSUPP; | 445 | return -EOPNOTSUPP; |
@@ -731,7 +455,15 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, | |||
731 | if (copy_from_user(&info, useraddr, info_size)) | 455 | if (copy_from_user(&info, useraddr, info_size)) |
732 | return -EFAULT; | 456 | return -EFAULT; |
733 | 457 | ||
734 | return dev->ethtool_ops->set_rxnfc(dev, &info); | 458 | rc = dev->ethtool_ops->set_rxnfc(dev, &info); |
459 | if (rc) | ||
460 | return rc; | ||
461 | |||
462 | if (cmd == ETHTOOL_SRXCLSRLINS && | ||
463 | copy_to_user(useraddr, &info, info_size)) | ||
464 | return -EFAULT; | ||
465 | |||
466 | return 0; | ||
735 | } | 467 | } |
736 | 468 | ||
737 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, | 469 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, |
@@ -792,34 +524,44 @@ err_out: | |||
792 | static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, | 524 | static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, |
793 | void __user *useraddr) | 525 | void __user *useraddr) |
794 | { | 526 | { |
795 | struct ethtool_rxfh_indir *indir; | 527 | u32 user_size, dev_size; |
796 | u32 table_size; | 528 | u32 *indir; |
797 | size_t full_size; | ||
798 | int ret; | 529 | int ret; |
799 | 530 | ||
800 | if (!dev->ethtool_ops->get_rxfh_indir) | 531 | if (!dev->ethtool_ops->get_rxfh_indir_size || |
532 | !dev->ethtool_ops->get_rxfh_indir) | ||
533 | return -EOPNOTSUPP; | ||
534 | dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); | ||
535 | if (dev_size == 0) | ||
801 | return -EOPNOTSUPP; | 536 | return -EOPNOTSUPP; |
802 | 537 | ||
803 | if (copy_from_user(&table_size, | 538 | if (copy_from_user(&user_size, |
804 | useraddr + offsetof(struct ethtool_rxfh_indir, size), | 539 | useraddr + offsetof(struct ethtool_rxfh_indir, size), |
805 | sizeof(table_size))) | 540 | sizeof(user_size))) |
806 | return -EFAULT; | 541 | return -EFAULT; |
807 | 542 | ||
808 | if (table_size > | 543 | if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size), |
809 | (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) | 544 | &dev_size, sizeof(dev_size))) |
810 | return -ENOMEM; | 545 | return -EFAULT; |
811 | full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; | 546 | |
812 | indir = kzalloc(full_size, GFP_USER); | 547 | /* If the user buffer size is 0, this is just a query for the |
548 | * device table size. Otherwise, if it's smaller than the | ||
549 | * device table size it's an error. | ||
550 | */ | ||
551 | if (user_size < dev_size) | ||
552 | return user_size == 0 ? 0 : -EINVAL; | ||
553 | |||
554 | indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); | ||
813 | if (!indir) | 555 | if (!indir) |
814 | return -ENOMEM; | 556 | return -ENOMEM; |
815 | 557 | ||
816 | indir->cmd = ETHTOOL_GRXFHINDIR; | ||
817 | indir->size = table_size; | ||
818 | ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); | 558 | ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); |
819 | if (ret) | 559 | if (ret) |
820 | goto out; | 560 | goto out; |
821 | 561 | ||
822 | if (copy_to_user(useraddr, indir, full_size)) | 562 | if (copy_to_user(useraddr + |
563 | offsetof(struct ethtool_rxfh_indir, ring_index[0]), | ||
564 | indir, dev_size * sizeof(indir[0]))) | ||
823 | ret = -EFAULT; | 565 | ret = -EFAULT; |
824 | 566 | ||
825 | out: | 567 | out: |
@@ -830,30 +572,56 @@ out: | |||
830 | static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, | 572 | static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, |
831 | void __user *useraddr) | 573 | void __user *useraddr) |
832 | { | 574 | { |
833 | struct ethtool_rxfh_indir *indir; | 575 | struct ethtool_rxnfc rx_rings; |
834 | u32 table_size; | 576 | u32 user_size, dev_size, i; |
835 | size_t full_size; | 577 | u32 *indir; |
836 | int ret; | 578 | int ret; |
837 | 579 | ||
838 | if (!dev->ethtool_ops->set_rxfh_indir) | 580 | if (!dev->ethtool_ops->get_rxfh_indir_size || |
581 | !dev->ethtool_ops->set_rxfh_indir || | ||
582 | !dev->ethtool_ops->get_rxnfc) | ||
583 | return -EOPNOTSUPP; | ||
584 | dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); | ||
585 | if (dev_size == 0) | ||
839 | return -EOPNOTSUPP; | 586 | return -EOPNOTSUPP; |
840 | 587 | ||
841 | if (copy_from_user(&table_size, | 588 | if (copy_from_user(&user_size, |
842 | useraddr + offsetof(struct ethtool_rxfh_indir, size), | 589 | useraddr + offsetof(struct ethtool_rxfh_indir, size), |
843 | sizeof(table_size))) | 590 | sizeof(user_size))) |
844 | return -EFAULT; | 591 | return -EFAULT; |
845 | 592 | ||
846 | if (table_size > | 593 | if (user_size != 0 && user_size != dev_size) |
847 | (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) | 594 | return -EINVAL; |
848 | return -ENOMEM; | 595 | |
849 | full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; | 596 | indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); |
850 | indir = kmalloc(full_size, GFP_USER); | ||
851 | if (!indir) | 597 | if (!indir) |
852 | return -ENOMEM; | 598 | return -ENOMEM; |
853 | 599 | ||
854 | if (copy_from_user(indir, useraddr, full_size)) { | 600 | rx_rings.cmd = ETHTOOL_GRXRINGS; |
855 | ret = -EFAULT; | 601 | ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); |
602 | if (ret) | ||
856 | goto out; | 603 | goto out; |
604 | |||
605 | if (user_size == 0) { | ||
606 | for (i = 0; i < dev_size; i++) | ||
607 | indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); | ||
608 | } else { | ||
609 | if (copy_from_user(indir, | ||
610 | useraddr + | ||
611 | offsetof(struct ethtool_rxfh_indir, | ||
612 | ring_index[0]), | ||
613 | dev_size * sizeof(indir[0]))) { | ||
614 | ret = -EFAULT; | ||
615 | goto out; | ||
616 | } | ||
617 | |||
618 | /* Validate ring indices */ | ||
619 | for (i = 0; i < dev_size; i++) { | ||
620 | if (indir[i] >= rx_rings.data) { | ||
621 | ret = -EINVAL; | ||
622 | goto out; | ||
623 | } | ||
624 | } | ||
857 | } | 625 | } |
858 | 626 | ||
859 | ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); | 627 | ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); |
@@ -863,58 +631,6 @@ out: | |||
863 | return ret; | 631 | return ret; |
864 | } | 632 | } |
865 | 633 | ||
866 | /* | ||
867 | * ethtool does not (or did not) set masks for flow parameters that are | ||
868 | * not specified, so if both value and mask are 0 then this must be | ||
869 | * treated as equivalent to a mask with all bits set. Implement that | ||
870 | * here rather than in drivers. | ||
871 | */ | ||
872 | static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs) | ||
873 | { | ||
874 | struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec; | ||
875 | struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec; | ||
876 | |||
877 | if (fs->flow_type != TCP_V4_FLOW && | ||
878 | fs->flow_type != UDP_V4_FLOW && | ||
879 | fs->flow_type != SCTP_V4_FLOW) | ||
880 | return; | ||
881 | |||
882 | if (!(entry->ip4src | mask->ip4src)) | ||
883 | mask->ip4src = htonl(0xffffffff); | ||
884 | if (!(entry->ip4dst | mask->ip4dst)) | ||
885 | mask->ip4dst = htonl(0xffffffff); | ||
886 | if (!(entry->psrc | mask->psrc)) | ||
887 | mask->psrc = htons(0xffff); | ||
888 | if (!(entry->pdst | mask->pdst)) | ||
889 | mask->pdst = htons(0xffff); | ||
890 | if (!(entry->tos | mask->tos)) | ||
891 | mask->tos = 0xff; | ||
892 | if (!(fs->vlan_tag | fs->vlan_tag_mask)) | ||
893 | fs->vlan_tag_mask = 0xffff; | ||
894 | if (!(fs->data | fs->data_mask)) | ||
895 | fs->data_mask = 0xffffffffffffffffULL; | ||
896 | } | ||
897 | |||
898 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, | ||
899 | void __user *useraddr) | ||
900 | { | ||
901 | struct ethtool_rx_ntuple cmd; | ||
902 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
903 | |||
904 | if (!ops->set_rx_ntuple) | ||
905 | return -EOPNOTSUPP; | ||
906 | |||
907 | if (!(dev->features & NETIF_F_NTUPLE)) | ||
908 | return -EINVAL; | ||
909 | |||
910 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | ||
911 | return -EFAULT; | ||
912 | |||
913 | rx_ntuple_fix_masks(&cmd.fs); | ||
914 | |||
915 | return ops->set_rx_ntuple(dev, &cmd); | ||
916 | } | ||
917 | |||
918 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | 634 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) |
919 | { | 635 | { |
920 | struct ethtool_regs regs; | 636 | struct ethtool_regs regs; |
@@ -1231,81 +947,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) | |||
1231 | return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); | 947 | return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); |
1232 | } | 948 | } |
1233 | 949 | ||
1234 | static int __ethtool_set_sg(struct net_device *dev, u32 data) | ||
1235 | { | ||
1236 | int err; | ||
1237 | |||
1238 | if (!dev->ethtool_ops->set_sg) | ||
1239 | return -EOPNOTSUPP; | ||
1240 | |||
1241 | if (data && !(dev->features & NETIF_F_ALL_CSUM)) | ||
1242 | return -EINVAL; | ||
1243 | |||
1244 | if (!data && dev->ethtool_ops->set_tso) { | ||
1245 | err = dev->ethtool_ops->set_tso(dev, 0); | ||
1246 | if (err) | ||
1247 | return err; | ||
1248 | } | ||
1249 | |||
1250 | if (!data && dev->ethtool_ops->set_ufo) { | ||
1251 | err = dev->ethtool_ops->set_ufo(dev, 0); | ||
1252 | if (err) | ||
1253 | return err; | ||
1254 | } | ||
1255 | return dev->ethtool_ops->set_sg(dev, data); | ||
1256 | } | ||
1257 | |||
1258 | static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) | ||
1259 | { | ||
1260 | int err; | ||
1261 | |||
1262 | if (!dev->ethtool_ops->set_tx_csum) | ||
1263 | return -EOPNOTSUPP; | ||
1264 | |||
1265 | if (!data && dev->ethtool_ops->set_sg) { | ||
1266 | err = __ethtool_set_sg(dev, 0); | ||
1267 | if (err) | ||
1268 | return err; | ||
1269 | } | ||
1270 | |||
1271 | return dev->ethtool_ops->set_tx_csum(dev, data); | ||
1272 | } | ||
1273 | |||
1274 | static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) | ||
1275 | { | ||
1276 | if (!dev->ethtool_ops->set_rx_csum) | ||
1277 | return -EOPNOTSUPP; | ||
1278 | |||
1279 | if (!data) | ||
1280 | dev->features &= ~NETIF_F_GRO; | ||
1281 | |||
1282 | return dev->ethtool_ops->set_rx_csum(dev, data); | ||
1283 | } | ||
1284 | |||
1285 | static int __ethtool_set_tso(struct net_device *dev, u32 data) | ||
1286 | { | ||
1287 | if (!dev->ethtool_ops->set_tso) | ||
1288 | return -EOPNOTSUPP; | ||
1289 | |||
1290 | if (data && !(dev->features & NETIF_F_SG)) | ||
1291 | return -EINVAL; | ||
1292 | |||
1293 | return dev->ethtool_ops->set_tso(dev, data); | ||
1294 | } | ||
1295 | |||
1296 | static int __ethtool_set_ufo(struct net_device *dev, u32 data) | ||
1297 | { | ||
1298 | if (!dev->ethtool_ops->set_ufo) | ||
1299 | return -EOPNOTSUPP; | ||
1300 | if (data && !(dev->features & NETIF_F_SG)) | ||
1301 | return -EINVAL; | ||
1302 | if (data && !((dev->features & NETIF_F_GEN_CSUM) || | ||
1303 | (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) | ||
1304 | == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) | ||
1305 | return -EINVAL; | ||
1306 | return dev->ethtool_ops->set_ufo(dev, data); | ||
1307 | } | ||
1308 | |||
1309 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) | 950 | static int ethtool_self_test(struct net_device *dev, char __user *useraddr) |
1310 | { | 951 | { |
1311 | struct ethtool_test test; | 952 | struct ethtool_test test; |
@@ -1771,9 +1412,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1771 | break; | 1412 | break; |
1772 | case ETHTOOL_GFLAGS: | 1413 | case ETHTOOL_GFLAGS: |
1773 | rc = ethtool_get_value(dev, useraddr, ethcmd, | 1414 | rc = ethtool_get_value(dev, useraddr, ethcmd, |
1774 | (dev->ethtool_ops->get_flags ? | 1415 | __ethtool_get_flags); |
1775 | dev->ethtool_ops->get_flags : | ||
1776 | ethtool_op_get_flags)); | ||
1777 | break; | 1416 | break; |
1778 | case ETHTOOL_SFLAGS: | 1417 | case ETHTOOL_SFLAGS: |
1779 | rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); | 1418 | rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); |
@@ -1804,9 +1443,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1804 | case ETHTOOL_RESET: | 1443 | case ETHTOOL_RESET: |
1805 | rc = ethtool_reset(dev, useraddr); | 1444 | rc = ethtool_reset(dev, useraddr); |
1806 | break; | 1445 | break; |
1807 | case ETHTOOL_SRXNTUPLE: | ||
1808 | rc = ethtool_set_rx_ntuple(dev, useraddr); | ||
1809 | break; | ||
1810 | case ETHTOOL_GSSET_INFO: | 1446 | case ETHTOOL_GSSET_INFO: |
1811 | rc = ethtool_get_sset_info(dev, useraddr); | 1447 | rc = ethtool_get_sset_info(dev, useraddr); |
1812 | break; | 1448 | break; |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c new file mode 100644 index 000000000000..0985b9b14b80 --- /dev/null +++ b/net/core/flow_dissector.c | |||
@@ -0,0 +1,143 @@ | |||
1 | #include <linux/skbuff.h> | ||
2 | #include <linux/ip.h> | ||
3 | #include <linux/ipv6.h> | ||
4 | #include <linux/if_vlan.h> | ||
5 | #include <net/ip.h> | ||
6 | #include <linux/if_tunnel.h> | ||
7 | #include <linux/if_pppox.h> | ||
8 | #include <linux/ppp_defs.h> | ||
9 | #include <net/flow_keys.h> | ||
10 | |||
11 | /* copy saddr & daddr, possibly using 64bit load/store | ||
12 | * Equivalent to : flow->src = iph->saddr; | ||
13 | * flow->dst = iph->daddr; | ||
14 | */ | ||
15 | static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) | ||
16 | { | ||
17 | BUILD_BUG_ON(offsetof(typeof(*flow), dst) != | ||
18 | offsetof(typeof(*flow), src) + sizeof(flow->src)); | ||
19 | memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); | ||
20 | } | ||
21 | |||
22 | bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) | ||
23 | { | ||
24 | int poff, nhoff = skb_network_offset(skb); | ||
25 | u8 ip_proto; | ||
26 | __be16 proto = skb->protocol; | ||
27 | |||
28 | memset(flow, 0, sizeof(*flow)); | ||
29 | |||
30 | again: | ||
31 | switch (proto) { | ||
32 | case __constant_htons(ETH_P_IP): { | ||
33 | const struct iphdr *iph; | ||
34 | struct iphdr _iph; | ||
35 | ip: | ||
36 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | ||
37 | if (!iph) | ||
38 | return false; | ||
39 | |||
40 | if (ip_is_fragment(iph)) | ||
41 | ip_proto = 0; | ||
42 | else | ||
43 | ip_proto = iph->protocol; | ||
44 | iph_to_flow_copy_addrs(flow, iph); | ||
45 | nhoff += iph->ihl * 4; | ||
46 | break; | ||
47 | } | ||
48 | case __constant_htons(ETH_P_IPV6): { | ||
49 | const struct ipv6hdr *iph; | ||
50 | struct ipv6hdr _iph; | ||
51 | ipv6: | ||
52 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | ||
53 | if (!iph) | ||
54 | return false; | ||
55 | |||
56 | ip_proto = iph->nexthdr; | ||
57 | flow->src = iph->saddr.s6_addr32[3]; | ||
58 | flow->dst = iph->daddr.s6_addr32[3]; | ||
59 | nhoff += sizeof(struct ipv6hdr); | ||
60 | break; | ||
61 | } | ||
62 | case __constant_htons(ETH_P_8021Q): { | ||
63 | const struct vlan_hdr *vlan; | ||
64 | struct vlan_hdr _vlan; | ||
65 | |||
66 | vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); | ||
67 | if (!vlan) | ||
68 | return false; | ||
69 | |||
70 | proto = vlan->h_vlan_encapsulated_proto; | ||
71 | nhoff += sizeof(*vlan); | ||
72 | goto again; | ||
73 | } | ||
74 | case __constant_htons(ETH_P_PPP_SES): { | ||
75 | struct { | ||
76 | struct pppoe_hdr hdr; | ||
77 | __be16 proto; | ||
78 | } *hdr, _hdr; | ||
79 | hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); | ||
80 | if (!hdr) | ||
81 | return false; | ||
82 | proto = hdr->proto; | ||
83 | nhoff += PPPOE_SES_HLEN; | ||
84 | switch (proto) { | ||
85 | case __constant_htons(PPP_IP): | ||
86 | goto ip; | ||
87 | case __constant_htons(PPP_IPV6): | ||
88 | goto ipv6; | ||
89 | default: | ||
90 | return false; | ||
91 | } | ||
92 | } | ||
93 | default: | ||
94 | return false; | ||
95 | } | ||
96 | |||
97 | switch (ip_proto) { | ||
98 | case IPPROTO_GRE: { | ||
99 | struct gre_hdr { | ||
100 | __be16 flags; | ||
101 | __be16 proto; | ||
102 | } *hdr, _hdr; | ||
103 | |||
104 | hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); | ||
105 | if (!hdr) | ||
106 | return false; | ||
107 | /* | ||
108 | * Only look inside GRE if version zero and no | ||
109 | * routing | ||
110 | */ | ||
111 | if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { | ||
112 | proto = hdr->proto; | ||
113 | nhoff += 4; | ||
114 | if (hdr->flags & GRE_CSUM) | ||
115 | nhoff += 4; | ||
116 | if (hdr->flags & GRE_KEY) | ||
117 | nhoff += 4; | ||
118 | if (hdr->flags & GRE_SEQ) | ||
119 | nhoff += 4; | ||
120 | goto again; | ||
121 | } | ||
122 | break; | ||
123 | } | ||
124 | case IPPROTO_IPIP: | ||
125 | goto again; | ||
126 | default: | ||
127 | break; | ||
128 | } | ||
129 | |||
130 | flow->ip_proto = ip_proto; | ||
131 | poff = proto_ports_offset(ip_proto); | ||
132 | if (poff >= 0) { | ||
133 | __be32 *ports, _ports; | ||
134 | |||
135 | nhoff += poff; | ||
136 | ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); | ||
137 | if (ports) | ||
138 | flow->ports = *ports; | ||
139 | } | ||
140 | |||
141 | return true; | ||
142 | } | ||
143 | EXPORT_SYMBOL(skb_flow_dissect); | ||
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5ac07d31fbc9..e287346e0934 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) | |||
238 | it to safe state. | 238 | it to safe state. |
239 | */ | 239 | */ |
240 | skb_queue_purge(&n->arp_queue); | 240 | skb_queue_purge(&n->arp_queue); |
241 | n->arp_queue_len_bytes = 0; | ||
241 | n->output = neigh_blackhole; | 242 | n->output = neigh_blackhole; |
242 | if (n->nud_state & NUD_VALID) | 243 | if (n->nud_state & NUD_VALID) |
243 | n->nud_state = NUD_NOARP; | 244 | n->nud_state = NUD_NOARP; |
@@ -272,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) | |||
272 | } | 273 | } |
273 | EXPORT_SYMBOL(neigh_ifdown); | 274 | EXPORT_SYMBOL(neigh_ifdown); |
274 | 275 | ||
275 | static struct neighbour *neigh_alloc(struct neigh_table *tbl) | 276 | static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev) |
276 | { | 277 | { |
277 | struct neighbour *n = NULL; | 278 | struct neighbour *n = NULL; |
278 | unsigned long now = jiffies; | 279 | unsigned long now = jiffies; |
@@ -287,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) | |||
287 | goto out_entries; | 288 | goto out_entries; |
288 | } | 289 | } |
289 | 290 | ||
290 | n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); | 291 | if (tbl->entry_size) |
292 | n = kzalloc(tbl->entry_size, GFP_ATOMIC); | ||
293 | else { | ||
294 | int sz = sizeof(*n) + tbl->key_len; | ||
295 | |||
296 | sz = ALIGN(sz, NEIGH_PRIV_ALIGN); | ||
297 | sz += dev->neigh_priv_len; | ||
298 | n = kzalloc(sz, GFP_ATOMIC); | ||
299 | } | ||
291 | if (!n) | 300 | if (!n) |
292 | goto out_entries; | 301 | goto out_entries; |
293 | 302 | ||
@@ -313,11 +322,18 @@ out_entries: | |||
313 | goto out; | 322 | goto out; |
314 | } | 323 | } |
315 | 324 | ||
325 | static void neigh_get_hash_rnd(u32 *x) | ||
326 | { | ||
327 | get_random_bytes(x, sizeof(*x)); | ||
328 | *x |= 1; | ||
329 | } | ||
330 | |||
316 | static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) | 331 | static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) |
317 | { | 332 | { |
318 | size_t size = (1 << shift) * sizeof(struct neighbour *); | 333 | size_t size = (1 << shift) * sizeof(struct neighbour *); |
319 | struct neigh_hash_table *ret; | 334 | struct neigh_hash_table *ret; |
320 | struct neighbour __rcu **buckets; | 335 | struct neighbour __rcu **buckets; |
336 | int i; | ||
321 | 337 | ||
322 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); | 338 | ret = kmalloc(sizeof(*ret), GFP_ATOMIC); |
323 | if (!ret) | 339 | if (!ret) |
@@ -334,8 +350,8 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) | |||
334 | } | 350 | } |
335 | ret->hash_buckets = buckets; | 351 | ret->hash_buckets = buckets; |
336 | ret->hash_shift = shift; | 352 | ret->hash_shift = shift; |
337 | get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); | 353 | for (i = 0; i < NEIGH_NUM_HASH_RND; i++) |
338 | ret->hash_rnd |= 1; | 354 | neigh_get_hash_rnd(&ret->hash_rnd[i]); |
339 | return ret; | 355 | return ret; |
340 | } | 356 | } |
341 | 357 | ||
@@ -462,7 +478,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
462 | u32 hash_val; | 478 | u32 hash_val; |
463 | int key_len = tbl->key_len; | 479 | int key_len = tbl->key_len; |
464 | int error; | 480 | int error; |
465 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl); | 481 | struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); |
466 | struct neigh_hash_table *nht; | 482 | struct neigh_hash_table *nht; |
467 | 483 | ||
468 | if (!n) { | 484 | if (!n) { |
@@ -480,6 +496,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, | |||
480 | goto out_neigh_release; | 496 | goto out_neigh_release; |
481 | } | 497 | } |
482 | 498 | ||
499 | if (dev->netdev_ops->ndo_neigh_construct) { | ||
500 | error = dev->netdev_ops->ndo_neigh_construct(n); | ||
501 | if (error < 0) { | ||
502 | rc = ERR_PTR(error); | ||
503 | goto out_neigh_release; | ||
504 | } | ||
505 | } | ||
506 | |||
483 | /* Device specific setup. */ | 507 | /* Device specific setup. */ |
484 | if (n->parms->neigh_setup && | 508 | if (n->parms->neigh_setup && |
485 | (error = n->parms->neigh_setup(n)) < 0) { | 509 | (error = n->parms->neigh_setup(n)) < 0) { |
@@ -677,18 +701,14 @@ static inline void neigh_parms_put(struct neigh_parms *parms) | |||
677 | neigh_parms_destroy(parms); | 701 | neigh_parms_destroy(parms); |
678 | } | 702 | } |
679 | 703 | ||
680 | static void neigh_destroy_rcu(struct rcu_head *head) | ||
681 | { | ||
682 | struct neighbour *neigh = container_of(head, struct neighbour, rcu); | ||
683 | |||
684 | kmem_cache_free(neigh->tbl->kmem_cachep, neigh); | ||
685 | } | ||
686 | /* | 704 | /* |
687 | * neighbour must already be out of the table; | 705 | * neighbour must already be out of the table; |
688 | * | 706 | * |
689 | */ | 707 | */ |
690 | void neigh_destroy(struct neighbour *neigh) | 708 | void neigh_destroy(struct neighbour *neigh) |
691 | { | 709 | { |
710 | struct net_device *dev = neigh->dev; | ||
711 | |||
692 | NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); | 712 | NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); |
693 | 713 | ||
694 | if (!neigh->dead) { | 714 | if (!neigh->dead) { |
@@ -702,14 +722,18 @@ void neigh_destroy(struct neighbour *neigh) | |||
702 | printk(KERN_WARNING "Impossible event.\n"); | 722 | printk(KERN_WARNING "Impossible event.\n"); |
703 | 723 | ||
704 | skb_queue_purge(&neigh->arp_queue); | 724 | skb_queue_purge(&neigh->arp_queue); |
725 | neigh->arp_queue_len_bytes = 0; | ||
726 | |||
727 | if (dev->netdev_ops->ndo_neigh_destroy) | ||
728 | dev->netdev_ops->ndo_neigh_destroy(neigh); | ||
705 | 729 | ||
706 | dev_put(neigh->dev); | 730 | dev_put(dev); |
707 | neigh_parms_put(neigh->parms); | 731 | neigh_parms_put(neigh->parms); |
708 | 732 | ||
709 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); | 733 | NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); |
710 | 734 | ||
711 | atomic_dec(&neigh->tbl->entries); | 735 | atomic_dec(&neigh->tbl->entries); |
712 | call_rcu(&neigh->rcu, neigh_destroy_rcu); | 736 | kfree_rcu(neigh, rcu); |
713 | } | 737 | } |
714 | EXPORT_SYMBOL(neigh_destroy); | 738 | EXPORT_SYMBOL(neigh_destroy); |
715 | 739 | ||
@@ -842,6 +866,7 @@ static void neigh_invalidate(struct neighbour *neigh) | |||
842 | write_lock(&neigh->lock); | 866 | write_lock(&neigh->lock); |
843 | } | 867 | } |
844 | skb_queue_purge(&neigh->arp_queue); | 868 | skb_queue_purge(&neigh->arp_queue); |
869 | neigh->arp_queue_len_bytes = 0; | ||
845 | } | 870 | } |
846 | 871 | ||
847 | static void neigh_probe(struct neighbour *neigh) | 872 | static void neigh_probe(struct neighbour *neigh) |
@@ -980,15 +1005,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | |||
980 | 1005 | ||
981 | if (neigh->nud_state == NUD_INCOMPLETE) { | 1006 | if (neigh->nud_state == NUD_INCOMPLETE) { |
982 | if (skb) { | 1007 | if (skb) { |
983 | if (skb_queue_len(&neigh->arp_queue) >= | 1008 | while (neigh->arp_queue_len_bytes + skb->truesize > |
984 | neigh->parms->queue_len) { | 1009 | neigh->parms->queue_len_bytes) { |
985 | struct sk_buff *buff; | 1010 | struct sk_buff *buff; |
1011 | |||
986 | buff = __skb_dequeue(&neigh->arp_queue); | 1012 | buff = __skb_dequeue(&neigh->arp_queue); |
1013 | if (!buff) | ||
1014 | break; | ||
1015 | neigh->arp_queue_len_bytes -= buff->truesize; | ||
987 | kfree_skb(buff); | 1016 | kfree_skb(buff); |
988 | NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); | 1017 | NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); |
989 | } | 1018 | } |
990 | skb_dst_force(skb); | 1019 | skb_dst_force(skb); |
991 | __skb_queue_tail(&neigh->arp_queue, skb); | 1020 | __skb_queue_tail(&neigh->arp_queue, skb); |
1021 | neigh->arp_queue_len_bytes += skb->truesize; | ||
992 | } | 1022 | } |
993 | rc = 1; | 1023 | rc = 1; |
994 | } | 1024 | } |
@@ -1167,7 +1197,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1167 | 1197 | ||
1168 | rcu_read_lock(); | 1198 | rcu_read_lock(); |
1169 | /* On shaper/eql skb->dst->neighbour != neigh :( */ | 1199 | /* On shaper/eql skb->dst->neighbour != neigh :( */ |
1170 | if (dst && (n2 = dst_get_neighbour(dst)) != NULL) | 1200 | if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL) |
1171 | n1 = n2; | 1201 | n1 = n2; |
1172 | n1->output(n1, skb); | 1202 | n1->output(n1, skb); |
1173 | rcu_read_unlock(); | 1203 | rcu_read_unlock(); |
@@ -1175,6 +1205,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |||
1175 | write_lock_bh(&neigh->lock); | 1205 | write_lock_bh(&neigh->lock); |
1176 | } | 1206 | } |
1177 | skb_queue_purge(&neigh->arp_queue); | 1207 | skb_queue_purge(&neigh->arp_queue); |
1208 | neigh->arp_queue_len_bytes = 0; | ||
1178 | } | 1209 | } |
1179 | out: | 1210 | out: |
1180 | if (update_isrouter) { | 1211 | if (update_isrouter) { |
@@ -1477,11 +1508,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1477 | tbl->parms.reachable_time = | 1508 | tbl->parms.reachable_time = |
1478 | neigh_rand_reach_time(tbl->parms.base_reachable_time); | 1509 | neigh_rand_reach_time(tbl->parms.base_reachable_time); |
1479 | 1510 | ||
1480 | if (!tbl->kmem_cachep) | ||
1481 | tbl->kmem_cachep = | ||
1482 | kmem_cache_create(tbl->id, tbl->entry_size, 0, | ||
1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, | ||
1484 | NULL); | ||
1485 | tbl->stats = alloc_percpu(struct neigh_statistics); | 1511 | tbl->stats = alloc_percpu(struct neigh_statistics); |
1486 | if (!tbl->stats) | 1512 | if (!tbl->stats) |
1487 | panic("cannot create neighbour cache statistics"); | 1513 | panic("cannot create neighbour cache statistics"); |
@@ -1566,9 +1592,6 @@ int neigh_table_clear(struct neigh_table *tbl) | |||
1566 | free_percpu(tbl->stats); | 1592 | free_percpu(tbl->stats); |
1567 | tbl->stats = NULL; | 1593 | tbl->stats = NULL; |
1568 | 1594 | ||
1569 | kmem_cache_destroy(tbl->kmem_cachep); | ||
1570 | tbl->kmem_cachep = NULL; | ||
1571 | |||
1572 | return 0; | 1595 | return 0; |
1573 | } | 1596 | } |
1574 | EXPORT_SYMBOL(neigh_table_clear); | 1597 | EXPORT_SYMBOL(neigh_table_clear); |
@@ -1747,7 +1770,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) | |||
1747 | NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); | 1770 | NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); |
1748 | 1771 | ||
1749 | NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); | 1772 | NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); |
1750 | NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); | 1773 | NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes); |
1774 | /* approximative value for deprecated QUEUE_LEN (in packets) */ | ||
1775 | NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, | ||
1776 | DIV_ROUND_UP(parms->queue_len_bytes, | ||
1777 | SKB_TRUESIZE(ETH_FRAME_LEN))); | ||
1751 | NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); | 1778 | NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); |
1752 | NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); | 1779 | NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); |
1753 | NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); | 1780 | NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); |
@@ -1808,7 +1835,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, | |||
1808 | 1835 | ||
1809 | rcu_read_lock_bh(); | 1836 | rcu_read_lock_bh(); |
1810 | nht = rcu_dereference_bh(tbl->nht); | 1837 | nht = rcu_dereference_bh(tbl->nht); |
1811 | ndc.ndtc_hash_rnd = nht->hash_rnd; | 1838 | ndc.ndtc_hash_rnd = nht->hash_rnd[0]; |
1812 | ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); | 1839 | ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); |
1813 | rcu_read_unlock_bh(); | 1840 | rcu_read_unlock_bh(); |
1814 | 1841 | ||
@@ -1974,7 +2001,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1974 | 2001 | ||
1975 | switch (i) { | 2002 | switch (i) { |
1976 | case NDTPA_QUEUE_LEN: | 2003 | case NDTPA_QUEUE_LEN: |
1977 | p->queue_len = nla_get_u32(tbp[i]); | 2004 | p->queue_len_bytes = nla_get_u32(tbp[i]) * |
2005 | SKB_TRUESIZE(ETH_FRAME_LEN); | ||
2006 | break; | ||
2007 | case NDTPA_QUEUE_LENBYTES: | ||
2008 | p->queue_len_bytes = nla_get_u32(tbp[i]); | ||
1978 | break; | 2009 | break; |
1979 | case NDTPA_PROXY_QLEN: | 2010 | case NDTPA_PROXY_QLEN: |
1980 | p->proxy_qlen = nla_get_u32(tbp[i]); | 2011 | p->proxy_qlen = nla_get_u32(tbp[i]); |
@@ -2638,117 +2669,158 @@ EXPORT_SYMBOL(neigh_app_ns); | |||
2638 | 2669 | ||
2639 | #ifdef CONFIG_SYSCTL | 2670 | #ifdef CONFIG_SYSCTL |
2640 | 2671 | ||
2641 | #define NEIGH_VARS_MAX 19 | 2672 | static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, |
2673 | size_t *lenp, loff_t *ppos) | ||
2674 | { | ||
2675 | int size, ret; | ||
2676 | ctl_table tmp = *ctl; | ||
2677 | |||
2678 | tmp.data = &size; | ||
2679 | size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); | ||
2680 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); | ||
2681 | if (write && !ret) | ||
2682 | *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); | ||
2683 | return ret; | ||
2684 | } | ||
2685 | |||
2686 | enum { | ||
2687 | NEIGH_VAR_MCAST_PROBE, | ||
2688 | NEIGH_VAR_UCAST_PROBE, | ||
2689 | NEIGH_VAR_APP_PROBE, | ||
2690 | NEIGH_VAR_RETRANS_TIME, | ||
2691 | NEIGH_VAR_BASE_REACHABLE_TIME, | ||
2692 | NEIGH_VAR_DELAY_PROBE_TIME, | ||
2693 | NEIGH_VAR_GC_STALETIME, | ||
2694 | NEIGH_VAR_QUEUE_LEN, | ||
2695 | NEIGH_VAR_QUEUE_LEN_BYTES, | ||
2696 | NEIGH_VAR_PROXY_QLEN, | ||
2697 | NEIGH_VAR_ANYCAST_DELAY, | ||
2698 | NEIGH_VAR_PROXY_DELAY, | ||
2699 | NEIGH_VAR_LOCKTIME, | ||
2700 | NEIGH_VAR_RETRANS_TIME_MS, | ||
2701 | NEIGH_VAR_BASE_REACHABLE_TIME_MS, | ||
2702 | NEIGH_VAR_GC_INTERVAL, | ||
2703 | NEIGH_VAR_GC_THRESH1, | ||
2704 | NEIGH_VAR_GC_THRESH2, | ||
2705 | NEIGH_VAR_GC_THRESH3, | ||
2706 | NEIGH_VAR_MAX | ||
2707 | }; | ||
2642 | 2708 | ||
2643 | static struct neigh_sysctl_table { | 2709 | static struct neigh_sysctl_table { |
2644 | struct ctl_table_header *sysctl_header; | 2710 | struct ctl_table_header *sysctl_header; |
2645 | struct ctl_table neigh_vars[NEIGH_VARS_MAX]; | 2711 | struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; |
2646 | char *dev_name; | 2712 | char *dev_name; |
2647 | } neigh_sysctl_template __read_mostly = { | 2713 | } neigh_sysctl_template __read_mostly = { |
2648 | .neigh_vars = { | 2714 | .neigh_vars = { |
2649 | { | 2715 | [NEIGH_VAR_MCAST_PROBE] = { |
2650 | .procname = "mcast_solicit", | 2716 | .procname = "mcast_solicit", |
2651 | .maxlen = sizeof(int), | 2717 | .maxlen = sizeof(int), |
2652 | .mode = 0644, | 2718 | .mode = 0644, |
2653 | .proc_handler = proc_dointvec, | 2719 | .proc_handler = proc_dointvec, |
2654 | }, | 2720 | }, |
2655 | { | 2721 | [NEIGH_VAR_UCAST_PROBE] = { |
2656 | .procname = "ucast_solicit", | 2722 | .procname = "ucast_solicit", |
2657 | .maxlen = sizeof(int), | 2723 | .maxlen = sizeof(int), |
2658 | .mode = 0644, | 2724 | .mode = 0644, |
2659 | .proc_handler = proc_dointvec, | 2725 | .proc_handler = proc_dointvec, |
2660 | }, | 2726 | }, |
2661 | { | 2727 | [NEIGH_VAR_APP_PROBE] = { |
2662 | .procname = "app_solicit", | 2728 | .procname = "app_solicit", |
2663 | .maxlen = sizeof(int), | 2729 | .maxlen = sizeof(int), |
2664 | .mode = 0644, | 2730 | .mode = 0644, |
2665 | .proc_handler = proc_dointvec, | 2731 | .proc_handler = proc_dointvec, |
2666 | }, | 2732 | }, |
2667 | { | 2733 | [NEIGH_VAR_RETRANS_TIME] = { |
2668 | .procname = "retrans_time", | 2734 | .procname = "retrans_time", |
2669 | .maxlen = sizeof(int), | 2735 | .maxlen = sizeof(int), |
2670 | .mode = 0644, | 2736 | .mode = 0644, |
2671 | .proc_handler = proc_dointvec_userhz_jiffies, | 2737 | .proc_handler = proc_dointvec_userhz_jiffies, |
2672 | }, | 2738 | }, |
2673 | { | 2739 | [NEIGH_VAR_BASE_REACHABLE_TIME] = { |
2674 | .procname = "base_reachable_time", | 2740 | .procname = "base_reachable_time", |
2675 | .maxlen = sizeof(int), | 2741 | .maxlen = sizeof(int), |
2676 | .mode = 0644, | 2742 | .mode = 0644, |
2677 | .proc_handler = proc_dointvec_jiffies, | 2743 | .proc_handler = proc_dointvec_jiffies, |
2678 | }, | 2744 | }, |
2679 | { | 2745 | [NEIGH_VAR_DELAY_PROBE_TIME] = { |
2680 | .procname = "delay_first_probe_time", | 2746 | .procname = "delay_first_probe_time", |
2681 | .maxlen = sizeof(int), | 2747 | .maxlen = sizeof(int), |
2682 | .mode = 0644, | 2748 | .mode = 0644, |
2683 | .proc_handler = proc_dointvec_jiffies, | 2749 | .proc_handler = proc_dointvec_jiffies, |
2684 | }, | 2750 | }, |
2685 | { | 2751 | [NEIGH_VAR_GC_STALETIME] = { |
2686 | .procname = "gc_stale_time", | 2752 | .procname = "gc_stale_time", |
2687 | .maxlen = sizeof(int), | 2753 | .maxlen = sizeof(int), |
2688 | .mode = 0644, | 2754 | .mode = 0644, |
2689 | .proc_handler = proc_dointvec_jiffies, | 2755 | .proc_handler = proc_dointvec_jiffies, |
2690 | }, | 2756 | }, |
2691 | { | 2757 | [NEIGH_VAR_QUEUE_LEN] = { |
2692 | .procname = "unres_qlen", | 2758 | .procname = "unres_qlen", |
2693 | .maxlen = sizeof(int), | 2759 | .maxlen = sizeof(int), |
2694 | .mode = 0644, | 2760 | .mode = 0644, |
2761 | .proc_handler = proc_unres_qlen, | ||
2762 | }, | ||
2763 | [NEIGH_VAR_QUEUE_LEN_BYTES] = { | ||
2764 | .procname = "unres_qlen_bytes", | ||
2765 | .maxlen = sizeof(int), | ||
2766 | .mode = 0644, | ||
2695 | .proc_handler = proc_dointvec, | 2767 | .proc_handler = proc_dointvec, |
2696 | }, | 2768 | }, |
2697 | { | 2769 | [NEIGH_VAR_PROXY_QLEN] = { |
2698 | .procname = "proxy_qlen", | 2770 | .procname = "proxy_qlen", |
2699 | .maxlen = sizeof(int), | 2771 | .maxlen = sizeof(int), |
2700 | .mode = 0644, | 2772 | .mode = 0644, |
2701 | .proc_handler = proc_dointvec, | 2773 | .proc_handler = proc_dointvec, |
2702 | }, | 2774 | }, |
2703 | { | 2775 | [NEIGH_VAR_ANYCAST_DELAY] = { |
2704 | .procname = "anycast_delay", | 2776 | .procname = "anycast_delay", |
2705 | .maxlen = sizeof(int), | 2777 | .maxlen = sizeof(int), |
2706 | .mode = 0644, | 2778 | .mode = 0644, |
2707 | .proc_handler = proc_dointvec_userhz_jiffies, | 2779 | .proc_handler = proc_dointvec_userhz_jiffies, |
2708 | }, | 2780 | }, |
2709 | { | 2781 | [NEIGH_VAR_PROXY_DELAY] = { |
2710 | .procname = "proxy_delay", | 2782 | .procname = "proxy_delay", |
2711 | .maxlen = sizeof(int), | 2783 | .maxlen = sizeof(int), |
2712 | .mode = 0644, | 2784 | .mode = 0644, |
2713 | .proc_handler = proc_dointvec_userhz_jiffies, | 2785 | .proc_handler = proc_dointvec_userhz_jiffies, |
2714 | }, | 2786 | }, |
2715 | { | 2787 | [NEIGH_VAR_LOCKTIME] = { |
2716 | .procname = "locktime", | 2788 | .procname = "locktime", |
2717 | .maxlen = sizeof(int), | 2789 | .maxlen = sizeof(int), |
2718 | .mode = 0644, | 2790 | .mode = 0644, |
2719 | .proc_handler = proc_dointvec_userhz_jiffies, | 2791 | .proc_handler = proc_dointvec_userhz_jiffies, |
2720 | }, | 2792 | }, |
2721 | { | 2793 | [NEIGH_VAR_RETRANS_TIME_MS] = { |
2722 | .procname = "retrans_time_ms", | 2794 | .procname = "retrans_time_ms", |
2723 | .maxlen = sizeof(int), | 2795 | .maxlen = sizeof(int), |
2724 | .mode = 0644, | 2796 | .mode = 0644, |
2725 | .proc_handler = proc_dointvec_ms_jiffies, | 2797 | .proc_handler = proc_dointvec_ms_jiffies, |
2726 | }, | 2798 | }, |
2727 | { | 2799 | [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = { |
2728 | .procname = "base_reachable_time_ms", | 2800 | .procname = "base_reachable_time_ms", |
2729 | .maxlen = sizeof(int), | 2801 | .maxlen = sizeof(int), |
2730 | .mode = 0644, | 2802 | .mode = 0644, |
2731 | .proc_handler = proc_dointvec_ms_jiffies, | 2803 | .proc_handler = proc_dointvec_ms_jiffies, |
2732 | }, | 2804 | }, |
2733 | { | 2805 | [NEIGH_VAR_GC_INTERVAL] = { |
2734 | .procname = "gc_interval", | 2806 | .procname = "gc_interval", |
2735 | .maxlen = sizeof(int), | 2807 | .maxlen = sizeof(int), |
2736 | .mode = 0644, | 2808 | .mode = 0644, |
2737 | .proc_handler = proc_dointvec_jiffies, | 2809 | .proc_handler = proc_dointvec_jiffies, |
2738 | }, | 2810 | }, |
2739 | { | 2811 | [NEIGH_VAR_GC_THRESH1] = { |
2740 | .procname = "gc_thresh1", | 2812 | .procname = "gc_thresh1", |
2741 | .maxlen = sizeof(int), | 2813 | .maxlen = sizeof(int), |
2742 | .mode = 0644, | 2814 | .mode = 0644, |
2743 | .proc_handler = proc_dointvec, | 2815 | .proc_handler = proc_dointvec, |
2744 | }, | 2816 | }, |
2745 | { | 2817 | [NEIGH_VAR_GC_THRESH2] = { |
2746 | .procname = "gc_thresh2", | 2818 | .procname = "gc_thresh2", |
2747 | .maxlen = sizeof(int), | 2819 | .maxlen = sizeof(int), |
2748 | .mode = 0644, | 2820 | .mode = 0644, |
2749 | .proc_handler = proc_dointvec, | 2821 | .proc_handler = proc_dointvec, |
2750 | }, | 2822 | }, |
2751 | { | 2823 | [NEIGH_VAR_GC_THRESH3] = { |
2752 | .procname = "gc_thresh3", | 2824 | .procname = "gc_thresh3", |
2753 | .maxlen = sizeof(int), | 2825 | .maxlen = sizeof(int), |
2754 | .mode = 0644, | 2826 | .mode = 0644, |
@@ -2781,47 +2853,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, | |||
2781 | if (!t) | 2853 | if (!t) |
2782 | goto err; | 2854 | goto err; |
2783 | 2855 | ||
2784 | t->neigh_vars[0].data = &p->mcast_probes; | 2856 | t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; |
2785 | t->neigh_vars[1].data = &p->ucast_probes; | 2857 | t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; |
2786 | t->neigh_vars[2].data = &p->app_probes; | 2858 | t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; |
2787 | t->neigh_vars[3].data = &p->retrans_time; | 2859 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; |
2788 | t->neigh_vars[4].data = &p->base_reachable_time; | 2860 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; |
2789 | t->neigh_vars[5].data = &p->delay_probe_time; | 2861 | t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time; |
2790 | t->neigh_vars[6].data = &p->gc_staletime; | 2862 | t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime; |
2791 | t->neigh_vars[7].data = &p->queue_len; | 2863 | t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes; |
2792 | t->neigh_vars[8].data = &p->proxy_qlen; | 2864 | t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes; |
2793 | t->neigh_vars[9].data = &p->anycast_delay; | 2865 | t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen; |
2794 | t->neigh_vars[10].data = &p->proxy_delay; | 2866 | t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay; |
2795 | t->neigh_vars[11].data = &p->locktime; | 2867 | t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay; |
2796 | t->neigh_vars[12].data = &p->retrans_time; | 2868 | t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime; |
2797 | t->neigh_vars[13].data = &p->base_reachable_time; | 2869 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time; |
2870 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time; | ||
2798 | 2871 | ||
2799 | if (dev) { | 2872 | if (dev) { |
2800 | dev_name_source = dev->name; | 2873 | dev_name_source = dev->name; |
2801 | /* Terminate the table early */ | 2874 | /* Terminate the table early */ |
2802 | memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); | 2875 | memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, |
2876 | sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); | ||
2803 | } else { | 2877 | } else { |
2804 | dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; | 2878 | dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; |
2805 | t->neigh_vars[14].data = (int *)(p + 1); | 2879 | t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); |
2806 | t->neigh_vars[15].data = (int *)(p + 1) + 1; | 2880 | t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; |
2807 | t->neigh_vars[16].data = (int *)(p + 1) + 2; | 2881 | t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; |
2808 | t->neigh_vars[17].data = (int *)(p + 1) + 3; | 2882 | t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; |
2809 | } | 2883 | } |
2810 | 2884 | ||
2811 | 2885 | ||
2812 | if (handler) { | 2886 | if (handler) { |
2813 | /* RetransTime */ | 2887 | /* RetransTime */ |
2814 | t->neigh_vars[3].proc_handler = handler; | 2888 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; |
2815 | t->neigh_vars[3].extra1 = dev; | 2889 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev; |
2816 | /* ReachableTime */ | 2890 | /* ReachableTime */ |
2817 | t->neigh_vars[4].proc_handler = handler; | 2891 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; |
2818 | t->neigh_vars[4].extra1 = dev; | 2892 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev; |
2819 | /* RetransTime (in milliseconds)*/ | 2893 | /* RetransTime (in milliseconds)*/ |
2820 | t->neigh_vars[12].proc_handler = handler; | 2894 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; |
2821 | t->neigh_vars[12].extra1 = dev; | 2895 | t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev; |
2822 | /* ReachableTime (in milliseconds) */ | 2896 | /* ReachableTime (in milliseconds) */ |
2823 | t->neigh_vars[13].proc_handler = handler; | 2897 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; |
2824 | t->neigh_vars[13].extra1 = dev; | 2898 | t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; |
2825 | } | 2899 | } |
2826 | 2900 | ||
2827 | t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); | 2901 | t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 385aefe53648..abf4393a77b3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/wireless.h> | 21 | #include <linux/wireless.h> |
22 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/export.h> | 23 | #include <linux/export.h> |
24 | #include <linux/jiffies.h> | ||
24 | #include <net/wext.h> | 25 | #include <net/wext.h> |
25 | 26 | ||
26 | #include "net-sysfs.h" | 27 | #include "net-sysfs.h" |
@@ -606,9 +607,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, | |||
606 | rcu_assign_pointer(queue->rps_map, map); | 607 | rcu_assign_pointer(queue->rps_map, map); |
607 | spin_unlock(&rps_map_lock); | 608 | spin_unlock(&rps_map_lock); |
608 | 609 | ||
609 | if (old_map) | 610 | if (map) |
611 | jump_label_inc(&rps_needed); | ||
612 | if (old_map) { | ||
610 | kfree_rcu(old_map, rcu); | 613 | kfree_rcu(old_map, rcu); |
611 | 614 | jump_label_dec(&rps_needed); | |
615 | } | ||
612 | free_cpumask_var(mask); | 616 | free_cpumask_var(mask); |
613 | return len; | 617 | return len; |
614 | } | 618 | } |
@@ -618,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | |||
618 | char *buf) | 622 | char *buf) |
619 | { | 623 | { |
620 | struct rps_dev_flow_table *flow_table; | 624 | struct rps_dev_flow_table *flow_table; |
621 | unsigned int val = 0; | 625 | unsigned long val = 0; |
622 | 626 | ||
623 | rcu_read_lock(); | 627 | rcu_read_lock(); |
624 | flow_table = rcu_dereference(queue->rps_flow_table); | 628 | flow_table = rcu_dereference(queue->rps_flow_table); |
625 | if (flow_table) | 629 | if (flow_table) |
626 | val = flow_table->mask + 1; | 630 | val = (unsigned long)flow_table->mask + 1; |
627 | rcu_read_unlock(); | 631 | rcu_read_unlock(); |
628 | 632 | ||
629 | return sprintf(buf, "%u\n", val); | 633 | return sprintf(buf, "%lu\n", val); |
630 | } | 634 | } |
631 | 635 | ||
632 | static void rps_dev_flow_table_release_work(struct work_struct *work) | 636 | static void rps_dev_flow_table_release_work(struct work_struct *work) |
@@ -650,36 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | |||
650 | struct rx_queue_attribute *attr, | 654 | struct rx_queue_attribute *attr, |
651 | const char *buf, size_t len) | 655 | const char *buf, size_t len) |
652 | { | 656 | { |
653 | unsigned int count; | 657 | unsigned long mask, count; |
654 | char *endp; | ||
655 | struct rps_dev_flow_table *table, *old_table; | 658 | struct rps_dev_flow_table *table, *old_table; |
656 | static DEFINE_SPINLOCK(rps_dev_flow_lock); | 659 | static DEFINE_SPINLOCK(rps_dev_flow_lock); |
660 | int rc; | ||
657 | 661 | ||
658 | if (!capable(CAP_NET_ADMIN)) | 662 | if (!capable(CAP_NET_ADMIN)) |
659 | return -EPERM; | 663 | return -EPERM; |
660 | 664 | ||
661 | count = simple_strtoul(buf, &endp, 0); | 665 | rc = kstrtoul(buf, 0, &count); |
662 | if (endp == buf) | 666 | if (rc < 0) |
663 | return -EINVAL; | 667 | return rc; |
664 | 668 | ||
665 | if (count) { | 669 | if (count) { |
666 | int i; | 670 | mask = count - 1; |
667 | 671 | /* mask = roundup_pow_of_two(count) - 1; | |
668 | if (count > INT_MAX) | 672 | * without overflows... |
673 | */ | ||
674 | while ((mask | (mask >> 1)) != mask) | ||
675 | mask |= (mask >> 1); | ||
676 | /* On 64 bit arches, must check mask fits in table->mask (u32), | ||
677 | * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) | ||
678 | * doesnt overflow. | ||
679 | */ | ||
680 | #if BITS_PER_LONG > 32 | ||
681 | if (mask > (unsigned long)(u32)mask) | ||
669 | return -EINVAL; | 682 | return -EINVAL; |
670 | count = roundup_pow_of_two(count); | 683 | #else |
671 | if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table)) | 684 | if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) |
672 | / sizeof(struct rps_dev_flow)) { | 685 | / sizeof(struct rps_dev_flow)) { |
673 | /* Enforce a limit to prevent overflow */ | 686 | /* Enforce a limit to prevent overflow */ |
674 | return -EINVAL; | 687 | return -EINVAL; |
675 | } | 688 | } |
676 | table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); | 689 | #endif |
690 | table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); | ||
677 | if (!table) | 691 | if (!table) |
678 | return -ENOMEM; | 692 | return -ENOMEM; |
679 | 693 | ||
680 | table->mask = count - 1; | 694 | table->mask = mask; |
681 | for (i = 0; i < count; i++) | 695 | for (count = 0; count <= mask; count++) |
682 | table->flows[i].cpu = RPS_NO_CPU; | 696 | table->flows[count].cpu = RPS_NO_CPU; |
683 | } else | 697 | } else |
684 | table = NULL; | 698 | table = NULL; |
685 | 699 | ||
@@ -783,7 +797,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | |||
783 | #endif | 797 | #endif |
784 | } | 798 | } |
785 | 799 | ||
786 | #ifdef CONFIG_XPS | 800 | #ifdef CONFIG_SYSFS |
787 | /* | 801 | /* |
788 | * netdev_queue sysfs structures and functions. | 802 | * netdev_queue sysfs structures and functions. |
789 | */ | 803 | */ |
@@ -829,6 +843,133 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { | |||
829 | .store = netdev_queue_attr_store, | 843 | .store = netdev_queue_attr_store, |
830 | }; | 844 | }; |
831 | 845 | ||
846 | static ssize_t show_trans_timeout(struct netdev_queue *queue, | ||
847 | struct netdev_queue_attribute *attribute, | ||
848 | char *buf) | ||
849 | { | ||
850 | unsigned long trans_timeout; | ||
851 | |||
852 | spin_lock_irq(&queue->_xmit_lock); | ||
853 | trans_timeout = queue->trans_timeout; | ||
854 | spin_unlock_irq(&queue->_xmit_lock); | ||
855 | |||
856 | return sprintf(buf, "%lu", trans_timeout); | ||
857 | } | ||
858 | |||
859 | static struct netdev_queue_attribute queue_trans_timeout = | ||
860 | __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); | ||
861 | |||
862 | #ifdef CONFIG_BQL | ||
863 | /* | ||
864 | * Byte queue limits sysfs structures and functions. | ||
865 | */ | ||
866 | static ssize_t bql_show(char *buf, unsigned int value) | ||
867 | { | ||
868 | return sprintf(buf, "%u\n", value); | ||
869 | } | ||
870 | |||
871 | static ssize_t bql_set(const char *buf, const size_t count, | ||
872 | unsigned int *pvalue) | ||
873 | { | ||
874 | unsigned int value; | ||
875 | int err; | ||
876 | |||
877 | if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) | ||
878 | value = DQL_MAX_LIMIT; | ||
879 | else { | ||
880 | err = kstrtouint(buf, 10, &value); | ||
881 | if (err < 0) | ||
882 | return err; | ||
883 | if (value > DQL_MAX_LIMIT) | ||
884 | return -EINVAL; | ||
885 | } | ||
886 | |||
887 | *pvalue = value; | ||
888 | |||
889 | return count; | ||
890 | } | ||
891 | |||
892 | static ssize_t bql_show_hold_time(struct netdev_queue *queue, | ||
893 | struct netdev_queue_attribute *attr, | ||
894 | char *buf) | ||
895 | { | ||
896 | struct dql *dql = &queue->dql; | ||
897 | |||
898 | return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); | ||
899 | } | ||
900 | |||
901 | static ssize_t bql_set_hold_time(struct netdev_queue *queue, | ||
902 | struct netdev_queue_attribute *attribute, | ||
903 | const char *buf, size_t len) | ||
904 | { | ||
905 | struct dql *dql = &queue->dql; | ||
906 | unsigned value; | ||
907 | int err; | ||
908 | |||
909 | err = kstrtouint(buf, 10, &value); | ||
910 | if (err < 0) | ||
911 | return err; | ||
912 | |||
913 | dql->slack_hold_time = msecs_to_jiffies(value); | ||
914 | |||
915 | return len; | ||
916 | } | ||
917 | |||
918 | static struct netdev_queue_attribute bql_hold_time_attribute = | ||
919 | __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time, | ||
920 | bql_set_hold_time); | ||
921 | |||
922 | static ssize_t bql_show_inflight(struct netdev_queue *queue, | ||
923 | struct netdev_queue_attribute *attr, | ||
924 | char *buf) | ||
925 | { | ||
926 | struct dql *dql = &queue->dql; | ||
927 | |||
928 | return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed); | ||
929 | } | ||
930 | |||
931 | static struct netdev_queue_attribute bql_inflight_attribute = | ||
932 | __ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL); | ||
933 | |||
934 | #define BQL_ATTR(NAME, FIELD) \ | ||
935 | static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ | ||
936 | struct netdev_queue_attribute *attr, \ | ||
937 | char *buf) \ | ||
938 | { \ | ||
939 | return bql_show(buf, queue->dql.FIELD); \ | ||
940 | } \ | ||
941 | \ | ||
942 | static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ | ||
943 | struct netdev_queue_attribute *attr, \ | ||
944 | const char *buf, size_t len) \ | ||
945 | { \ | ||
946 | return bql_set(buf, len, &queue->dql.FIELD); \ | ||
947 | } \ | ||
948 | \ | ||
949 | static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \ | ||
950 | __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \ | ||
951 | bql_set_ ## NAME); | ||
952 | |||
953 | BQL_ATTR(limit, limit) | ||
954 | BQL_ATTR(limit_max, max_limit) | ||
955 | BQL_ATTR(limit_min, min_limit) | ||
956 | |||
957 | static struct attribute *dql_attrs[] = { | ||
958 | &bql_limit_attribute.attr, | ||
959 | &bql_limit_max_attribute.attr, | ||
960 | &bql_limit_min_attribute.attr, | ||
961 | &bql_hold_time_attribute.attr, | ||
962 | &bql_inflight_attribute.attr, | ||
963 | NULL | ||
964 | }; | ||
965 | |||
966 | static struct attribute_group dql_group = { | ||
967 | .name = "byte_queue_limits", | ||
968 | .attrs = dql_attrs, | ||
969 | }; | ||
970 | #endif /* CONFIG_BQL */ | ||
971 | |||
972 | #ifdef CONFIG_XPS | ||
832 | static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) | 973 | static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) |
833 | { | 974 | { |
834 | struct net_device *dev = queue->dev; | 975 | struct net_device *dev = queue->dev; |
@@ -893,6 +1034,52 @@ static DEFINE_MUTEX(xps_map_mutex); | |||
893 | #define xmap_dereference(P) \ | 1034 | #define xmap_dereference(P) \ |
894 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) | 1035 | rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) |
895 | 1036 | ||
1037 | static void xps_queue_release(struct netdev_queue *queue) | ||
1038 | { | ||
1039 | struct net_device *dev = queue->dev; | ||
1040 | struct xps_dev_maps *dev_maps; | ||
1041 | struct xps_map *map; | ||
1042 | unsigned long index; | ||
1043 | int i, pos, nonempty = 0; | ||
1044 | |||
1045 | index = get_netdev_queue_index(queue); | ||
1046 | |||
1047 | mutex_lock(&xps_map_mutex); | ||
1048 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1049 | |||
1050 | if (dev_maps) { | ||
1051 | for_each_possible_cpu(i) { | ||
1052 | map = xmap_dereference(dev_maps->cpu_map[i]); | ||
1053 | if (!map) | ||
1054 | continue; | ||
1055 | |||
1056 | for (pos = 0; pos < map->len; pos++) | ||
1057 | if (map->queues[pos] == index) | ||
1058 | break; | ||
1059 | |||
1060 | if (pos < map->len) { | ||
1061 | if (map->len > 1) | ||
1062 | map->queues[pos] = | ||
1063 | map->queues[--map->len]; | ||
1064 | else { | ||
1065 | RCU_INIT_POINTER(dev_maps->cpu_map[i], | ||
1066 | NULL); | ||
1067 | kfree_rcu(map, rcu); | ||
1068 | map = NULL; | ||
1069 | } | ||
1070 | } | ||
1071 | if (map) | ||
1072 | nonempty = 1; | ||
1073 | } | ||
1074 | |||
1075 | if (!nonempty) { | ||
1076 | RCU_INIT_POINTER(dev->xps_maps, NULL); | ||
1077 | kfree_rcu(dev_maps, rcu); | ||
1078 | } | ||
1079 | } | ||
1080 | mutex_unlock(&xps_map_mutex); | ||
1081 | } | ||
1082 | |||
896 | static ssize_t store_xps_map(struct netdev_queue *queue, | 1083 | static ssize_t store_xps_map(struct netdev_queue *queue, |
897 | struct netdev_queue_attribute *attribute, | 1084 | struct netdev_queue_attribute *attribute, |
898 | const char *buf, size_t len) | 1085 | const char *buf, size_t len) |
@@ -904,7 +1091,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
904 | struct xps_map *map, *new_map; | 1091 | struct xps_map *map, *new_map; |
905 | struct xps_dev_maps *dev_maps, *new_dev_maps; | 1092 | struct xps_dev_maps *dev_maps, *new_dev_maps; |
906 | int nonempty = 0; | 1093 | int nonempty = 0; |
907 | int numa_node = -2; | 1094 | int numa_node_id = -2; |
908 | 1095 | ||
909 | if (!capable(CAP_NET_ADMIN)) | 1096 | if (!capable(CAP_NET_ADMIN)) |
910 | return -EPERM; | 1097 | return -EPERM; |
@@ -947,10 +1134,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
947 | need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); | 1134 | need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); |
948 | #ifdef CONFIG_NUMA | 1135 | #ifdef CONFIG_NUMA |
949 | if (need_set) { | 1136 | if (need_set) { |
950 | if (numa_node == -2) | 1137 | if (numa_node_id == -2) |
951 | numa_node = cpu_to_node(cpu); | 1138 | numa_node_id = cpu_to_node(cpu); |
952 | else if (numa_node != cpu_to_node(cpu)) | 1139 | else if (numa_node_id != cpu_to_node(cpu)) |
953 | numa_node = -1; | 1140 | numa_node_id = -1; |
954 | } | 1141 | } |
955 | #endif | 1142 | #endif |
956 | if (need_set && pos >= map_len) { | 1143 | if (need_set && pos >= map_len) { |
@@ -1000,7 +1187,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, | |||
1000 | if (dev_maps) | 1187 | if (dev_maps) |
1001 | kfree_rcu(dev_maps, rcu); | 1188 | kfree_rcu(dev_maps, rcu); |
1002 | 1189 | ||
1003 | netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : | 1190 | netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : |
1004 | NUMA_NO_NODE); | 1191 | NUMA_NO_NODE); |
1005 | 1192 | ||
1006 | mutex_unlock(&xps_map_mutex); | 1193 | mutex_unlock(&xps_map_mutex); |
@@ -1023,58 +1210,23 @@ error: | |||
1023 | 1210 | ||
1024 | static struct netdev_queue_attribute xps_cpus_attribute = | 1211 | static struct netdev_queue_attribute xps_cpus_attribute = |
1025 | __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); | 1212 | __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); |
1213 | #endif /* CONFIG_XPS */ | ||
1026 | 1214 | ||
1027 | static struct attribute *netdev_queue_default_attrs[] = { | 1215 | static struct attribute *netdev_queue_default_attrs[] = { |
1216 | &queue_trans_timeout.attr, | ||
1217 | #ifdef CONFIG_XPS | ||
1028 | &xps_cpus_attribute.attr, | 1218 | &xps_cpus_attribute.attr, |
1219 | #endif | ||
1029 | NULL | 1220 | NULL |
1030 | }; | 1221 | }; |
1031 | 1222 | ||
1032 | static void netdev_queue_release(struct kobject *kobj) | 1223 | static void netdev_queue_release(struct kobject *kobj) |
1033 | { | 1224 | { |
1034 | struct netdev_queue *queue = to_netdev_queue(kobj); | 1225 | struct netdev_queue *queue = to_netdev_queue(kobj); |
1035 | struct net_device *dev = queue->dev; | ||
1036 | struct xps_dev_maps *dev_maps; | ||
1037 | struct xps_map *map; | ||
1038 | unsigned long index; | ||
1039 | int i, pos, nonempty = 0; | ||
1040 | |||
1041 | index = get_netdev_queue_index(queue); | ||
1042 | |||
1043 | mutex_lock(&xps_map_mutex); | ||
1044 | dev_maps = xmap_dereference(dev->xps_maps); | ||
1045 | |||
1046 | if (dev_maps) { | ||
1047 | for_each_possible_cpu(i) { | ||
1048 | map = xmap_dereference(dev_maps->cpu_map[i]); | ||
1049 | if (!map) | ||
1050 | continue; | ||
1051 | |||
1052 | for (pos = 0; pos < map->len; pos++) | ||
1053 | if (map->queues[pos] == index) | ||
1054 | break; | ||
1055 | |||
1056 | if (pos < map->len) { | ||
1057 | if (map->len > 1) | ||
1058 | map->queues[pos] = | ||
1059 | map->queues[--map->len]; | ||
1060 | else { | ||
1061 | RCU_INIT_POINTER(dev_maps->cpu_map[i], | ||
1062 | NULL); | ||
1063 | kfree_rcu(map, rcu); | ||
1064 | map = NULL; | ||
1065 | } | ||
1066 | } | ||
1067 | if (map) | ||
1068 | nonempty = 1; | ||
1069 | } | ||
1070 | 1226 | ||
1071 | if (!nonempty) { | 1227 | #ifdef CONFIG_XPS |
1072 | RCU_INIT_POINTER(dev->xps_maps, NULL); | 1228 | xps_queue_release(queue); |
1073 | kfree_rcu(dev_maps, rcu); | 1229 | #endif |
1074 | } | ||
1075 | } | ||
1076 | |||
1077 | mutex_unlock(&xps_map_mutex); | ||
1078 | 1230 | ||
1079 | memset(kobj, 0, sizeof(*kobj)); | 1231 | memset(kobj, 0, sizeof(*kobj)); |
1080 | dev_put(queue->dev); | 1232 | dev_put(queue->dev); |
@@ -1095,22 +1247,29 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) | |||
1095 | kobj->kset = net->queues_kset; | 1247 | kobj->kset = net->queues_kset; |
1096 | error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, | 1248 | error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, |
1097 | "tx-%u", index); | 1249 | "tx-%u", index); |
1098 | if (error) { | 1250 | if (error) |
1099 | kobject_put(kobj); | 1251 | goto exit; |
1100 | return error; | 1252 | |
1101 | } | 1253 | #ifdef CONFIG_BQL |
1254 | error = sysfs_create_group(kobj, &dql_group); | ||
1255 | if (error) | ||
1256 | goto exit; | ||
1257 | #endif | ||
1102 | 1258 | ||
1103 | kobject_uevent(kobj, KOBJ_ADD); | 1259 | kobject_uevent(kobj, KOBJ_ADD); |
1104 | dev_hold(queue->dev); | 1260 | dev_hold(queue->dev); |
1105 | 1261 | ||
1262 | return 0; | ||
1263 | exit: | ||
1264 | kobject_put(kobj); | ||
1106 | return error; | 1265 | return error; |
1107 | } | 1266 | } |
1108 | #endif /* CONFIG_XPS */ | 1267 | #endif /* CONFIG_SYSFS */ |
1109 | 1268 | ||
1110 | int | 1269 | int |
1111 | netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | 1270 | netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) |
1112 | { | 1271 | { |
1113 | #ifdef CONFIG_XPS | 1272 | #ifdef CONFIG_SYSFS |
1114 | int i; | 1273 | int i; |
1115 | int error = 0; | 1274 | int error = 0; |
1116 | 1275 | ||
@@ -1122,20 +1281,26 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) | |||
1122 | } | 1281 | } |
1123 | } | 1282 | } |
1124 | 1283 | ||
1125 | while (--i >= new_num) | 1284 | while (--i >= new_num) { |
1126 | kobject_put(&net->_tx[i].kobj); | 1285 | struct netdev_queue *queue = net->_tx + i; |
1286 | |||
1287 | #ifdef CONFIG_BQL | ||
1288 | sysfs_remove_group(&queue->kobj, &dql_group); | ||
1289 | #endif | ||
1290 | kobject_put(&queue->kobj); | ||
1291 | } | ||
1127 | 1292 | ||
1128 | return error; | 1293 | return error; |
1129 | #else | 1294 | #else |
1130 | return 0; | 1295 | return 0; |
1131 | #endif | 1296 | #endif /* CONFIG_SYSFS */ |
1132 | } | 1297 | } |
1133 | 1298 | ||
1134 | static int register_queue_kobjects(struct net_device *net) | 1299 | static int register_queue_kobjects(struct net_device *net) |
1135 | { | 1300 | { |
1136 | int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; | 1301 | int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; |
1137 | 1302 | ||
1138 | #if defined(CONFIG_RPS) || defined(CONFIG_XPS) | 1303 | #ifdef CONFIG_SYSFS |
1139 | net->queues_kset = kset_create_and_add("queues", | 1304 | net->queues_kset = kset_create_and_add("queues", |
1140 | NULL, &net->dev.kobj); | 1305 | NULL, &net->dev.kobj); |
1141 | if (!net->queues_kset) | 1306 | if (!net->queues_kset) |
@@ -1176,7 +1341,7 @@ static void remove_queue_kobjects(struct net_device *net) | |||
1176 | 1341 | ||
1177 | net_rx_queue_update_kobjects(net, real_rx, 0); | 1342 | net_rx_queue_update_kobjects(net, real_rx, 0); |
1178 | netdev_queue_update_kobjects(net, real_tx, 0); | 1343 | netdev_queue_update_kobjects(net, real_tx, 0); |
1179 | #if defined(CONFIG_RPS) || defined(CONFIG_XPS) | 1344 | #ifdef CONFIG_SYSFS |
1180 | kset_unregister(net->queues_kset); | 1345 | kset_unregister(net->queues_kset); |
1181 | #endif | 1346 | #endif |
1182 | } | 1347 | } |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cf64c1ffa4cd..0d38808a2305 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work) | |||
76 | 76 | ||
77 | local_irq_save(flags); | 77 | local_irq_save(flags); |
78 | __netif_tx_lock(txq, smp_processor_id()); | 78 | __netif_tx_lock(txq, smp_processor_id()); |
79 | if (netif_tx_queue_frozen_or_stopped(txq) || | 79 | if (netif_xmit_frozen_or_stopped(txq) || |
80 | ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { | 80 | ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { |
81 | skb_queue_head(&npinfo->txq, skb); | 81 | skb_queue_head(&npinfo->txq, skb); |
82 | __netif_tx_unlock(txq); | 82 | __netif_tx_unlock(txq); |
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, | |||
317 | for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; | 317 | for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; |
318 | tries > 0; --tries) { | 318 | tries > 0; --tries) { |
319 | if (__netif_tx_trylock(txq)) { | 319 | if (__netif_tx_trylock(txq)) { |
320 | if (!netif_tx_queue_stopped(txq)) { | 320 | if (!netif_xmit_stopped(txq)) { |
321 | status = ops->ndo_start_xmit(skb, dev); | 321 | status = ops->ndo_start_xmit(skb, dev); |
322 | if (status == NETDEV_TX_OK) | 322 | if (status == NETDEV_TX_OK) |
323 | txq_trans_update(txq); | 323 | txq_trans_update(txq); |
@@ -422,6 +422,7 @@ static void arp_reply(struct sk_buff *skb) | |||
422 | struct sk_buff *send_skb; | 422 | struct sk_buff *send_skb; |
423 | struct netpoll *np, *tmp; | 423 | struct netpoll *np, *tmp; |
424 | unsigned long flags; | 424 | unsigned long flags; |
425 | int hlen, tlen; | ||
425 | int hits = 0; | 426 | int hits = 0; |
426 | 427 | ||
427 | if (list_empty(&npinfo->rx_np)) | 428 | if (list_empty(&npinfo->rx_np)) |
@@ -479,8 +480,9 @@ static void arp_reply(struct sk_buff *skb) | |||
479 | if (tip != np->local_ip) | 480 | if (tip != np->local_ip) |
480 | continue; | 481 | continue; |
481 | 482 | ||
482 | send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), | 483 | hlen = LL_RESERVED_SPACE(np->dev); |
483 | LL_RESERVED_SPACE(np->dev)); | 484 | tlen = np->dev->needed_tailroom; |
485 | send_skb = find_skb(np, size + hlen + tlen, hlen); | ||
484 | if (!send_skb) | 486 | if (!send_skb) |
485 | continue; | 487 | continue; |
486 | 488 | ||
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c new file mode 100644 index 000000000000..3a9fd4826b75 --- /dev/null +++ b/net/core/netprio_cgroup.c | |||
@@ -0,0 +1,344 @@ | |||
1 | /* | ||
2 | * net/core/netprio_cgroup.c Priority Control Group | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Neil Horman <nhorman@tuxdriver.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/errno.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/cgroup.h> | ||
19 | #include <linux/rcupdate.h> | ||
20 | #include <linux/atomic.h> | ||
21 | #include <net/rtnetlink.h> | ||
22 | #include <net/pkt_cls.h> | ||
23 | #include <net/sock.h> | ||
24 | #include <net/netprio_cgroup.h> | ||
25 | |||
26 | static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, | ||
27 | struct cgroup *cgrp); | ||
28 | static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); | ||
29 | static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); | ||
30 | |||
31 | struct cgroup_subsys net_prio_subsys = { | ||
32 | .name = "net_prio", | ||
33 | .create = cgrp_create, | ||
34 | .destroy = cgrp_destroy, | ||
35 | .populate = cgrp_populate, | ||
36 | #ifdef CONFIG_NETPRIO_CGROUP | ||
37 | .subsys_id = net_prio_subsys_id, | ||
38 | #endif | ||
39 | .module = THIS_MODULE | ||
40 | }; | ||
41 | |||
42 | #define PRIOIDX_SZ 128 | ||
43 | |||
44 | static unsigned long prioidx_map[PRIOIDX_SZ]; | ||
45 | static DEFINE_SPINLOCK(prioidx_map_lock); | ||
46 | static atomic_t max_prioidx = ATOMIC_INIT(0); | ||
47 | |||
48 | static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) | ||
49 | { | ||
50 | return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), | ||
51 | struct cgroup_netprio_state, css); | ||
52 | } | ||
53 | |||
54 | static int get_prioidx(u32 *prio) | ||
55 | { | ||
56 | unsigned long flags; | ||
57 | u32 prioidx; | ||
58 | |||
59 | spin_lock_irqsave(&prioidx_map_lock, flags); | ||
60 | prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); | ||
61 | set_bit(prioidx, prioidx_map); | ||
62 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
63 | if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) | ||
64 | return -ENOSPC; | ||
65 | |||
66 | atomic_set(&max_prioidx, prioidx); | ||
67 | *prio = prioidx; | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static void put_prioidx(u32 idx) | ||
72 | { | ||
73 | unsigned long flags; | ||
74 | |||
75 | spin_lock_irqsave(&prioidx_map_lock, flags); | ||
76 | clear_bit(idx, prioidx_map); | ||
77 | spin_unlock_irqrestore(&prioidx_map_lock, flags); | ||
78 | } | ||
79 | |||
80 | static void extend_netdev_table(struct net_device *dev, u32 new_len) | ||
81 | { | ||
82 | size_t new_size = sizeof(struct netprio_map) + | ||
83 | ((sizeof(u32) * new_len)); | ||
84 | struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); | ||
85 | struct netprio_map *old_priomap; | ||
86 | int i; | ||
87 | |||
88 | old_priomap = rtnl_dereference(dev->priomap); | ||
89 | |||
90 | if (!new_priomap) { | ||
91 | printk(KERN_WARNING "Unable to alloc new priomap!\n"); | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | for (i = 0; | ||
96 | old_priomap && (i < old_priomap->priomap_len); | ||
97 | i++) | ||
98 | new_priomap->priomap[i] = old_priomap->priomap[i]; | ||
99 | |||
100 | new_priomap->priomap_len = new_len; | ||
101 | |||
102 | rcu_assign_pointer(dev->priomap, new_priomap); | ||
103 | if (old_priomap) | ||
104 | kfree_rcu(old_priomap, rcu); | ||
105 | } | ||
106 | |||
107 | static void update_netdev_tables(void) | ||
108 | { | ||
109 | struct net_device *dev; | ||
110 | u32 max_len = atomic_read(&max_prioidx); | ||
111 | struct netprio_map *map; | ||
112 | |||
113 | rtnl_lock(); | ||
114 | for_each_netdev(&init_net, dev) { | ||
115 | map = rtnl_dereference(dev->priomap); | ||
116 | if ((!map) || | ||
117 | (map->priomap_len < max_len)) | ||
118 | extend_netdev_table(dev, max_len); | ||
119 | } | ||
120 | rtnl_unlock(); | ||
121 | } | ||
122 | |||
123 | static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, | ||
124 | struct cgroup *cgrp) | ||
125 | { | ||
126 | struct cgroup_netprio_state *cs; | ||
127 | int ret; | ||
128 | |||
129 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); | ||
130 | if (!cs) | ||
131 | return ERR_PTR(-ENOMEM); | ||
132 | |||
133 | if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { | ||
134 | kfree(cs); | ||
135 | return ERR_PTR(-EINVAL); | ||
136 | } | ||
137 | |||
138 | ret = get_prioidx(&cs->prioidx); | ||
139 | if (ret != 0) { | ||
140 | printk(KERN_WARNING "No space in priority index array\n"); | ||
141 | kfree(cs); | ||
142 | return ERR_PTR(ret); | ||
143 | } | ||
144 | |||
145 | return &cs->css; | ||
146 | } | ||
147 | |||
148 | static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
149 | { | ||
150 | struct cgroup_netprio_state *cs; | ||
151 | struct net_device *dev; | ||
152 | struct netprio_map *map; | ||
153 | |||
154 | cs = cgrp_netprio_state(cgrp); | ||
155 | rtnl_lock(); | ||
156 | for_each_netdev(&init_net, dev) { | ||
157 | map = rtnl_dereference(dev->priomap); | ||
158 | if (map) | ||
159 | map->priomap[cs->prioidx] = 0; | ||
160 | } | ||
161 | rtnl_unlock(); | ||
162 | put_prioidx(cs->prioidx); | ||
163 | kfree(cs); | ||
164 | } | ||
165 | |||
166 | static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) | ||
167 | { | ||
168 | return (u64)cgrp_netprio_state(cgrp)->prioidx; | ||
169 | } | ||
170 | |||
171 | static int read_priomap(struct cgroup *cont, struct cftype *cft, | ||
172 | struct cgroup_map_cb *cb) | ||
173 | { | ||
174 | struct net_device *dev; | ||
175 | u32 prioidx = cgrp_netprio_state(cont)->prioidx; | ||
176 | u32 priority; | ||
177 | struct netprio_map *map; | ||
178 | |||
179 | rcu_read_lock(); | ||
180 | for_each_netdev_rcu(&init_net, dev) { | ||
181 | map = rcu_dereference(dev->priomap); | ||
182 | priority = map ? map->priomap[prioidx] : 0; | ||
183 | cb->fill(cb, dev->name, priority); | ||
184 | } | ||
185 | rcu_read_unlock(); | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | ||
190 | const char *buffer) | ||
191 | { | ||
192 | char *devname = kstrdup(buffer, GFP_KERNEL); | ||
193 | int ret = -EINVAL; | ||
194 | u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; | ||
195 | unsigned long priority; | ||
196 | char *priostr; | ||
197 | struct net_device *dev; | ||
198 | struct netprio_map *map; | ||
199 | |||
200 | if (!devname) | ||
201 | return -ENOMEM; | ||
202 | |||
203 | /* | ||
204 | * Minimally sized valid priomap string | ||
205 | */ | ||
206 | if (strlen(devname) < 3) | ||
207 | goto out_free_devname; | ||
208 | |||
209 | priostr = strstr(devname, " "); | ||
210 | if (!priostr) | ||
211 | goto out_free_devname; | ||
212 | |||
213 | /* | ||
214 | *Separate the devname from the associated priority | ||
215 | *and advance the priostr poitner to the priority value | ||
216 | */ | ||
217 | *priostr = '\0'; | ||
218 | priostr++; | ||
219 | |||
220 | /* | ||
221 | * If the priostr points to NULL, we're at the end of the passed | ||
222 | * in string, and its not a valid write | ||
223 | */ | ||
224 | if (*priostr == '\0') | ||
225 | goto out_free_devname; | ||
226 | |||
227 | ret = kstrtoul(priostr, 10, &priority); | ||
228 | if (ret < 0) | ||
229 | goto out_free_devname; | ||
230 | |||
231 | ret = -ENODEV; | ||
232 | |||
233 | dev = dev_get_by_name(&init_net, devname); | ||
234 | if (!dev) | ||
235 | goto out_free_devname; | ||
236 | |||
237 | update_netdev_tables(); | ||
238 | ret = 0; | ||
239 | rcu_read_lock(); | ||
240 | map = rcu_dereference(dev->priomap); | ||
241 | if (map) | ||
242 | map->priomap[prioidx] = priority; | ||
243 | rcu_read_unlock(); | ||
244 | dev_put(dev); | ||
245 | |||
246 | out_free_devname: | ||
247 | kfree(devname); | ||
248 | return ret; | ||
249 | } | ||
250 | |||
251 | static struct cftype ss_files[] = { | ||
252 | { | ||
253 | .name = "prioidx", | ||
254 | .read_u64 = read_prioidx, | ||
255 | }, | ||
256 | { | ||
257 | .name = "ifpriomap", | ||
258 | .read_map = read_priomap, | ||
259 | .write_string = write_priomap, | ||
260 | }, | ||
261 | }; | ||
262 | |||
263 | static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
264 | { | ||
265 | return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); | ||
266 | } | ||
267 | |||
268 | static int netprio_device_event(struct notifier_block *unused, | ||
269 | unsigned long event, void *ptr) | ||
270 | { | ||
271 | struct net_device *dev = ptr; | ||
272 | struct netprio_map *old; | ||
273 | u32 max_len = atomic_read(&max_prioidx); | ||
274 | |||
275 | /* | ||
276 | * Note this is called with rtnl_lock held so we have update side | ||
277 | * protection on our rcu assignments | ||
278 | */ | ||
279 | |||
280 | switch (event) { | ||
281 | |||
282 | case NETDEV_REGISTER: | ||
283 | if (max_len) | ||
284 | extend_netdev_table(dev, max_len); | ||
285 | break; | ||
286 | case NETDEV_UNREGISTER: | ||
287 | old = rtnl_dereference(dev->priomap); | ||
288 | RCU_INIT_POINTER(dev->priomap, NULL); | ||
289 | if (old) | ||
290 | kfree_rcu(old, rcu); | ||
291 | break; | ||
292 | } | ||
293 | return NOTIFY_DONE; | ||
294 | } | ||
295 | |||
296 | static struct notifier_block netprio_device_notifier = { | ||
297 | .notifier_call = netprio_device_event | ||
298 | }; | ||
299 | |||
300 | static int __init init_cgroup_netprio(void) | ||
301 | { | ||
302 | int ret; | ||
303 | |||
304 | ret = cgroup_load_subsys(&net_prio_subsys); | ||
305 | if (ret) | ||
306 | goto out; | ||
307 | #ifndef CONFIG_NETPRIO_CGROUP | ||
308 | smp_wmb(); | ||
309 | net_prio_subsys_id = net_prio_subsys.subsys_id; | ||
310 | #endif | ||
311 | |||
312 | register_netdevice_notifier(&netprio_device_notifier); | ||
313 | |||
314 | out: | ||
315 | return ret; | ||
316 | } | ||
317 | |||
318 | static void __exit exit_cgroup_netprio(void) | ||
319 | { | ||
320 | struct netprio_map *old; | ||
321 | struct net_device *dev; | ||
322 | |||
323 | unregister_netdevice_notifier(&netprio_device_notifier); | ||
324 | |||
325 | cgroup_unload_subsys(&net_prio_subsys); | ||
326 | |||
327 | #ifndef CONFIG_NETPRIO_CGROUP | ||
328 | net_prio_subsys_id = -1; | ||
329 | synchronize_rcu(); | ||
330 | #endif | ||
331 | |||
332 | rtnl_lock(); | ||
333 | for_each_netdev(&init_net, dev) { | ||
334 | old = rtnl_dereference(dev->priomap); | ||
335 | RCU_INIT_POINTER(dev->priomap, NULL); | ||
336 | if (old) | ||
337 | kfree_rcu(old, rcu); | ||
338 | } | ||
339 | rtnl_unlock(); | ||
340 | } | ||
341 | |||
342 | module_init(init_cgroup_netprio); | ||
343 | module_exit(exit_cgroup_netprio); | ||
344 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0001c243b35c..449fe0f068f8 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1304 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); | 1304 | scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); |
1305 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); | 1305 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); |
1306 | 1306 | ||
1307 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); | 1307 | pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr; |
1308 | 1308 | ||
1309 | if (debug) | 1309 | if (debug) |
1310 | printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); | 1310 | printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); |
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1327 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); | 1327 | scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); |
1328 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); | 1328 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); |
1329 | 1329 | ||
1330 | ipv6_addr_copy(&pkt_dev->cur_in6_daddr, | 1330 | pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr; |
1331 | &pkt_dev->min_in6_daddr); | ||
1332 | if (debug) | 1331 | if (debug) |
1333 | printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); | 1332 | printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); |
1334 | 1333 | ||
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1371 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); | 1370 | scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); |
1372 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); | 1371 | snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); |
1373 | 1372 | ||
1374 | ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); | 1373 | pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr; |
1375 | 1374 | ||
1376 | if (debug) | 1375 | if (debug) |
1377 | printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); | 1376 | printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); |
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | |||
2079 | ifp = ifp->if_next) { | 2078 | ifp = ifp->if_next) { |
2080 | if (ifp->scope == IFA_LINK && | 2079 | if (ifp->scope == IFA_LINK && |
2081 | !(ifp->flags & IFA_F_TENTATIVE)) { | 2080 | !(ifp->flags & IFA_F_TENTATIVE)) { |
2082 | ipv6_addr_copy(&pkt_dev-> | 2081 | pkt_dev->cur_in6_saddr = ifp->addr; |
2083 | cur_in6_saddr, | ||
2084 | &ifp->addr); | ||
2085 | err = 0; | 2082 | err = 0; |
2086 | break; | 2083 | break; |
2087 | } | 2084 | } |
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, | |||
2958 | iph->payload_len = htons(sizeof(struct udphdr) + datalen); | 2955 | iph->payload_len = htons(sizeof(struct udphdr) + datalen); |
2959 | iph->nexthdr = IPPROTO_UDP; | 2956 | iph->nexthdr = IPPROTO_UDP; |
2960 | 2957 | ||
2961 | ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); | 2958 | iph->daddr = pkt_dev->cur_in6_daddr; |
2962 | ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); | 2959 | iph->saddr = pkt_dev->cur_in6_saddr; |
2963 | 2960 | ||
2964 | skb->mac_header = (skb->network_header - ETH_HLEN - | 2961 | skb->mac_header = (skb->network_header - ETH_HLEN - |
2965 | pkt_dev->pkt_overhead); | 2962 | pkt_dev->pkt_overhead); |
@@ -3345,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3345 | 3342 | ||
3346 | __netif_tx_lock_bh(txq); | 3343 | __netif_tx_lock_bh(txq); |
3347 | 3344 | ||
3348 | if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { | 3345 | if (unlikely(netif_xmit_frozen_or_stopped(txq))) { |
3349 | ret = NETDEV_TX_BUSY; | 3346 | ret = NETDEV_TX_BUSY; |
3350 | pkt_dev->last_ok = 0; | 3347 | pkt_dev->last_ok = 0; |
3351 | goto unlock; | 3348 | goto unlock; |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9083e82bdae5..dbf2ddafd52d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -273,6 +273,17 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all); | |||
273 | 273 | ||
274 | static LIST_HEAD(link_ops); | 274 | static LIST_HEAD(link_ops); |
275 | 275 | ||
276 | static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) | ||
277 | { | ||
278 | const struct rtnl_link_ops *ops; | ||
279 | |||
280 | list_for_each_entry(ops, &link_ops, list) { | ||
281 | if (!strcmp(ops->kind, kind)) | ||
282 | return ops; | ||
283 | } | ||
284 | return NULL; | ||
285 | } | ||
286 | |||
276 | /** | 287 | /** |
277 | * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. | 288 | * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. |
278 | * @ops: struct rtnl_link_ops * to register | 289 | * @ops: struct rtnl_link_ops * to register |
@@ -285,6 +296,9 @@ static LIST_HEAD(link_ops); | |||
285 | */ | 296 | */ |
286 | int __rtnl_link_register(struct rtnl_link_ops *ops) | 297 | int __rtnl_link_register(struct rtnl_link_ops *ops) |
287 | { | 298 | { |
299 | if (rtnl_link_ops_get(ops->kind)) | ||
300 | return -EEXIST; | ||
301 | |||
288 | if (!ops->dellink) | 302 | if (!ops->dellink) |
289 | ops->dellink = unregister_netdevice_queue; | 303 | ops->dellink = unregister_netdevice_queue; |
290 | 304 | ||
@@ -351,17 +365,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) | |||
351 | } | 365 | } |
352 | EXPORT_SYMBOL_GPL(rtnl_link_unregister); | 366 | EXPORT_SYMBOL_GPL(rtnl_link_unregister); |
353 | 367 | ||
354 | static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) | ||
355 | { | ||
356 | const struct rtnl_link_ops *ops; | ||
357 | |||
358 | list_for_each_entry(ops, &link_ops, list) { | ||
359 | if (!strcmp(ops->kind, kind)) | ||
360 | return ops; | ||
361 | } | ||
362 | return NULL; | ||
363 | } | ||
364 | |||
365 | static size_t rtnl_link_get_size(const struct net_device *dev) | 368 | static size_t rtnl_link_get_size(const struct net_device *dev) |
366 | { | 369 | { |
367 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; | 370 | const struct rtnl_link_ops *ops = dev->rtnl_link_ops; |
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 925991ae6f52..6fd44606fdd1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c | |||
@@ -36,7 +36,7 @@ static u32 seq_scale(u32 seq) | |||
36 | } | 36 | } |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 39 | #if IS_ENABLED(CONFIG_IPV6) |
40 | __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, | 40 | __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, |
41 | __be16 sport, __be16 dport) | 41 | __be16 sport, __be16 dport) |
42 | { | 42 | { |
@@ -134,7 +134,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) | |||
134 | EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); | 134 | EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); |
135 | #endif | 135 | #endif |
136 | 136 | ||
137 | #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) | 137 | #if IS_ENABLED(CONFIG_IP_DCCP) |
138 | u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, | 138 | u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, |
139 | __be16 sport, __be16 dport) | 139 | __be16 sport, __be16 dport) |
140 | { | 140 | { |
@@ -156,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, | |||
156 | } | 156 | } |
157 | EXPORT_SYMBOL(secure_dccp_sequence_number); | 157 | EXPORT_SYMBOL(secure_dccp_sequence_number); |
158 | 158 | ||
159 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 159 | #if IS_ENABLED(CONFIG_IPV6) |
160 | u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, | 160 | u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, |
161 | __be16 sport, __be16 dport) | 161 | __be16 sport, __be16 dport) |
162 | { | 162 | { |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3c30ee4a5710..da0c97f2fab4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -245,6 +245,55 @@ nodata: | |||
245 | EXPORT_SYMBOL(__alloc_skb); | 245 | EXPORT_SYMBOL(__alloc_skb); |
246 | 246 | ||
247 | /** | 247 | /** |
248 | * build_skb - build a network buffer | ||
249 | * @data: data buffer provided by caller | ||
250 | * | ||
251 | * Allocate a new &sk_buff. Caller provides space holding head and | ||
252 | * skb_shared_info. @data must have been allocated by kmalloc() | ||
253 | * The return is the new skb buffer. | ||
254 | * On a failure the return is %NULL, and @data is not freed. | ||
255 | * Notes : | ||
256 | * Before IO, driver allocates only data buffer where NIC put incoming frame | ||
257 | * Driver should add room at head (NET_SKB_PAD) and | ||
258 | * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) | ||
259 | * After IO, driver calls build_skb(), to allocate sk_buff and populate it | ||
260 | * before giving packet to stack. | ||
261 | * RX rings only contains data buffers, not full skbs. | ||
262 | */ | ||
263 | struct sk_buff *build_skb(void *data) | ||
264 | { | ||
265 | struct skb_shared_info *shinfo; | ||
266 | struct sk_buff *skb; | ||
267 | unsigned int size; | ||
268 | |||
269 | skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); | ||
270 | if (!skb) | ||
271 | return NULL; | ||
272 | |||
273 | size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||
274 | |||
275 | memset(skb, 0, offsetof(struct sk_buff, tail)); | ||
276 | skb->truesize = SKB_TRUESIZE(size); | ||
277 | atomic_set(&skb->users, 1); | ||
278 | skb->head = data; | ||
279 | skb->data = data; | ||
280 | skb_reset_tail_pointer(skb); | ||
281 | skb->end = skb->tail + size; | ||
282 | #ifdef NET_SKBUFF_DATA_USES_OFFSET | ||
283 | skb->mac_header = ~0U; | ||
284 | #endif | ||
285 | |||
286 | /* make sure we initialize shinfo sequentially */ | ||
287 | shinfo = skb_shinfo(skb); | ||
288 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
289 | atomic_set(&shinfo->dataref, 1); | ||
290 | kmemcheck_annotate_variable(shinfo->destructor_arg); | ||
291 | |||
292 | return skb; | ||
293 | } | ||
294 | EXPORT_SYMBOL(build_skb); | ||
295 | |||
296 | /** | ||
248 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device | 297 | * __netdev_alloc_skb - allocate an skbuff for rx on a specific device |
249 | * @dev: network device to receive on | 298 | * @dev: network device to receive on |
250 | * @length: length to allocate | 299 | * @length: length to allocate |
@@ -403,7 +452,7 @@ static void skb_release_head_state(struct sk_buff *skb) | |||
403 | WARN_ON(in_irq()); | 452 | WARN_ON(in_irq()); |
404 | skb->destructor(skb); | 453 | skb->destructor(skb); |
405 | } | 454 | } |
406 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 455 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
407 | nf_conntrack_put(skb->nfct); | 456 | nf_conntrack_put(skb->nfct); |
408 | #endif | 457 | #endif |
409 | #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED | 458 | #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED |
@@ -553,15 +602,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
553 | new->ip_summed = old->ip_summed; | 602 | new->ip_summed = old->ip_summed; |
554 | skb_copy_queue_mapping(new, old); | 603 | skb_copy_queue_mapping(new, old); |
555 | new->priority = old->priority; | 604 | new->priority = old->priority; |
556 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 605 | #if IS_ENABLED(CONFIG_IP_VS) |
557 | new->ipvs_property = old->ipvs_property; | 606 | new->ipvs_property = old->ipvs_property; |
558 | #endif | 607 | #endif |
559 | new->protocol = old->protocol; | 608 | new->protocol = old->protocol; |
560 | new->mark = old->mark; | 609 | new->mark = old->mark; |
561 | new->skb_iif = old->skb_iif; | 610 | new->skb_iif = old->skb_iif; |
562 | __nf_copy(new, old); | 611 | __nf_copy(new, old); |
563 | #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ | 612 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) |
564 | defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) | ||
565 | new->nf_trace = old->nf_trace; | 613 | new->nf_trace = old->nf_trace; |
566 | #endif | 614 | #endif |
567 | #ifdef CONFIG_NET_SCHED | 615 | #ifdef CONFIG_NET_SCHED |
@@ -791,8 +839,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |||
791 | EXPORT_SYMBOL(skb_copy); | 839 | EXPORT_SYMBOL(skb_copy); |
792 | 840 | ||
793 | /** | 841 | /** |
794 | * pskb_copy - create copy of an sk_buff with private head. | 842 | * __pskb_copy - create copy of an sk_buff with private head. |
795 | * @skb: buffer to copy | 843 | * @skb: buffer to copy |
844 | * @headroom: headroom of new skb | ||
796 | * @gfp_mask: allocation priority | 845 | * @gfp_mask: allocation priority |
797 | * | 846 | * |
798 | * Make a copy of both an &sk_buff and part of its data, located | 847 | * Make a copy of both an &sk_buff and part of its data, located |
@@ -803,16 +852,16 @@ EXPORT_SYMBOL(skb_copy); | |||
803 | * The returned buffer has a reference count of 1. | 852 | * The returned buffer has a reference count of 1. |
804 | */ | 853 | */ |
805 | 854 | ||
806 | struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | 855 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
807 | { | 856 | { |
808 | unsigned int size = skb_end_pointer(skb) - skb->head; | 857 | unsigned int size = skb_headlen(skb) + headroom; |
809 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 858 | struct sk_buff *n = alloc_skb(size, gfp_mask); |
810 | 859 | ||
811 | if (!n) | 860 | if (!n) |
812 | goto out; | 861 | goto out; |
813 | 862 | ||
814 | /* Set the data pointer */ | 863 | /* Set the data pointer */ |
815 | skb_reserve(n, skb_headroom(skb)); | 864 | skb_reserve(n, headroom); |
816 | /* Set the tail pointer and length */ | 865 | /* Set the tail pointer and length */ |
817 | skb_put(n, skb_headlen(skb)); | 866 | skb_put(n, skb_headlen(skb)); |
818 | /* Copy the bytes */ | 867 | /* Copy the bytes */ |
@@ -848,7 +897,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) | |||
848 | out: | 897 | out: |
849 | return n; | 898 | return n; |
850 | } | 899 | } |
851 | EXPORT_SYMBOL(pskb_copy); | 900 | EXPORT_SYMBOL(__pskb_copy); |
852 | 901 | ||
853 | /** | 902 | /** |
854 | * pskb_expand_head - reallocate header of &sk_buff | 903 | * pskb_expand_head - reallocate header of &sk_buff |
@@ -2621,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); | |||
2621 | * a pointer to the first in a list of new skbs for the segments. | 2670 | * a pointer to the first in a list of new skbs for the segments. |
2622 | * In case of error it returns ERR_PTR(err). | 2671 | * In case of error it returns ERR_PTR(err). |
2623 | */ | 2672 | */ |
2624 | struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) | 2673 | struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) |
2625 | { | 2674 | { |
2626 | struct sk_buff *segs = NULL; | 2675 | struct sk_buff *segs = NULL; |
2627 | struct sk_buff *tail = NULL; | 2676 | struct sk_buff *tail = NULL; |
@@ -3169,6 +3218,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, | |||
3169 | } | 3218 | } |
3170 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); | 3219 | EXPORT_SYMBOL_GPL(skb_tstamp_tx); |
3171 | 3220 | ||
3221 | void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) | ||
3222 | { | ||
3223 | struct sock *sk = skb->sk; | ||
3224 | struct sock_exterr_skb *serr; | ||
3225 | int err; | ||
3226 | |||
3227 | skb->wifi_acked_valid = 1; | ||
3228 | skb->wifi_acked = acked; | ||
3229 | |||
3230 | serr = SKB_EXT_ERR(skb); | ||
3231 | memset(serr, 0, sizeof(*serr)); | ||
3232 | serr->ee.ee_errno = ENOMSG; | ||
3233 | serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; | ||
3234 | |||
3235 | err = sock_queue_err_skb(sk, skb); | ||
3236 | if (err) | ||
3237 | kfree_skb(skb); | ||
3238 | } | ||
3239 | EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); | ||
3240 | |||
3172 | 3241 | ||
3173 | /** | 3242 | /** |
3174 | * skb_partial_csum_set - set up and verify partial csum values for packet | 3243 | * skb_partial_csum_set - set up and verify partial csum values for packet |
diff --git a/net/core/sock.c b/net/core/sock.c index b23f174ab84c..002939cfc069 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -111,6 +111,7 @@ | |||
111 | #include <linux/init.h> | 111 | #include <linux/init.h> |
112 | #include <linux/highmem.h> | 112 | #include <linux/highmem.h> |
113 | #include <linux/user_namespace.h> | 113 | #include <linux/user_namespace.h> |
114 | #include <linux/jump_label.h> | ||
114 | 115 | ||
115 | #include <asm/uaccess.h> | 116 | #include <asm/uaccess.h> |
116 | #include <asm/system.h> | 117 | #include <asm/system.h> |
@@ -125,6 +126,7 @@ | |||
125 | #include <net/xfrm.h> | 126 | #include <net/xfrm.h> |
126 | #include <linux/ipsec.h> | 127 | #include <linux/ipsec.h> |
127 | #include <net/cls_cgroup.h> | 128 | #include <net/cls_cgroup.h> |
129 | #include <net/netprio_cgroup.h> | ||
128 | 130 | ||
129 | #include <linux/filter.h> | 131 | #include <linux/filter.h> |
130 | 132 | ||
@@ -134,6 +136,46 @@ | |||
134 | #include <net/tcp.h> | 136 | #include <net/tcp.h> |
135 | #endif | 137 | #endif |
136 | 138 | ||
139 | static DEFINE_MUTEX(proto_list_mutex); | ||
140 | static LIST_HEAD(proto_list); | ||
141 | |||
142 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
143 | int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
144 | { | ||
145 | struct proto *proto; | ||
146 | int ret = 0; | ||
147 | |||
148 | mutex_lock(&proto_list_mutex); | ||
149 | list_for_each_entry(proto, &proto_list, node) { | ||
150 | if (proto->init_cgroup) { | ||
151 | ret = proto->init_cgroup(cgrp, ss); | ||
152 | if (ret) | ||
153 | goto out; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | mutex_unlock(&proto_list_mutex); | ||
158 | return ret; | ||
159 | out: | ||
160 | list_for_each_entry_continue_reverse(proto, &proto_list, node) | ||
161 | if (proto->destroy_cgroup) | ||
162 | proto->destroy_cgroup(cgrp, ss); | ||
163 | mutex_unlock(&proto_list_mutex); | ||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) | ||
168 | { | ||
169 | struct proto *proto; | ||
170 | |||
171 | mutex_lock(&proto_list_mutex); | ||
172 | list_for_each_entry_reverse(proto, &proto_list, node) | ||
173 | if (proto->destroy_cgroup) | ||
174 | proto->destroy_cgroup(cgrp, ss); | ||
175 | mutex_unlock(&proto_list_mutex); | ||
176 | } | ||
177 | #endif | ||
178 | |||
137 | /* | 179 | /* |
138 | * Each address family might have different locking rules, so we have | 180 | * Each address family might have different locking rules, so we have |
139 | * one slock key per address family: | 181 | * one slock key per address family: |
@@ -141,6 +183,9 @@ | |||
141 | static struct lock_class_key af_family_keys[AF_MAX]; | 183 | static struct lock_class_key af_family_keys[AF_MAX]; |
142 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 184 | static struct lock_class_key af_family_slock_keys[AF_MAX]; |
143 | 185 | ||
186 | struct jump_label_key memcg_socket_limit_enabled; | ||
187 | EXPORT_SYMBOL(memcg_socket_limit_enabled); | ||
188 | |||
144 | /* | 189 | /* |
145 | * Make lock validator output more readable. (we pre-construct these | 190 | * Make lock validator output more readable. (we pre-construct these |
146 | * strings build-time, so that runtime initialization of socket | 191 | * strings build-time, so that runtime initialization of socket |
@@ -221,10 +266,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
221 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 266 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
222 | EXPORT_SYMBOL(sysctl_optmem_max); | 267 | EXPORT_SYMBOL(sysctl_optmem_max); |
223 | 268 | ||
224 | #if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) | 269 | #if defined(CONFIG_CGROUPS) |
270 | #if !defined(CONFIG_NET_CLS_CGROUP) | ||
225 | int net_cls_subsys_id = -1; | 271 | int net_cls_subsys_id = -1; |
226 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); | 272 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); |
227 | #endif | 273 | #endif |
274 | #if !defined(CONFIG_NETPRIO_CGROUP) | ||
275 | int net_prio_subsys_id = -1; | ||
276 | EXPORT_SYMBOL_GPL(net_prio_subsys_id); | ||
277 | #endif | ||
278 | #endif | ||
228 | 279 | ||
229 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 280 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
230 | { | 281 | { |
@@ -269,14 +320,14 @@ static void sock_warn_obsolete_bsdism(const char *name) | |||
269 | } | 320 | } |
270 | } | 321 | } |
271 | 322 | ||
272 | static void sock_disable_timestamp(struct sock *sk, int flag) | 323 | #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) |
324 | |||
325 | static void sock_disable_timestamp(struct sock *sk, unsigned long flags) | ||
273 | { | 326 | { |
274 | if (sock_flag(sk, flag)) { | 327 | if (sk->sk_flags & flags) { |
275 | sock_reset_flag(sk, flag); | 328 | sk->sk_flags &= ~flags; |
276 | if (!sock_flag(sk, SOCK_TIMESTAMP) && | 329 | if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) |
277 | !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) { | ||
278 | net_disable_timestamp(); | 330 | net_disable_timestamp(); |
279 | } | ||
280 | } | 331 | } |
281 | } | 332 | } |
282 | 333 | ||
@@ -678,7 +729,7 @@ set_rcvbuf: | |||
678 | SOCK_TIMESTAMPING_RX_SOFTWARE); | 729 | SOCK_TIMESTAMPING_RX_SOFTWARE); |
679 | else | 730 | else |
680 | sock_disable_timestamp(sk, | 731 | sock_disable_timestamp(sk, |
681 | SOCK_TIMESTAMPING_RX_SOFTWARE); | 732 | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); |
682 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, | 733 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, |
683 | val & SOF_TIMESTAMPING_SOFTWARE); | 734 | val & SOF_TIMESTAMPING_SOFTWARE); |
684 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, | 735 | sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, |
@@ -736,6 +787,11 @@ set_rcvbuf: | |||
736 | case SO_RXQ_OVFL: | 787 | case SO_RXQ_OVFL: |
737 | sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); | 788 | sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); |
738 | break; | 789 | break; |
790 | |||
791 | case SO_WIFI_STATUS: | ||
792 | sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); | ||
793 | break; | ||
794 | |||
739 | default: | 795 | default: |
740 | ret = -ENOPROTOOPT; | 796 | ret = -ENOPROTOOPT; |
741 | break; | 797 | break; |
@@ -957,6 +1013,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
957 | v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); | 1013 | v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); |
958 | break; | 1014 | break; |
959 | 1015 | ||
1016 | case SO_WIFI_STATUS: | ||
1017 | v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); | ||
1018 | break; | ||
1019 | |||
960 | default: | 1020 | default: |
961 | return -ENOPROTOOPT; | 1021 | return -ENOPROTOOPT; |
962 | } | 1022 | } |
@@ -1107,6 +1167,18 @@ void sock_update_classid(struct sock *sk) | |||
1107 | sk->sk_classid = classid; | 1167 | sk->sk_classid = classid; |
1108 | } | 1168 | } |
1109 | EXPORT_SYMBOL(sock_update_classid); | 1169 | EXPORT_SYMBOL(sock_update_classid); |
1170 | |||
1171 | void sock_update_netprioidx(struct sock *sk) | ||
1172 | { | ||
1173 | struct cgroup_netprio_state *state; | ||
1174 | if (in_interrupt()) | ||
1175 | return; | ||
1176 | rcu_read_lock(); | ||
1177 | state = task_netprio_state(current); | ||
1178 | sk->sk_cgrp_prioidx = state ? state->prioidx : 0; | ||
1179 | rcu_read_unlock(); | ||
1180 | } | ||
1181 | EXPORT_SYMBOL_GPL(sock_update_netprioidx); | ||
1110 | #endif | 1182 | #endif |
1111 | 1183 | ||
1112 | /** | 1184 | /** |
@@ -1134,6 +1206,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||
1134 | atomic_set(&sk->sk_wmem_alloc, 1); | 1206 | atomic_set(&sk->sk_wmem_alloc, 1); |
1135 | 1207 | ||
1136 | sock_update_classid(sk); | 1208 | sock_update_classid(sk); |
1209 | sock_update_netprioidx(sk); | ||
1137 | } | 1210 | } |
1138 | 1211 | ||
1139 | return sk; | 1212 | return sk; |
@@ -1154,8 +1227,7 @@ static void __sk_free(struct sock *sk) | |||
1154 | RCU_INIT_POINTER(sk->sk_filter, NULL); | 1227 | RCU_INIT_POINTER(sk->sk_filter, NULL); |
1155 | } | 1228 | } |
1156 | 1229 | ||
1157 | sock_disable_timestamp(sk, SOCK_TIMESTAMP); | 1230 | sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); |
1158 | sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); | ||
1159 | 1231 | ||
1160 | if (atomic_read(&sk->sk_omem_alloc)) | 1232 | if (atomic_read(&sk->sk_omem_alloc)) |
1161 | printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", | 1233 | printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", |
@@ -1200,7 +1272,14 @@ void sk_release_kernel(struct sock *sk) | |||
1200 | } | 1272 | } |
1201 | EXPORT_SYMBOL(sk_release_kernel); | 1273 | EXPORT_SYMBOL(sk_release_kernel); |
1202 | 1274 | ||
1203 | struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | 1275 | /** |
1276 | * sk_clone_lock - clone a socket, and lock its clone | ||
1277 | * @sk: the socket to clone | ||
1278 | * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) | ||
1279 | * | ||
1280 | * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) | ||
1281 | */ | ||
1282 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | ||
1204 | { | 1283 | { |
1205 | struct sock *newsk; | 1284 | struct sock *newsk; |
1206 | 1285 | ||
@@ -1284,16 +1363,15 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1284 | newsk->sk_wq = NULL; | 1363 | newsk->sk_wq = NULL; |
1285 | 1364 | ||
1286 | if (newsk->sk_prot->sockets_allocated) | 1365 | if (newsk->sk_prot->sockets_allocated) |
1287 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1366 | sk_sockets_allocated_inc(newsk); |
1288 | 1367 | ||
1289 | if (sock_flag(newsk, SOCK_TIMESTAMP) || | 1368 | if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
1290 | sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE)) | ||
1291 | net_enable_timestamp(); | 1369 | net_enable_timestamp(); |
1292 | } | 1370 | } |
1293 | out: | 1371 | out: |
1294 | return newsk; | 1372 | return newsk; |
1295 | } | 1373 | } |
1296 | EXPORT_SYMBOL_GPL(sk_clone); | 1374 | EXPORT_SYMBOL_GPL(sk_clone_lock); |
1297 | 1375 | ||
1298 | void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | 1376 | void sk_setup_caps(struct sock *sk, struct dst_entry *dst) |
1299 | { | 1377 | { |
@@ -1673,30 +1751,34 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1673 | struct proto *prot = sk->sk_prot; | 1751 | struct proto *prot = sk->sk_prot; |
1674 | int amt = sk_mem_pages(size); | 1752 | int amt = sk_mem_pages(size); |
1675 | long allocated; | 1753 | long allocated; |
1754 | int parent_status = UNDER_LIMIT; | ||
1676 | 1755 | ||
1677 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 1756 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
1678 | allocated = atomic_long_add_return(amt, prot->memory_allocated); | 1757 | |
1758 | allocated = sk_memory_allocated_add(sk, amt, &parent_status); | ||
1679 | 1759 | ||
1680 | /* Under limit. */ | 1760 | /* Under limit. */ |
1681 | if (allocated <= prot->sysctl_mem[0]) { | 1761 | if (parent_status == UNDER_LIMIT && |
1682 | if (prot->memory_pressure && *prot->memory_pressure) | 1762 | allocated <= sk_prot_mem_limits(sk, 0)) { |
1683 | *prot->memory_pressure = 0; | 1763 | sk_leave_memory_pressure(sk); |
1684 | return 1; | 1764 | return 1; |
1685 | } | 1765 | } |
1686 | 1766 | ||
1687 | /* Under pressure. */ | 1767 | /* Under pressure. (we or our parents) */ |
1688 | if (allocated > prot->sysctl_mem[1]) | 1768 | if ((parent_status > SOFT_LIMIT) || |
1689 | if (prot->enter_memory_pressure) | 1769 | allocated > sk_prot_mem_limits(sk, 1)) |
1690 | prot->enter_memory_pressure(sk); | 1770 | sk_enter_memory_pressure(sk); |
1691 | 1771 | ||
1692 | /* Over hard limit. */ | 1772 | /* Over hard limit (we or our parents) */ |
1693 | if (allocated > prot->sysctl_mem[2]) | 1773 | if ((parent_status == OVER_LIMIT) || |
1774 | (allocated > sk_prot_mem_limits(sk, 2))) | ||
1694 | goto suppress_allocation; | 1775 | goto suppress_allocation; |
1695 | 1776 | ||
1696 | /* guarantee minimum buffer size under pressure */ | 1777 | /* guarantee minimum buffer size under pressure */ |
1697 | if (kind == SK_MEM_RECV) { | 1778 | if (kind == SK_MEM_RECV) { |
1698 | if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) | 1779 | if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) |
1699 | return 1; | 1780 | return 1; |
1781 | |||
1700 | } else { /* SK_MEM_SEND */ | 1782 | } else { /* SK_MEM_SEND */ |
1701 | if (sk->sk_type == SOCK_STREAM) { | 1783 | if (sk->sk_type == SOCK_STREAM) { |
1702 | if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) | 1784 | if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) |
@@ -1706,13 +1788,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1706 | return 1; | 1788 | return 1; |
1707 | } | 1789 | } |
1708 | 1790 | ||
1709 | if (prot->memory_pressure) { | 1791 | if (sk_has_memory_pressure(sk)) { |
1710 | int alloc; | 1792 | int alloc; |
1711 | 1793 | ||
1712 | if (!*prot->memory_pressure) | 1794 | if (!sk_under_memory_pressure(sk)) |
1713 | return 1; | 1795 | return 1; |
1714 | alloc = percpu_counter_read_positive(prot->sockets_allocated); | 1796 | alloc = sk_sockets_allocated_read_positive(sk); |
1715 | if (prot->sysctl_mem[2] > alloc * | 1797 | if (sk_prot_mem_limits(sk, 2) > alloc * |
1716 | sk_mem_pages(sk->sk_wmem_queued + | 1798 | sk_mem_pages(sk->sk_wmem_queued + |
1717 | atomic_read(&sk->sk_rmem_alloc) + | 1799 | atomic_read(&sk->sk_rmem_alloc) + |
1718 | sk->sk_forward_alloc)) | 1800 | sk->sk_forward_alloc)) |
@@ -1735,7 +1817,9 @@ suppress_allocation: | |||
1735 | 1817 | ||
1736 | /* Alas. Undo changes. */ | 1818 | /* Alas. Undo changes. */ |
1737 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; | 1819 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; |
1738 | atomic_long_sub(amt, prot->memory_allocated); | 1820 | |
1821 | sk_memory_allocated_sub(sk, amt, parent_status); | ||
1822 | |||
1739 | return 0; | 1823 | return 0; |
1740 | } | 1824 | } |
1741 | EXPORT_SYMBOL(__sk_mem_schedule); | 1825 | EXPORT_SYMBOL(__sk_mem_schedule); |
@@ -1746,15 +1830,13 @@ EXPORT_SYMBOL(__sk_mem_schedule); | |||
1746 | */ | 1830 | */ |
1747 | void __sk_mem_reclaim(struct sock *sk) | 1831 | void __sk_mem_reclaim(struct sock *sk) |
1748 | { | 1832 | { |
1749 | struct proto *prot = sk->sk_prot; | 1833 | sk_memory_allocated_sub(sk, |
1750 | 1834 | sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0); | |
1751 | atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, | ||
1752 | prot->memory_allocated); | ||
1753 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; | 1835 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; |
1754 | 1836 | ||
1755 | if (prot->memory_pressure && *prot->memory_pressure && | 1837 | if (sk_under_memory_pressure(sk) && |
1756 | (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) | 1838 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) |
1757 | *prot->memory_pressure = 0; | 1839 | sk_leave_memory_pressure(sk); |
1758 | } | 1840 | } |
1759 | EXPORT_SYMBOL(__sk_mem_reclaim); | 1841 | EXPORT_SYMBOL(__sk_mem_reclaim); |
1760 | 1842 | ||
@@ -2125,16 +2207,15 @@ EXPORT_SYMBOL(sock_get_timestampns); | |||
2125 | void sock_enable_timestamp(struct sock *sk, int flag) | 2207 | void sock_enable_timestamp(struct sock *sk, int flag) |
2126 | { | 2208 | { |
2127 | if (!sock_flag(sk, flag)) { | 2209 | if (!sock_flag(sk, flag)) { |
2210 | unsigned long previous_flags = sk->sk_flags; | ||
2211 | |||
2128 | sock_set_flag(sk, flag); | 2212 | sock_set_flag(sk, flag); |
2129 | /* | 2213 | /* |
2130 | * we just set one of the two flags which require net | 2214 | * we just set one of the two flags which require net |
2131 | * time stamping, but time stamping might have been on | 2215 | * time stamping, but time stamping might have been on |
2132 | * already because of the other one | 2216 | * already because of the other one |
2133 | */ | 2217 | */ |
2134 | if (!sock_flag(sk, | 2218 | if (!(previous_flags & SK_FLAGS_TIMESTAMP)) |
2135 | flag == SOCK_TIMESTAMP ? | ||
2136 | SOCK_TIMESTAMPING_RX_SOFTWARE : | ||
2137 | SOCK_TIMESTAMP)) | ||
2138 | net_enable_timestamp(); | 2219 | net_enable_timestamp(); |
2139 | } | 2220 | } |
2140 | } | 2221 | } |
@@ -2246,9 +2327,6 @@ void sk_common_release(struct sock *sk) | |||
2246 | } | 2327 | } |
2247 | EXPORT_SYMBOL(sk_common_release); | 2328 | EXPORT_SYMBOL(sk_common_release); |
2248 | 2329 | ||
2249 | static DEFINE_RWLOCK(proto_list_lock); | ||
2250 | static LIST_HEAD(proto_list); | ||
2251 | |||
2252 | #ifdef CONFIG_PROC_FS | 2330 | #ifdef CONFIG_PROC_FS |
2253 | #define PROTO_INUSE_NR 64 /* should be enough for the first time */ | 2331 | #define PROTO_INUSE_NR 64 /* should be enough for the first time */ |
2254 | struct prot_inuse { | 2332 | struct prot_inuse { |
@@ -2397,10 +2475,10 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
2397 | } | 2475 | } |
2398 | } | 2476 | } |
2399 | 2477 | ||
2400 | write_lock(&proto_list_lock); | 2478 | mutex_lock(&proto_list_mutex); |
2401 | list_add(&prot->node, &proto_list); | 2479 | list_add(&prot->node, &proto_list); |
2402 | assign_proto_idx(prot); | 2480 | assign_proto_idx(prot); |
2403 | write_unlock(&proto_list_lock); | 2481 | mutex_unlock(&proto_list_mutex); |
2404 | return 0; | 2482 | return 0; |
2405 | 2483 | ||
2406 | out_free_timewait_sock_slab_name: | 2484 | out_free_timewait_sock_slab_name: |
@@ -2423,10 +2501,10 @@ EXPORT_SYMBOL(proto_register); | |||
2423 | 2501 | ||
2424 | void proto_unregister(struct proto *prot) | 2502 | void proto_unregister(struct proto *prot) |
2425 | { | 2503 | { |
2426 | write_lock(&proto_list_lock); | 2504 | mutex_lock(&proto_list_mutex); |
2427 | release_proto_idx(prot); | 2505 | release_proto_idx(prot); |
2428 | list_del(&prot->node); | 2506 | list_del(&prot->node); |
2429 | write_unlock(&proto_list_lock); | 2507 | mutex_unlock(&proto_list_mutex); |
2430 | 2508 | ||
2431 | if (prot->slab != NULL) { | 2509 | if (prot->slab != NULL) { |
2432 | kmem_cache_destroy(prot->slab); | 2510 | kmem_cache_destroy(prot->slab); |
@@ -2449,9 +2527,9 @@ EXPORT_SYMBOL(proto_unregister); | |||
2449 | 2527 | ||
2450 | #ifdef CONFIG_PROC_FS | 2528 | #ifdef CONFIG_PROC_FS |
2451 | static void *proto_seq_start(struct seq_file *seq, loff_t *pos) | 2529 | static void *proto_seq_start(struct seq_file *seq, loff_t *pos) |
2452 | __acquires(proto_list_lock) | 2530 | __acquires(proto_list_mutex) |
2453 | { | 2531 | { |
2454 | read_lock(&proto_list_lock); | 2532 | mutex_lock(&proto_list_mutex); |
2455 | return seq_list_start_head(&proto_list, *pos); | 2533 | return seq_list_start_head(&proto_list, *pos); |
2456 | } | 2534 | } |
2457 | 2535 | ||
@@ -2461,25 +2539,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2461 | } | 2539 | } |
2462 | 2540 | ||
2463 | static void proto_seq_stop(struct seq_file *seq, void *v) | 2541 | static void proto_seq_stop(struct seq_file *seq, void *v) |
2464 | __releases(proto_list_lock) | 2542 | __releases(proto_list_mutex) |
2465 | { | 2543 | { |
2466 | read_unlock(&proto_list_lock); | 2544 | mutex_unlock(&proto_list_mutex); |
2467 | } | 2545 | } |
2468 | 2546 | ||
2469 | static char proto_method_implemented(const void *method) | 2547 | static char proto_method_implemented(const void *method) |
2470 | { | 2548 | { |
2471 | return method == NULL ? 'n' : 'y'; | 2549 | return method == NULL ? 'n' : 'y'; |
2472 | } | 2550 | } |
2551 | static long sock_prot_memory_allocated(struct proto *proto) | ||
2552 | { | ||
2553 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; | ||
2554 | } | ||
2555 | |||
2556 | static char *sock_prot_memory_pressure(struct proto *proto) | ||
2557 | { | ||
2558 | return proto->memory_pressure != NULL ? | ||
2559 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; | ||
2560 | } | ||
2473 | 2561 | ||
2474 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) | 2562 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) |
2475 | { | 2563 | { |
2564 | |||
2476 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " | 2565 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " |
2477 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", | 2566 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", |
2478 | proto->name, | 2567 | proto->name, |
2479 | proto->obj_size, | 2568 | proto->obj_size, |
2480 | sock_prot_inuse_get(seq_file_net(seq), proto), | 2569 | sock_prot_inuse_get(seq_file_net(seq), proto), |
2481 | proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, | 2570 | sock_prot_memory_allocated(proto), |
2482 | proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", | 2571 | sock_prot_memory_pressure(proto), |
2483 | proto->max_header, | 2572 | proto->max_header, |
2484 | proto->slab == NULL ? "no" : "yes", | 2573 | proto->slab == NULL ? "no" : "yes", |
2485 | module_name(proto->owner), | 2574 | module_name(proto->owner), |
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c new file mode 100644 index 000000000000..b9868e1fd62c --- /dev/null +++ b/net/core/sock_diag.c | |||
@@ -0,0 +1,192 @@ | |||
1 | #include <linux/mutex.h> | ||
2 | #include <linux/socket.h> | ||
3 | #include <linux/skbuff.h> | ||
4 | #include <net/netlink.h> | ||
5 | #include <net/net_namespace.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/rtnetlink.h> | ||
8 | #include <net/sock.h> | ||
9 | |||
10 | #include <linux/inet_diag.h> | ||
11 | #include <linux/sock_diag.h> | ||
12 | |||
13 | static struct sock_diag_handler *sock_diag_handlers[AF_MAX]; | ||
14 | static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); | ||
15 | static DEFINE_MUTEX(sock_diag_table_mutex); | ||
16 | |||
17 | int sock_diag_check_cookie(void *sk, __u32 *cookie) | ||
18 | { | ||
19 | if ((cookie[0] != INET_DIAG_NOCOOKIE || | ||
20 | cookie[1] != INET_DIAG_NOCOOKIE) && | ||
21 | ((u32)(unsigned long)sk != cookie[0] || | ||
22 | (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) | ||
23 | return -ESTALE; | ||
24 | else | ||
25 | return 0; | ||
26 | } | ||
27 | EXPORT_SYMBOL_GPL(sock_diag_check_cookie); | ||
28 | |||
29 | void sock_diag_save_cookie(void *sk, __u32 *cookie) | ||
30 | { | ||
31 | cookie[0] = (u32)(unsigned long)sk; | ||
32 | cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | ||
33 | } | ||
34 | EXPORT_SYMBOL_GPL(sock_diag_save_cookie); | ||
35 | |||
36 | int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) | ||
37 | { | ||
38 | __u32 *mem; | ||
39 | |||
40 | mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32))); | ||
41 | |||
42 | mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); | ||
43 | mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; | ||
44 | mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); | ||
45 | mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; | ||
46 | mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; | ||
47 | mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; | ||
48 | mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); | ||
49 | |||
50 | return 0; | ||
51 | |||
52 | rtattr_failure: | ||
53 | return -EMSGSIZE; | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); | ||
56 | |||
57 | void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) | ||
58 | { | ||
59 | mutex_lock(&sock_diag_table_mutex); | ||
60 | inet_rcv_compat = fn; | ||
61 | mutex_unlock(&sock_diag_table_mutex); | ||
62 | } | ||
63 | EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); | ||
64 | |||
65 | void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) | ||
66 | { | ||
67 | mutex_lock(&sock_diag_table_mutex); | ||
68 | inet_rcv_compat = NULL; | ||
69 | mutex_unlock(&sock_diag_table_mutex); | ||
70 | } | ||
71 | EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); | ||
72 | |||
73 | int sock_diag_register(struct sock_diag_handler *hndl) | ||
74 | { | ||
75 | int err = 0; | ||
76 | |||
77 | if (hndl->family >= AF_MAX) | ||
78 | return -EINVAL; | ||
79 | |||
80 | mutex_lock(&sock_diag_table_mutex); | ||
81 | if (sock_diag_handlers[hndl->family]) | ||
82 | err = -EBUSY; | ||
83 | else | ||
84 | sock_diag_handlers[hndl->family] = hndl; | ||
85 | mutex_unlock(&sock_diag_table_mutex); | ||
86 | |||
87 | return err; | ||
88 | } | ||
89 | EXPORT_SYMBOL_GPL(sock_diag_register); | ||
90 | |||
91 | void sock_diag_unregister(struct sock_diag_handler *hnld) | ||
92 | { | ||
93 | int family = hnld->family; | ||
94 | |||
95 | if (family >= AF_MAX) | ||
96 | return; | ||
97 | |||
98 | mutex_lock(&sock_diag_table_mutex); | ||
99 | BUG_ON(sock_diag_handlers[family] != hnld); | ||
100 | sock_diag_handlers[family] = NULL; | ||
101 | mutex_unlock(&sock_diag_table_mutex); | ||
102 | } | ||
103 | EXPORT_SYMBOL_GPL(sock_diag_unregister); | ||
104 | |||
105 | static inline struct sock_diag_handler *sock_diag_lock_handler(int family) | ||
106 | { | ||
107 | if (sock_diag_handlers[family] == NULL) | ||
108 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, | ||
109 | NETLINK_SOCK_DIAG, family); | ||
110 | |||
111 | mutex_lock(&sock_diag_table_mutex); | ||
112 | return sock_diag_handlers[family]; | ||
113 | } | ||
114 | |||
115 | static inline void sock_diag_unlock_handler(struct sock_diag_handler *h) | ||
116 | { | ||
117 | mutex_unlock(&sock_diag_table_mutex); | ||
118 | } | ||
119 | |||
120 | static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
121 | { | ||
122 | int err; | ||
123 | struct sock_diag_req *req = NLMSG_DATA(nlh); | ||
124 | struct sock_diag_handler *hndl; | ||
125 | |||
126 | if (nlmsg_len(nlh) < sizeof(*req)) | ||
127 | return -EINVAL; | ||
128 | |||
129 | hndl = sock_diag_lock_handler(req->sdiag_family); | ||
130 | if (hndl == NULL) | ||
131 | err = -ENOENT; | ||
132 | else | ||
133 | err = hndl->dump(skb, nlh); | ||
134 | sock_diag_unlock_handler(hndl); | ||
135 | |||
136 | return err; | ||
137 | } | ||
138 | |||
139 | static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
140 | { | ||
141 | int ret; | ||
142 | |||
143 | switch (nlh->nlmsg_type) { | ||
144 | case TCPDIAG_GETSOCK: | ||
145 | case DCCPDIAG_GETSOCK: | ||
146 | if (inet_rcv_compat == NULL) | ||
147 | request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, | ||
148 | NETLINK_SOCK_DIAG, AF_INET); | ||
149 | |||
150 | mutex_lock(&sock_diag_table_mutex); | ||
151 | if (inet_rcv_compat != NULL) | ||
152 | ret = inet_rcv_compat(skb, nlh); | ||
153 | else | ||
154 | ret = -EOPNOTSUPP; | ||
155 | mutex_unlock(&sock_diag_table_mutex); | ||
156 | |||
157 | return ret; | ||
158 | case SOCK_DIAG_BY_FAMILY: | ||
159 | return __sock_diag_rcv_msg(skb, nlh); | ||
160 | default: | ||
161 | return -EINVAL; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | static DEFINE_MUTEX(sock_diag_mutex); | ||
166 | |||
167 | static void sock_diag_rcv(struct sk_buff *skb) | ||
168 | { | ||
169 | mutex_lock(&sock_diag_mutex); | ||
170 | netlink_rcv_skb(skb, &sock_diag_rcv_msg); | ||
171 | mutex_unlock(&sock_diag_mutex); | ||
172 | } | ||
173 | |||
174 | struct sock *sock_diag_nlsk; | ||
175 | EXPORT_SYMBOL_GPL(sock_diag_nlsk); | ||
176 | |||
177 | static int __init sock_diag_init(void) | ||
178 | { | ||
179 | sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0, | ||
180 | sock_diag_rcv, NULL, THIS_MODULE); | ||
181 | return sock_diag_nlsk == NULL ? -ENOMEM : 0; | ||
182 | } | ||
183 | |||
184 | static void __exit sock_diag_exit(void) | ||
185 | { | ||
186 | netlink_kernel_release(sock_diag_nlsk); | ||
187 | } | ||
188 | |||
189 | module_init(sock_diag_init); | ||
190 | module_exit(sock_diag_exit); | ||
191 | MODULE_LICENSE("GPL"); | ||
192 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG); | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 77a65f031488..d05559d4d9cd 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, | |||
68 | 68 | ||
69 | if (sock_table != orig_sock_table) { | 69 | if (sock_table != orig_sock_table) { |
70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); | 70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); |
71 | synchronize_rcu(); | 71 | if (sock_table) |
72 | vfree(orig_sock_table); | 72 | jump_label_inc(&rps_needed); |
73 | if (orig_sock_table) { | ||
74 | jump_label_dec(&rps_needed); | ||
75 | synchronize_rcu(); | ||
76 | vfree(orig_sock_table); | ||
77 | } | ||
73 | } | 78 | } |
74 | } | 79 | } |
75 | 80 | ||