aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 20:22:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 20:22:09 -0500
commit9753dfe19a85e7e45a34a56f4cb2048bb4f50e27 (patch)
treec017a1b4a70b8447c71b01d8b320e071546b5c9d /net/core
parentedf7c8148ec40c0fd27c0ef3f688defcc65e3913 (diff)
parent9f42f126154786e6e76df513004800c8c633f020 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1958 commits) net: pack skb_shared_info more efficiently net_sched: red: split red_parms into parms and vars net_sched: sfq: extend limits cnic: Improve error recovery on bnx2x devices cnic: Re-init dev->stats_addr after chip reset net_sched: Bug in netem reordering bna: fix sparse warnings/errors bna: make ethtool_ops and strings const xgmac: cleanups net: make ethtool_ops const vmxnet3" make ethtool ops const xen-netback: make ops structs const virtio_net: Pass gfp flags when allocating rx buffers. ixgbe: FCoE: Add support for ndo_get_fcoe_hbainfo() call netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call igb: reset PHY after recovering from PHY power down igb: add basic runtime PM support igb: Add support for byte queue limits. e1000: cleanup CE4100 MDIO registers access e1000: unmap ce4100_gbe_mdio_base_virt in e1000_remove ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile6
-rw-r--r--net/core/dev.c307
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c712
-rw-r--r--net/core/flow_dissector.c143
-rw-r--r--net/core/neighbour.c222
-rw-r--r--net/core/net-sysfs.c323
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c344
-rw-r--r--net/core/pktgen.c17
-rw-r--r--net/core/rtnetlink.c25
-rw-r--r--net/core/secure_seq.c6
-rw-r--r--net/core/skbuff.c89
-rw-r--r--net/core/sock.c197
-rw-r--r--net/core/sock_diag.c192
-rw-r--r--net/core/sysctl_net_core.c9
16 files changed, 1624 insertions, 980 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 0d357b1c4e57..674641b13aea 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,12 +3,13 @@
3# 3#
4 4
5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o 6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
12 sock_diag.o
12 13
13obj-$(CONFIG_XFRM) += flow.o 14obj-$(CONFIG_XFRM) += flow.o
14obj-y += net-sysfs.o 15obj-y += net-sysfs.o
@@ -19,3 +20,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
19obj-$(CONFIG_TRACEPOINTS) += net-traces.o 20obj-$(CONFIG_TRACEPOINTS) += net-traces.o
20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 21obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
21obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o 22obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
23obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 5a13edfc9f73..f494675471a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,10 +133,9 @@
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h> 135#include <linux/cpu_rmap.h>
136#include <linux/if_tunnel.h>
137#include <linux/if_pppox.h>
138#include <linux/ppp_defs.h>
139#include <linux/net_tstamp.h> 136#include <linux/net_tstamp.h>
137#include <linux/jump_label.h>
138#include <net/flow_keys.h>
140 139
141#include "net-sysfs.h" 140#include "net-sysfs.h"
142 141
@@ -1320,8 +1319,6 @@ EXPORT_SYMBOL(dev_close);
1320 */ 1319 */
1321void dev_disable_lro(struct net_device *dev) 1320void dev_disable_lro(struct net_device *dev)
1322{ 1321{
1323 u32 flags;
1324
1325 /* 1322 /*
1326 * If we're trying to disable lro on a vlan device 1323 * If we're trying to disable lro on a vlan device
1327 * use the underlying physical device instead 1324 * use the underlying physical device instead
@@ -1329,15 +1326,9 @@ void dev_disable_lro(struct net_device *dev)
1329 if (is_vlan_dev(dev)) 1326 if (is_vlan_dev(dev))
1330 dev = vlan_dev_real_dev(dev); 1327 dev = vlan_dev_real_dev(dev);
1331 1328
1332 if (dev->ethtool_ops && dev->ethtool_ops->get_flags) 1329 dev->wanted_features &= ~NETIF_F_LRO;
1333 flags = dev->ethtool_ops->get_flags(dev); 1330 netdev_update_features(dev);
1334 else
1335 flags = ethtool_op_get_flags(dev);
1336 1331
1337 if (!(flags & ETH_FLAG_LRO))
1338 return;
1339
1340 __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
1341 if (unlikely(dev->features & NETIF_F_LRO)) 1332 if (unlikely(dev->features & NETIF_F_LRO))
1342 netdev_WARN(dev, "failed to disable LRO!\n"); 1333 netdev_WARN(dev, "failed to disable LRO!\n");
1343} 1334}
@@ -1450,34 +1441,55 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1450} 1441}
1451EXPORT_SYMBOL(call_netdevice_notifiers); 1442EXPORT_SYMBOL(call_netdevice_notifiers);
1452 1443
1453/* When > 0 there are consumers of rx skb time stamps */ 1444static struct jump_label_key netstamp_needed __read_mostly;
1454static atomic_t netstamp_needed = ATOMIC_INIT(0); 1445#ifdef HAVE_JUMP_LABEL
1446/* We are not allowed to call jump_label_dec() from irq context
1447 * If net_disable_timestamp() is called from irq context, defer the
1448 * jump_label_dec() calls.
1449 */
1450static atomic_t netstamp_needed_deferred;
1451#endif
1455 1452
1456void net_enable_timestamp(void) 1453void net_enable_timestamp(void)
1457{ 1454{
1458 atomic_inc(&netstamp_needed); 1455#ifdef HAVE_JUMP_LABEL
1456 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1457
1458 if (deferred) {
1459 while (--deferred)
1460 jump_label_dec(&netstamp_needed);
1461 return;
1462 }
1463#endif
1464 WARN_ON(in_interrupt());
1465 jump_label_inc(&netstamp_needed);
1459} 1466}
1460EXPORT_SYMBOL(net_enable_timestamp); 1467EXPORT_SYMBOL(net_enable_timestamp);
1461 1468
1462void net_disable_timestamp(void) 1469void net_disable_timestamp(void)
1463{ 1470{
1464 atomic_dec(&netstamp_needed); 1471#ifdef HAVE_JUMP_LABEL
1472 if (in_interrupt()) {
1473 atomic_inc(&netstamp_needed_deferred);
1474 return;
1475 }
1476#endif
1477 jump_label_dec(&netstamp_needed);
1465} 1478}
1466EXPORT_SYMBOL(net_disable_timestamp); 1479EXPORT_SYMBOL(net_disable_timestamp);
1467 1480
1468static inline void net_timestamp_set(struct sk_buff *skb) 1481static inline void net_timestamp_set(struct sk_buff *skb)
1469{ 1482{
1470 if (atomic_read(&netstamp_needed)) 1483 skb->tstamp.tv64 = 0;
1484 if (static_branch(&netstamp_needed))
1471 __net_timestamp(skb); 1485 __net_timestamp(skb);
1472 else
1473 skb->tstamp.tv64 = 0;
1474} 1486}
1475 1487
1476static inline void net_timestamp_check(struct sk_buff *skb) 1488#define net_timestamp_check(COND, SKB) \
1477{ 1489 if (static_branch(&netstamp_needed)) { \
1478 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) 1490 if ((COND) && !(SKB)->tstamp.tv64) \
1479 __net_timestamp(skb); 1491 __net_timestamp(SKB); \
1480} 1492 } \
1481 1493
1482static int net_hwtstamp_validate(struct ifreq *ifr) 1494static int net_hwtstamp_validate(struct ifreq *ifr)
1483{ 1495{
@@ -1924,7 +1936,8 @@ EXPORT_SYMBOL(skb_checksum_help);
1924 * It may return NULL if the skb requires no segmentation. This is 1936 * It may return NULL if the skb requires no segmentation. This is
1925 * only possible when GSO is used for verifying header integrity. 1937 * only possible when GSO is used for verifying header integrity.
1926 */ 1938 */
1927struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) 1939struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1940 netdev_features_t features)
1928{ 1941{
1929 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1942 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1930 struct packet_type *ptype; 1943 struct packet_type *ptype;
@@ -1954,9 +1967,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
1954 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) 1967 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1955 dev->ethtool_ops->get_drvinfo(dev, &info); 1968 dev->ethtool_ops->get_drvinfo(dev, &info);
1956 1969
1957 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", 1970 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n",
1958 info.driver, dev ? dev->features : 0L, 1971 info.driver, dev ? &dev->features : NULL,
1959 skb->sk ? skb->sk->sk_route_caps : 0L, 1972 skb->sk ? &skb->sk->sk_route_caps : NULL,
1960 skb->len, skb->data_len, skb->ip_summed); 1973 skb->len, skb->data_len, skb->ip_summed);
1961 1974
1962 if (skb_header_cloned(skb) && 1975 if (skb_header_cloned(skb) &&
@@ -2065,7 +2078,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
2065 * This function segments the given skb and stores the list of segments 2078 * This function segments the given skb and stores the list of segments
2066 * in skb->next. 2079 * in skb->next.
2067 */ 2080 */
2068static int dev_gso_segment(struct sk_buff *skb, int features) 2081static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2069{ 2082{
2070 struct sk_buff *segs; 2083 struct sk_buff *segs;
2071 2084
@@ -2104,7 +2117,7 @@ static inline void skb_orphan_try(struct sk_buff *skb)
2104 } 2117 }
2105} 2118}
2106 2119
2107static bool can_checksum_protocol(unsigned long features, __be16 protocol) 2120static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2108{ 2121{
2109 return ((features & NETIF_F_GEN_CSUM) || 2122 return ((features & NETIF_F_GEN_CSUM) ||
2110 ((features & NETIF_F_V4_CSUM) && 2123 ((features & NETIF_F_V4_CSUM) &&
@@ -2115,7 +2128,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2115 protocol == htons(ETH_P_FCOE))); 2128 protocol == htons(ETH_P_FCOE)));
2116} 2129}
2117 2130
2118static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) 2131static netdev_features_t harmonize_features(struct sk_buff *skb,
2132 __be16 protocol, netdev_features_t features)
2119{ 2133{
2120 if (!can_checksum_protocol(features, protocol)) { 2134 if (!can_checksum_protocol(features, protocol)) {
2121 features &= ~NETIF_F_ALL_CSUM; 2135 features &= ~NETIF_F_ALL_CSUM;
@@ -2127,10 +2141,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features
2127 return features; 2141 return features;
2128} 2142}
2129 2143
2130u32 netif_skb_features(struct sk_buff *skb) 2144netdev_features_t netif_skb_features(struct sk_buff *skb)
2131{ 2145{
2132 __be16 protocol = skb->protocol; 2146 __be16 protocol = skb->protocol;
2133 u32 features = skb->dev->features; 2147 netdev_features_t features = skb->dev->features;
2134 2148
2135 if (protocol == htons(ETH_P_8021Q)) { 2149 if (protocol == htons(ETH_P_8021Q)) {
2136 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2150 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2176,7 +2190,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2176 unsigned int skb_len; 2190 unsigned int skb_len;
2177 2191
2178 if (likely(!skb->next)) { 2192 if (likely(!skb->next)) {
2179 u32 features; 2193 netdev_features_t features;
2180 2194
2181 /* 2195 /*
2182 * If device doesn't need skb->dst, release it right now while 2196 * If device doesn't need skb->dst, release it right now while
@@ -2257,7 +2271,7 @@ gso:
2257 return rc; 2271 return rc;
2258 } 2272 }
2259 txq_trans_update(txq); 2273 txq_trans_update(txq);
2260 if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) 2274 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2261 return NETDEV_TX_BUSY; 2275 return NETDEV_TX_BUSY;
2262 } while (skb->next); 2276 } while (skb->next);
2263 2277
@@ -2457,6 +2471,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2457 return rc; 2471 return rc;
2458} 2472}
2459 2473
2474#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2475static void skb_update_prio(struct sk_buff *skb)
2476{
2477 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2478
2479 if ((!skb->priority) && (skb->sk) && map)
2480 skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
2481}
2482#else
2483#define skb_update_prio(skb)
2484#endif
2485
2460static DEFINE_PER_CPU(int, xmit_recursion); 2486static DEFINE_PER_CPU(int, xmit_recursion);
2461#define RECURSION_LIMIT 10 2487#define RECURSION_LIMIT 10
2462 2488
@@ -2497,6 +2523,8 @@ int dev_queue_xmit(struct sk_buff *skb)
2497 */ 2523 */
2498 rcu_read_lock_bh(); 2524 rcu_read_lock_bh();
2499 2525
2526 skb_update_prio(skb);
2527
2500 txq = dev_pick_tx(dev, skb); 2528 txq = dev_pick_tx(dev, skb);
2501 q = rcu_dereference_bh(txq->qdisc); 2529 q = rcu_dereference_bh(txq->qdisc);
2502 2530
@@ -2531,7 +2559,7 @@ int dev_queue_xmit(struct sk_buff *skb)
2531 2559
2532 HARD_TX_LOCK(dev, txq, cpu); 2560 HARD_TX_LOCK(dev, txq, cpu);
2533 2561
2534 if (!netif_tx_queue_stopped(txq)) { 2562 if (!netif_xmit_stopped(txq)) {
2535 __this_cpu_inc(xmit_recursion); 2563 __this_cpu_inc(xmit_recursion);
2536 rc = dev_hard_start_xmit(skb, dev, txq); 2564 rc = dev_hard_start_xmit(skb, dev, txq);
2537 __this_cpu_dec(xmit_recursion); 2565 __this_cpu_dec(xmit_recursion);
@@ -2592,123 +2620,28 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2592 */ 2620 */
2593void __skb_get_rxhash(struct sk_buff *skb) 2621void __skb_get_rxhash(struct sk_buff *skb)
2594{ 2622{
2595 int nhoff, hash = 0, poff; 2623 struct flow_keys keys;
2596 const struct ipv6hdr *ip6; 2624 u32 hash;
2597 const struct iphdr *ip;
2598 const struct vlan_hdr *vlan;
2599 u8 ip_proto;
2600 u32 addr1, addr2;
2601 u16 proto;
2602 union {
2603 u32 v32;
2604 u16 v16[2];
2605 } ports;
2606
2607 nhoff = skb_network_offset(skb);
2608 proto = skb->protocol;
2609
2610again:
2611 switch (proto) {
2612 case __constant_htons(ETH_P_IP):
2613ip:
2614 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2615 goto done;
2616
2617 ip = (const struct iphdr *) (skb->data + nhoff);
2618 if (ip_is_fragment(ip))
2619 ip_proto = 0;
2620 else
2621 ip_proto = ip->protocol;
2622 addr1 = (__force u32) ip->saddr;
2623 addr2 = (__force u32) ip->daddr;
2624 nhoff += ip->ihl * 4;
2625 break;
2626 case __constant_htons(ETH_P_IPV6):
2627ipv6:
2628 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2629 goto done;
2630
2631 ip6 = (const struct ipv6hdr *) (skb->data + nhoff);
2632 ip_proto = ip6->nexthdr;
2633 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2634 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2635 nhoff += 40;
2636 break;
2637 case __constant_htons(ETH_P_8021Q):
2638 if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
2639 goto done;
2640 vlan = (const struct vlan_hdr *) (skb->data + nhoff);
2641 proto = vlan->h_vlan_encapsulated_proto;
2642 nhoff += sizeof(*vlan);
2643 goto again;
2644 case __constant_htons(ETH_P_PPP_SES):
2645 if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
2646 goto done;
2647 proto = *((__be16 *) (skb->data + nhoff +
2648 sizeof(struct pppoe_hdr)));
2649 nhoff += PPPOE_SES_HLEN;
2650 switch (proto) {
2651 case __constant_htons(PPP_IP):
2652 goto ip;
2653 case __constant_htons(PPP_IPV6):
2654 goto ipv6;
2655 default:
2656 goto done;
2657 }
2658 default:
2659 goto done;
2660 }
2661
2662 switch (ip_proto) {
2663 case IPPROTO_GRE:
2664 if (pskb_may_pull(skb, nhoff + 16)) {
2665 u8 *h = skb->data + nhoff;
2666 __be16 flags = *(__be16 *)h;
2667 2625
2668 /* 2626 if (!skb_flow_dissect(skb, &keys))
2669 * Only look inside GRE if version zero and no 2627 return;
2670 * routing
2671 */
2672 if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
2673 proto = *(__be16 *)(h + 2);
2674 nhoff += 4;
2675 if (flags & GRE_CSUM)
2676 nhoff += 4;
2677 if (flags & GRE_KEY)
2678 nhoff += 4;
2679 if (flags & GRE_SEQ)
2680 nhoff += 4;
2681 goto again;
2682 }
2683 }
2684 break;
2685 case IPPROTO_IPIP:
2686 goto again;
2687 default:
2688 break;
2689 }
2690 2628
2691 ports.v32 = 0; 2629 if (keys.ports) {
2692 poff = proto_ports_offset(ip_proto); 2630 if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
2693 if (poff >= 0) { 2631 swap(keys.port16[0], keys.port16[1]);
2694 nhoff += poff; 2632 skb->l4_rxhash = 1;
2695 if (pskb_may_pull(skb, nhoff + 4)) {
2696 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2697 if (ports.v16[1] < ports.v16[0])
2698 swap(ports.v16[0], ports.v16[1]);
2699 skb->l4_rxhash = 1;
2700 }
2701 } 2633 }
2702 2634
2703 /* get a consistent hash (same value on both flow directions) */ 2635 /* get a consistent hash (same value on both flow directions) */
2704 if (addr2 < addr1) 2636 if ((__force u32)keys.dst < (__force u32)keys.src)
2705 swap(addr1, addr2); 2637 swap(keys.dst, keys.src);
2706 2638
2707 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); 2639 hash = jhash_3words((__force u32)keys.dst,
2640 (__force u32)keys.src,
2641 (__force u32)keys.ports, hashrnd);
2708 if (!hash) 2642 if (!hash)
2709 hash = 1; 2643 hash = 1;
2710 2644
2711done:
2712 skb->rxhash = hash; 2645 skb->rxhash = hash;
2713} 2646}
2714EXPORT_SYMBOL(__skb_get_rxhash); 2647EXPORT_SYMBOL(__skb_get_rxhash);
@@ -2719,6 +2652,8 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2719struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2652struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2720EXPORT_SYMBOL(rps_sock_flow_table); 2653EXPORT_SYMBOL(rps_sock_flow_table);
2721 2654
2655struct jump_label_key rps_needed __read_mostly;
2656
2722static struct rps_dev_flow * 2657static struct rps_dev_flow *
2723set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2658set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2724 struct rps_dev_flow *rflow, u16 next_cpu) 2659 struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2998,12 +2933,11 @@ int netif_rx(struct sk_buff *skb)
2998 if (netpoll_rx(skb)) 2933 if (netpoll_rx(skb))
2999 return NET_RX_DROP; 2934 return NET_RX_DROP;
3000 2935
3001 if (netdev_tstamp_prequeue) 2936 net_timestamp_check(netdev_tstamp_prequeue, skb);
3002 net_timestamp_check(skb);
3003 2937
3004 trace_netif_rx(skb); 2938 trace_netif_rx(skb);
3005#ifdef CONFIG_RPS 2939#ifdef CONFIG_RPS
3006 { 2940 if (static_branch(&rps_needed)) {
3007 struct rps_dev_flow voidflow, *rflow = &voidflow; 2941 struct rps_dev_flow voidflow, *rflow = &voidflow;
3008 int cpu; 2942 int cpu;
3009 2943
@@ -3018,14 +2952,13 @@ int netif_rx(struct sk_buff *skb)
3018 2952
3019 rcu_read_unlock(); 2953 rcu_read_unlock();
3020 preempt_enable(); 2954 preempt_enable();
3021 } 2955 } else
3022#else 2956#endif
3023 { 2957 {
3024 unsigned int qtail; 2958 unsigned int qtail;
3025 ret = enqueue_to_backlog(skb, get_cpu(), &qtail); 2959 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3026 put_cpu(); 2960 put_cpu();
3027 } 2961 }
3028#endif
3029 return ret; 2962 return ret;
3030} 2963}
3031EXPORT_SYMBOL(netif_rx); 2964EXPORT_SYMBOL(netif_rx);
@@ -3231,8 +3164,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
3231 int ret = NET_RX_DROP; 3164 int ret = NET_RX_DROP;
3232 __be16 type; 3165 __be16 type;
3233 3166
3234 if (!netdev_tstamp_prequeue) 3167 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3235 net_timestamp_check(skb);
3236 3168
3237 trace_netif_receive_skb(skb); 3169 trace_netif_receive_skb(skb);
3238 3170
@@ -3363,14 +3295,13 @@ out:
3363 */ 3295 */
3364int netif_receive_skb(struct sk_buff *skb) 3296int netif_receive_skb(struct sk_buff *skb)
3365{ 3297{
3366 if (netdev_tstamp_prequeue) 3298 net_timestamp_check(netdev_tstamp_prequeue, skb);
3367 net_timestamp_check(skb);
3368 3299
3369 if (skb_defer_rx_timestamp(skb)) 3300 if (skb_defer_rx_timestamp(skb))
3370 return NET_RX_SUCCESS; 3301 return NET_RX_SUCCESS;
3371 3302
3372#ifdef CONFIG_RPS 3303#ifdef CONFIG_RPS
3373 { 3304 if (static_branch(&rps_needed)) {
3374 struct rps_dev_flow voidflow, *rflow = &voidflow; 3305 struct rps_dev_flow voidflow, *rflow = &voidflow;
3375 int cpu, ret; 3306 int cpu, ret;
3376 3307
@@ -3381,16 +3312,12 @@ int netif_receive_skb(struct sk_buff *skb)
3381 if (cpu >= 0) { 3312 if (cpu >= 0) {
3382 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); 3313 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3383 rcu_read_unlock(); 3314 rcu_read_unlock();
3384 } else { 3315 return ret;
3385 rcu_read_unlock();
3386 ret = __netif_receive_skb(skb);
3387 } 3316 }
3388 3317 rcu_read_unlock();
3389 return ret;
3390 } 3318 }
3391#else
3392 return __netif_receive_skb(skb);
3393#endif 3319#endif
3320 return __netif_receive_skb(skb);
3394} 3321}
3395EXPORT_SYMBOL(netif_receive_skb); 3322EXPORT_SYMBOL(netif_receive_skb);
3396 3323
@@ -4539,7 +4466,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
4539 4466
4540static int __dev_set_promiscuity(struct net_device *dev, int inc) 4467static int __dev_set_promiscuity(struct net_device *dev, int inc)
4541{ 4468{
4542 unsigned short old_flags = dev->flags; 4469 unsigned int old_flags = dev->flags;
4543 uid_t uid; 4470 uid_t uid;
4544 gid_t gid; 4471 gid_t gid;
4545 4472
@@ -4596,7 +4523,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
4596 */ 4523 */
4597int dev_set_promiscuity(struct net_device *dev, int inc) 4524int dev_set_promiscuity(struct net_device *dev, int inc)
4598{ 4525{
4599 unsigned short old_flags = dev->flags; 4526 unsigned int old_flags = dev->flags;
4600 int err; 4527 int err;
4601 4528
4602 err = __dev_set_promiscuity(dev, inc); 4529 err = __dev_set_promiscuity(dev, inc);
@@ -4623,7 +4550,7 @@ EXPORT_SYMBOL(dev_set_promiscuity);
4623 4550
4624int dev_set_allmulti(struct net_device *dev, int inc) 4551int dev_set_allmulti(struct net_device *dev, int inc)
4625{ 4552{
4626 unsigned short old_flags = dev->flags; 4553 unsigned int old_flags = dev->flags;
4627 4554
4628 ASSERT_RTNL(); 4555 ASSERT_RTNL();
4629 4556
@@ -4726,7 +4653,7 @@ EXPORT_SYMBOL(dev_get_flags);
4726 4653
4727int __dev_change_flags(struct net_device *dev, unsigned int flags) 4654int __dev_change_flags(struct net_device *dev, unsigned int flags)
4728{ 4655{
4729 int old_flags = dev->flags; 4656 unsigned int old_flags = dev->flags;
4730 int ret; 4657 int ret;
4731 4658
4732 ASSERT_RTNL(); 4659 ASSERT_RTNL();
@@ -4809,10 +4736,10 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4809 * Change settings on device based state flags. The flags are 4736 * Change settings on device based state flags. The flags are
4810 * in the userspace exported format. 4737 * in the userspace exported format.
4811 */ 4738 */
4812int dev_change_flags(struct net_device *dev, unsigned flags) 4739int dev_change_flags(struct net_device *dev, unsigned int flags)
4813{ 4740{
4814 int ret, changes; 4741 int ret;
4815 int old_flags = dev->flags; 4742 unsigned int changes, old_flags = dev->flags;
4816 4743
4817 ret = __dev_change_flags(dev, flags); 4744 ret = __dev_change_flags(dev, flags);
4818 if (ret < 0) 4745 if (ret < 0)
@@ -5369,7 +5296,8 @@ static void rollback_registered(struct net_device *dev)
5369 list_del(&single); 5296 list_del(&single);
5370} 5297}
5371 5298
5372static u32 netdev_fix_features(struct net_device *dev, u32 features) 5299static netdev_features_t netdev_fix_features(struct net_device *dev,
5300 netdev_features_t features)
5373{ 5301{
5374 /* Fix illegal checksum combinations */ 5302 /* Fix illegal checksum combinations */
5375 if ((features & NETIF_F_HW_CSUM) && 5303 if ((features & NETIF_F_HW_CSUM) &&
@@ -5378,12 +5306,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
5378 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 5306 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5379 } 5307 }
5380 5308
5381 if ((features & NETIF_F_NO_CSUM) &&
5382 (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5383 netdev_warn(dev, "mixed no checksumming and other settings.\n");
5384 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5385 }
5386
5387 /* Fix illegal SG+CSUM combinations. */ 5309 /* Fix illegal SG+CSUM combinations. */
5388 if ((features & NETIF_F_SG) && 5310 if ((features & NETIF_F_SG) &&
5389 !(features & NETIF_F_ALL_CSUM)) { 5311 !(features & NETIF_F_ALL_CSUM)) {
@@ -5431,7 +5353,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
5431 5353
5432int __netdev_update_features(struct net_device *dev) 5354int __netdev_update_features(struct net_device *dev)
5433{ 5355{
5434 u32 features; 5356 netdev_features_t features;
5435 int err = 0; 5357 int err = 0;
5436 5358
5437 ASSERT_RTNL(); 5359 ASSERT_RTNL();
@@ -5447,16 +5369,16 @@ int __netdev_update_features(struct net_device *dev)
5447 if (dev->features == features) 5369 if (dev->features == features)
5448 return 0; 5370 return 0;
5449 5371
5450 netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", 5372 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5451 dev->features, features); 5373 &dev->features, &features);
5452 5374
5453 if (dev->netdev_ops->ndo_set_features) 5375 if (dev->netdev_ops->ndo_set_features)
5454 err = dev->netdev_ops->ndo_set_features(dev, features); 5376 err = dev->netdev_ops->ndo_set_features(dev, features);
5455 5377
5456 if (unlikely(err < 0)) { 5378 if (unlikely(err < 0)) {
5457 netdev_err(dev, 5379 netdev_err(dev,
5458 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", 5380 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5459 err, features, dev->features); 5381 err, &features, &dev->features);
5460 return -1; 5382 return -1;
5461 } 5383 }
5462 5384
@@ -5555,6 +5477,9 @@ static void netdev_init_one_queue(struct net_device *dev,
5555 queue->xmit_lock_owner = -1; 5477 queue->xmit_lock_owner = -1;
5556 netdev_queue_numa_node_write(queue, NUMA_NO_NODE); 5478 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5557 queue->dev = dev; 5479 queue->dev = dev;
5480#ifdef CONFIG_BQL
5481 dql_init(&queue->dql, HZ);
5482#endif
5558} 5483}
5559 5484
5560static int netif_alloc_netdev_queues(struct net_device *dev) 5485static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -5640,11 +5565,12 @@ int register_netdevice(struct net_device *dev)
5640 dev->wanted_features = dev->features & dev->hw_features; 5565 dev->wanted_features = dev->features & dev->hw_features;
5641 5566
5642 /* Turn on no cache copy if HW is doing checksum */ 5567 /* Turn on no cache copy if HW is doing checksum */
5643 dev->hw_features |= NETIF_F_NOCACHE_COPY; 5568 if (!(dev->flags & IFF_LOOPBACK)) {
5644 if ((dev->features & NETIF_F_ALL_CSUM) && 5569 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5645 !(dev->features & NETIF_F_NO_CSUM)) { 5570 if (dev->features & NETIF_F_ALL_CSUM) {
5646 dev->wanted_features |= NETIF_F_NOCACHE_COPY; 5571 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5647 dev->features |= NETIF_F_NOCACHE_COPY; 5572 dev->features |= NETIF_F_NOCACHE_COPY;
5573 }
5648 } 5574 }
5649 5575
5650 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. 5576 /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6380,7 +6306,8 @@ static int dev_cpu_callback(struct notifier_block *nfb,
6380 * @one to the master device with current feature set @all. Will not 6306 * @one to the master device with current feature set @all. Will not
6381 * enable anything that is off in @mask. Returns the new feature set. 6307 * enable anything that is off in @mask. Returns the new feature set.
6382 */ 6308 */
6383u32 netdev_increment_features(u32 all, u32 one, u32 mask) 6309netdev_features_t netdev_increment_features(netdev_features_t all,
6310 netdev_features_t one, netdev_features_t mask)
6384{ 6311{
6385 if (mask & NETIF_F_GEN_CSUM) 6312 if (mask & NETIF_F_GEN_CSUM)
6386 mask |= NETIF_F_ALL_CSUM; 6313 mask |= NETIF_F_ALL_CSUM;
@@ -6389,10 +6316,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6389 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; 6316 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6390 all &= one | ~NETIF_F_ALL_FOR_ALL; 6317 all &= one | ~NETIF_F_ALL_FOR_ALL;
6391 6318
6392 /* If device needs checksumming, downgrade to it. */
6393 if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
6394 all &= ~NETIF_F_NO_CSUM;
6395
6396 /* If one device supports hw checksumming, set for all. */ 6319 /* If one device supports hw checksumming, set for all. */
6397 if (all & NETIF_F_GEN_CSUM) 6320 if (all & NETIF_F_GEN_CSUM)
6398 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); 6321 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
diff --git a/net/core/dst.c b/net/core/dst.c
index d5e2c4c09107..43d94cedbf7c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -366,7 +366,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
366 dev_hold(dst->dev); 366 dev_hold(dst->dev);
367 dev_put(dev); 367 dev_put(dev);
368 rcu_read_lock(); 368 rcu_read_lock();
369 neigh = dst_get_neighbour(dst); 369 neigh = dst_get_neighbour_noref(dst);
370 if (neigh && neigh->dev == dev) { 370 if (neigh && neigh->dev == dev) {
371 neigh->dev = dst->dev; 371 neigh->dev = dst->dev;
372 dev_hold(dst->dev); 372 dev_hold(dst->dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f44481707124..921aa2b4b415 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -36,235 +36,44 @@ u32 ethtool_op_get_link(struct net_device *dev)
36} 36}
37EXPORT_SYMBOL(ethtool_op_get_link); 37EXPORT_SYMBOL(ethtool_op_get_link);
38 38
39u32 ethtool_op_get_tx_csum(struct net_device *dev)
40{
41 return (dev->features & NETIF_F_ALL_CSUM) != 0;
42}
43EXPORT_SYMBOL(ethtool_op_get_tx_csum);
44
45int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
46{
47 if (data)
48 dev->features |= NETIF_F_IP_CSUM;
49 else
50 dev->features &= ~NETIF_F_IP_CSUM;
51
52 return 0;
53}
54EXPORT_SYMBOL(ethtool_op_set_tx_csum);
55
56int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
57{
58 if (data)
59 dev->features |= NETIF_F_HW_CSUM;
60 else
61 dev->features &= ~NETIF_F_HW_CSUM;
62
63 return 0;
64}
65EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
66
67int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
68{
69 if (data)
70 dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
71 else
72 dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
73
74 return 0;
75}
76EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
77
78u32 ethtool_op_get_sg(struct net_device *dev)
79{
80 return (dev->features & NETIF_F_SG) != 0;
81}
82EXPORT_SYMBOL(ethtool_op_get_sg);
83
84int ethtool_op_set_sg(struct net_device *dev, u32 data)
85{
86 if (data)
87 dev->features |= NETIF_F_SG;
88 else
89 dev->features &= ~NETIF_F_SG;
90
91 return 0;
92}
93EXPORT_SYMBOL(ethtool_op_set_sg);
94
95u32 ethtool_op_get_tso(struct net_device *dev)
96{
97 return (dev->features & NETIF_F_TSO) != 0;
98}
99EXPORT_SYMBOL(ethtool_op_get_tso);
100
101int ethtool_op_set_tso(struct net_device *dev, u32 data)
102{
103 if (data)
104 dev->features |= NETIF_F_TSO;
105 else
106 dev->features &= ~NETIF_F_TSO;
107
108 return 0;
109}
110EXPORT_SYMBOL(ethtool_op_set_tso);
111
112u32 ethtool_op_get_ufo(struct net_device *dev)
113{
114 return (dev->features & NETIF_F_UFO) != 0;
115}
116EXPORT_SYMBOL(ethtool_op_get_ufo);
117
118int ethtool_op_set_ufo(struct net_device *dev, u32 data)
119{
120 if (data)
121 dev->features |= NETIF_F_UFO;
122 else
123 dev->features &= ~NETIF_F_UFO;
124 return 0;
125}
126EXPORT_SYMBOL(ethtool_op_set_ufo);
127
128/* the following list of flags are the same as their associated
129 * NETIF_F_xxx values in include/linux/netdevice.h
130 */
131static const u32 flags_dup_features =
132 (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
133 ETH_FLAG_RXHASH);
134
135u32 ethtool_op_get_flags(struct net_device *dev)
136{
137 /* in the future, this function will probably contain additional
138 * handling for flags which are not so easily handled
139 * by a simple masking operation
140 */
141
142 return dev->features & flags_dup_features;
143}
144EXPORT_SYMBOL(ethtool_op_get_flags);
145
146/* Check if device can enable (or disable) particular feature coded in "data"
147 * argument. Flags "supported" describe features that can be toggled by device.
148 * If feature can not be toggled, it state (enabled or disabled) must match
149 * hardcoded device features state, otherwise flags are marked as invalid.
150 */
151bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
152{
153 u32 features = dev->features & flags_dup_features;
154 /* "data" can contain only flags_dup_features bits,
155 * see __ethtool_set_flags */
156
157 return (features & ~supported) != (data & ~supported);
158}
159EXPORT_SYMBOL(ethtool_invalid_flags);
160
161int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
162{
163 if (ethtool_invalid_flags(dev, data, supported))
164 return -EINVAL;
165
166 dev->features = ((dev->features & ~flags_dup_features) |
167 (data & flags_dup_features));
168 return 0;
169}
170EXPORT_SYMBOL(ethtool_op_set_flags);
171
172/* Handlers for each ethtool command */ 39/* Handlers for each ethtool command */
173 40
174#define ETHTOOL_DEV_FEATURE_WORDS 1 41#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
175 42
176static void ethtool_get_features_compat(struct net_device *dev, 43static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
177 struct ethtool_get_features_block *features) 44 [NETIF_F_SG_BIT] = "tx-scatter-gather",
178{ 45 [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
179 if (!dev->ethtool_ops) 46 [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
180 return; 47 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
181 48 [NETIF_F_HIGHDMA_BIT] = "highdma",
182 /* getting RX checksum */ 49 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
183 if (dev->ethtool_ops->get_rx_csum) 50 [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert",
184 if (dev->ethtool_ops->get_rx_csum(dev)) 51
185 features[0].active |= NETIF_F_RXCSUM; 52 [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse",
186 53 [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter",
187 /* mark legacy-changeable features */ 54 [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
188 if (dev->ethtool_ops->set_sg) 55 [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
189 features[0].available |= NETIF_F_SG; 56 [NETIF_F_LLTX_BIT] = "tx-lockless",
190 if (dev->ethtool_ops->set_tx_csum) 57 [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
191 features[0].available |= NETIF_F_ALL_CSUM; 58 [NETIF_F_GRO_BIT] = "rx-gro",
192 if (dev->ethtool_ops->set_tso) 59 [NETIF_F_LRO_BIT] = "rx-lro",
193 features[0].available |= NETIF_F_ALL_TSO; 60
194 if (dev->ethtool_ops->set_rx_csum) 61 [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
195 features[0].available |= NETIF_F_RXCSUM; 62 [NETIF_F_UFO_BIT] = "tx-udp-fragmentation",
196 if (dev->ethtool_ops->set_flags) 63 [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
197 features[0].available |= flags_dup_features; 64 [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
198} 65 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
199 66 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
200static int ethtool_set_feature_compat(struct net_device *dev, 67
201 int (*legacy_set)(struct net_device *, u32), 68 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
202 struct ethtool_set_features_block *features, u32 mask) 69 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
203{ 70 [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
204 u32 do_set; 71 [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
205 72 [NETIF_F_RXHASH_BIT] = "rx-hashing",
206 if (!legacy_set) 73 [NETIF_F_RXCSUM_BIT] = "rx-checksum",
207 return 0; 74 [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
208 75 [NETIF_F_LOOPBACK_BIT] = "loopback",
209 if (!(features[0].valid & mask)) 76};
210 return 0;
211
212 features[0].valid &= ~mask;
213
214 do_set = !!(features[0].requested & mask);
215
216 if (legacy_set(dev, do_set) < 0)
217 netdev_info(dev,
218 "Legacy feature change (%s) failed for 0x%08x\n",
219 do_set ? "set" : "clear", mask);
220
221 return 1;
222}
223
224static int ethtool_set_flags_compat(struct net_device *dev,
225 int (*legacy_set)(struct net_device *, u32),
226 struct ethtool_set_features_block *features, u32 mask)
227{
228 u32 value;
229
230 if (!legacy_set)
231 return 0;
232
233 if (!(features[0].valid & mask))
234 return 0;
235
236 value = dev->features & ~features[0].valid;
237 value |= features[0].requested;
238
239 features[0].valid &= ~mask;
240
241 if (legacy_set(dev, value & mask) < 0)
242 netdev_info(dev, "Legacy flags change failed\n");
243
244 return 1;
245}
246
247static int ethtool_set_features_compat(struct net_device *dev,
248 struct ethtool_set_features_block *features)
249{
250 int compat;
251
252 if (!dev->ethtool_ops)
253 return 0;
254
255 compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
256 features, NETIF_F_SG);
257 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
258 features, NETIF_F_ALL_CSUM);
259 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
260 features, NETIF_F_ALL_TSO);
261 compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
262 features, NETIF_F_RXCSUM);
263 compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags,
264 features, flags_dup_features);
265
266 return compat;
267}
268 77
269static int ethtool_get_features(struct net_device *dev, void __user *useraddr) 78static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
270{ 79{
@@ -272,18 +81,21 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
272 .cmd = ETHTOOL_GFEATURES, 81 .cmd = ETHTOOL_GFEATURES,
273 .size = ETHTOOL_DEV_FEATURE_WORDS, 82 .size = ETHTOOL_DEV_FEATURE_WORDS,
274 }; 83 };
275 struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { 84 struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
276 {
277 .available = dev->hw_features,
278 .requested = dev->wanted_features,
279 .active = dev->features,
280 .never_changed = NETIF_F_NEVER_CHANGE,
281 },
282 };
283 u32 __user *sizeaddr; 85 u32 __user *sizeaddr;
284 u32 copy_size; 86 u32 copy_size;
87 int i;
285 88
286 ethtool_get_features_compat(dev, features); 89 /* in case feature bits run out again */
90 BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t));
91
92 for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
93 features[i].available = (u32)(dev->hw_features >> (32 * i));
94 features[i].requested = (u32)(dev->wanted_features >> (32 * i));
95 features[i].active = (u32)(dev->features >> (32 * i));
96 features[i].never_changed =
97 (u32)(NETIF_F_NEVER_CHANGE >> (32 * i));
98 }
287 99
288 sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); 100 sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
289 if (get_user(copy_size, sizeaddr)) 101 if (get_user(copy_size, sizeaddr))
@@ -305,7 +117,8 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
305{ 117{
306 struct ethtool_sfeatures cmd; 118 struct ethtool_sfeatures cmd;
307 struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; 119 struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
308 int ret = 0; 120 netdev_features_t wanted = 0, valid = 0;
121 int i, ret = 0;
309 122
310 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 123 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
311 return -EFAULT; 124 return -EFAULT;
@@ -317,65 +130,29 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
317 if (copy_from_user(features, useraddr, sizeof(features))) 130 if (copy_from_user(features, useraddr, sizeof(features)))
318 return -EFAULT; 131 return -EFAULT;
319 132
320 if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) 133 for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
321 return -EINVAL; 134 valid |= (netdev_features_t)features[i].valid << (32 * i);
135 wanted |= (netdev_features_t)features[i].requested << (32 * i);
136 }
322 137
323 if (ethtool_set_features_compat(dev, features)) 138 if (valid & ~NETIF_F_ETHTOOL_BITS)
324 ret |= ETHTOOL_F_COMPAT; 139 return -EINVAL;
325 140
326 if (features[0].valid & ~dev->hw_features) { 141 if (valid & ~dev->hw_features) {
327 features[0].valid &= dev->hw_features; 142 valid &= dev->hw_features;
328 ret |= ETHTOOL_F_UNSUPPORTED; 143 ret |= ETHTOOL_F_UNSUPPORTED;
329 } 144 }
330 145
331 dev->wanted_features &= ~features[0].valid; 146 dev->wanted_features &= ~valid;
332 dev->wanted_features |= features[0].valid & features[0].requested; 147 dev->wanted_features |= wanted & valid;
333 __netdev_update_features(dev); 148 __netdev_update_features(dev);
334 149
335 if ((dev->wanted_features ^ dev->features) & features[0].valid) 150 if ((dev->wanted_features ^ dev->features) & valid)
336 ret |= ETHTOOL_F_WISH; 151 ret |= ETHTOOL_F_WISH;
337 152
338 return ret; 153 return ret;
339} 154}
340 155
341static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
342 /* NETIF_F_SG */ "tx-scatter-gather",
343 /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
344 /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
345 /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
346 /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6",
347 /* NETIF_F_HIGHDMA */ "highdma",
348 /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
349 /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
350
351 /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
352 /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
353 /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
354 /* NETIF_F_GSO */ "tx-generic-segmentation",
355 /* NETIF_F_LLTX */ "tx-lockless",
356 /* NETIF_F_NETNS_LOCAL */ "netns-local",
357 /* NETIF_F_GRO */ "rx-gro",
358 /* NETIF_F_LRO */ "rx-lro",
359
360 /* NETIF_F_TSO */ "tx-tcp-segmentation",
361 /* NETIF_F_UFO */ "tx-udp-fragmentation",
362 /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
363 /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
364 /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
365 /* NETIF_F_FSO */ "tx-fcoe-segmentation",
366 "",
367 "",
368
369 /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
370 /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
371 /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
372 /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
373 /* NETIF_F_RXHASH */ "rx-hashing",
374 /* NETIF_F_RXCSUM */ "rx-checksum",
375 /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy",
376 /* NETIF_F_LOOPBACK */ "loopback",
377};
378
379static int __ethtool_get_sset_count(struct net_device *dev, int sset) 156static int __ethtool_get_sset_count(struct net_device *dev, int sset)
380{ 157{
381 const struct ethtool_ops *ops = dev->ethtool_ops; 158 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -402,7 +179,7 @@ static void __ethtool_get_strings(struct net_device *dev,
402 ops->get_strings(dev, stringset, data); 179 ops->get_strings(dev, stringset, data);
403} 180}
404 181
405static u32 ethtool_get_feature_mask(u32 eth_cmd) 182static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
406{ 183{
407 /* feature masks of legacy discrete ethtool ops */ 184 /* feature masks of legacy discrete ethtool ops */
408 185
@@ -433,136 +210,82 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
433 } 210 }
434} 211}
435 212
436static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
437{
438 const struct ethtool_ops *ops = dev->ethtool_ops;
439
440 if (!ops)
441 return NULL;
442
443 switch (ethcmd) {
444 case ETHTOOL_GTXCSUM:
445 return ops->get_tx_csum;
446 case ETHTOOL_GRXCSUM:
447 return ops->get_rx_csum;
448 case ETHTOOL_SSG:
449 return ops->get_sg;
450 case ETHTOOL_STSO:
451 return ops->get_tso;
452 case ETHTOOL_SUFO:
453 return ops->get_ufo;
454 default:
455 return NULL;
456 }
457}
458
459static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
460{
461 return !!(dev->features & NETIF_F_ALL_CSUM);
462}
463
464static int ethtool_get_one_feature(struct net_device *dev, 213static int ethtool_get_one_feature(struct net_device *dev,
465 char __user *useraddr, u32 ethcmd) 214 char __user *useraddr, u32 ethcmd)
466{ 215{
467 u32 mask = ethtool_get_feature_mask(ethcmd); 216 netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
468 struct ethtool_value edata = { 217 struct ethtool_value edata = {
469 .cmd = ethcmd, 218 .cmd = ethcmd,
470 .data = !!(dev->features & mask), 219 .data = !!(dev->features & mask),
471 }; 220 };
472 221
473 /* compatibility with discrete get_ ops */
474 if (!(dev->hw_features & mask)) {
475 u32 (*actor)(struct net_device *);
476
477 actor = __ethtool_get_one_feature_actor(dev, ethcmd);
478
479 /* bug compatibility with old get_rx_csum */
480 if (ethcmd == ETHTOOL_GRXCSUM && !actor)
481 actor = __ethtool_get_rx_csum_oldbug;
482
483 if (actor)
484 edata.data = actor(dev);
485 }
486
487 if (copy_to_user(useraddr, &edata, sizeof(edata))) 222 if (copy_to_user(useraddr, &edata, sizeof(edata)))
488 return -EFAULT; 223 return -EFAULT;
489 return 0; 224 return 0;
490} 225}
491 226
492static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
493static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
494static int __ethtool_set_sg(struct net_device *dev, u32 data);
495static int __ethtool_set_tso(struct net_device *dev, u32 data);
496static int __ethtool_set_ufo(struct net_device *dev, u32 data);
497
498static int ethtool_set_one_feature(struct net_device *dev, 227static int ethtool_set_one_feature(struct net_device *dev,
499 void __user *useraddr, u32 ethcmd) 228 void __user *useraddr, u32 ethcmd)
500{ 229{
501 struct ethtool_value edata; 230 struct ethtool_value edata;
502 u32 mask; 231 netdev_features_t mask;
503 232
504 if (copy_from_user(&edata, useraddr, sizeof(edata))) 233 if (copy_from_user(&edata, useraddr, sizeof(edata)))
505 return -EFAULT; 234 return -EFAULT;
506 235
507 mask = ethtool_get_feature_mask(ethcmd); 236 mask = ethtool_get_feature_mask(ethcmd);
508 mask &= dev->hw_features; 237 mask &= dev->hw_features;
509 if (mask) { 238 if (!mask)
510 if (edata.data) 239 return -EOPNOTSUPP;
511 dev->wanted_features |= mask;
512 else
513 dev->wanted_features &= ~mask;
514 240
515 __netdev_update_features(dev); 241 if (edata.data)
516 return 0; 242 dev->wanted_features |= mask;
517 } 243 else
244 dev->wanted_features &= ~mask;
518 245
519 /* Driver is not converted to ndo_fix_features or does not 246 __netdev_update_features(dev);
520 * support changing this offload. In the latter case it won't
521 * have corresponding ethtool_ops field set.
522 *
523 * Following part is to be removed after all drivers advertise
524 * their changeable features in netdev->hw_features and stop
525 * using discrete offload setting ops.
526 */
527 247
528 switch (ethcmd) { 248 return 0;
529 case ETHTOOL_STXCSUM: 249}
530 return __ethtool_set_tx_csum(dev, edata.data); 250
531 case ETHTOOL_SRXCSUM: 251#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
532 return __ethtool_set_rx_csum(dev, edata.data); 252 ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
533 case ETHTOOL_SSG: 253#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \
534 return __ethtool_set_sg(dev, edata.data); 254 NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH)
535 case ETHTOOL_STSO: 255
536 return __ethtool_set_tso(dev, edata.data); 256static u32 __ethtool_get_flags(struct net_device *dev)
537 case ETHTOOL_SUFO: 257{
538 return __ethtool_set_ufo(dev, edata.data); 258 u32 flags = 0;
539 default: 259
540 return -EOPNOTSUPP; 260 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
541 } 261 if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN;
262 if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN;
263 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
264 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
265
266 return flags;
542} 267}
543 268
544int __ethtool_set_flags(struct net_device *dev, u32 data) 269static int __ethtool_set_flags(struct net_device *dev, u32 data)
545{ 270{
546 u32 changed; 271 netdev_features_t features = 0, changed;
547 272
548 if (data & ~flags_dup_features) 273 if (data & ~ETH_ALL_FLAGS)
549 return -EINVAL; 274 return -EINVAL;
550 275
551 /* legacy set_flags() op */ 276 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
552 if (dev->ethtool_ops->set_flags) { 277 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX;
553 if (unlikely(dev->hw_features & flags_dup_features)) 278 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX;
554 netdev_warn(dev, 279 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
555 "driver BUG: mixed hw_features and set_flags()\n"); 280 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
556 return dev->ethtool_ops->set_flags(dev, data);
557 }
558 281
559 /* allow changing only bits set in hw_features */ 282 /* allow changing only bits set in hw_features */
560 changed = (data ^ dev->features) & flags_dup_features; 283 changed = (features ^ dev->features) & ETH_ALL_FEATURES;
561 if (changed & ~dev->hw_features) 284 if (changed & ~dev->hw_features)
562 return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; 285 return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
563 286
564 dev->wanted_features = 287 dev->wanted_features =
565 (dev->wanted_features & ~changed) | (data & dev->hw_features); 288 (dev->wanted_features & ~changed) | (features & changed);
566 289
567 __netdev_update_features(dev); 290 __netdev_update_features(dev);
568 291
@@ -716,6 +439,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
716{ 439{
717 struct ethtool_rxnfc info; 440 struct ethtool_rxnfc info;
718 size_t info_size = sizeof(info); 441 size_t info_size = sizeof(info);
442 int rc;
719 443
720 if (!dev->ethtool_ops->set_rxnfc) 444 if (!dev->ethtool_ops->set_rxnfc)
721 return -EOPNOTSUPP; 445 return -EOPNOTSUPP;
@@ -731,7 +455,15 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
731 if (copy_from_user(&info, useraddr, info_size)) 455 if (copy_from_user(&info, useraddr, info_size))
732 return -EFAULT; 456 return -EFAULT;
733 457
734 return dev->ethtool_ops->set_rxnfc(dev, &info); 458 rc = dev->ethtool_ops->set_rxnfc(dev, &info);
459 if (rc)
460 return rc;
461
462 if (cmd == ETHTOOL_SRXCLSRLINS &&
463 copy_to_user(useraddr, &info, info_size))
464 return -EFAULT;
465
466 return 0;
735} 467}
736 468
737static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, 469static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
@@ -792,34 +524,44 @@ err_out:
792static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, 524static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
793 void __user *useraddr) 525 void __user *useraddr)
794{ 526{
795 struct ethtool_rxfh_indir *indir; 527 u32 user_size, dev_size;
796 u32 table_size; 528 u32 *indir;
797 size_t full_size;
798 int ret; 529 int ret;
799 530
800 if (!dev->ethtool_ops->get_rxfh_indir) 531 if (!dev->ethtool_ops->get_rxfh_indir_size ||
532 !dev->ethtool_ops->get_rxfh_indir)
533 return -EOPNOTSUPP;
534 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
535 if (dev_size == 0)
801 return -EOPNOTSUPP; 536 return -EOPNOTSUPP;
802 537
803 if (copy_from_user(&table_size, 538 if (copy_from_user(&user_size,
804 useraddr + offsetof(struct ethtool_rxfh_indir, size), 539 useraddr + offsetof(struct ethtool_rxfh_indir, size),
805 sizeof(table_size))) 540 sizeof(user_size)))
806 return -EFAULT; 541 return -EFAULT;
807 542
808 if (table_size > 543 if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
809 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) 544 &dev_size, sizeof(dev_size)))
810 return -ENOMEM; 545 return -EFAULT;
811 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; 546
812 indir = kzalloc(full_size, GFP_USER); 547 /* If the user buffer size is 0, this is just a query for the
548 * device table size. Otherwise, if it's smaller than the
549 * device table size it's an error.
550 */
551 if (user_size < dev_size)
552 return user_size == 0 ? 0 : -EINVAL;
553
554 indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
813 if (!indir) 555 if (!indir)
814 return -ENOMEM; 556 return -ENOMEM;
815 557
816 indir->cmd = ETHTOOL_GRXFHINDIR;
817 indir->size = table_size;
818 ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); 558 ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
819 if (ret) 559 if (ret)
820 goto out; 560 goto out;
821 561
822 if (copy_to_user(useraddr, indir, full_size)) 562 if (copy_to_user(useraddr +
563 offsetof(struct ethtool_rxfh_indir, ring_index[0]),
564 indir, dev_size * sizeof(indir[0])))
823 ret = -EFAULT; 565 ret = -EFAULT;
824 566
825out: 567out:
@@ -830,30 +572,56 @@ out:
830static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, 572static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
831 void __user *useraddr) 573 void __user *useraddr)
832{ 574{
833 struct ethtool_rxfh_indir *indir; 575 struct ethtool_rxnfc rx_rings;
834 u32 table_size; 576 u32 user_size, dev_size, i;
835 size_t full_size; 577 u32 *indir;
836 int ret; 578 int ret;
837 579
838 if (!dev->ethtool_ops->set_rxfh_indir) 580 if (!dev->ethtool_ops->get_rxfh_indir_size ||
581 !dev->ethtool_ops->set_rxfh_indir ||
582 !dev->ethtool_ops->get_rxnfc)
583 return -EOPNOTSUPP;
584 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
585 if (dev_size == 0)
839 return -EOPNOTSUPP; 586 return -EOPNOTSUPP;
840 587
841 if (copy_from_user(&table_size, 588 if (copy_from_user(&user_size,
842 useraddr + offsetof(struct ethtool_rxfh_indir, size), 589 useraddr + offsetof(struct ethtool_rxfh_indir, size),
843 sizeof(table_size))) 590 sizeof(user_size)))
844 return -EFAULT; 591 return -EFAULT;
845 592
846 if (table_size > 593 if (user_size != 0 && user_size != dev_size)
847 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) 594 return -EINVAL;
848 return -ENOMEM; 595
849 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; 596 indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
850 indir = kmalloc(full_size, GFP_USER);
851 if (!indir) 597 if (!indir)
852 return -ENOMEM; 598 return -ENOMEM;
853 599
854 if (copy_from_user(indir, useraddr, full_size)) { 600 rx_rings.cmd = ETHTOOL_GRXRINGS;
855 ret = -EFAULT; 601 ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
602 if (ret)
856 goto out; 603 goto out;
604
605 if (user_size == 0) {
606 for (i = 0; i < dev_size; i++)
607 indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
608 } else {
609 if (copy_from_user(indir,
610 useraddr +
611 offsetof(struct ethtool_rxfh_indir,
612 ring_index[0]),
613 dev_size * sizeof(indir[0]))) {
614 ret = -EFAULT;
615 goto out;
616 }
617
618 /* Validate ring indices */
619 for (i = 0; i < dev_size; i++) {
620 if (indir[i] >= rx_rings.data) {
621 ret = -EINVAL;
622 goto out;
623 }
624 }
857 } 625 }
858 626
859 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); 627 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
@@ -863,58 +631,6 @@ out:
863 return ret; 631 return ret;
864} 632}
865 633
866/*
867 * ethtool does not (or did not) set masks for flow parameters that are
868 * not specified, so if both value and mask are 0 then this must be
869 * treated as equivalent to a mask with all bits set. Implement that
870 * here rather than in drivers.
871 */
872static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
873{
874 struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
875 struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
876
877 if (fs->flow_type != TCP_V4_FLOW &&
878 fs->flow_type != UDP_V4_FLOW &&
879 fs->flow_type != SCTP_V4_FLOW)
880 return;
881
882 if (!(entry->ip4src | mask->ip4src))
883 mask->ip4src = htonl(0xffffffff);
884 if (!(entry->ip4dst | mask->ip4dst))
885 mask->ip4dst = htonl(0xffffffff);
886 if (!(entry->psrc | mask->psrc))
887 mask->psrc = htons(0xffff);
888 if (!(entry->pdst | mask->pdst))
889 mask->pdst = htons(0xffff);
890 if (!(entry->tos | mask->tos))
891 mask->tos = 0xff;
892 if (!(fs->vlan_tag | fs->vlan_tag_mask))
893 fs->vlan_tag_mask = 0xffff;
894 if (!(fs->data | fs->data_mask))
895 fs->data_mask = 0xffffffffffffffffULL;
896}
897
898static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
899 void __user *useraddr)
900{
901 struct ethtool_rx_ntuple cmd;
902 const struct ethtool_ops *ops = dev->ethtool_ops;
903
904 if (!ops->set_rx_ntuple)
905 return -EOPNOTSUPP;
906
907 if (!(dev->features & NETIF_F_NTUPLE))
908 return -EINVAL;
909
910 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
911 return -EFAULT;
912
913 rx_ntuple_fix_masks(&cmd.fs);
914
915 return ops->set_rx_ntuple(dev, &cmd);
916}
917
918static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) 634static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
919{ 635{
920 struct ethtool_regs regs; 636 struct ethtool_regs regs;
@@ -1231,81 +947,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
1231 return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); 947 return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
1232} 948}
1233 949
1234static int __ethtool_set_sg(struct net_device *dev, u32 data)
1235{
1236 int err;
1237
1238 if (!dev->ethtool_ops->set_sg)
1239 return -EOPNOTSUPP;
1240
1241 if (data && !(dev->features & NETIF_F_ALL_CSUM))
1242 return -EINVAL;
1243
1244 if (!data && dev->ethtool_ops->set_tso) {
1245 err = dev->ethtool_ops->set_tso(dev, 0);
1246 if (err)
1247 return err;
1248 }
1249
1250 if (!data && dev->ethtool_ops->set_ufo) {
1251 err = dev->ethtool_ops->set_ufo(dev, 0);
1252 if (err)
1253 return err;
1254 }
1255 return dev->ethtool_ops->set_sg(dev, data);
1256}
1257
1258static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
1259{
1260 int err;
1261
1262 if (!dev->ethtool_ops->set_tx_csum)
1263 return -EOPNOTSUPP;
1264
1265 if (!data && dev->ethtool_ops->set_sg) {
1266 err = __ethtool_set_sg(dev, 0);
1267 if (err)
1268 return err;
1269 }
1270
1271 return dev->ethtool_ops->set_tx_csum(dev, data);
1272}
1273
1274static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
1275{
1276 if (!dev->ethtool_ops->set_rx_csum)
1277 return -EOPNOTSUPP;
1278
1279 if (!data)
1280 dev->features &= ~NETIF_F_GRO;
1281
1282 return dev->ethtool_ops->set_rx_csum(dev, data);
1283}
1284
1285static int __ethtool_set_tso(struct net_device *dev, u32 data)
1286{
1287 if (!dev->ethtool_ops->set_tso)
1288 return -EOPNOTSUPP;
1289
1290 if (data && !(dev->features & NETIF_F_SG))
1291 return -EINVAL;
1292
1293 return dev->ethtool_ops->set_tso(dev, data);
1294}
1295
1296static int __ethtool_set_ufo(struct net_device *dev, u32 data)
1297{
1298 if (!dev->ethtool_ops->set_ufo)
1299 return -EOPNOTSUPP;
1300 if (data && !(dev->features & NETIF_F_SG))
1301 return -EINVAL;
1302 if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
1303 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
1304 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
1305 return -EINVAL;
1306 return dev->ethtool_ops->set_ufo(dev, data);
1307}
1308
1309static int ethtool_self_test(struct net_device *dev, char __user *useraddr) 950static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
1310{ 951{
1311 struct ethtool_test test; 952 struct ethtool_test test;
@@ -1771,9 +1412,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1771 break; 1412 break;
1772 case ETHTOOL_GFLAGS: 1413 case ETHTOOL_GFLAGS:
1773 rc = ethtool_get_value(dev, useraddr, ethcmd, 1414 rc = ethtool_get_value(dev, useraddr, ethcmd,
1774 (dev->ethtool_ops->get_flags ? 1415 __ethtool_get_flags);
1775 dev->ethtool_ops->get_flags :
1776 ethtool_op_get_flags));
1777 break; 1416 break;
1778 case ETHTOOL_SFLAGS: 1417 case ETHTOOL_SFLAGS:
1779 rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); 1418 rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
@@ -1804,9 +1443,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1804 case ETHTOOL_RESET: 1443 case ETHTOOL_RESET:
1805 rc = ethtool_reset(dev, useraddr); 1444 rc = ethtool_reset(dev, useraddr);
1806 break; 1445 break;
1807 case ETHTOOL_SRXNTUPLE:
1808 rc = ethtool_set_rx_ntuple(dev, useraddr);
1809 break;
1810 case ETHTOOL_GSSET_INFO: 1446 case ETHTOOL_GSSET_INFO:
1811 rc = ethtool_get_sset_info(dev, useraddr); 1447 rc = ethtool_get_sset_info(dev, useraddr);
1812 break; 1448 break;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
new file mode 100644
index 000000000000..0985b9b14b80
--- /dev/null
+++ b/net/core/flow_dissector.c
@@ -0,0 +1,143 @@
1#include <linux/skbuff.h>
2#include <linux/ip.h>
3#include <linux/ipv6.h>
4#include <linux/if_vlan.h>
5#include <net/ip.h>
6#include <linux/if_tunnel.h>
7#include <linux/if_pppox.h>
8#include <linux/ppp_defs.h>
9#include <net/flow_keys.h>
10
11/* copy saddr & daddr, possibly using 64bit load/store
12 * Equivalent to : flow->src = iph->saddr;
13 * flow->dst = iph->daddr;
14 */
15static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
16{
17 BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
18 offsetof(typeof(*flow), src) + sizeof(flow->src));
19 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
20}
21
22bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
23{
24 int poff, nhoff = skb_network_offset(skb);
25 u8 ip_proto;
26 __be16 proto = skb->protocol;
27
28 memset(flow, 0, sizeof(*flow));
29
30again:
31 switch (proto) {
32 case __constant_htons(ETH_P_IP): {
33 const struct iphdr *iph;
34 struct iphdr _iph;
35ip:
36 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
37 if (!iph)
38 return false;
39
40 if (ip_is_fragment(iph))
41 ip_proto = 0;
42 else
43 ip_proto = iph->protocol;
44 iph_to_flow_copy_addrs(flow, iph);
45 nhoff += iph->ihl * 4;
46 break;
47 }
48 case __constant_htons(ETH_P_IPV6): {
49 const struct ipv6hdr *iph;
50 struct ipv6hdr _iph;
51ipv6:
52 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
53 if (!iph)
54 return false;
55
56 ip_proto = iph->nexthdr;
57 flow->src = iph->saddr.s6_addr32[3];
58 flow->dst = iph->daddr.s6_addr32[3];
59 nhoff += sizeof(struct ipv6hdr);
60 break;
61 }
62 case __constant_htons(ETH_P_8021Q): {
63 const struct vlan_hdr *vlan;
64 struct vlan_hdr _vlan;
65
66 vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
67 if (!vlan)
68 return false;
69
70 proto = vlan->h_vlan_encapsulated_proto;
71 nhoff += sizeof(*vlan);
72 goto again;
73 }
74 case __constant_htons(ETH_P_PPP_SES): {
75 struct {
76 struct pppoe_hdr hdr;
77 __be16 proto;
78 } *hdr, _hdr;
79 hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
80 if (!hdr)
81 return false;
82 proto = hdr->proto;
83 nhoff += PPPOE_SES_HLEN;
84 switch (proto) {
85 case __constant_htons(PPP_IP):
86 goto ip;
87 case __constant_htons(PPP_IPV6):
88 goto ipv6;
89 default:
90 return false;
91 }
92 }
93 default:
94 return false;
95 }
96
97 switch (ip_proto) {
98 case IPPROTO_GRE: {
99 struct gre_hdr {
100 __be16 flags;
101 __be16 proto;
102 } *hdr, _hdr;
103
104 hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
105 if (!hdr)
106 return false;
107 /*
108 * Only look inside GRE if version zero and no
109 * routing
110 */
111 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
112 proto = hdr->proto;
113 nhoff += 4;
114 if (hdr->flags & GRE_CSUM)
115 nhoff += 4;
116 if (hdr->flags & GRE_KEY)
117 nhoff += 4;
118 if (hdr->flags & GRE_SEQ)
119 nhoff += 4;
120 goto again;
121 }
122 break;
123 }
124 case IPPROTO_IPIP:
125 goto again;
126 default:
127 break;
128 }
129
130 flow->ip_proto = ip_proto;
131 poff = proto_ports_offset(ip_proto);
132 if (poff >= 0) {
133 __be32 *ports, _ports;
134
135 nhoff += poff;
136 ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
137 if (ports)
138 flow->ports = *ports;
139 }
140
141 return true;
142}
143EXPORT_SYMBOL(skb_flow_dissect);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5ac07d31fbc9..e287346e0934 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
238 it to safe state. 238 it to safe state.
239 */ 239 */
240 skb_queue_purge(&n->arp_queue); 240 skb_queue_purge(&n->arp_queue);
241 n->arp_queue_len_bytes = 0;
241 n->output = neigh_blackhole; 242 n->output = neigh_blackhole;
242 if (n->nud_state & NUD_VALID) 243 if (n->nud_state & NUD_VALID)
243 n->nud_state = NUD_NOARP; 244 n->nud_state = NUD_NOARP;
@@ -272,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
272} 273}
273EXPORT_SYMBOL(neigh_ifdown); 274EXPORT_SYMBOL(neigh_ifdown);
274 275
275static struct neighbour *neigh_alloc(struct neigh_table *tbl) 276static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
276{ 277{
277 struct neighbour *n = NULL; 278 struct neighbour *n = NULL;
278 unsigned long now = jiffies; 279 unsigned long now = jiffies;
@@ -287,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
287 goto out_entries; 288 goto out_entries;
288 } 289 }
289 290
290 n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); 291 if (tbl->entry_size)
292 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
293 else {
294 int sz = sizeof(*n) + tbl->key_len;
295
296 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
297 sz += dev->neigh_priv_len;
298 n = kzalloc(sz, GFP_ATOMIC);
299 }
291 if (!n) 300 if (!n)
292 goto out_entries; 301 goto out_entries;
293 302
@@ -313,11 +322,18 @@ out_entries:
313 goto out; 322 goto out;
314} 323}
315 324
325static void neigh_get_hash_rnd(u32 *x)
326{
327 get_random_bytes(x, sizeof(*x));
328 *x |= 1;
329}
330
316static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 331static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
317{ 332{
318 size_t size = (1 << shift) * sizeof(struct neighbour *); 333 size_t size = (1 << shift) * sizeof(struct neighbour *);
319 struct neigh_hash_table *ret; 334 struct neigh_hash_table *ret;
320 struct neighbour __rcu **buckets; 335 struct neighbour __rcu **buckets;
336 int i;
321 337
322 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 338 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323 if (!ret) 339 if (!ret)
@@ -334,8 +350,8 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
334 } 350 }
335 ret->hash_buckets = buckets; 351 ret->hash_buckets = buckets;
336 ret->hash_shift = shift; 352 ret->hash_shift = shift;
337 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); 353 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
338 ret->hash_rnd |= 1; 354 neigh_get_hash_rnd(&ret->hash_rnd[i]);
339 return ret; 355 return ret;
340} 356}
341 357
@@ -462,7 +478,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
462 u32 hash_val; 478 u32 hash_val;
463 int key_len = tbl->key_len; 479 int key_len = tbl->key_len;
464 int error; 480 int error;
465 struct neighbour *n1, *rc, *n = neigh_alloc(tbl); 481 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
466 struct neigh_hash_table *nht; 482 struct neigh_hash_table *nht;
467 483
468 if (!n) { 484 if (!n) {
@@ -480,6 +496,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
480 goto out_neigh_release; 496 goto out_neigh_release;
481 } 497 }
482 498
499 if (dev->netdev_ops->ndo_neigh_construct) {
500 error = dev->netdev_ops->ndo_neigh_construct(n);
501 if (error < 0) {
502 rc = ERR_PTR(error);
503 goto out_neigh_release;
504 }
505 }
506
483 /* Device specific setup. */ 507 /* Device specific setup. */
484 if (n->parms->neigh_setup && 508 if (n->parms->neigh_setup &&
485 (error = n->parms->neigh_setup(n)) < 0) { 509 (error = n->parms->neigh_setup(n)) < 0) {
@@ -677,18 +701,14 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
677 neigh_parms_destroy(parms); 701 neigh_parms_destroy(parms);
678} 702}
679 703
680static void neigh_destroy_rcu(struct rcu_head *head)
681{
682 struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685}
686/* 704/*
687 * neighbour must already be out of the table; 705 * neighbour must already be out of the table;
688 * 706 *
689 */ 707 */
690void neigh_destroy(struct neighbour *neigh) 708void neigh_destroy(struct neighbour *neigh)
691{ 709{
710 struct net_device *dev = neigh->dev;
711
692 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 712 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
693 713
694 if (!neigh->dead) { 714 if (!neigh->dead) {
@@ -702,14 +722,18 @@ void neigh_destroy(struct neighbour *neigh)
702 printk(KERN_WARNING "Impossible event.\n"); 722 printk(KERN_WARNING "Impossible event.\n");
703 723
704 skb_queue_purge(&neigh->arp_queue); 724 skb_queue_purge(&neigh->arp_queue);
725 neigh->arp_queue_len_bytes = 0;
726
727 if (dev->netdev_ops->ndo_neigh_destroy)
728 dev->netdev_ops->ndo_neigh_destroy(neigh);
705 729
706 dev_put(neigh->dev); 730 dev_put(dev);
707 neigh_parms_put(neigh->parms); 731 neigh_parms_put(neigh->parms);
708 732
709 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); 733 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
710 734
711 atomic_dec(&neigh->tbl->entries); 735 atomic_dec(&neigh->tbl->entries);
712 call_rcu(&neigh->rcu, neigh_destroy_rcu); 736 kfree_rcu(neigh, rcu);
713} 737}
714EXPORT_SYMBOL(neigh_destroy); 738EXPORT_SYMBOL(neigh_destroy);
715 739
@@ -842,6 +866,7 @@ static void neigh_invalidate(struct neighbour *neigh)
842 write_lock(&neigh->lock); 866 write_lock(&neigh->lock);
843 } 867 }
844 skb_queue_purge(&neigh->arp_queue); 868 skb_queue_purge(&neigh->arp_queue);
869 neigh->arp_queue_len_bytes = 0;
845} 870}
846 871
847static void neigh_probe(struct neighbour *neigh) 872static void neigh_probe(struct neighbour *neigh)
@@ -980,15 +1005,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
980 1005
981 if (neigh->nud_state == NUD_INCOMPLETE) { 1006 if (neigh->nud_state == NUD_INCOMPLETE) {
982 if (skb) { 1007 if (skb) {
983 if (skb_queue_len(&neigh->arp_queue) >= 1008 while (neigh->arp_queue_len_bytes + skb->truesize >
984 neigh->parms->queue_len) { 1009 neigh->parms->queue_len_bytes) {
985 struct sk_buff *buff; 1010 struct sk_buff *buff;
1011
986 buff = __skb_dequeue(&neigh->arp_queue); 1012 buff = __skb_dequeue(&neigh->arp_queue);
1013 if (!buff)
1014 break;
1015 neigh->arp_queue_len_bytes -= buff->truesize;
987 kfree_skb(buff); 1016 kfree_skb(buff);
988 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1017 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
989 } 1018 }
990 skb_dst_force(skb); 1019 skb_dst_force(skb);
991 __skb_queue_tail(&neigh->arp_queue, skb); 1020 __skb_queue_tail(&neigh->arp_queue, skb);
1021 neigh->arp_queue_len_bytes += skb->truesize;
992 } 1022 }
993 rc = 1; 1023 rc = 1;
994 } 1024 }
@@ -1167,7 +1197,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1167 1197
1168 rcu_read_lock(); 1198 rcu_read_lock();
1169 /* On shaper/eql skb->dst->neighbour != neigh :( */ 1199 /* On shaper/eql skb->dst->neighbour != neigh :( */
1170 if (dst && (n2 = dst_get_neighbour(dst)) != NULL) 1200 if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1171 n1 = n2; 1201 n1 = n2;
1172 n1->output(n1, skb); 1202 n1->output(n1, skb);
1173 rcu_read_unlock(); 1203 rcu_read_unlock();
@@ -1175,6 +1205,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1175 write_lock_bh(&neigh->lock); 1205 write_lock_bh(&neigh->lock);
1176 } 1206 }
1177 skb_queue_purge(&neigh->arp_queue); 1207 skb_queue_purge(&neigh->arp_queue);
1208 neigh->arp_queue_len_bytes = 0;
1178 } 1209 }
1179out: 1210out:
1180 if (update_isrouter) { 1211 if (update_isrouter) {
@@ -1477,11 +1508,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1477 tbl->parms.reachable_time = 1508 tbl->parms.reachable_time =
1478 neigh_rand_reach_time(tbl->parms.base_reachable_time); 1509 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1479 1510
1480 if (!tbl->kmem_cachep)
1481 tbl->kmem_cachep =
1482 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1484 NULL);
1485 tbl->stats = alloc_percpu(struct neigh_statistics); 1511 tbl->stats = alloc_percpu(struct neigh_statistics);
1486 if (!tbl->stats) 1512 if (!tbl->stats)
1487 panic("cannot create neighbour cache statistics"); 1513 panic("cannot create neighbour cache statistics");
@@ -1566,9 +1592,6 @@ int neigh_table_clear(struct neigh_table *tbl)
1566 free_percpu(tbl->stats); 1592 free_percpu(tbl->stats);
1567 tbl->stats = NULL; 1593 tbl->stats = NULL;
1568 1594
1569 kmem_cache_destroy(tbl->kmem_cachep);
1570 tbl->kmem_cachep = NULL;
1571
1572 return 0; 1595 return 0;
1573} 1596}
1574EXPORT_SYMBOL(neigh_table_clear); 1597EXPORT_SYMBOL(neigh_table_clear);
@@ -1747,7 +1770,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1747 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); 1770 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1748 1771
1749 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); 1772 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1750 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); 1773 NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
1774 /* approximative value for deprecated QUEUE_LEN (in packets) */
1775 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
1776 DIV_ROUND_UP(parms->queue_len_bytes,
1777 SKB_TRUESIZE(ETH_FRAME_LEN)));
1751 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); 1778 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1752 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); 1779 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1753 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); 1780 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
@@ -1808,7 +1835,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1808 1835
1809 rcu_read_lock_bh(); 1836 rcu_read_lock_bh();
1810 nht = rcu_dereference_bh(tbl->nht); 1837 nht = rcu_dereference_bh(tbl->nht);
1811 ndc.ndtc_hash_rnd = nht->hash_rnd; 1838 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1812 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 1839 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1813 rcu_read_unlock_bh(); 1840 rcu_read_unlock_bh();
1814 1841
@@ -1974,7 +2001,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1974 2001
1975 switch (i) { 2002 switch (i) {
1976 case NDTPA_QUEUE_LEN: 2003 case NDTPA_QUEUE_LEN:
1977 p->queue_len = nla_get_u32(tbp[i]); 2004 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2005 SKB_TRUESIZE(ETH_FRAME_LEN);
2006 break;
2007 case NDTPA_QUEUE_LENBYTES:
2008 p->queue_len_bytes = nla_get_u32(tbp[i]);
1978 break; 2009 break;
1979 case NDTPA_PROXY_QLEN: 2010 case NDTPA_PROXY_QLEN:
1980 p->proxy_qlen = nla_get_u32(tbp[i]); 2011 p->proxy_qlen = nla_get_u32(tbp[i]);
@@ -2638,117 +2669,158 @@ EXPORT_SYMBOL(neigh_app_ns);
2638 2669
2639#ifdef CONFIG_SYSCTL 2670#ifdef CONFIG_SYSCTL
2640 2671
2641#define NEIGH_VARS_MAX 19 2672static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2673 size_t *lenp, loff_t *ppos)
2674{
2675 int size, ret;
2676 ctl_table tmp = *ctl;
2677
2678 tmp.data = &size;
2679 size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2680 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2681 if (write && !ret)
2682 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2683 return ret;
2684}
2685
2686enum {
2687 NEIGH_VAR_MCAST_PROBE,
2688 NEIGH_VAR_UCAST_PROBE,
2689 NEIGH_VAR_APP_PROBE,
2690 NEIGH_VAR_RETRANS_TIME,
2691 NEIGH_VAR_BASE_REACHABLE_TIME,
2692 NEIGH_VAR_DELAY_PROBE_TIME,
2693 NEIGH_VAR_GC_STALETIME,
2694 NEIGH_VAR_QUEUE_LEN,
2695 NEIGH_VAR_QUEUE_LEN_BYTES,
2696 NEIGH_VAR_PROXY_QLEN,
2697 NEIGH_VAR_ANYCAST_DELAY,
2698 NEIGH_VAR_PROXY_DELAY,
2699 NEIGH_VAR_LOCKTIME,
2700 NEIGH_VAR_RETRANS_TIME_MS,
2701 NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2702 NEIGH_VAR_GC_INTERVAL,
2703 NEIGH_VAR_GC_THRESH1,
2704 NEIGH_VAR_GC_THRESH2,
2705 NEIGH_VAR_GC_THRESH3,
2706 NEIGH_VAR_MAX
2707};
2642 2708
2643static struct neigh_sysctl_table { 2709static struct neigh_sysctl_table {
2644 struct ctl_table_header *sysctl_header; 2710 struct ctl_table_header *sysctl_header;
2645 struct ctl_table neigh_vars[NEIGH_VARS_MAX]; 2711 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2646 char *dev_name; 2712 char *dev_name;
2647} neigh_sysctl_template __read_mostly = { 2713} neigh_sysctl_template __read_mostly = {
2648 .neigh_vars = { 2714 .neigh_vars = {
2649 { 2715 [NEIGH_VAR_MCAST_PROBE] = {
2650 .procname = "mcast_solicit", 2716 .procname = "mcast_solicit",
2651 .maxlen = sizeof(int), 2717 .maxlen = sizeof(int),
2652 .mode = 0644, 2718 .mode = 0644,
2653 .proc_handler = proc_dointvec, 2719 .proc_handler = proc_dointvec,
2654 }, 2720 },
2655 { 2721 [NEIGH_VAR_UCAST_PROBE] = {
2656 .procname = "ucast_solicit", 2722 .procname = "ucast_solicit",
2657 .maxlen = sizeof(int), 2723 .maxlen = sizeof(int),
2658 .mode = 0644, 2724 .mode = 0644,
2659 .proc_handler = proc_dointvec, 2725 .proc_handler = proc_dointvec,
2660 }, 2726 },
2661 { 2727 [NEIGH_VAR_APP_PROBE] = {
2662 .procname = "app_solicit", 2728 .procname = "app_solicit",
2663 .maxlen = sizeof(int), 2729 .maxlen = sizeof(int),
2664 .mode = 0644, 2730 .mode = 0644,
2665 .proc_handler = proc_dointvec, 2731 .proc_handler = proc_dointvec,
2666 }, 2732 },
2667 { 2733 [NEIGH_VAR_RETRANS_TIME] = {
2668 .procname = "retrans_time", 2734 .procname = "retrans_time",
2669 .maxlen = sizeof(int), 2735 .maxlen = sizeof(int),
2670 .mode = 0644, 2736 .mode = 0644,
2671 .proc_handler = proc_dointvec_userhz_jiffies, 2737 .proc_handler = proc_dointvec_userhz_jiffies,
2672 }, 2738 },
2673 { 2739 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2674 .procname = "base_reachable_time", 2740 .procname = "base_reachable_time",
2675 .maxlen = sizeof(int), 2741 .maxlen = sizeof(int),
2676 .mode = 0644, 2742 .mode = 0644,
2677 .proc_handler = proc_dointvec_jiffies, 2743 .proc_handler = proc_dointvec_jiffies,
2678 }, 2744 },
2679 { 2745 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2680 .procname = "delay_first_probe_time", 2746 .procname = "delay_first_probe_time",
2681 .maxlen = sizeof(int), 2747 .maxlen = sizeof(int),
2682 .mode = 0644, 2748 .mode = 0644,
2683 .proc_handler = proc_dointvec_jiffies, 2749 .proc_handler = proc_dointvec_jiffies,
2684 }, 2750 },
2685 { 2751 [NEIGH_VAR_GC_STALETIME] = {
2686 .procname = "gc_stale_time", 2752 .procname = "gc_stale_time",
2687 .maxlen = sizeof(int), 2753 .maxlen = sizeof(int),
2688 .mode = 0644, 2754 .mode = 0644,
2689 .proc_handler = proc_dointvec_jiffies, 2755 .proc_handler = proc_dointvec_jiffies,
2690 }, 2756 },
2691 { 2757 [NEIGH_VAR_QUEUE_LEN] = {
2692 .procname = "unres_qlen", 2758 .procname = "unres_qlen",
2693 .maxlen = sizeof(int), 2759 .maxlen = sizeof(int),
2694 .mode = 0644, 2760 .mode = 0644,
2761 .proc_handler = proc_unres_qlen,
2762 },
2763 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2764 .procname = "unres_qlen_bytes",
2765 .maxlen = sizeof(int),
2766 .mode = 0644,
2695 .proc_handler = proc_dointvec, 2767 .proc_handler = proc_dointvec,
2696 }, 2768 },
2697 { 2769 [NEIGH_VAR_PROXY_QLEN] = {
2698 .procname = "proxy_qlen", 2770 .procname = "proxy_qlen",
2699 .maxlen = sizeof(int), 2771 .maxlen = sizeof(int),
2700 .mode = 0644, 2772 .mode = 0644,
2701 .proc_handler = proc_dointvec, 2773 .proc_handler = proc_dointvec,
2702 }, 2774 },
2703 { 2775 [NEIGH_VAR_ANYCAST_DELAY] = {
2704 .procname = "anycast_delay", 2776 .procname = "anycast_delay",
2705 .maxlen = sizeof(int), 2777 .maxlen = sizeof(int),
2706 .mode = 0644, 2778 .mode = 0644,
2707 .proc_handler = proc_dointvec_userhz_jiffies, 2779 .proc_handler = proc_dointvec_userhz_jiffies,
2708 }, 2780 },
2709 { 2781 [NEIGH_VAR_PROXY_DELAY] = {
2710 .procname = "proxy_delay", 2782 .procname = "proxy_delay",
2711 .maxlen = sizeof(int), 2783 .maxlen = sizeof(int),
2712 .mode = 0644, 2784 .mode = 0644,
2713 .proc_handler = proc_dointvec_userhz_jiffies, 2785 .proc_handler = proc_dointvec_userhz_jiffies,
2714 }, 2786 },
2715 { 2787 [NEIGH_VAR_LOCKTIME] = {
2716 .procname = "locktime", 2788 .procname = "locktime",
2717 .maxlen = sizeof(int), 2789 .maxlen = sizeof(int),
2718 .mode = 0644, 2790 .mode = 0644,
2719 .proc_handler = proc_dointvec_userhz_jiffies, 2791 .proc_handler = proc_dointvec_userhz_jiffies,
2720 }, 2792 },
2721 { 2793 [NEIGH_VAR_RETRANS_TIME_MS] = {
2722 .procname = "retrans_time_ms", 2794 .procname = "retrans_time_ms",
2723 .maxlen = sizeof(int), 2795 .maxlen = sizeof(int),
2724 .mode = 0644, 2796 .mode = 0644,
2725 .proc_handler = proc_dointvec_ms_jiffies, 2797 .proc_handler = proc_dointvec_ms_jiffies,
2726 }, 2798 },
2727 { 2799 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2728 .procname = "base_reachable_time_ms", 2800 .procname = "base_reachable_time_ms",
2729 .maxlen = sizeof(int), 2801 .maxlen = sizeof(int),
2730 .mode = 0644, 2802 .mode = 0644,
2731 .proc_handler = proc_dointvec_ms_jiffies, 2803 .proc_handler = proc_dointvec_ms_jiffies,
2732 }, 2804 },
2733 { 2805 [NEIGH_VAR_GC_INTERVAL] = {
2734 .procname = "gc_interval", 2806 .procname = "gc_interval",
2735 .maxlen = sizeof(int), 2807 .maxlen = sizeof(int),
2736 .mode = 0644, 2808 .mode = 0644,
2737 .proc_handler = proc_dointvec_jiffies, 2809 .proc_handler = proc_dointvec_jiffies,
2738 }, 2810 },
2739 { 2811 [NEIGH_VAR_GC_THRESH1] = {
2740 .procname = "gc_thresh1", 2812 .procname = "gc_thresh1",
2741 .maxlen = sizeof(int), 2813 .maxlen = sizeof(int),
2742 .mode = 0644, 2814 .mode = 0644,
2743 .proc_handler = proc_dointvec, 2815 .proc_handler = proc_dointvec,
2744 }, 2816 },
2745 { 2817 [NEIGH_VAR_GC_THRESH2] = {
2746 .procname = "gc_thresh2", 2818 .procname = "gc_thresh2",
2747 .maxlen = sizeof(int), 2819 .maxlen = sizeof(int),
2748 .mode = 0644, 2820 .mode = 0644,
2749 .proc_handler = proc_dointvec, 2821 .proc_handler = proc_dointvec,
2750 }, 2822 },
2751 { 2823 [NEIGH_VAR_GC_THRESH3] = {
2752 .procname = "gc_thresh3", 2824 .procname = "gc_thresh3",
2753 .maxlen = sizeof(int), 2825 .maxlen = sizeof(int),
2754 .mode = 0644, 2826 .mode = 0644,
@@ -2781,47 +2853,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2781 if (!t) 2853 if (!t)
2782 goto err; 2854 goto err;
2783 2855
2784 t->neigh_vars[0].data = &p->mcast_probes; 2856 t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes;
2785 t->neigh_vars[1].data = &p->ucast_probes; 2857 t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes;
2786 t->neigh_vars[2].data = &p->app_probes; 2858 t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes;
2787 t->neigh_vars[3].data = &p->retrans_time; 2859 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time;
2788 t->neigh_vars[4].data = &p->base_reachable_time; 2860 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time;
2789 t->neigh_vars[5].data = &p->delay_probe_time; 2861 t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
2790 t->neigh_vars[6].data = &p->gc_staletime; 2862 t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
2791 t->neigh_vars[7].data = &p->queue_len; 2863 t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
2792 t->neigh_vars[8].data = &p->proxy_qlen; 2864 t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
2793 t->neigh_vars[9].data = &p->anycast_delay; 2865 t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
2794 t->neigh_vars[10].data = &p->proxy_delay; 2866 t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
2795 t->neigh_vars[11].data = &p->locktime; 2867 t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2796 t->neigh_vars[12].data = &p->retrans_time; 2868 t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2797 t->neigh_vars[13].data = &p->base_reachable_time; 2869 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
2870 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
2798 2871
2799 if (dev) { 2872 if (dev) {
2800 dev_name_source = dev->name; 2873 dev_name_source = dev->name;
2801 /* Terminate the table early */ 2874 /* Terminate the table early */
2802 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); 2875 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2876 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2803 } else { 2877 } else {
2804 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; 2878 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2805 t->neigh_vars[14].data = (int *)(p + 1); 2879 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2806 t->neigh_vars[15].data = (int *)(p + 1) + 1; 2880 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2807 t->neigh_vars[16].data = (int *)(p + 1) + 2; 2881 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2808 t->neigh_vars[17].data = (int *)(p + 1) + 3; 2882 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2809 } 2883 }
2810 2884
2811 2885
2812 if (handler) { 2886 if (handler) {
2813 /* RetransTime */ 2887 /* RetransTime */
2814 t->neigh_vars[3].proc_handler = handler; 2888 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2815 t->neigh_vars[3].extra1 = dev; 2889 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2816 /* ReachableTime */ 2890 /* ReachableTime */
2817 t->neigh_vars[4].proc_handler = handler; 2891 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2818 t->neigh_vars[4].extra1 = dev; 2892 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2819 /* RetransTime (in milliseconds)*/ 2893 /* RetransTime (in milliseconds)*/
2820 t->neigh_vars[12].proc_handler = handler; 2894 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2821 t->neigh_vars[12].extra1 = dev; 2895 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2822 /* ReachableTime (in milliseconds) */ 2896 /* ReachableTime (in milliseconds) */
2823 t->neigh_vars[13].proc_handler = handler; 2897 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2824 t->neigh_vars[13].extra1 = dev; 2898 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2825 } 2899 }
2826 2900
2827 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); 2901 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 385aefe53648..abf4393a77b3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
21#include <linux/wireless.h> 21#include <linux/wireless.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/export.h> 23#include <linux/export.h>
24#include <linux/jiffies.h>
24#include <net/wext.h> 25#include <net/wext.h>
25 26
26#include "net-sysfs.h" 27#include "net-sysfs.h"
@@ -606,9 +607,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
606 rcu_assign_pointer(queue->rps_map, map); 607 rcu_assign_pointer(queue->rps_map, map);
607 spin_unlock(&rps_map_lock); 608 spin_unlock(&rps_map_lock);
608 609
609 if (old_map) 610 if (map)
611 jump_label_inc(&rps_needed);
612 if (old_map) {
610 kfree_rcu(old_map, rcu); 613 kfree_rcu(old_map, rcu);
611 614 jump_label_dec(&rps_needed);
615 }
612 free_cpumask_var(mask); 616 free_cpumask_var(mask);
613 return len; 617 return len;
614} 618}
@@ -618,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
618 char *buf) 622 char *buf)
619{ 623{
620 struct rps_dev_flow_table *flow_table; 624 struct rps_dev_flow_table *flow_table;
621 unsigned int val = 0; 625 unsigned long val = 0;
622 626
623 rcu_read_lock(); 627 rcu_read_lock();
624 flow_table = rcu_dereference(queue->rps_flow_table); 628 flow_table = rcu_dereference(queue->rps_flow_table);
625 if (flow_table) 629 if (flow_table)
626 val = flow_table->mask + 1; 630 val = (unsigned long)flow_table->mask + 1;
627 rcu_read_unlock(); 631 rcu_read_unlock();
628 632
629 return sprintf(buf, "%u\n", val); 633 return sprintf(buf, "%lu\n", val);
630} 634}
631 635
632static void rps_dev_flow_table_release_work(struct work_struct *work) 636static void rps_dev_flow_table_release_work(struct work_struct *work)
@@ -650,36 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
650 struct rx_queue_attribute *attr, 654 struct rx_queue_attribute *attr,
651 const char *buf, size_t len) 655 const char *buf, size_t len)
652{ 656{
653 unsigned int count; 657 unsigned long mask, count;
654 char *endp;
655 struct rps_dev_flow_table *table, *old_table; 658 struct rps_dev_flow_table *table, *old_table;
656 static DEFINE_SPINLOCK(rps_dev_flow_lock); 659 static DEFINE_SPINLOCK(rps_dev_flow_lock);
660 int rc;
657 661
658 if (!capable(CAP_NET_ADMIN)) 662 if (!capable(CAP_NET_ADMIN))
659 return -EPERM; 663 return -EPERM;
660 664
661 count = simple_strtoul(buf, &endp, 0); 665 rc = kstrtoul(buf, 0, &count);
662 if (endp == buf) 666 if (rc < 0)
663 return -EINVAL; 667 return rc;
664 668
665 if (count) { 669 if (count) {
666 int i; 670 mask = count - 1;
667 671 /* mask = roundup_pow_of_two(count) - 1;
668 if (count > INT_MAX) 672 * without overflows...
673 */
674 while ((mask | (mask >> 1)) != mask)
675 mask |= (mask >> 1);
676 /* On 64 bit arches, must check mask fits in table->mask (u32),
677 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1)
678 * doesnt overflow.
679 */
680#if BITS_PER_LONG > 32
681 if (mask > (unsigned long)(u32)mask)
669 return -EINVAL; 682 return -EINVAL;
670 count = roundup_pow_of_two(count); 683#else
671 if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table)) 684 if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
672 / sizeof(struct rps_dev_flow)) { 685 / sizeof(struct rps_dev_flow)) {
673 /* Enforce a limit to prevent overflow */ 686 /* Enforce a limit to prevent overflow */
674 return -EINVAL; 687 return -EINVAL;
675 } 688 }
676 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); 689#endif
690 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
677 if (!table) 691 if (!table)
678 return -ENOMEM; 692 return -ENOMEM;
679 693
680 table->mask = count - 1; 694 table->mask = mask;
681 for (i = 0; i < count; i++) 695 for (count = 0; count <= mask; count++)
682 table->flows[i].cpu = RPS_NO_CPU; 696 table->flows[count].cpu = RPS_NO_CPU;
683 } else 697 } else
684 table = NULL; 698 table = NULL;
685 699
@@ -783,7 +797,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
783#endif 797#endif
784} 798}
785 799
786#ifdef CONFIG_XPS 800#ifdef CONFIG_SYSFS
787/* 801/*
788 * netdev_queue sysfs structures and functions. 802 * netdev_queue sysfs structures and functions.
789 */ 803 */
@@ -829,6 +843,133 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
829 .store = netdev_queue_attr_store, 843 .store = netdev_queue_attr_store,
830}; 844};
831 845
846static ssize_t show_trans_timeout(struct netdev_queue *queue,
847 struct netdev_queue_attribute *attribute,
848 char *buf)
849{
850 unsigned long trans_timeout;
851
852 spin_lock_irq(&queue->_xmit_lock);
853 trans_timeout = queue->trans_timeout;
854 spin_unlock_irq(&queue->_xmit_lock);
855
856 return sprintf(buf, "%lu", trans_timeout);
857}
858
859static struct netdev_queue_attribute queue_trans_timeout =
860 __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
861
862#ifdef CONFIG_BQL
863/*
864 * Byte queue limits sysfs structures and functions.
865 */
866static ssize_t bql_show(char *buf, unsigned int value)
867{
868 return sprintf(buf, "%u\n", value);
869}
870
871static ssize_t bql_set(const char *buf, const size_t count,
872 unsigned int *pvalue)
873{
874 unsigned int value;
875 int err;
876
877 if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
878 value = DQL_MAX_LIMIT;
879 else {
880 err = kstrtouint(buf, 10, &value);
881 if (err < 0)
882 return err;
883 if (value > DQL_MAX_LIMIT)
884 return -EINVAL;
885 }
886
887 *pvalue = value;
888
889 return count;
890}
891
892static ssize_t bql_show_hold_time(struct netdev_queue *queue,
893 struct netdev_queue_attribute *attr,
894 char *buf)
895{
896 struct dql *dql = &queue->dql;
897
898 return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
899}
900
901static ssize_t bql_set_hold_time(struct netdev_queue *queue,
902 struct netdev_queue_attribute *attribute,
903 const char *buf, size_t len)
904{
905 struct dql *dql = &queue->dql;
906 unsigned value;
907 int err;
908
909 err = kstrtouint(buf, 10, &value);
910 if (err < 0)
911 return err;
912
913 dql->slack_hold_time = msecs_to_jiffies(value);
914
915 return len;
916}
917
918static struct netdev_queue_attribute bql_hold_time_attribute =
919 __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
920 bql_set_hold_time);
921
922static ssize_t bql_show_inflight(struct netdev_queue *queue,
923 struct netdev_queue_attribute *attr,
924 char *buf)
925{
926 struct dql *dql = &queue->dql;
927
928 return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
929}
930
931static struct netdev_queue_attribute bql_inflight_attribute =
932 __ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
933
934#define BQL_ATTR(NAME, FIELD) \
935static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
936 struct netdev_queue_attribute *attr, \
937 char *buf) \
938{ \
939 return bql_show(buf, queue->dql.FIELD); \
940} \
941 \
942static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
943 struct netdev_queue_attribute *attr, \
944 const char *buf, size_t len) \
945{ \
946 return bql_set(buf, len, &queue->dql.FIELD); \
947} \
948 \
949static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \
950 __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \
951 bql_set_ ## NAME);
952
953BQL_ATTR(limit, limit)
954BQL_ATTR(limit_max, max_limit)
955BQL_ATTR(limit_min, min_limit)
956
957static struct attribute *dql_attrs[] = {
958 &bql_limit_attribute.attr,
959 &bql_limit_max_attribute.attr,
960 &bql_limit_min_attribute.attr,
961 &bql_hold_time_attribute.attr,
962 &bql_inflight_attribute.attr,
963 NULL
964};
965
966static struct attribute_group dql_group = {
967 .name = "byte_queue_limits",
968 .attrs = dql_attrs,
969};
970#endif /* CONFIG_BQL */
971
972#ifdef CONFIG_XPS
832static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) 973static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
833{ 974{
834 struct net_device *dev = queue->dev; 975 struct net_device *dev = queue->dev;
@@ -893,6 +1034,52 @@ static DEFINE_MUTEX(xps_map_mutex);
893#define xmap_dereference(P) \ 1034#define xmap_dereference(P) \
894 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 1035 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
895 1036
1037static void xps_queue_release(struct netdev_queue *queue)
1038{
1039 struct net_device *dev = queue->dev;
1040 struct xps_dev_maps *dev_maps;
1041 struct xps_map *map;
1042 unsigned long index;
1043 int i, pos, nonempty = 0;
1044
1045 index = get_netdev_queue_index(queue);
1046
1047 mutex_lock(&xps_map_mutex);
1048 dev_maps = xmap_dereference(dev->xps_maps);
1049
1050 if (dev_maps) {
1051 for_each_possible_cpu(i) {
1052 map = xmap_dereference(dev_maps->cpu_map[i]);
1053 if (!map)
1054 continue;
1055
1056 for (pos = 0; pos < map->len; pos++)
1057 if (map->queues[pos] == index)
1058 break;
1059
1060 if (pos < map->len) {
1061 if (map->len > 1)
1062 map->queues[pos] =
1063 map->queues[--map->len];
1064 else {
1065 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1066 NULL);
1067 kfree_rcu(map, rcu);
1068 map = NULL;
1069 }
1070 }
1071 if (map)
1072 nonempty = 1;
1073 }
1074
1075 if (!nonempty) {
1076 RCU_INIT_POINTER(dev->xps_maps, NULL);
1077 kfree_rcu(dev_maps, rcu);
1078 }
1079 }
1080 mutex_unlock(&xps_map_mutex);
1081}
1082
896static ssize_t store_xps_map(struct netdev_queue *queue, 1083static ssize_t store_xps_map(struct netdev_queue *queue,
897 struct netdev_queue_attribute *attribute, 1084 struct netdev_queue_attribute *attribute,
898 const char *buf, size_t len) 1085 const char *buf, size_t len)
@@ -904,7 +1091,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
904 struct xps_map *map, *new_map; 1091 struct xps_map *map, *new_map;
905 struct xps_dev_maps *dev_maps, *new_dev_maps; 1092 struct xps_dev_maps *dev_maps, *new_dev_maps;
906 int nonempty = 0; 1093 int nonempty = 0;
907 int numa_node = -2; 1094 int numa_node_id = -2;
908 1095
909 if (!capable(CAP_NET_ADMIN)) 1096 if (!capable(CAP_NET_ADMIN))
910 return -EPERM; 1097 return -EPERM;
@@ -947,10 +1134,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
947 need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); 1134 need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
948#ifdef CONFIG_NUMA 1135#ifdef CONFIG_NUMA
949 if (need_set) { 1136 if (need_set) {
950 if (numa_node == -2) 1137 if (numa_node_id == -2)
951 numa_node = cpu_to_node(cpu); 1138 numa_node_id = cpu_to_node(cpu);
952 else if (numa_node != cpu_to_node(cpu)) 1139 else if (numa_node_id != cpu_to_node(cpu))
953 numa_node = -1; 1140 numa_node_id = -1;
954 } 1141 }
955#endif 1142#endif
956 if (need_set && pos >= map_len) { 1143 if (need_set && pos >= map_len) {
@@ -1000,7 +1187,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
1000 if (dev_maps) 1187 if (dev_maps)
1001 kfree_rcu(dev_maps, rcu); 1188 kfree_rcu(dev_maps, rcu);
1002 1189
1003 netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : 1190 netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
1004 NUMA_NO_NODE); 1191 NUMA_NO_NODE);
1005 1192
1006 mutex_unlock(&xps_map_mutex); 1193 mutex_unlock(&xps_map_mutex);
@@ -1023,58 +1210,23 @@ error:
1023 1210
1024static struct netdev_queue_attribute xps_cpus_attribute = 1211static struct netdev_queue_attribute xps_cpus_attribute =
1025 __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); 1212 __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
1213#endif /* CONFIG_XPS */
1026 1214
1027static struct attribute *netdev_queue_default_attrs[] = { 1215static struct attribute *netdev_queue_default_attrs[] = {
1216 &queue_trans_timeout.attr,
1217#ifdef CONFIG_XPS
1028 &xps_cpus_attribute.attr, 1218 &xps_cpus_attribute.attr,
1219#endif
1029 NULL 1220 NULL
1030}; 1221};
1031 1222
1032static void netdev_queue_release(struct kobject *kobj) 1223static void netdev_queue_release(struct kobject *kobj)
1033{ 1224{
1034 struct netdev_queue *queue = to_netdev_queue(kobj); 1225 struct netdev_queue *queue = to_netdev_queue(kobj);
1035 struct net_device *dev = queue->dev;
1036 struct xps_dev_maps *dev_maps;
1037 struct xps_map *map;
1038 unsigned long index;
1039 int i, pos, nonempty = 0;
1040
1041 index = get_netdev_queue_index(queue);
1042
1043 mutex_lock(&xps_map_mutex);
1044 dev_maps = xmap_dereference(dev->xps_maps);
1045
1046 if (dev_maps) {
1047 for_each_possible_cpu(i) {
1048 map = xmap_dereference(dev_maps->cpu_map[i]);
1049 if (!map)
1050 continue;
1051
1052 for (pos = 0; pos < map->len; pos++)
1053 if (map->queues[pos] == index)
1054 break;
1055
1056 if (pos < map->len) {
1057 if (map->len > 1)
1058 map->queues[pos] =
1059 map->queues[--map->len];
1060 else {
1061 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1062 NULL);
1063 kfree_rcu(map, rcu);
1064 map = NULL;
1065 }
1066 }
1067 if (map)
1068 nonempty = 1;
1069 }
1070 1226
1071 if (!nonempty) { 1227#ifdef CONFIG_XPS
1072 RCU_INIT_POINTER(dev->xps_maps, NULL); 1228 xps_queue_release(queue);
1073 kfree_rcu(dev_maps, rcu); 1229#endif
1074 }
1075 }
1076
1077 mutex_unlock(&xps_map_mutex);
1078 1230
1079 memset(kobj, 0, sizeof(*kobj)); 1231 memset(kobj, 0, sizeof(*kobj));
1080 dev_put(queue->dev); 1232 dev_put(queue->dev);
@@ -1095,22 +1247,29 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
1095 kobj->kset = net->queues_kset; 1247 kobj->kset = net->queues_kset;
1096 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, 1248 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1097 "tx-%u", index); 1249 "tx-%u", index);
1098 if (error) { 1250 if (error)
1099 kobject_put(kobj); 1251 goto exit;
1100 return error; 1252
1101 } 1253#ifdef CONFIG_BQL
1254 error = sysfs_create_group(kobj, &dql_group);
1255 if (error)
1256 goto exit;
1257#endif
1102 1258
1103 kobject_uevent(kobj, KOBJ_ADD); 1259 kobject_uevent(kobj, KOBJ_ADD);
1104 dev_hold(queue->dev); 1260 dev_hold(queue->dev);
1105 1261
1262 return 0;
1263exit:
1264 kobject_put(kobj);
1106 return error; 1265 return error;
1107} 1266}
1108#endif /* CONFIG_XPS */ 1267#endif /* CONFIG_SYSFS */
1109 1268
1110int 1269int
1111netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 1270netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1112{ 1271{
1113#ifdef CONFIG_XPS 1272#ifdef CONFIG_SYSFS
1114 int i; 1273 int i;
1115 int error = 0; 1274 int error = 0;
1116 1275
@@ -1122,20 +1281,26 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1122 } 1281 }
1123 } 1282 }
1124 1283
1125 while (--i >= new_num) 1284 while (--i >= new_num) {
1126 kobject_put(&net->_tx[i].kobj); 1285 struct netdev_queue *queue = net->_tx + i;
1286
1287#ifdef CONFIG_BQL
1288 sysfs_remove_group(&queue->kobj, &dql_group);
1289#endif
1290 kobject_put(&queue->kobj);
1291 }
1127 1292
1128 return error; 1293 return error;
1129#else 1294#else
1130 return 0; 1295 return 0;
1131#endif 1296#endif /* CONFIG_SYSFS */
1132} 1297}
1133 1298
1134static int register_queue_kobjects(struct net_device *net) 1299static int register_queue_kobjects(struct net_device *net)
1135{ 1300{
1136 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; 1301 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1137 1302
1138#if defined(CONFIG_RPS) || defined(CONFIG_XPS) 1303#ifdef CONFIG_SYSFS
1139 net->queues_kset = kset_create_and_add("queues", 1304 net->queues_kset = kset_create_and_add("queues",
1140 NULL, &net->dev.kobj); 1305 NULL, &net->dev.kobj);
1141 if (!net->queues_kset) 1306 if (!net->queues_kset)
@@ -1176,7 +1341,7 @@ static void remove_queue_kobjects(struct net_device *net)
1176 1341
1177 net_rx_queue_update_kobjects(net, real_rx, 0); 1342 net_rx_queue_update_kobjects(net, real_rx, 0);
1178 netdev_queue_update_kobjects(net, real_tx, 0); 1343 netdev_queue_update_kobjects(net, real_tx, 0);
1179#if defined(CONFIG_RPS) || defined(CONFIG_XPS) 1344#ifdef CONFIG_SYSFS
1180 kset_unregister(net->queues_kset); 1345 kset_unregister(net->queues_kset);
1181#endif 1346#endif
1182} 1347}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cf64c1ffa4cd..0d38808a2305 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work)
76 76
77 local_irq_save(flags); 77 local_irq_save(flags);
78 __netif_tx_lock(txq, smp_processor_id()); 78 __netif_tx_lock(txq, smp_processor_id());
79 if (netif_tx_queue_frozen_or_stopped(txq) || 79 if (netif_xmit_frozen_or_stopped(txq) ||
80 ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { 80 ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
81 skb_queue_head(&npinfo->txq, skb); 81 skb_queue_head(&npinfo->txq, skb);
82 __netif_tx_unlock(txq); 82 __netif_tx_unlock(txq);
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
317 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; 317 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
318 tries > 0; --tries) { 318 tries > 0; --tries) {
319 if (__netif_tx_trylock(txq)) { 319 if (__netif_tx_trylock(txq)) {
320 if (!netif_tx_queue_stopped(txq)) { 320 if (!netif_xmit_stopped(txq)) {
321 status = ops->ndo_start_xmit(skb, dev); 321 status = ops->ndo_start_xmit(skb, dev);
322 if (status == NETDEV_TX_OK) 322 if (status == NETDEV_TX_OK)
323 txq_trans_update(txq); 323 txq_trans_update(txq);
@@ -422,6 +422,7 @@ static void arp_reply(struct sk_buff *skb)
422 struct sk_buff *send_skb; 422 struct sk_buff *send_skb;
423 struct netpoll *np, *tmp; 423 struct netpoll *np, *tmp;
424 unsigned long flags; 424 unsigned long flags;
425 int hlen, tlen;
425 int hits = 0; 426 int hits = 0;
426 427
427 if (list_empty(&npinfo->rx_np)) 428 if (list_empty(&npinfo->rx_np))
@@ -479,8 +480,9 @@ static void arp_reply(struct sk_buff *skb)
479 if (tip != np->local_ip) 480 if (tip != np->local_ip)
480 continue; 481 continue;
481 482
482 send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), 483 hlen = LL_RESERVED_SPACE(np->dev);
483 LL_RESERVED_SPACE(np->dev)); 484 tlen = np->dev->needed_tailroom;
485 send_skb = find_skb(np, size + hlen + tlen, hlen);
484 if (!send_skb) 486 if (!send_skb)
485 continue; 487 continue;
486 488
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
new file mode 100644
index 000000000000..3a9fd4826b75
--- /dev/null
+++ b/net/core/netprio_cgroup.c
@@ -0,0 +1,344 @@
1/*
2 * net/core/netprio_cgroup.c Priority Control Group
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Neil Horman <nhorman@tuxdriver.com>
10 */
11
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/types.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/skbuff.h>
18#include <linux/cgroup.h>
19#include <linux/rcupdate.h>
20#include <linux/atomic.h>
21#include <net/rtnetlink.h>
22#include <net/pkt_cls.h>
23#include <net/sock.h>
24#include <net/netprio_cgroup.h>
25
26static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
27 struct cgroup *cgrp);
28static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
29static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
30
31struct cgroup_subsys net_prio_subsys = {
32 .name = "net_prio",
33 .create = cgrp_create,
34 .destroy = cgrp_destroy,
35 .populate = cgrp_populate,
36#ifdef CONFIG_NETPRIO_CGROUP
37 .subsys_id = net_prio_subsys_id,
38#endif
39 .module = THIS_MODULE
40};
41
42#define PRIOIDX_SZ 128
43
44static unsigned long prioidx_map[PRIOIDX_SZ];
45static DEFINE_SPINLOCK(prioidx_map_lock);
46static atomic_t max_prioidx = ATOMIC_INIT(0);
47
48static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
49{
50 return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
51 struct cgroup_netprio_state, css);
52}
53
54static int get_prioidx(u32 *prio)
55{
56 unsigned long flags;
57 u32 prioidx;
58
59 spin_lock_irqsave(&prioidx_map_lock, flags);
60 prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
61 set_bit(prioidx, prioidx_map);
62 spin_unlock_irqrestore(&prioidx_map_lock, flags);
63 if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ)
64 return -ENOSPC;
65
66 atomic_set(&max_prioidx, prioidx);
67 *prio = prioidx;
68 return 0;
69}
70
71static void put_prioidx(u32 idx)
72{
73 unsigned long flags;
74
75 spin_lock_irqsave(&prioidx_map_lock, flags);
76 clear_bit(idx, prioidx_map);
77 spin_unlock_irqrestore(&prioidx_map_lock, flags);
78}
79
80static void extend_netdev_table(struct net_device *dev, u32 new_len)
81{
82 size_t new_size = sizeof(struct netprio_map) +
83 ((sizeof(u32) * new_len));
84 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
85 struct netprio_map *old_priomap;
86 int i;
87
88 old_priomap = rtnl_dereference(dev->priomap);
89
90 if (!new_priomap) {
91 printk(KERN_WARNING "Unable to alloc new priomap!\n");
92 return;
93 }
94
95 for (i = 0;
96 old_priomap && (i < old_priomap->priomap_len);
97 i++)
98 new_priomap->priomap[i] = old_priomap->priomap[i];
99
100 new_priomap->priomap_len = new_len;
101
102 rcu_assign_pointer(dev->priomap, new_priomap);
103 if (old_priomap)
104 kfree_rcu(old_priomap, rcu);
105}
106
107static void update_netdev_tables(void)
108{
109 struct net_device *dev;
110 u32 max_len = atomic_read(&max_prioidx);
111 struct netprio_map *map;
112
113 rtnl_lock();
114 for_each_netdev(&init_net, dev) {
115 map = rtnl_dereference(dev->priomap);
116 if ((!map) ||
117 (map->priomap_len < max_len))
118 extend_netdev_table(dev, max_len);
119 }
120 rtnl_unlock();
121}
122
123static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
124 struct cgroup *cgrp)
125{
126 struct cgroup_netprio_state *cs;
127 int ret;
128
129 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
130 if (!cs)
131 return ERR_PTR(-ENOMEM);
132
133 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
134 kfree(cs);
135 return ERR_PTR(-EINVAL);
136 }
137
138 ret = get_prioidx(&cs->prioidx);
139 if (ret != 0) {
140 printk(KERN_WARNING "No space in priority index array\n");
141 kfree(cs);
142 return ERR_PTR(ret);
143 }
144
145 return &cs->css;
146}
147
148static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
149{
150 struct cgroup_netprio_state *cs;
151 struct net_device *dev;
152 struct netprio_map *map;
153
154 cs = cgrp_netprio_state(cgrp);
155 rtnl_lock();
156 for_each_netdev(&init_net, dev) {
157 map = rtnl_dereference(dev->priomap);
158 if (map)
159 map->priomap[cs->prioidx] = 0;
160 }
161 rtnl_unlock();
162 put_prioidx(cs->prioidx);
163 kfree(cs);
164}
165
166static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
167{
168 return (u64)cgrp_netprio_state(cgrp)->prioidx;
169}
170
171static int read_priomap(struct cgroup *cont, struct cftype *cft,
172 struct cgroup_map_cb *cb)
173{
174 struct net_device *dev;
175 u32 prioidx = cgrp_netprio_state(cont)->prioidx;
176 u32 priority;
177 struct netprio_map *map;
178
179 rcu_read_lock();
180 for_each_netdev_rcu(&init_net, dev) {
181 map = rcu_dereference(dev->priomap);
182 priority = map ? map->priomap[prioidx] : 0;
183 cb->fill(cb, dev->name, priority);
184 }
185 rcu_read_unlock();
186 return 0;
187}
188
189static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
190 const char *buffer)
191{
192 char *devname = kstrdup(buffer, GFP_KERNEL);
193 int ret = -EINVAL;
194 u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
195 unsigned long priority;
196 char *priostr;
197 struct net_device *dev;
198 struct netprio_map *map;
199
200 if (!devname)
201 return -ENOMEM;
202
203 /*
204 * Minimally sized valid priomap string
205 */
206 if (strlen(devname) < 3)
207 goto out_free_devname;
208
209 priostr = strstr(devname, " ");
210 if (!priostr)
211 goto out_free_devname;
212
213 /*
214 *Separate the devname from the associated priority
215 *and advance the priostr poitner to the priority value
216 */
217 *priostr = '\0';
218 priostr++;
219
220 /*
221 * If the priostr points to NULL, we're at the end of the passed
222 * in string, and its not a valid write
223 */
224 if (*priostr == '\0')
225 goto out_free_devname;
226
227 ret = kstrtoul(priostr, 10, &priority);
228 if (ret < 0)
229 goto out_free_devname;
230
231 ret = -ENODEV;
232
233 dev = dev_get_by_name(&init_net, devname);
234 if (!dev)
235 goto out_free_devname;
236
237 update_netdev_tables();
238 ret = 0;
239 rcu_read_lock();
240 map = rcu_dereference(dev->priomap);
241 if (map)
242 map->priomap[prioidx] = priority;
243 rcu_read_unlock();
244 dev_put(dev);
245
246out_free_devname:
247 kfree(devname);
248 return ret;
249}
250
251static struct cftype ss_files[] = {
252 {
253 .name = "prioidx",
254 .read_u64 = read_prioidx,
255 },
256 {
257 .name = "ifpriomap",
258 .read_map = read_priomap,
259 .write_string = write_priomap,
260 },
261};
262
263static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
264{
265 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
266}
267
268static int netprio_device_event(struct notifier_block *unused,
269 unsigned long event, void *ptr)
270{
271 struct net_device *dev = ptr;
272 struct netprio_map *old;
273 u32 max_len = atomic_read(&max_prioidx);
274
275 /*
276 * Note this is called with rtnl_lock held so we have update side
277 * protection on our rcu assignments
278 */
279
280 switch (event) {
281
282 case NETDEV_REGISTER:
283 if (max_len)
284 extend_netdev_table(dev, max_len);
285 break;
286 case NETDEV_UNREGISTER:
287 old = rtnl_dereference(dev->priomap);
288 RCU_INIT_POINTER(dev->priomap, NULL);
289 if (old)
290 kfree_rcu(old, rcu);
291 break;
292 }
293 return NOTIFY_DONE;
294}
295
296static struct notifier_block netprio_device_notifier = {
297 .notifier_call = netprio_device_event
298};
299
300static int __init init_cgroup_netprio(void)
301{
302 int ret;
303
304 ret = cgroup_load_subsys(&net_prio_subsys);
305 if (ret)
306 goto out;
307#ifndef CONFIG_NETPRIO_CGROUP
308 smp_wmb();
309 net_prio_subsys_id = net_prio_subsys.subsys_id;
310#endif
311
312 register_netdevice_notifier(&netprio_device_notifier);
313
314out:
315 return ret;
316}
317
318static void __exit exit_cgroup_netprio(void)
319{
320 struct netprio_map *old;
321 struct net_device *dev;
322
323 unregister_netdevice_notifier(&netprio_device_notifier);
324
325 cgroup_unload_subsys(&net_prio_subsys);
326
327#ifndef CONFIG_NETPRIO_CGROUP
328 net_prio_subsys_id = -1;
329 synchronize_rcu();
330#endif
331
332 rtnl_lock();
333 for_each_netdev(&init_net, dev) {
334 old = rtnl_dereference(dev->priomap);
335 RCU_INIT_POINTER(dev->priomap, NULL);
336 if (old)
337 kfree_rcu(old, rcu);
338 }
339 rtnl_unlock();
340}
341
342module_init(init_cgroup_netprio);
343module_exit(exit_cgroup_netprio);
344MODULE_LICENSE("GPL v2");
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0001c243b35c..449fe0f068f8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file,
1304 scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); 1304 scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
1305 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); 1305 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
1306 1306
1307 ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); 1307 pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
1308 1308
1309 if (debug) 1309 if (debug)
1310 printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); 1310 printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file,
1327 scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); 1327 scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
1328 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); 1328 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
1329 1329
1330 ipv6_addr_copy(&pkt_dev->cur_in6_daddr, 1330 pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
1331 &pkt_dev->min_in6_daddr);
1332 if (debug) 1331 if (debug)
1333 printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); 1332 printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
1334 1333
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file,
1371 scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); 1370 scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
1372 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); 1371 snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
1373 1372
1374 ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); 1373 pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
1375 1374
1376 if (debug) 1375 if (debug)
1377 printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); 1376 printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2079 ifp = ifp->if_next) { 2078 ifp = ifp->if_next) {
2080 if (ifp->scope == IFA_LINK && 2079 if (ifp->scope == IFA_LINK &&
2081 !(ifp->flags & IFA_F_TENTATIVE)) { 2080 !(ifp->flags & IFA_F_TENTATIVE)) {
2082 ipv6_addr_copy(&pkt_dev-> 2081 pkt_dev->cur_in6_saddr = ifp->addr;
2083 cur_in6_saddr,
2084 &ifp->addr);
2085 err = 0; 2082 err = 0;
2086 break; 2083 break;
2087 } 2084 }
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2958 iph->payload_len = htons(sizeof(struct udphdr) + datalen); 2955 iph->payload_len = htons(sizeof(struct udphdr) + datalen);
2959 iph->nexthdr = IPPROTO_UDP; 2956 iph->nexthdr = IPPROTO_UDP;
2960 2957
2961 ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); 2958 iph->daddr = pkt_dev->cur_in6_daddr;
2962 ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); 2959 iph->saddr = pkt_dev->cur_in6_saddr;
2963 2960
2964 skb->mac_header = (skb->network_header - ETH_HLEN - 2961 skb->mac_header = (skb->network_header - ETH_HLEN -
2965 pkt_dev->pkt_overhead); 2962 pkt_dev->pkt_overhead);
@@ -3345,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3345 3342
3346 __netif_tx_lock_bh(txq); 3343 __netif_tx_lock_bh(txq);
3347 3344
3348 if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { 3345 if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
3349 ret = NETDEV_TX_BUSY; 3346 ret = NETDEV_TX_BUSY;
3350 pkt_dev->last_ok = 0; 3347 pkt_dev->last_ok = 0;
3351 goto unlock; 3348 goto unlock;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9083e82bdae5..dbf2ddafd52d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -273,6 +273,17 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all);
273 273
274static LIST_HEAD(link_ops); 274static LIST_HEAD(link_ops);
275 275
276static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
277{
278 const struct rtnl_link_ops *ops;
279
280 list_for_each_entry(ops, &link_ops, list) {
281 if (!strcmp(ops->kind, kind))
282 return ops;
283 }
284 return NULL;
285}
286
276/** 287/**
277 * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. 288 * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
278 * @ops: struct rtnl_link_ops * to register 289 * @ops: struct rtnl_link_ops * to register
@@ -285,6 +296,9 @@ static LIST_HEAD(link_ops);
285 */ 296 */
286int __rtnl_link_register(struct rtnl_link_ops *ops) 297int __rtnl_link_register(struct rtnl_link_ops *ops)
287{ 298{
299 if (rtnl_link_ops_get(ops->kind))
300 return -EEXIST;
301
288 if (!ops->dellink) 302 if (!ops->dellink)
289 ops->dellink = unregister_netdevice_queue; 303 ops->dellink = unregister_netdevice_queue;
290 304
@@ -351,17 +365,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
351} 365}
352EXPORT_SYMBOL_GPL(rtnl_link_unregister); 366EXPORT_SYMBOL_GPL(rtnl_link_unregister);
353 367
354static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
355{
356 const struct rtnl_link_ops *ops;
357
358 list_for_each_entry(ops, &link_ops, list) {
359 if (!strcmp(ops->kind, kind))
360 return ops;
361 }
362 return NULL;
363}
364
365static size_t rtnl_link_get_size(const struct net_device *dev) 368static size_t rtnl_link_get_size(const struct net_device *dev)
366{ 369{
367 const struct rtnl_link_ops *ops = dev->rtnl_link_ops; 370 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 925991ae6f52..6fd44606fdd1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -36,7 +36,7 @@ static u32 seq_scale(u32 seq)
36} 36}
37#endif 37#endif
38 38
39#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 39#if IS_ENABLED(CONFIG_IPV6)
40__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, 40__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
41 __be16 sport, __be16 dport) 41 __be16 sport, __be16 dport)
42{ 42{
@@ -134,7 +134,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
134EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); 134EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
135#endif 135#endif
136 136
137#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) 137#if IS_ENABLED(CONFIG_IP_DCCP)
138u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, 138u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
139 __be16 sport, __be16 dport) 139 __be16 sport, __be16 dport)
140{ 140{
@@ -156,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
156} 156}
157EXPORT_SYMBOL(secure_dccp_sequence_number); 157EXPORT_SYMBOL(secure_dccp_sequence_number);
158 158
159#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 159#if IS_ENABLED(CONFIG_IPV6)
160u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, 160u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
161 __be16 sport, __be16 dport) 161 __be16 sport, __be16 dport)
162{ 162{
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3c30ee4a5710..da0c97f2fab4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -245,6 +245,55 @@ nodata:
245EXPORT_SYMBOL(__alloc_skb); 245EXPORT_SYMBOL(__alloc_skb);
246 246
247/** 247/**
248 * build_skb - build a network buffer
249 * @data: data buffer provided by caller
250 *
251 * Allocate a new &sk_buff. Caller provides space holding head and
252 * skb_shared_info. @data must have been allocated by kmalloc()
253 * The return is the new skb buffer.
254 * On a failure the return is %NULL, and @data is not freed.
255 * Notes :
256 * Before IO, driver allocates only data buffer where NIC put incoming frame
257 * Driver should add room at head (NET_SKB_PAD) and
258 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
259 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
260 * before giving packet to stack.
261 * RX rings only contains data buffers, not full skbs.
262 */
263struct sk_buff *build_skb(void *data)
264{
265 struct skb_shared_info *shinfo;
266 struct sk_buff *skb;
267 unsigned int size;
268
269 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
270 if (!skb)
271 return NULL;
272
273 size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
274
275 memset(skb, 0, offsetof(struct sk_buff, tail));
276 skb->truesize = SKB_TRUESIZE(size);
277 atomic_set(&skb->users, 1);
278 skb->head = data;
279 skb->data = data;
280 skb_reset_tail_pointer(skb);
281 skb->end = skb->tail + size;
282#ifdef NET_SKBUFF_DATA_USES_OFFSET
283 skb->mac_header = ~0U;
284#endif
285
286 /* make sure we initialize shinfo sequentially */
287 shinfo = skb_shinfo(skb);
288 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
289 atomic_set(&shinfo->dataref, 1);
290 kmemcheck_annotate_variable(shinfo->destructor_arg);
291
292 return skb;
293}
294EXPORT_SYMBOL(build_skb);
295
296/**
248 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 297 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
249 * @dev: network device to receive on 298 * @dev: network device to receive on
250 * @length: length to allocate 299 * @length: length to allocate
@@ -403,7 +452,7 @@ static void skb_release_head_state(struct sk_buff *skb)
403 WARN_ON(in_irq()); 452 WARN_ON(in_irq());
404 skb->destructor(skb); 453 skb->destructor(skb);
405 } 454 }
406#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 455#if IS_ENABLED(CONFIG_NF_CONNTRACK)
407 nf_conntrack_put(skb->nfct); 456 nf_conntrack_put(skb->nfct);
408#endif 457#endif
409#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED 458#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
@@ -553,15 +602,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
553 new->ip_summed = old->ip_summed; 602 new->ip_summed = old->ip_summed;
554 skb_copy_queue_mapping(new, old); 603 skb_copy_queue_mapping(new, old);
555 new->priority = old->priority; 604 new->priority = old->priority;
556#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 605#if IS_ENABLED(CONFIG_IP_VS)
557 new->ipvs_property = old->ipvs_property; 606 new->ipvs_property = old->ipvs_property;
558#endif 607#endif
559 new->protocol = old->protocol; 608 new->protocol = old->protocol;
560 new->mark = old->mark; 609 new->mark = old->mark;
561 new->skb_iif = old->skb_iif; 610 new->skb_iif = old->skb_iif;
562 __nf_copy(new, old); 611 __nf_copy(new, old);
563#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 612#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
564 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
565 new->nf_trace = old->nf_trace; 613 new->nf_trace = old->nf_trace;
566#endif 614#endif
567#ifdef CONFIG_NET_SCHED 615#ifdef CONFIG_NET_SCHED
@@ -791,8 +839,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
791EXPORT_SYMBOL(skb_copy); 839EXPORT_SYMBOL(skb_copy);
792 840
793/** 841/**
794 * pskb_copy - create copy of an sk_buff with private head. 842 * __pskb_copy - create copy of an sk_buff with private head.
795 * @skb: buffer to copy 843 * @skb: buffer to copy
844 * @headroom: headroom of new skb
796 * @gfp_mask: allocation priority 845 * @gfp_mask: allocation priority
797 * 846 *
798 * Make a copy of both an &sk_buff and part of its data, located 847 * Make a copy of both an &sk_buff and part of its data, located
@@ -803,16 +852,16 @@ EXPORT_SYMBOL(skb_copy);
803 * The returned buffer has a reference count of 1. 852 * The returned buffer has a reference count of 1.
804 */ 853 */
805 854
806struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 855struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
807{ 856{
808 unsigned int size = skb_end_pointer(skb) - skb->head; 857 unsigned int size = skb_headlen(skb) + headroom;
809 struct sk_buff *n = alloc_skb(size, gfp_mask); 858 struct sk_buff *n = alloc_skb(size, gfp_mask);
810 859
811 if (!n) 860 if (!n)
812 goto out; 861 goto out;
813 862
814 /* Set the data pointer */ 863 /* Set the data pointer */
815 skb_reserve(n, skb_headroom(skb)); 864 skb_reserve(n, headroom);
816 /* Set the tail pointer and length */ 865 /* Set the tail pointer and length */
817 skb_put(n, skb_headlen(skb)); 866 skb_put(n, skb_headlen(skb));
818 /* Copy the bytes */ 867 /* Copy the bytes */
@@ -848,7 +897,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
848out: 897out:
849 return n; 898 return n;
850} 899}
851EXPORT_SYMBOL(pskb_copy); 900EXPORT_SYMBOL(__pskb_copy);
852 901
853/** 902/**
854 * pskb_expand_head - reallocate header of &sk_buff 903 * pskb_expand_head - reallocate header of &sk_buff
@@ -2621,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2621 * a pointer to the first in a list of new skbs for the segments. 2670 * a pointer to the first in a list of new skbs for the segments.
2622 * In case of error it returns ERR_PTR(err). 2671 * In case of error it returns ERR_PTR(err).
2623 */ 2672 */
2624struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) 2673struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2625{ 2674{
2626 struct sk_buff *segs = NULL; 2675 struct sk_buff *segs = NULL;
2627 struct sk_buff *tail = NULL; 2676 struct sk_buff *tail = NULL;
@@ -3169,6 +3218,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3169} 3218}
3170EXPORT_SYMBOL_GPL(skb_tstamp_tx); 3219EXPORT_SYMBOL_GPL(skb_tstamp_tx);
3171 3220
3221void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
3222{
3223 struct sock *sk = skb->sk;
3224 struct sock_exterr_skb *serr;
3225 int err;
3226
3227 skb->wifi_acked_valid = 1;
3228 skb->wifi_acked = acked;
3229
3230 serr = SKB_EXT_ERR(skb);
3231 memset(serr, 0, sizeof(*serr));
3232 serr->ee.ee_errno = ENOMSG;
3233 serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
3234
3235 err = sock_queue_err_skb(sk, skb);
3236 if (err)
3237 kfree_skb(skb);
3238}
3239EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
3240
3172 3241
3173/** 3242/**
3174 * skb_partial_csum_set - set up and verify partial csum values for packet 3243 * skb_partial_csum_set - set up and verify partial csum values for packet
diff --git a/net/core/sock.c b/net/core/sock.c
index b23f174ab84c..002939cfc069 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
111#include <linux/init.h> 111#include <linux/init.h>
112#include <linux/highmem.h> 112#include <linux/highmem.h>
113#include <linux/user_namespace.h> 113#include <linux/user_namespace.h>
114#include <linux/jump_label.h>
114 115
115#include <asm/uaccess.h> 116#include <asm/uaccess.h>
116#include <asm/system.h> 117#include <asm/system.h>
@@ -125,6 +126,7 @@
125#include <net/xfrm.h> 126#include <net/xfrm.h>
126#include <linux/ipsec.h> 127#include <linux/ipsec.h>
127#include <net/cls_cgroup.h> 128#include <net/cls_cgroup.h>
129#include <net/netprio_cgroup.h>
128 130
129#include <linux/filter.h> 131#include <linux/filter.h>
130 132
@@ -134,6 +136,46 @@
134#include <net/tcp.h> 136#include <net/tcp.h>
135#endif 137#endif
136 138
139static DEFINE_MUTEX(proto_list_mutex);
140static LIST_HEAD(proto_list);
141
142#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
143int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
144{
145 struct proto *proto;
146 int ret = 0;
147
148 mutex_lock(&proto_list_mutex);
149 list_for_each_entry(proto, &proto_list, node) {
150 if (proto->init_cgroup) {
151 ret = proto->init_cgroup(cgrp, ss);
152 if (ret)
153 goto out;
154 }
155 }
156
157 mutex_unlock(&proto_list_mutex);
158 return ret;
159out:
160 list_for_each_entry_continue_reverse(proto, &proto_list, node)
161 if (proto->destroy_cgroup)
162 proto->destroy_cgroup(cgrp, ss);
163 mutex_unlock(&proto_list_mutex);
164 return ret;
165}
166
167void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
168{
169 struct proto *proto;
170
171 mutex_lock(&proto_list_mutex);
172 list_for_each_entry_reverse(proto, &proto_list, node)
173 if (proto->destroy_cgroup)
174 proto->destroy_cgroup(cgrp, ss);
175 mutex_unlock(&proto_list_mutex);
176}
177#endif
178
137/* 179/*
138 * Each address family might have different locking rules, so we have 180 * Each address family might have different locking rules, so we have
139 * one slock key per address family: 181 * one slock key per address family:
@@ -141,6 +183,9 @@
141static struct lock_class_key af_family_keys[AF_MAX]; 183static struct lock_class_key af_family_keys[AF_MAX];
142static struct lock_class_key af_family_slock_keys[AF_MAX]; 184static struct lock_class_key af_family_slock_keys[AF_MAX];
143 185
186struct jump_label_key memcg_socket_limit_enabled;
187EXPORT_SYMBOL(memcg_socket_limit_enabled);
188
144/* 189/*
145 * Make lock validator output more readable. (we pre-construct these 190 * Make lock validator output more readable. (we pre-construct these
146 * strings build-time, so that runtime initialization of socket 191 * strings build-time, so that runtime initialization of socket
@@ -221,10 +266,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
221int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 266int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
222EXPORT_SYMBOL(sysctl_optmem_max); 267EXPORT_SYMBOL(sysctl_optmem_max);
223 268
224#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) 269#if defined(CONFIG_CGROUPS)
270#if !defined(CONFIG_NET_CLS_CGROUP)
225int net_cls_subsys_id = -1; 271int net_cls_subsys_id = -1;
226EXPORT_SYMBOL_GPL(net_cls_subsys_id); 272EXPORT_SYMBOL_GPL(net_cls_subsys_id);
227#endif 273#endif
274#if !defined(CONFIG_NETPRIO_CGROUP)
275int net_prio_subsys_id = -1;
276EXPORT_SYMBOL_GPL(net_prio_subsys_id);
277#endif
278#endif
228 279
229static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 280static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
230{ 281{
@@ -269,14 +320,14 @@ static void sock_warn_obsolete_bsdism(const char *name)
269 } 320 }
270} 321}
271 322
272static void sock_disable_timestamp(struct sock *sk, int flag) 323#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
324
325static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
273{ 326{
274 if (sock_flag(sk, flag)) { 327 if (sk->sk_flags & flags) {
275 sock_reset_flag(sk, flag); 328 sk->sk_flags &= ~flags;
276 if (!sock_flag(sk, SOCK_TIMESTAMP) && 329 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
277 !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
278 net_disable_timestamp(); 330 net_disable_timestamp();
279 }
280 } 331 }
281} 332}
282 333
@@ -678,7 +729,7 @@ set_rcvbuf:
678 SOCK_TIMESTAMPING_RX_SOFTWARE); 729 SOCK_TIMESTAMPING_RX_SOFTWARE);
679 else 730 else
680 sock_disable_timestamp(sk, 731 sock_disable_timestamp(sk,
681 SOCK_TIMESTAMPING_RX_SOFTWARE); 732 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
682 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, 733 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
683 val & SOF_TIMESTAMPING_SOFTWARE); 734 val & SOF_TIMESTAMPING_SOFTWARE);
684 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, 735 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
@@ -736,6 +787,11 @@ set_rcvbuf:
736 case SO_RXQ_OVFL: 787 case SO_RXQ_OVFL:
737 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 788 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
738 break; 789 break;
790
791 case SO_WIFI_STATUS:
792 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
793 break;
794
739 default: 795 default:
740 ret = -ENOPROTOOPT; 796 ret = -ENOPROTOOPT;
741 break; 797 break;
@@ -957,6 +1013,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
957 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); 1013 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
958 break; 1014 break;
959 1015
1016 case SO_WIFI_STATUS:
1017 v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
1018 break;
1019
960 default: 1020 default:
961 return -ENOPROTOOPT; 1021 return -ENOPROTOOPT;
962 } 1022 }
@@ -1107,6 +1167,18 @@ void sock_update_classid(struct sock *sk)
1107 sk->sk_classid = classid; 1167 sk->sk_classid = classid;
1108} 1168}
1109EXPORT_SYMBOL(sock_update_classid); 1169EXPORT_SYMBOL(sock_update_classid);
1170
1171void sock_update_netprioidx(struct sock *sk)
1172{
1173 struct cgroup_netprio_state *state;
1174 if (in_interrupt())
1175 return;
1176 rcu_read_lock();
1177 state = task_netprio_state(current);
1178 sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
1179 rcu_read_unlock();
1180}
1181EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1110#endif 1182#endif
1111 1183
1112/** 1184/**
@@ -1134,6 +1206,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1134 atomic_set(&sk->sk_wmem_alloc, 1); 1206 atomic_set(&sk->sk_wmem_alloc, 1);
1135 1207
1136 sock_update_classid(sk); 1208 sock_update_classid(sk);
1209 sock_update_netprioidx(sk);
1137 } 1210 }
1138 1211
1139 return sk; 1212 return sk;
@@ -1154,8 +1227,7 @@ static void __sk_free(struct sock *sk)
1154 RCU_INIT_POINTER(sk->sk_filter, NULL); 1227 RCU_INIT_POINTER(sk->sk_filter, NULL);
1155 } 1228 }
1156 1229
1157 sock_disable_timestamp(sk, SOCK_TIMESTAMP); 1230 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1158 sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
1159 1231
1160 if (atomic_read(&sk->sk_omem_alloc)) 1232 if (atomic_read(&sk->sk_omem_alloc))
1161 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1233 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -1200,7 +1272,14 @@ void sk_release_kernel(struct sock *sk)
1200} 1272}
1201EXPORT_SYMBOL(sk_release_kernel); 1273EXPORT_SYMBOL(sk_release_kernel);
1202 1274
1203struct sock *sk_clone(const struct sock *sk, const gfp_t priority) 1275/**
1276 * sk_clone_lock - clone a socket, and lock its clone
1277 * @sk: the socket to clone
1278 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1279 *
1280 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1281 */
1282struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1204{ 1283{
1205 struct sock *newsk; 1284 struct sock *newsk;
1206 1285
@@ -1284,16 +1363,15 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1284 newsk->sk_wq = NULL; 1363 newsk->sk_wq = NULL;
1285 1364
1286 if (newsk->sk_prot->sockets_allocated) 1365 if (newsk->sk_prot->sockets_allocated)
1287 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1366 sk_sockets_allocated_inc(newsk);
1288 1367
1289 if (sock_flag(newsk, SOCK_TIMESTAMP) || 1368 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1290 sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1291 net_enable_timestamp(); 1369 net_enable_timestamp();
1292 } 1370 }
1293out: 1371out:
1294 return newsk; 1372 return newsk;
1295} 1373}
1296EXPORT_SYMBOL_GPL(sk_clone); 1374EXPORT_SYMBOL_GPL(sk_clone_lock);
1297 1375
1298void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1376void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1299{ 1377{
@@ -1673,30 +1751,34 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1673 struct proto *prot = sk->sk_prot; 1751 struct proto *prot = sk->sk_prot;
1674 int amt = sk_mem_pages(size); 1752 int amt = sk_mem_pages(size);
1675 long allocated; 1753 long allocated;
1754 int parent_status = UNDER_LIMIT;
1676 1755
1677 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1756 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1678 allocated = atomic_long_add_return(amt, prot->memory_allocated); 1757
1758 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
1679 1759
1680 /* Under limit. */ 1760 /* Under limit. */
1681 if (allocated <= prot->sysctl_mem[0]) { 1761 if (parent_status == UNDER_LIMIT &&
1682 if (prot->memory_pressure && *prot->memory_pressure) 1762 allocated <= sk_prot_mem_limits(sk, 0)) {
1683 *prot->memory_pressure = 0; 1763 sk_leave_memory_pressure(sk);
1684 return 1; 1764 return 1;
1685 } 1765 }
1686 1766
1687 /* Under pressure. */ 1767 /* Under pressure. (we or our parents) */
1688 if (allocated > prot->sysctl_mem[1]) 1768 if ((parent_status > SOFT_LIMIT) ||
1689 if (prot->enter_memory_pressure) 1769 allocated > sk_prot_mem_limits(sk, 1))
1690 prot->enter_memory_pressure(sk); 1770 sk_enter_memory_pressure(sk);
1691 1771
1692 /* Over hard limit. */ 1772 /* Over hard limit (we or our parents) */
1693 if (allocated > prot->sysctl_mem[2]) 1773 if ((parent_status == OVER_LIMIT) ||
1774 (allocated > sk_prot_mem_limits(sk, 2)))
1694 goto suppress_allocation; 1775 goto suppress_allocation;
1695 1776
1696 /* guarantee minimum buffer size under pressure */ 1777 /* guarantee minimum buffer size under pressure */
1697 if (kind == SK_MEM_RECV) { 1778 if (kind == SK_MEM_RECV) {
1698 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 1779 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1699 return 1; 1780 return 1;
1781
1700 } else { /* SK_MEM_SEND */ 1782 } else { /* SK_MEM_SEND */
1701 if (sk->sk_type == SOCK_STREAM) { 1783 if (sk->sk_type == SOCK_STREAM) {
1702 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 1784 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
@@ -1706,13 +1788,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1706 return 1; 1788 return 1;
1707 } 1789 }
1708 1790
1709 if (prot->memory_pressure) { 1791 if (sk_has_memory_pressure(sk)) {
1710 int alloc; 1792 int alloc;
1711 1793
1712 if (!*prot->memory_pressure) 1794 if (!sk_under_memory_pressure(sk))
1713 return 1; 1795 return 1;
1714 alloc = percpu_counter_read_positive(prot->sockets_allocated); 1796 alloc = sk_sockets_allocated_read_positive(sk);
1715 if (prot->sysctl_mem[2] > alloc * 1797 if (sk_prot_mem_limits(sk, 2) > alloc *
1716 sk_mem_pages(sk->sk_wmem_queued + 1798 sk_mem_pages(sk->sk_wmem_queued +
1717 atomic_read(&sk->sk_rmem_alloc) + 1799 atomic_read(&sk->sk_rmem_alloc) +
1718 sk->sk_forward_alloc)) 1800 sk->sk_forward_alloc))
@@ -1735,7 +1817,9 @@ suppress_allocation:
1735 1817
1736 /* Alas. Undo changes. */ 1818 /* Alas. Undo changes. */
1737 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1819 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1738 atomic_long_sub(amt, prot->memory_allocated); 1820
1821 sk_memory_allocated_sub(sk, amt, parent_status);
1822
1739 return 0; 1823 return 0;
1740} 1824}
1741EXPORT_SYMBOL(__sk_mem_schedule); 1825EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1746,15 +1830,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
1746 */ 1830 */
1747void __sk_mem_reclaim(struct sock *sk) 1831void __sk_mem_reclaim(struct sock *sk)
1748{ 1832{
1749 struct proto *prot = sk->sk_prot; 1833 sk_memory_allocated_sub(sk,
1750 1834 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
1751 atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1752 prot->memory_allocated);
1753 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1835 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1754 1836
1755 if (prot->memory_pressure && *prot->memory_pressure && 1837 if (sk_under_memory_pressure(sk) &&
1756 (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) 1838 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
1757 *prot->memory_pressure = 0; 1839 sk_leave_memory_pressure(sk);
1758} 1840}
1759EXPORT_SYMBOL(__sk_mem_reclaim); 1841EXPORT_SYMBOL(__sk_mem_reclaim);
1760 1842
@@ -2125,16 +2207,15 @@ EXPORT_SYMBOL(sock_get_timestampns);
2125void sock_enable_timestamp(struct sock *sk, int flag) 2207void sock_enable_timestamp(struct sock *sk, int flag)
2126{ 2208{
2127 if (!sock_flag(sk, flag)) { 2209 if (!sock_flag(sk, flag)) {
2210 unsigned long previous_flags = sk->sk_flags;
2211
2128 sock_set_flag(sk, flag); 2212 sock_set_flag(sk, flag);
2129 /* 2213 /*
2130 * we just set one of the two flags which require net 2214 * we just set one of the two flags which require net
2131 * time stamping, but time stamping might have been on 2215 * time stamping, but time stamping might have been on
2132 * already because of the other one 2216 * already because of the other one
2133 */ 2217 */
2134 if (!sock_flag(sk, 2218 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2135 flag == SOCK_TIMESTAMP ?
2136 SOCK_TIMESTAMPING_RX_SOFTWARE :
2137 SOCK_TIMESTAMP))
2138 net_enable_timestamp(); 2219 net_enable_timestamp();
2139 } 2220 }
2140} 2221}
@@ -2246,9 +2327,6 @@ void sk_common_release(struct sock *sk)
2246} 2327}
2247EXPORT_SYMBOL(sk_common_release); 2328EXPORT_SYMBOL(sk_common_release);
2248 2329
2249static DEFINE_RWLOCK(proto_list_lock);
2250static LIST_HEAD(proto_list);
2251
2252#ifdef CONFIG_PROC_FS 2330#ifdef CONFIG_PROC_FS
2253#define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2331#define PROTO_INUSE_NR 64 /* should be enough for the first time */
2254struct prot_inuse { 2332struct prot_inuse {
@@ -2397,10 +2475,10 @@ int proto_register(struct proto *prot, int alloc_slab)
2397 } 2475 }
2398 } 2476 }
2399 2477
2400 write_lock(&proto_list_lock); 2478 mutex_lock(&proto_list_mutex);
2401 list_add(&prot->node, &proto_list); 2479 list_add(&prot->node, &proto_list);
2402 assign_proto_idx(prot); 2480 assign_proto_idx(prot);
2403 write_unlock(&proto_list_lock); 2481 mutex_unlock(&proto_list_mutex);
2404 return 0; 2482 return 0;
2405 2483
2406out_free_timewait_sock_slab_name: 2484out_free_timewait_sock_slab_name:
@@ -2423,10 +2501,10 @@ EXPORT_SYMBOL(proto_register);
2423 2501
2424void proto_unregister(struct proto *prot) 2502void proto_unregister(struct proto *prot)
2425{ 2503{
2426 write_lock(&proto_list_lock); 2504 mutex_lock(&proto_list_mutex);
2427 release_proto_idx(prot); 2505 release_proto_idx(prot);
2428 list_del(&prot->node); 2506 list_del(&prot->node);
2429 write_unlock(&proto_list_lock); 2507 mutex_unlock(&proto_list_mutex);
2430 2508
2431 if (prot->slab != NULL) { 2509 if (prot->slab != NULL) {
2432 kmem_cache_destroy(prot->slab); 2510 kmem_cache_destroy(prot->slab);
@@ -2449,9 +2527,9 @@ EXPORT_SYMBOL(proto_unregister);
2449 2527
2450#ifdef CONFIG_PROC_FS 2528#ifdef CONFIG_PROC_FS
2451static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2529static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2452 __acquires(proto_list_lock) 2530 __acquires(proto_list_mutex)
2453{ 2531{
2454 read_lock(&proto_list_lock); 2532 mutex_lock(&proto_list_mutex);
2455 return seq_list_start_head(&proto_list, *pos); 2533 return seq_list_start_head(&proto_list, *pos);
2456} 2534}
2457 2535
@@ -2461,25 +2539,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2461} 2539}
2462 2540
2463static void proto_seq_stop(struct seq_file *seq, void *v) 2541static void proto_seq_stop(struct seq_file *seq, void *v)
2464 __releases(proto_list_lock) 2542 __releases(proto_list_mutex)
2465{ 2543{
2466 read_unlock(&proto_list_lock); 2544 mutex_unlock(&proto_list_mutex);
2467} 2545}
2468 2546
2469static char proto_method_implemented(const void *method) 2547static char proto_method_implemented(const void *method)
2470{ 2548{
2471 return method == NULL ? 'n' : 'y'; 2549 return method == NULL ? 'n' : 'y';
2472} 2550}
2551static long sock_prot_memory_allocated(struct proto *proto)
2552{
2553 return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L;
2554}
2555
2556static char *sock_prot_memory_pressure(struct proto *proto)
2557{
2558 return proto->memory_pressure != NULL ?
2559 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2560}
2473 2561
2474static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2562static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2475{ 2563{
2564
2476 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 2565 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2477 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2566 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2478 proto->name, 2567 proto->name,
2479 proto->obj_size, 2568 proto->obj_size,
2480 sock_prot_inuse_get(seq_file_net(seq), proto), 2569 sock_prot_inuse_get(seq_file_net(seq), proto),
2481 proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, 2570 sock_prot_memory_allocated(proto),
2482 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", 2571 sock_prot_memory_pressure(proto),
2483 proto->max_header, 2572 proto->max_header,
2484 proto->slab == NULL ? "no" : "yes", 2573 proto->slab == NULL ? "no" : "yes",
2485 module_name(proto->owner), 2574 module_name(proto->owner),
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
new file mode 100644
index 000000000000..b9868e1fd62c
--- /dev/null
+++ b/net/core/sock_diag.c
@@ -0,0 +1,192 @@
1#include <linux/mutex.h>
2#include <linux/socket.h>
3#include <linux/skbuff.h>
4#include <net/netlink.h>
5#include <net/net_namespace.h>
6#include <linux/module.h>
7#include <linux/rtnetlink.h>
8#include <net/sock.h>
9
10#include <linux/inet_diag.h>
11#include <linux/sock_diag.h>
12
13static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
14static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
15static DEFINE_MUTEX(sock_diag_table_mutex);
16
17int sock_diag_check_cookie(void *sk, __u32 *cookie)
18{
19 if ((cookie[0] != INET_DIAG_NOCOOKIE ||
20 cookie[1] != INET_DIAG_NOCOOKIE) &&
21 ((u32)(unsigned long)sk != cookie[0] ||
22 (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
23 return -ESTALE;
24 else
25 return 0;
26}
27EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
28
29void sock_diag_save_cookie(void *sk, __u32 *cookie)
30{
31 cookie[0] = (u32)(unsigned long)sk;
32 cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
33}
34EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
35
36int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
37{
38 __u32 *mem;
39
40 mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
41
42 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
43 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
44 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
45 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
46 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
47 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
48 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
49
50 return 0;
51
52rtattr_failure:
53 return -EMSGSIZE;
54}
55EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
56
57void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
58{
59 mutex_lock(&sock_diag_table_mutex);
60 inet_rcv_compat = fn;
61 mutex_unlock(&sock_diag_table_mutex);
62}
63EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
64
65void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
66{
67 mutex_lock(&sock_diag_table_mutex);
68 inet_rcv_compat = NULL;
69 mutex_unlock(&sock_diag_table_mutex);
70}
71EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
72
73int sock_diag_register(struct sock_diag_handler *hndl)
74{
75 int err = 0;
76
77 if (hndl->family >= AF_MAX)
78 return -EINVAL;
79
80 mutex_lock(&sock_diag_table_mutex);
81 if (sock_diag_handlers[hndl->family])
82 err = -EBUSY;
83 else
84 sock_diag_handlers[hndl->family] = hndl;
85 mutex_unlock(&sock_diag_table_mutex);
86
87 return err;
88}
89EXPORT_SYMBOL_GPL(sock_diag_register);
90
91void sock_diag_unregister(struct sock_diag_handler *hnld)
92{
93 int family = hnld->family;
94
95 if (family >= AF_MAX)
96 return;
97
98 mutex_lock(&sock_diag_table_mutex);
99 BUG_ON(sock_diag_handlers[family] != hnld);
100 sock_diag_handlers[family] = NULL;
101 mutex_unlock(&sock_diag_table_mutex);
102}
103EXPORT_SYMBOL_GPL(sock_diag_unregister);
104
105static inline struct sock_diag_handler *sock_diag_lock_handler(int family)
106{
107 if (sock_diag_handlers[family] == NULL)
108 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
109 NETLINK_SOCK_DIAG, family);
110
111 mutex_lock(&sock_diag_table_mutex);
112 return sock_diag_handlers[family];
113}
114
115static inline void sock_diag_unlock_handler(struct sock_diag_handler *h)
116{
117 mutex_unlock(&sock_diag_table_mutex);
118}
119
120static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
121{
122 int err;
123 struct sock_diag_req *req = NLMSG_DATA(nlh);
124 struct sock_diag_handler *hndl;
125
126 if (nlmsg_len(nlh) < sizeof(*req))
127 return -EINVAL;
128
129 hndl = sock_diag_lock_handler(req->sdiag_family);
130 if (hndl == NULL)
131 err = -ENOENT;
132 else
133 err = hndl->dump(skb, nlh);
134 sock_diag_unlock_handler(hndl);
135
136 return err;
137}
138
139static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
140{
141 int ret;
142
143 switch (nlh->nlmsg_type) {
144 case TCPDIAG_GETSOCK:
145 case DCCPDIAG_GETSOCK:
146 if (inet_rcv_compat == NULL)
147 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
148 NETLINK_SOCK_DIAG, AF_INET);
149
150 mutex_lock(&sock_diag_table_mutex);
151 if (inet_rcv_compat != NULL)
152 ret = inet_rcv_compat(skb, nlh);
153 else
154 ret = -EOPNOTSUPP;
155 mutex_unlock(&sock_diag_table_mutex);
156
157 return ret;
158 case SOCK_DIAG_BY_FAMILY:
159 return __sock_diag_rcv_msg(skb, nlh);
160 default:
161 return -EINVAL;
162 }
163}
164
165static DEFINE_MUTEX(sock_diag_mutex);
166
167static void sock_diag_rcv(struct sk_buff *skb)
168{
169 mutex_lock(&sock_diag_mutex);
170 netlink_rcv_skb(skb, &sock_diag_rcv_msg);
171 mutex_unlock(&sock_diag_mutex);
172}
173
174struct sock *sock_diag_nlsk;
175EXPORT_SYMBOL_GPL(sock_diag_nlsk);
176
177static int __init sock_diag_init(void)
178{
179 sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0,
180 sock_diag_rcv, NULL, THIS_MODULE);
181 return sock_diag_nlsk == NULL ? -ENOMEM : 0;
182}
183
184static void __exit sock_diag_exit(void)
185{
186 netlink_kernel_release(sock_diag_nlsk);
187}
188
189module_init(sock_diag_init);
190module_exit(sock_diag_exit);
191MODULE_LICENSE("GPL");
192MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 77a65f031488..d05559d4d9cd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
68 68
69 if (sock_table != orig_sock_table) { 69 if (sock_table != orig_sock_table) {
70 rcu_assign_pointer(rps_sock_flow_table, sock_table); 70 rcu_assign_pointer(rps_sock_flow_table, sock_table);
71 synchronize_rcu(); 71 if (sock_table)
72 vfree(orig_sock_table); 72 jump_label_inc(&rps_needed);
73 if (orig_sock_table) {
74 jump_label_dec(&rps_needed);
75 synchronize_rcu();
76 vfree(orig_sock_table);
77 }
73 } 78 }
74 } 79 }
75 80