aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-06 12:38:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-06 12:38:14 -0400
commitae045e2455429c418a418a3376301a9e5753a0a8 (patch)
treeb445bdeecd3f38aa0d0a29c9585cee49e4ccb0f1 /net/core
parentf4f142ed4ef835709c7e6d12eaca10d190bcebed (diff)
parentd247b6ab3ce6dd43665780865ec5fa145d9ab6bd (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Steady transitioning of the BPF instructure to a generic spot so all kernel subsystems can make use of it, from Alexei Starovoitov. 2) SFC driver supports busy polling, from Alexandre Rames. 3) Take advantage of hash table in UDP multicast delivery, from David Held. 4) Lighten locking, in particular by getting rid of the LRU lists, in inet frag handling. From Florian Westphal. 5) Add support for various RFC6458 control messages in SCTP, from Geir Ola Vaagland. 6) Allow to filter bridge forwarding database dumps by device, from Jamal Hadi Salim. 7) virtio-net also now supports busy polling, from Jason Wang. 8) Some low level optimization tweaks in pktgen from Jesper Dangaard Brouer. 9) Add support for ipv6 address generation modes, so that userland can have some input into the process. From Jiri Pirko. 10) Consolidate common TCP connection request code in ipv4 and ipv6, from Octavian Purdila. 11) New ARP packet logger in netfilter, from Pablo Neira Ayuso. 12) Generic resizable RCU hash table, with intial users in netlink and nftables. From Thomas Graf. 13) Maintain a name assignment type so that userspace can see where a network device name came from (enumerated by kernel, assigned explicitly by userspace, etc.) From Tom Gundersen. 14) Automatic flow label generation on transmit in ipv6, from Tom Herbert. 15) New packet timestamping facilities from Willem de Bruijn, meant to assist in measuring latencies going into/out-of the packet scheduler, latency from TCP data transmission to ACK, etc" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1536 commits) cxgb4 : Disable recursive mailbox commands when enabling vi net: reduce USB network driver config options. tg3: Modify tg3_tso_bug() to handle multiple TX rings amd-xgbe: Perform phy connect/disconnect at dev open/stop amd-xgbe: Use dma_set_mask_and_coherent to set DMA mask net: sun4i-emac: fix memory leak on bad packet sctp: fix possible seqlock seadlock in sctp_packet_transmit() Revert "net: phy: Set the driver when registering an MDIO bus device" cxgb4vf: Turn off SGE RX/TX Callback Timers and interrupts in PCI shutdown routine team: Simplify return path of team_newlink bridge: Update outdated comment on promiscuous mode net-timestamp: ACK timestamp for bytestreams net-timestamp: TCP timestamping net-timestamp: SCHED timestamp on entering packet scheduler net-timestamp: add key to disambiguate concurrent datagrams net-timestamp: move timestamp flags out of sk_flags net-timestamp: extend SCM_TIMESTAMPING ancillary data struct cxgb4i : Move stray CPL definitions to cxgb4 driver tcp: reduce spurious retransmits due to transient SACK reneging qlcnic: Initialize dcbnl_ops before register_netdev ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c53
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/filter.c728
-rw-r--r--net/core/flow_dissector.c85
-rw-r--r--net/core/net-sysfs.c154
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/pktgen.c111
-rw-r--r--net/core/ptp_classifier.c70
-rw-r--r--net/core/request_sock.c43
-rw-r--r--net/core/rtnetlink.c107
-rw-r--r--net/core/skbuff.c22
-rw-r--r--net/core/sock.c52
-rw-r--r--net/core/sock_diag.c4
-rw-r--r--net/core/timestamping.c57
14 files changed, 549 insertions, 942 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 367a586d0c8a..1c15b189c52b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
132#include <linux/hashtable.h> 132#include <linux/hashtable.h>
133#include <linux/vmalloc.h> 133#include <linux/vmalloc.h>
134#include <linux/if_macvlan.h> 134#include <linux/if_macvlan.h>
135#include <linux/errqueue.h>
135 136
136#include "net-sysfs.h" 137#include "net-sysfs.h"
137 138
@@ -1085,6 +1086,7 @@ static int dev_get_valid_name(struct net *net,
1085 */ 1086 */
1086int dev_change_name(struct net_device *dev, const char *newname) 1087int dev_change_name(struct net_device *dev, const char *newname)
1087{ 1088{
1089 unsigned char old_assign_type;
1088 char oldname[IFNAMSIZ]; 1090 char oldname[IFNAMSIZ];
1089 int err = 0; 1091 int err = 0;
1090 int ret; 1092 int ret;
@@ -1112,10 +1114,17 @@ int dev_change_name(struct net_device *dev, const char *newname)
1112 return err; 1114 return err;
1113 } 1115 }
1114 1116
1117 if (oldname[0] && !strchr(oldname, '%'))
1118 netdev_info(dev, "renamed from %s\n", oldname);
1119
1120 old_assign_type = dev->name_assign_type;
1121 dev->name_assign_type = NET_NAME_RENAMED;
1122
1115rollback: 1123rollback:
1116 ret = device_rename(&dev->dev, dev->name); 1124 ret = device_rename(&dev->dev, dev->name);
1117 if (ret) { 1125 if (ret) {
1118 memcpy(dev->name, oldname, IFNAMSIZ); 1126 memcpy(dev->name, oldname, IFNAMSIZ);
1127 dev->name_assign_type = old_assign_type;
1119 write_seqcount_end(&devnet_rename_seq); 1128 write_seqcount_end(&devnet_rename_seq);
1120 return ret; 1129 return ret;
1121 } 1130 }
@@ -1144,6 +1153,8 @@ rollback:
1144 write_seqcount_begin(&devnet_rename_seq); 1153 write_seqcount_begin(&devnet_rename_seq);
1145 memcpy(dev->name, oldname, IFNAMSIZ); 1154 memcpy(dev->name, oldname, IFNAMSIZ);
1146 memcpy(oldname, newname, IFNAMSIZ); 1155 memcpy(oldname, newname, IFNAMSIZ);
1156 dev->name_assign_type = old_assign_type;
1157 old_assign_type = NET_NAME_RENAMED;
1147 goto rollback; 1158 goto rollback;
1148 } else { 1159 } else {
1149 pr_err("%s: name change rollback failed: %d\n", 1160 pr_err("%s: name change rollback failed: %d\n",
@@ -2316,7 +2327,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2316 */ 2327 */
2317 if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { 2328 if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2318 if (vlan_depth) { 2329 if (vlan_depth) {
2319 if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN))) 2330 if (WARN_ON(vlan_depth < VLAN_HLEN))
2320 return 0; 2331 return 0;
2321 vlan_depth -= VLAN_HLEN; 2332 vlan_depth -= VLAN_HLEN;
2322 } else { 2333 } else {
@@ -2414,8 +2425,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2414 2425
2415 skb_warn_bad_offload(skb); 2426 skb_warn_bad_offload(skb);
2416 2427
2417 if (skb_header_cloned(skb) && 2428 err = skb_cow_head(skb, 0);
2418 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 2429 if (err < 0)
2419 return ERR_PTR(err); 2430 return ERR_PTR(err);
2420 } 2431 }
2421 2432
@@ -2745,8 +2756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2745 /* 2756 /*
2746 * Heuristic to force contended enqueues to serialize on a 2757 * Heuristic to force contended enqueues to serialize on a
2747 * separate lock before trying to get qdisc main lock. 2758 * separate lock before trying to get qdisc main lock.
2748 * This permits __QDISC_STATE_RUNNING owner to get the lock more often 2759 * This permits __QDISC___STATE_RUNNING owner to get the lock more
2749 * and dequeue packets faster. 2760 * often and dequeue packets faster.
2750 */ 2761 */
2751 contended = qdisc_is_running(q); 2762 contended = qdisc_is_running(q);
2752 if (unlikely(contended)) 2763 if (unlikely(contended))
@@ -2866,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2866 2877
2867 skb_reset_mac_header(skb); 2878 skb_reset_mac_header(skb);
2868 2879
2880 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
2881 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
2882
2869 /* Disable soft irqs for various locks below. Also 2883 /* Disable soft irqs for various locks below. Also
2870 * stops preemption for RCU. 2884 * stops preemption for RCU.
2871 */ 2885 */
@@ -5440,13 +5454,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
5440 */ 5454 */
5441 5455
5442 ret = 0; 5456 ret = 0;
5443 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ 5457 if ((old_flags ^ flags) & IFF_UP)
5444 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); 5458 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
5445 5459
5446 if (!ret)
5447 dev_set_rx_mode(dev);
5448 }
5449
5450 if ((flags ^ dev->gflags) & IFF_PROMISC) { 5460 if ((flags ^ dev->gflags) & IFF_PROMISC) {
5451 int inc = (flags & IFF_PROMISC) ? 1 : -1; 5461 int inc = (flags & IFF_PROMISC) ? 1 : -1;
5452 unsigned int old_flags = dev->flags; 5462 unsigned int old_flags = dev->flags;
@@ -6446,17 +6456,19 @@ void netdev_freemem(struct net_device *dev)
6446 6456
6447/** 6457/**
6448 * alloc_netdev_mqs - allocate network device 6458 * alloc_netdev_mqs - allocate network device
6449 * @sizeof_priv: size of private data to allocate space for 6459 * @sizeof_priv: size of private data to allocate space for
6450 * @name: device name format string 6460 * @name: device name format string
6451 * @setup: callback to initialize device 6461 * @name_assign_type: origin of device name
6452 * @txqs: the number of TX subqueues to allocate 6462 * @setup: callback to initialize device
6453 * @rxqs: the number of RX subqueues to allocate 6463 * @txqs: the number of TX subqueues to allocate
6464 * @rxqs: the number of RX subqueues to allocate
6454 * 6465 *
6455 * Allocates a struct net_device with private data area for driver use 6466 * Allocates a struct net_device with private data area for driver use
6456 * and performs basic initialization. Also allocates subqueue structs 6467 * and performs basic initialization. Also allocates subqueue structs
6457 * for each queue on the device. 6468 * for each queue on the device.
6458 */ 6469 */
6459struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, 6470struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6471 unsigned char name_assign_type,
6460 void (*setup)(struct net_device *), 6472 void (*setup)(struct net_device *),
6461 unsigned int txqs, unsigned int rxqs) 6473 unsigned int txqs, unsigned int rxqs)
6462{ 6474{
@@ -6535,6 +6547,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6535#endif 6547#endif
6536 6548
6537 strcpy(dev->name, name); 6549 strcpy(dev->name, name);
6550 dev->name_assign_type = name_assign_type;
6538 dev->group = INIT_NETDEV_GROUP; 6551 dev->group = INIT_NETDEV_GROUP;
6539 if (!dev->ethtool_ops) 6552 if (!dev->ethtool_ops)
6540 dev->ethtool_ops = &default_ethtool_ops; 6553 dev->ethtool_ops = &default_ethtool_ops;
@@ -6946,12 +6959,14 @@ static int __netdev_printk(const char *level, const struct net_device *dev,
6946 if (dev && dev->dev.parent) { 6959 if (dev && dev->dev.parent) {
6947 r = dev_printk_emit(level[1] - '0', 6960 r = dev_printk_emit(level[1] - '0',
6948 dev->dev.parent, 6961 dev->dev.parent,
6949 "%s %s %s: %pV", 6962 "%s %s %s%s: %pV",
6950 dev_driver_string(dev->dev.parent), 6963 dev_driver_string(dev->dev.parent),
6951 dev_name(dev->dev.parent), 6964 dev_name(dev->dev.parent),
6952 netdev_name(dev), vaf); 6965 netdev_name(dev), netdev_reg_state(dev),
6966 vaf);
6953 } else if (dev) { 6967 } else if (dev) {
6954 r = printk("%s%s: %pV", level, netdev_name(dev), vaf); 6968 r = printk("%s%s%s: %pV", level, netdev_name(dev),
6969 netdev_reg_state(dev), vaf);
6955 } else { 6970 } else {
6956 r = printk("%s(NULL net_device): %pV", level, vaf); 6971 r = printk("%s(NULL net_device): %pV", level, vaf);
6957 } 6972 }
@@ -7103,7 +7118,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
7103 rtnl_lock_unregistering(net_list); 7118 rtnl_lock_unregistering(net_list);
7104 list_for_each_entry(net, net_list, exit_list) { 7119 list_for_each_entry(net, net_list, exit_list) {
7105 for_each_netdev_reverse(net, dev) { 7120 for_each_netdev_reverse(net, dev) {
7106 if (dev->rtnl_link_ops) 7121 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
7107 dev->rtnl_link_ops->dellink(dev, &dev_kill_list); 7122 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
7108 else 7123 else
7109 unregister_netdevice_queue(dev, &dev_kill_list); 7124 unregister_netdevice_queue(dev, &dev_kill_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index e70301eb7a4a..50f9a9db5792 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -289,10 +289,8 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
289 switch (info->genlhdr->cmd) { 289 switch (info->genlhdr->cmd) {
290 case NET_DM_CMD_START: 290 case NET_DM_CMD_START:
291 return set_all_monitor_traces(TRACE_ON); 291 return set_all_monitor_traces(TRACE_ON);
292 break;
293 case NET_DM_CMD_STOP: 292 case NET_DM_CMD_STOP:
294 return set_all_monitor_traces(TRACE_OFF); 293 return set_all_monitor_traces(TRACE_OFF);
295 break;
296 } 294 }
297 295
298 return -ENOTSUPP; 296 return -ENOTSUPP;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1dbf6462f766..d814b8a89d0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -18,7 +18,7 @@
18 * 2 of the License, or (at your option) any later version. 18 * 2 of the License, or (at your option) any later version.
19 * 19 *
20 * Andi Kleen - Fix a few bad bugs and races. 20 * Andi Kleen - Fix a few bad bugs and races.
21 * Kris Katterjohn - Added many additional checks in sk_chk_filter() 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
22 */ 22 */
23 23
24#include <linux/module.h> 24#include <linux/module.h>
@@ -45,54 +45,6 @@
45#include <linux/seccomp.h> 45#include <linux/seccomp.h>
46#include <linux/if_vlan.h> 46#include <linux/if_vlan.h>
47 47
48/* Registers */
49#define BPF_R0 regs[BPF_REG_0]
50#define BPF_R1 regs[BPF_REG_1]
51#define BPF_R2 regs[BPF_REG_2]
52#define BPF_R3 regs[BPF_REG_3]
53#define BPF_R4 regs[BPF_REG_4]
54#define BPF_R5 regs[BPF_REG_5]
55#define BPF_R6 regs[BPF_REG_6]
56#define BPF_R7 regs[BPF_REG_7]
57#define BPF_R8 regs[BPF_REG_8]
58#define BPF_R9 regs[BPF_REG_9]
59#define BPF_R10 regs[BPF_REG_10]
60
61/* Named registers */
62#define DST regs[insn->dst_reg]
63#define SRC regs[insn->src_reg]
64#define FP regs[BPF_REG_FP]
65#define ARG1 regs[BPF_REG_ARG1]
66#define CTX regs[BPF_REG_CTX]
67#define IMM insn->imm
68
69/* No hurry in this branch
70 *
71 * Exported for the bpf jit load helper.
72 */
73void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
74{
75 u8 *ptr = NULL;
76
77 if (k >= SKF_NET_OFF)
78 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
79 else if (k >= SKF_LL_OFF)
80 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
81 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
82 return ptr;
83
84 return NULL;
85}
86
87static inline void *load_pointer(const struct sk_buff *skb, int k,
88 unsigned int size, void *buffer)
89{
90 if (k >= 0)
91 return skb_header_pointer(skb, k, size, buffer);
92
93 return bpf_internal_load_pointer_neg_helper(skb, k, size);
94}
95
96/** 48/**
97 * sk_filter - run a packet through a socket filter 49 * sk_filter - run a packet through a socket filter
98 * @sk: sock associated with &sk_buff 50 * @sk: sock associated with &sk_buff
@@ -135,451 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
135} 87}
136EXPORT_SYMBOL(sk_filter); 88EXPORT_SYMBOL(sk_filter);
137 89
138/* Base function for offset calculation. Needs to go into .text section,
139 * therefore keeping it non-static as well; will also be used by JITs
140 * anyway later on, so do not let the compiler omit it.
141 */
142noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
143{
144 return 0;
145}
146
147/**
148 * __sk_run_filter - run a filter on a given context
149 * @ctx: buffer to run the filter on
150 * @insn: filter to apply
151 *
152 * Decode and apply filter instructions to the skb->data. Return length to
153 * keep, 0 for none. @ctx is the data we are operating on, @insn is the
154 * array of filter instructions.
155 */
156static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
157{
158 u64 stack[MAX_BPF_STACK / sizeof(u64)];
159 u64 regs[MAX_BPF_REG], tmp;
160 static const void *jumptable[256] = {
161 [0 ... 255] = &&default_label,
162 /* Now overwrite non-defaults ... */
163 /* 32 bit ALU operations */
164 [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
165 [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
166 [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
167 [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
168 [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
169 [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
170 [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
171 [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
172 [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
173 [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
174 [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
175 [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
176 [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
177 [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
178 [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
179 [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
180 [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
181 [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
182 [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
183 [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
184 [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
185 [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
186 [BPF_ALU | BPF_NEG] = &&ALU_NEG,
187 [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
188 [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
189 /* 64 bit ALU operations */
190 [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
191 [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
192 [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
193 [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
194 [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
195 [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
196 [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
197 [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
198 [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
199 [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
200 [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
201 [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
202 [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
203 [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
204 [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
205 [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
206 [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
207 [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
208 [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
209 [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
210 [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
211 [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
212 [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
213 [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
214 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
215 /* Call instruction */
216 [BPF_JMP | BPF_CALL] = &&JMP_CALL,
217 /* Jumps */
218 [BPF_JMP | BPF_JA] = &&JMP_JA,
219 [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
220 [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
221 [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
222 [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
223 [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
224 [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
225 [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
226 [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
227 [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
228 [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
229 [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
230 [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
231 [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
232 [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
233 /* Program return */
234 [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
235 /* Store instructions */
236 [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
237 [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
238 [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
239 [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
240 [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
241 [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
242 [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
243 [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
244 [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
245 [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
246 /* Load instructions */
247 [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
248 [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
249 [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
250 [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
251 [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
252 [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
253 [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
254 [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
255 [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
256 [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
257 };
258 void *ptr;
259 int off;
260
261#define CONT ({ insn++; goto select_insn; })
262#define CONT_JMP ({ insn++; goto select_insn; })
263
264 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
265 ARG1 = (u64) (unsigned long) ctx;
266
267 /* Registers used in classic BPF programs need to be reset first. */
268 regs[BPF_REG_A] = 0;
269 regs[BPF_REG_X] = 0;
270
271select_insn:
272 goto *jumptable[insn->code];
273
274 /* ALU */
275#define ALU(OPCODE, OP) \
276 ALU64_##OPCODE##_X: \
277 DST = DST OP SRC; \
278 CONT; \
279 ALU_##OPCODE##_X: \
280 DST = (u32) DST OP (u32) SRC; \
281 CONT; \
282 ALU64_##OPCODE##_K: \
283 DST = DST OP IMM; \
284 CONT; \
285 ALU_##OPCODE##_K: \
286 DST = (u32) DST OP (u32) IMM; \
287 CONT;
288
289 ALU(ADD, +)
290 ALU(SUB, -)
291 ALU(AND, &)
292 ALU(OR, |)
293 ALU(LSH, <<)
294 ALU(RSH, >>)
295 ALU(XOR, ^)
296 ALU(MUL, *)
297#undef ALU
298 ALU_NEG:
299 DST = (u32) -DST;
300 CONT;
301 ALU64_NEG:
302 DST = -DST;
303 CONT;
304 ALU_MOV_X:
305 DST = (u32) SRC;
306 CONT;
307 ALU_MOV_K:
308 DST = (u32) IMM;
309 CONT;
310 ALU64_MOV_X:
311 DST = SRC;
312 CONT;
313 ALU64_MOV_K:
314 DST = IMM;
315 CONT;
316 ALU64_ARSH_X:
317 (*(s64 *) &DST) >>= SRC;
318 CONT;
319 ALU64_ARSH_K:
320 (*(s64 *) &DST) >>= IMM;
321 CONT;
322 ALU64_MOD_X:
323 if (unlikely(SRC == 0))
324 return 0;
325 tmp = DST;
326 DST = do_div(tmp, SRC);
327 CONT;
328 ALU_MOD_X:
329 if (unlikely(SRC == 0))
330 return 0;
331 tmp = (u32) DST;
332 DST = do_div(tmp, (u32) SRC);
333 CONT;
334 ALU64_MOD_K:
335 tmp = DST;
336 DST = do_div(tmp, IMM);
337 CONT;
338 ALU_MOD_K:
339 tmp = (u32) DST;
340 DST = do_div(tmp, (u32) IMM);
341 CONT;
342 ALU64_DIV_X:
343 if (unlikely(SRC == 0))
344 return 0;
345 do_div(DST, SRC);
346 CONT;
347 ALU_DIV_X:
348 if (unlikely(SRC == 0))
349 return 0;
350 tmp = (u32) DST;
351 do_div(tmp, (u32) SRC);
352 DST = (u32) tmp;
353 CONT;
354 ALU64_DIV_K:
355 do_div(DST, IMM);
356 CONT;
357 ALU_DIV_K:
358 tmp = (u32) DST;
359 do_div(tmp, (u32) IMM);
360 DST = (u32) tmp;
361 CONT;
362 ALU_END_TO_BE:
363 switch (IMM) {
364 case 16:
365 DST = (__force u16) cpu_to_be16(DST);
366 break;
367 case 32:
368 DST = (__force u32) cpu_to_be32(DST);
369 break;
370 case 64:
371 DST = (__force u64) cpu_to_be64(DST);
372 break;
373 }
374 CONT;
375 ALU_END_TO_LE:
376 switch (IMM) {
377 case 16:
378 DST = (__force u16) cpu_to_le16(DST);
379 break;
380 case 32:
381 DST = (__force u32) cpu_to_le32(DST);
382 break;
383 case 64:
384 DST = (__force u64) cpu_to_le64(DST);
385 break;
386 }
387 CONT;
388
389 /* CALL */
390 JMP_CALL:
391 /* Function call scratches BPF_R1-BPF_R5 registers,
392 * preserves BPF_R6-BPF_R9, and stores return value
393 * into BPF_R0.
394 */
395 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
396 BPF_R4, BPF_R5);
397 CONT;
398
399 /* JMP */
400 JMP_JA:
401 insn += insn->off;
402 CONT;
403 JMP_JEQ_X:
404 if (DST == SRC) {
405 insn += insn->off;
406 CONT_JMP;
407 }
408 CONT;
409 JMP_JEQ_K:
410 if (DST == IMM) {
411 insn += insn->off;
412 CONT_JMP;
413 }
414 CONT;
415 JMP_JNE_X:
416 if (DST != SRC) {
417 insn += insn->off;
418 CONT_JMP;
419 }
420 CONT;
421 JMP_JNE_K:
422 if (DST != IMM) {
423 insn += insn->off;
424 CONT_JMP;
425 }
426 CONT;
427 JMP_JGT_X:
428 if (DST > SRC) {
429 insn += insn->off;
430 CONT_JMP;
431 }
432 CONT;
433 JMP_JGT_K:
434 if (DST > IMM) {
435 insn += insn->off;
436 CONT_JMP;
437 }
438 CONT;
439 JMP_JGE_X:
440 if (DST >= SRC) {
441 insn += insn->off;
442 CONT_JMP;
443 }
444 CONT;
445 JMP_JGE_K:
446 if (DST >= IMM) {
447 insn += insn->off;
448 CONT_JMP;
449 }
450 CONT;
451 JMP_JSGT_X:
452 if (((s64) DST) > ((s64) SRC)) {
453 insn += insn->off;
454 CONT_JMP;
455 }
456 CONT;
457 JMP_JSGT_K:
458 if (((s64) DST) > ((s64) IMM)) {
459 insn += insn->off;
460 CONT_JMP;
461 }
462 CONT;
463 JMP_JSGE_X:
464 if (((s64) DST) >= ((s64) SRC)) {
465 insn += insn->off;
466 CONT_JMP;
467 }
468 CONT;
469 JMP_JSGE_K:
470 if (((s64) DST) >= ((s64) IMM)) {
471 insn += insn->off;
472 CONT_JMP;
473 }
474 CONT;
475 JMP_JSET_X:
476 if (DST & SRC) {
477 insn += insn->off;
478 CONT_JMP;
479 }
480 CONT;
481 JMP_JSET_K:
482 if (DST & IMM) {
483 insn += insn->off;
484 CONT_JMP;
485 }
486 CONT;
487 JMP_EXIT:
488 return BPF_R0;
489
490 /* STX and ST and LDX*/
491#define LDST(SIZEOP, SIZE) \
492 STX_MEM_##SIZEOP: \
493 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
494 CONT; \
495 ST_MEM_##SIZEOP: \
496 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
497 CONT; \
498 LDX_MEM_##SIZEOP: \
499 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
500 CONT;
501
502 LDST(B, u8)
503 LDST(H, u16)
504 LDST(W, u32)
505 LDST(DW, u64)
506#undef LDST
507 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
508 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
509 (DST + insn->off));
510 CONT;
511 STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
512 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
513 (DST + insn->off));
514 CONT;
515 LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
516 off = IMM;
517load_word:
518 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
519 * only appearing in the programs where ctx ==
520 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
521 * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
522 * internal BPF verifier will check that BPF_R6 ==
523 * ctx.
524 *
525 * BPF_ABS and BPF_IND are wrappers of function calls,
526 * so they scratch BPF_R1-BPF_R5 registers, preserve
527 * BPF_R6-BPF_R9, and store return value into BPF_R0.
528 *
529 * Implicit input:
530 * ctx == skb == BPF_R6 == CTX
531 *
532 * Explicit input:
533 * SRC == any register
534 * IMM == 32-bit immediate
535 *
536 * Output:
537 * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
538 */
539
540 ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
541 if (likely(ptr != NULL)) {
542 BPF_R0 = get_unaligned_be32(ptr);
543 CONT;
544 }
545
546 return 0;
547 LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
548 off = IMM;
549load_half:
550 ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
551 if (likely(ptr != NULL)) {
552 BPF_R0 = get_unaligned_be16(ptr);
553 CONT;
554 }
555
556 return 0;
557 LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
558 off = IMM;
559load_byte:
560 ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
561 if (likely(ptr != NULL)) {
562 BPF_R0 = *(u8 *)ptr;
563 CONT;
564 }
565
566 return 0;
567 LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
568 off = IMM + SRC;
569 goto load_word;
570 LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
571 off = IMM + SRC;
572 goto load_half;
573 LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
574 off = IMM + SRC;
575 goto load_byte;
576
577 default_label:
578 /* If we ever reach this, we have a bug somewhere. */
579 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
580 return 0;
581}
582
583/* Helper to find the offset of pkt_type in sk_buff structure. We want 90/* Helper to find the offset of pkt_type in sk_buff structure. We want
584 * to make sure its still a 3bit field starting at a byte boundary; 91 * to make sure its still a 3bit field starting at a byte boundary;
585 * taken from arch/x86/net/bpf_jit_comp.c. 92 * taken from arch/x86/net/bpf_jit_comp.c.
@@ -667,9 +174,9 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
667} 174}
668 175
669static bool convert_bpf_extensions(struct sock_filter *fp, 176static bool convert_bpf_extensions(struct sock_filter *fp,
670 struct sock_filter_int **insnp) 177 struct bpf_insn **insnp)
671{ 178{
672 struct sock_filter_int *insn = *insnp; 179 struct bpf_insn *insn = *insnp;
673 180
674 switch (fp->k) { 181 switch (fp->k) {
675 case SKF_AD_OFF + SKF_AD_PROTOCOL: 182 case SKF_AD_OFF + SKF_AD_PROTOCOL:
@@ -805,7 +312,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
805} 312}
806 313
807/** 314/**
808 * sk_convert_filter - convert filter program 315 * bpf_convert_filter - convert filter program
809 * @prog: the user passed filter program 316 * @prog: the user passed filter program
810 * @len: the length of the user passed filter program 317 * @len: the length of the user passed filter program
811 * @new_prog: buffer where converted program will be stored 318 * @new_prog: buffer where converted program will be stored
@@ -815,12 +322,12 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
815 * Conversion workflow: 322 * Conversion workflow:
816 * 323 *
817 * 1) First pass for calculating the new program length: 324 * 1) First pass for calculating the new program length:
818 * sk_convert_filter(old_prog, old_len, NULL, &new_len) 325 * bpf_convert_filter(old_prog, old_len, NULL, &new_len)
819 * 326 *
820 * 2) 2nd pass to remap in two passes: 1st pass finds new 327 * 2) 2nd pass to remap in two passes: 1st pass finds new
821 * jump offsets, 2nd pass remapping: 328 * jump offsets, 2nd pass remapping:
822 * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); 329 * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
823 * sk_convert_filter(old_prog, old_len, new_prog, &new_len); 330 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
824 * 331 *
825 * User BPF's register A is mapped to our BPF register 6, user BPF 332 * User BPF's register A is mapped to our BPF register 6, user BPF
826 * register X is mapped to BPF register 7; frame pointer is always 333 * register X is mapped to BPF register 7; frame pointer is always
@@ -828,11 +335,11 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
828 * for socket filters: ctx == 'struct sk_buff *', for seccomp: 335 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
829 * ctx == 'struct seccomp_data *'. 336 * ctx == 'struct seccomp_data *'.
830 */ 337 */
831int sk_convert_filter(struct sock_filter *prog, int len, 338int bpf_convert_filter(struct sock_filter *prog, int len,
832 struct sock_filter_int *new_prog, int *new_len) 339 struct bpf_insn *new_prog, int *new_len)
833{ 340{
834 int new_flen = 0, pass = 0, target, i; 341 int new_flen = 0, pass = 0, target, i;
835 struct sock_filter_int *new_insn; 342 struct bpf_insn *new_insn;
836 struct sock_filter *fp; 343 struct sock_filter *fp;
837 int *addrs = NULL; 344 int *addrs = NULL;
838 u8 bpf_src; 345 u8 bpf_src;
@@ -858,8 +365,8 @@ do_pass:
858 new_insn++; 365 new_insn++;
859 366
860 for (i = 0; i < len; fp++, i++) { 367 for (i = 0; i < len; fp++, i++) {
861 struct sock_filter_int tmp_insns[6] = { }; 368 struct bpf_insn tmp_insns[6] = { };
862 struct sock_filter_int *insn = tmp_insns; 369 struct bpf_insn *insn = tmp_insns;
863 370
864 if (addrs) 371 if (addrs)
865 addrs[i] = new_insn - new_prog; 372 addrs[i] = new_insn - new_prog;
@@ -1094,7 +601,7 @@ err:
1094 * a cell if not previously written, and we check all branches to be sure 601 * a cell if not previously written, and we check all branches to be sure
1095 * a malicious user doesn't try to abuse us. 602 * a malicious user doesn't try to abuse us.
1096 */ 603 */
1097static int check_load_and_stores(struct sock_filter *filter, int flen) 604static int check_load_and_stores(const struct sock_filter *filter, int flen)
1098{ 605{
1099 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ 606 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
1100 int pc, ret = 0; 607 int pc, ret = 0;
@@ -1214,7 +721,7 @@ static bool chk_code_allowed(u16 code_to_probe)
1214} 721}
1215 722
1216/** 723/**
1217 * sk_chk_filter - verify socket filter code 724 * bpf_check_classic - verify socket filter code
1218 * @filter: filter to verify 725 * @filter: filter to verify
1219 * @flen: length of filter 726 * @flen: length of filter
1220 * 727 *
@@ -1227,7 +734,7 @@ static bool chk_code_allowed(u16 code_to_probe)
1227 * 734 *
1228 * Returns 0 if the rule set is legal or -EINVAL if not. 735 * Returns 0 if the rule set is legal or -EINVAL if not.
1229 */ 736 */
1230int sk_chk_filter(struct sock_filter *filter, unsigned int flen) 737int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
1231{ 738{
1232 bool anc_found; 739 bool anc_found;
1233 int pc; 740 int pc;
@@ -1237,7 +744,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
1237 744
1238 /* Check the filter code now */ 745 /* Check the filter code now */
1239 for (pc = 0; pc < flen; pc++) { 746 for (pc = 0; pc < flen; pc++) {
1240 struct sock_filter *ftest = &filter[pc]; 747 const struct sock_filter *ftest = &filter[pc];
1241 748
1242 /* May we actually operate on this code? */ 749 /* May we actually operate on this code? */
1243 if (!chk_code_allowed(ftest->code)) 750 if (!chk_code_allowed(ftest->code))
@@ -1301,12 +808,12 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
1301 808
1302 return -EINVAL; 809 return -EINVAL;
1303} 810}
1304EXPORT_SYMBOL(sk_chk_filter); 811EXPORT_SYMBOL(bpf_check_classic);
1305 812
1306static int sk_store_orig_filter(struct sk_filter *fp, 813static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1307 const struct sock_fprog *fprog) 814 const struct sock_fprog *fprog)
1308{ 815{
1309 unsigned int fsize = sk_filter_proglen(fprog); 816 unsigned int fsize = bpf_classic_proglen(fprog);
1310 struct sock_fprog_kern *fkprog; 817 struct sock_fprog_kern *fkprog;
1311 818
1312 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); 819 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
@@ -1324,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp,
1324 return 0; 831 return 0;
1325} 832}
1326 833
1327static void sk_release_orig_filter(struct sk_filter *fp) 834static void bpf_release_orig_filter(struct bpf_prog *fp)
1328{ 835{
1329 struct sock_fprog_kern *fprog = fp->orig_prog; 836 struct sock_fprog_kern *fprog = fp->orig_prog;
1330 837
@@ -1334,6 +841,18 @@ static void sk_release_orig_filter(struct sk_filter *fp)
1334 } 841 }
1335} 842}
1336 843
844static void __bpf_prog_release(struct bpf_prog *prog)
845{
846 bpf_release_orig_filter(prog);
847 bpf_prog_free(prog);
848}
849
850static void __sk_filter_release(struct sk_filter *fp)
851{
852 __bpf_prog_release(fp->prog);
853 kfree(fp);
854}
855
1337/** 856/**
1338 * sk_filter_release_rcu - Release a socket filter by rcu_head 857 * sk_filter_release_rcu - Release a socket filter by rcu_head
1339 * @rcu: rcu_head that contains the sk_filter to free 858 * @rcu: rcu_head that contains the sk_filter to free
@@ -1342,8 +861,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
1342{ 861{
1343 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 862 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1344 863
1345 sk_release_orig_filter(fp); 864 __sk_filter_release(fp);
1346 sk_filter_free(fp);
1347} 865}
1348 866
1349/** 867/**
@@ -1360,44 +878,33 @@ static void sk_filter_release(struct sk_filter *fp)
1360 878
1361void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 879void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1362{ 880{
1363 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); 881 u32 filter_size = bpf_prog_size(fp->prog->len);
1364 sk_filter_release(fp);
1365}
1366 882
1367void sk_filter_charge(struct sock *sk, struct sk_filter *fp) 883 atomic_sub(filter_size, &sk->sk_omem_alloc);
1368{ 884 sk_filter_release(fp);
1369 atomic_inc(&fp->refcnt);
1370 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1371} 885}
1372 886
1373static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, 887/* try to charge the socket memory if there is space available
1374 struct sock *sk, 888 * return true on success
1375 unsigned int len) 889 */
890bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1376{ 891{
1377 struct sk_filter *fp_new; 892 u32 filter_size = bpf_prog_size(fp->prog->len);
1378 893
1379 if (sk == NULL) 894 /* same check as in sock_kmalloc() */
1380 return krealloc(fp, len, GFP_KERNEL); 895 if (filter_size <= sysctl_optmem_max &&
1381 896 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1382 fp_new = sock_kmalloc(sk, len, GFP_KERNEL); 897 atomic_inc(&fp->refcnt);
1383 if (fp_new) { 898 atomic_add(filter_size, &sk->sk_omem_alloc);
1384 *fp_new = *fp; 899 return true;
1385 /* As we're keeping orig_prog in fp_new along,
1386 * we need to make sure we're not evicting it
1387 * from the old fp.
1388 */
1389 fp->orig_prog = NULL;
1390 sk_filter_uncharge(sk, fp);
1391 } 900 }
1392 901 return false;
1393 return fp_new;
1394} 902}
1395 903
1396static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, 904static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1397 struct sock *sk)
1398{ 905{
1399 struct sock_filter *old_prog; 906 struct sock_filter *old_prog;
1400 struct sk_filter *old_fp; 907 struct bpf_prog *old_fp;
1401 int err, new_len, old_len = fp->len; 908 int err, new_len, old_len = fp->len;
1402 909
1403 /* We are free to overwrite insns et al right here as it 910 /* We are free to overwrite insns et al right here as it
@@ -1406,7 +913,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1406 * representation. 913 * representation.
1407 */ 914 */
1408 BUILD_BUG_ON(sizeof(struct sock_filter) != 915 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1409 sizeof(struct sock_filter_int)); 916 sizeof(struct bpf_insn));
1410 917
1411 /* Conversion cannot happen on overlapping memory areas, 918 /* Conversion cannot happen on overlapping memory areas,
1412 * so we need to keep the user BPF around until the 2nd 919 * so we need to keep the user BPF around until the 2nd
@@ -1420,13 +927,13 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1420 } 927 }
1421 928
1422 /* 1st pass: calculate the new program length. */ 929 /* 1st pass: calculate the new program length. */
1423 err = sk_convert_filter(old_prog, old_len, NULL, &new_len); 930 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
1424 if (err) 931 if (err)
1425 goto out_err_free; 932 goto out_err_free;
1426 933
1427 /* Expand fp for appending the new filter representation. */ 934 /* Expand fp for appending the new filter representation. */
1428 old_fp = fp; 935 old_fp = fp;
1429 fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len)); 936 fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
1430 if (!fp) { 937 if (!fp) {
1431 /* The old_fp is still around in case we couldn't 938 /* The old_fp is still around in case we couldn't
1432 * allocate new memory, so uncharge on that one. 939 * allocate new memory, so uncharge on that one.
@@ -1438,17 +945,17 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1438 945
1439 fp->len = new_len; 946 fp->len = new_len;
1440 947
1441 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ 948 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1442 err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len); 949 err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
1443 if (err) 950 if (err)
1444 /* 2nd sk_convert_filter() can fail only if it fails 951 /* 2nd bpf_convert_filter() can fail only if it fails
1445 * to allocate memory, remapping must succeed. Note, 952 * to allocate memory, remapping must succeed. Note,
1446 * that at this time old_fp has already been released 953 * that at this time old_fp has already been released
1447 * by __sk_migrate_realloc(). 954 * by krealloc().
1448 */ 955 */
1449 goto out_err_free; 956 goto out_err_free;
1450 957
1451 sk_filter_select_runtime(fp); 958 bpf_prog_select_runtime(fp);
1452 959
1453 kfree(old_prog); 960 kfree(old_prog);
1454 return fp; 961 return fp;
@@ -1456,55 +963,20 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1456out_err_free: 963out_err_free:
1457 kfree(old_prog); 964 kfree(old_prog);
1458out_err: 965out_err:
1459 /* Rollback filter setup. */ 966 __bpf_prog_release(fp);
1460 if (sk != NULL)
1461 sk_filter_uncharge(sk, fp);
1462 else
1463 kfree(fp);
1464 return ERR_PTR(err); 967 return ERR_PTR(err);
1465} 968}
1466 969
1467void __weak bpf_int_jit_compile(struct sk_filter *prog) 970static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
1468{
1469}
1470
1471/**
1472 * sk_filter_select_runtime - select execution runtime for BPF program
1473 * @fp: sk_filter populated with internal BPF program
1474 *
1475 * try to JIT internal BPF program, if JIT is not available select interpreter
1476 * BPF program will be executed via SK_RUN_FILTER() macro
1477 */
1478void sk_filter_select_runtime(struct sk_filter *fp)
1479{
1480 fp->bpf_func = (void *) __sk_run_filter;
1481
1482 /* Probe if internal BPF can be JITed */
1483 bpf_int_jit_compile(fp);
1484}
1485EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
1486
1487/* free internal BPF program */
1488void sk_filter_free(struct sk_filter *fp)
1489{
1490 bpf_jit_free(fp);
1491}
1492EXPORT_SYMBOL_GPL(sk_filter_free);
1493
1494static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1495 struct sock *sk)
1496{ 971{
1497 int err; 972 int err;
1498 973
1499 fp->bpf_func = NULL; 974 fp->bpf_func = NULL;
1500 fp->jited = 0; 975 fp->jited = 0;
1501 976
1502 err = sk_chk_filter(fp->insns, fp->len); 977 err = bpf_check_classic(fp->insns, fp->len);
1503 if (err) { 978 if (err) {
1504 if (sk != NULL) 979 __bpf_prog_release(fp);
1505 sk_filter_uncharge(sk, fp);
1506 else
1507 kfree(fp);
1508 return ERR_PTR(err); 980 return ERR_PTR(err);
1509 } 981 }
1510 982
@@ -1517,13 +989,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1517 * internal BPF translation for the optimized interpreter. 989 * internal BPF translation for the optimized interpreter.
1518 */ 990 */
1519 if (!fp->jited) 991 if (!fp->jited)
1520 fp = __sk_migrate_filter(fp, sk); 992 fp = bpf_migrate_filter(fp);
1521 993
1522 return fp; 994 return fp;
1523} 995}
1524 996
1525/** 997/**
1526 * sk_unattached_filter_create - create an unattached filter 998 * bpf_prog_create - create an unattached filter
1527 * @pfp: the unattached filter that is created 999 * @pfp: the unattached filter that is created
1528 * @fprog: the filter program 1000 * @fprog: the filter program
1529 * 1001 *
@@ -1532,23 +1004,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1532 * If an error occurs or there is insufficient memory for the filter 1004 * If an error occurs or there is insufficient memory for the filter
1533 * a negative errno code is returned. On success the return is zero. 1005 * a negative errno code is returned. On success the return is zero.
1534 */ 1006 */
1535int sk_unattached_filter_create(struct sk_filter **pfp, 1007int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1536 struct sock_fprog_kern *fprog)
1537{ 1008{
1538 unsigned int fsize = sk_filter_proglen(fprog); 1009 unsigned int fsize = bpf_classic_proglen(fprog);
1539 struct sk_filter *fp; 1010 struct bpf_prog *fp;
1540 1011
1541 /* Make sure new filter is there and in the right amounts. */ 1012 /* Make sure new filter is there and in the right amounts. */
1542 if (fprog->filter == NULL) 1013 if (fprog->filter == NULL)
1543 return -EINVAL; 1014 return -EINVAL;
1544 1015
1545 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); 1016 fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
1546 if (!fp) 1017 if (!fp)
1547 return -ENOMEM; 1018 return -ENOMEM;
1548 1019
1549 memcpy(fp->insns, fprog->filter, fsize); 1020 memcpy(fp->insns, fprog->filter, fsize);
1550 1021
1551 atomic_set(&fp->refcnt, 1);
1552 fp->len = fprog->len; 1022 fp->len = fprog->len;
1553 /* Since unattached filters are not copied back to user 1023 /* Since unattached filters are not copied back to user
1554 * space through sk_get_filter(), we do not need to hold 1024 * space through sk_get_filter(), we do not need to hold
@@ -1556,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
1556 */ 1026 */
1557 fp->orig_prog = NULL; 1027 fp->orig_prog = NULL;
1558 1028
1559 /* __sk_prepare_filter() already takes care of uncharging 1029 /* bpf_prepare_filter() already takes care of freeing
1560 * memory in case something goes wrong. 1030 * memory in case something goes wrong.
1561 */ 1031 */
1562 fp = __sk_prepare_filter(fp, NULL); 1032 fp = bpf_prepare_filter(fp);
1563 if (IS_ERR(fp)) 1033 if (IS_ERR(fp))
1564 return PTR_ERR(fp); 1034 return PTR_ERR(fp);
1565 1035
1566 *pfp = fp; 1036 *pfp = fp;
1567 return 0; 1037 return 0;
1568} 1038}
1569EXPORT_SYMBOL_GPL(sk_unattached_filter_create); 1039EXPORT_SYMBOL_GPL(bpf_prog_create);
1570 1040
1571void sk_unattached_filter_destroy(struct sk_filter *fp) 1041void bpf_prog_destroy(struct bpf_prog *fp)
1572{ 1042{
1573 sk_filter_release(fp); 1043 __bpf_prog_release(fp);
1574} 1044}
1575EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); 1045EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1576 1046
1577/** 1047/**
1578 * sk_attach_filter - attach a socket filter 1048 * sk_attach_filter - attach a socket filter
@@ -1587,8 +1057,9 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
1587int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1057int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1588{ 1058{
1589 struct sk_filter *fp, *old_fp; 1059 struct sk_filter *fp, *old_fp;
1590 unsigned int fsize = sk_filter_proglen(fprog); 1060 unsigned int fsize = bpf_classic_proglen(fprog);
1591 unsigned int sk_fsize = sk_filter_size(fprog->len); 1061 unsigned int bpf_fsize = bpf_prog_size(fprog->len);
1062 struct bpf_prog *prog;
1592 int err; 1063 int err;
1593 1064
1594 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1065 if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -1598,30 +1069,43 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1598 if (fprog->filter == NULL) 1069 if (fprog->filter == NULL)
1599 return -EINVAL; 1070 return -EINVAL;
1600 1071
1601 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); 1072 prog = kmalloc(bpf_fsize, GFP_KERNEL);
1602 if (!fp) 1073 if (!prog)
1603 return -ENOMEM; 1074 return -ENOMEM;
1604 1075
1605 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 1076 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1606 sock_kfree_s(sk, fp, sk_fsize); 1077 kfree(prog);
1607 return -EFAULT; 1078 return -EFAULT;
1608 } 1079 }
1609 1080
1610 atomic_set(&fp->refcnt, 1); 1081 prog->len = fprog->len;
1611 fp->len = fprog->len;
1612 1082
1613 err = sk_store_orig_filter(fp, fprog); 1083 err = bpf_prog_store_orig_filter(prog, fprog);
1614 if (err) { 1084 if (err) {
1615 sk_filter_uncharge(sk, fp); 1085 kfree(prog);
1616 return -ENOMEM; 1086 return -ENOMEM;
1617 } 1087 }
1618 1088
1619 /* __sk_prepare_filter() already takes care of uncharging 1089 /* bpf_prepare_filter() already takes care of freeing
1620 * memory in case something goes wrong. 1090 * memory in case something goes wrong.
1621 */ 1091 */
1622 fp = __sk_prepare_filter(fp, sk); 1092 prog = bpf_prepare_filter(prog);
1623 if (IS_ERR(fp)) 1093 if (IS_ERR(prog))
1624 return PTR_ERR(fp); 1094 return PTR_ERR(prog);
1095
1096 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1097 if (!fp) {
1098 __bpf_prog_release(prog);
1099 return -ENOMEM;
1100 }
1101 fp->prog = prog;
1102
1103 atomic_set(&fp->refcnt, 0);
1104
1105 if (!sk_filter_charge(sk, fp)) {
1106 __sk_filter_release(fp);
1107 return -ENOMEM;
1108 }
1625 1109
1626 old_fp = rcu_dereference_protected(sk->sk_filter, 1110 old_fp = rcu_dereference_protected(sk->sk_filter,
1627 sock_owned_by_user(sk)); 1111 sock_owned_by_user(sk));
@@ -1670,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1670 /* We're copying the filter that has been originally attached, 1154 /* We're copying the filter that has been originally attached,
1671 * so no conversion/decode needed anymore. 1155 * so no conversion/decode needed anymore.
1672 */ 1156 */
1673 fprog = filter->orig_prog; 1157 fprog = filter->prog->orig_prog;
1674 1158
1675 ret = fprog->len; 1159 ret = fprog->len;
1676 if (!len) 1160 if (!len)
@@ -1682,7 +1166,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1682 goto out; 1166 goto out;
1683 1167
1684 ret = -EFAULT; 1168 ret = -EFAULT;
1685 if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog))) 1169 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
1686 goto out; 1170 goto out;
1687 1171
1688 /* Instead of bytes, the API requests to return the number 1172 /* Instead of bytes, the API requests to return the number
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 107ed12a5323..5f362c1d0332 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -80,6 +80,8 @@ ip:
80 case htons(ETH_P_IPV6): { 80 case htons(ETH_P_IPV6): {
81 const struct ipv6hdr *iph; 81 const struct ipv6hdr *iph;
82 struct ipv6hdr _iph; 82 struct ipv6hdr _iph;
83 __be32 flow_label;
84
83ipv6: 85ipv6:
84 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 86 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
85 if (!iph) 87 if (!iph)
@@ -89,6 +91,21 @@ ipv6:
89 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); 91 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
90 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); 92 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
91 nhoff += sizeof(struct ipv6hdr); 93 nhoff += sizeof(struct ipv6hdr);
94
95 flow_label = ip6_flowlabel(iph);
96 if (flow_label) {
97 /* Awesome, IPv6 packet has a flow label so we can
98 * use that to represent the ports without any
99 * further dissection.
100 */
101 flow->n_proto = proto;
102 flow->ip_proto = ip_proto;
103 flow->ports = flow_label;
104 flow->thoff = (u16)nhoff;
105
106 return true;
107 }
108
92 break; 109 break;
93 } 110 }
94 case htons(ETH_P_8021AD): 111 case htons(ETH_P_8021AD):
@@ -175,6 +192,7 @@ ipv6:
175 break; 192 break;
176 } 193 }
177 194
195 flow->n_proto = proto;
178 flow->ip_proto = ip_proto; 196 flow->ip_proto = ip_proto;
179 flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); 197 flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
180 flow->thoff = (u16) nhoff; 198 flow->thoff = (u16) nhoff;
@@ -195,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
195 return jhash_3words(a, b, c, hashrnd); 213 return jhash_3words(a, b, c, hashrnd);
196} 214}
197 215
198static __always_inline u32 __flow_hash_1word(u32 a) 216static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
199{ 217{
200 __flow_hash_secret_init(); 218 u32 hash;
201 return jhash_1word(a, hashrnd); 219
220 /* get a consistent hash (same value on both flow directions) */
221 if (((__force u32)keys->dst < (__force u32)keys->src) ||
222 (((__force u32)keys->dst == (__force u32)keys->src) &&
223 ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
224 swap(keys->dst, keys->src);
225 swap(keys->port16[0], keys->port16[1]);
226 }
227
228 hash = __flow_hash_3words((__force u32)keys->dst,
229 (__force u32)keys->src,
230 (__force u32)keys->ports);
231 if (!hash)
232 hash = 1;
233
234 return hash;
202} 235}
203 236
237u32 flow_hash_from_keys(struct flow_keys *keys)
238{
239 return __flow_hash_from_keys(keys);
240}
241EXPORT_SYMBOL(flow_hash_from_keys);
242
204/* 243/*
205 * __skb_get_hash: calculate a flow hash based on src/dst addresses 244 * __skb_get_hash: calculate a flow hash based on src/dst addresses
206 * and src/dst port numbers. Sets hash in skb to non-zero hash value 245 * and src/dst port numbers. Sets hash in skb to non-zero hash value
@@ -210,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a)
210void __skb_get_hash(struct sk_buff *skb) 249void __skb_get_hash(struct sk_buff *skb)
211{ 250{
212 struct flow_keys keys; 251 struct flow_keys keys;
213 u32 hash;
214 252
215 if (!skb_flow_dissect(skb, &keys)) 253 if (!skb_flow_dissect(skb, &keys))
216 return; 254 return;
@@ -218,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb)
218 if (keys.ports) 256 if (keys.ports)
219 skb->l4_hash = 1; 257 skb->l4_hash = 1;
220 258
221 /* get a consistent hash (same value on both flow directions) */ 259 skb->sw_hash = 1;
222 if (((__force u32)keys.dst < (__force u32)keys.src) ||
223 (((__force u32)keys.dst == (__force u32)keys.src) &&
224 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
225 swap(keys.dst, keys.src);
226 swap(keys.port16[0], keys.port16[1]);
227 }
228
229 hash = __flow_hash_3words((__force u32)keys.dst,
230 (__force u32)keys.src,
231 (__force u32)keys.ports);
232 if (!hash)
233 hash = 1;
234 260
235 skb->hash = hash; 261 skb->hash = __flow_hash_from_keys(&keys);
236} 262}
237EXPORT_SYMBOL(__skb_get_hash); 263EXPORT_SYMBOL(__skb_get_hash);
238 264
@@ -240,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash);
240 * Returns a Tx hash based on the given packet descriptor a Tx queues' number 266 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
241 * to be used as a distribution range. 267 * to be used as a distribution range.
242 */ 268 */
243u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, 269u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
244 unsigned int num_tx_queues) 270 unsigned int num_tx_queues)
245{ 271{
246 u32 hash; 272 u32 hash;
@@ -260,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
260 qcount = dev->tc_to_txq[tc].count; 286 qcount = dev->tc_to_txq[tc].count;
261 } 287 }
262 288
263 if (skb->sk && skb->sk->sk_hash) 289 return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset;
264 hash = skb->sk->sk_hash;
265 else
266 hash = (__force u16) skb->protocol;
267 hash = __flow_hash_1word(hash);
268
269 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
270} 290}
271EXPORT_SYMBOL(__skb_tx_hash); 291EXPORT_SYMBOL(__skb_tx_hash);
272 292
@@ -338,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
338 if (map) { 358 if (map) {
339 if (map->len == 1) 359 if (map->len == 1)
340 queue_index = map->queues[0]; 360 queue_index = map->queues[0];
341 else { 361 else
342 u32 hash;
343 if (skb->sk && skb->sk->sk_hash)
344 hash = skb->sk->sk_hash;
345 else
346 hash = (__force u16) skb->protocol ^
347 skb->hash;
348 hash = __flow_hash_1word(hash);
349 queue_index = map->queues[ 362 queue_index = map->queues[
350 ((u64)hash * map->len) >> 32]; 363 ((u64)skb_get_hash(skb) * map->len) >> 32];
351 } 364
352 if (unlikely(queue_index >= dev->real_num_tx_queues)) 365 if (unlikely(queue_index >= dev->real_num_tx_queues))
353 queue_index = -1; 366 queue_index = -1;
354 } 367 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1cac29ebb05b..9dd06699b09c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -43,12 +43,12 @@ static ssize_t netdev_show(const struct device *dev,
43 struct device_attribute *attr, char *buf, 43 struct device_attribute *attr, char *buf,
44 ssize_t (*format)(const struct net_device *, char *)) 44 ssize_t (*format)(const struct net_device *, char *))
45{ 45{
46 struct net_device *net = to_net_dev(dev); 46 struct net_device *ndev = to_net_dev(dev);
47 ssize_t ret = -EINVAL; 47 ssize_t ret = -EINVAL;
48 48
49 read_lock(&dev_base_lock); 49 read_lock(&dev_base_lock);
50 if (dev_isalive(net)) 50 if (dev_isalive(ndev))
51 ret = (*format)(net, buf); 51 ret = (*format)(ndev, buf);
52 read_unlock(&dev_base_lock); 52 read_unlock(&dev_base_lock);
53 53
54 return ret; 54 return ret;
@@ -56,9 +56,9 @@ static ssize_t netdev_show(const struct device *dev,
56 56
57/* generate a show function for simple field */ 57/* generate a show function for simple field */
58#define NETDEVICE_SHOW(field, format_string) \ 58#define NETDEVICE_SHOW(field, format_string) \
59static ssize_t format_##field(const struct net_device *net, char *buf) \ 59static ssize_t format_##field(const struct net_device *dev, char *buf) \
60{ \ 60{ \
61 return sprintf(buf, format_string, net->field); \ 61 return sprintf(buf, format_string, dev->field); \
62} \ 62} \
63static ssize_t field##_show(struct device *dev, \ 63static ssize_t field##_show(struct device *dev, \
64 struct device_attribute *attr, char *buf) \ 64 struct device_attribute *attr, char *buf) \
@@ -112,16 +112,35 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec);
112NETDEVICE_SHOW_RO(type, fmt_dec); 112NETDEVICE_SHOW_RO(type, fmt_dec);
113NETDEVICE_SHOW_RO(link_mode, fmt_dec); 113NETDEVICE_SHOW_RO(link_mode, fmt_dec);
114 114
115static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
116{
117 return sprintf(buf, fmt_dec, dev->name_assign_type);
118}
119
120static ssize_t name_assign_type_show(struct device *dev,
121 struct device_attribute *attr,
122 char *buf)
123{
124 struct net_device *ndev = to_net_dev(dev);
125 ssize_t ret = -EINVAL;
126
127 if (ndev->name_assign_type != NET_NAME_UNKNOWN)
128 ret = netdev_show(dev, attr, buf, format_name_assign_type);
129
130 return ret;
131}
132static DEVICE_ATTR_RO(name_assign_type);
133
115/* use same locking rules as GIFHWADDR ioctl's */ 134/* use same locking rules as GIFHWADDR ioctl's */
116static ssize_t address_show(struct device *dev, struct device_attribute *attr, 135static ssize_t address_show(struct device *dev, struct device_attribute *attr,
117 char *buf) 136 char *buf)
118{ 137{
119 struct net_device *net = to_net_dev(dev); 138 struct net_device *ndev = to_net_dev(dev);
120 ssize_t ret = -EINVAL; 139 ssize_t ret = -EINVAL;
121 140
122 read_lock(&dev_base_lock); 141 read_lock(&dev_base_lock);
123 if (dev_isalive(net)) 142 if (dev_isalive(ndev))
124 ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len); 143 ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
125 read_unlock(&dev_base_lock); 144 read_unlock(&dev_base_lock);
126 return ret; 145 return ret;
127} 146}
@@ -130,18 +149,18 @@ static DEVICE_ATTR_RO(address);
130static ssize_t broadcast_show(struct device *dev, 149static ssize_t broadcast_show(struct device *dev,
131 struct device_attribute *attr, char *buf) 150 struct device_attribute *attr, char *buf)
132{ 151{
133 struct net_device *net = to_net_dev(dev); 152 struct net_device *ndev = to_net_dev(dev);
134 if (dev_isalive(net)) 153 if (dev_isalive(ndev))
135 return sysfs_format_mac(buf, net->broadcast, net->addr_len); 154 return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
136 return -EINVAL; 155 return -EINVAL;
137} 156}
138static DEVICE_ATTR_RO(broadcast); 157static DEVICE_ATTR_RO(broadcast);
139 158
140static int change_carrier(struct net_device *net, unsigned long new_carrier) 159static int change_carrier(struct net_device *dev, unsigned long new_carrier)
141{ 160{
142 if (!netif_running(net)) 161 if (!netif_running(dev))
143 return -EINVAL; 162 return -EINVAL;
144 return dev_change_carrier(net, (bool) new_carrier); 163 return dev_change_carrier(dev, (bool) new_carrier);
145} 164}
146 165
147static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, 166static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
@@ -265,9 +284,9 @@ static DEVICE_ATTR_RO(carrier_changes);
265 284
266/* read-write attributes */ 285/* read-write attributes */
267 286
268static int change_mtu(struct net_device *net, unsigned long new_mtu) 287static int change_mtu(struct net_device *dev, unsigned long new_mtu)
269{ 288{
270 return dev_set_mtu(net, (int) new_mtu); 289 return dev_set_mtu(dev, (int) new_mtu);
271} 290}
272 291
273static ssize_t mtu_store(struct device *dev, struct device_attribute *attr, 292static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
@@ -277,9 +296,9 @@ static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
277} 296}
278NETDEVICE_SHOW_RW(mtu, fmt_dec); 297NETDEVICE_SHOW_RW(mtu, fmt_dec);
279 298
280static int change_flags(struct net_device *net, unsigned long new_flags) 299static int change_flags(struct net_device *dev, unsigned long new_flags)
281{ 300{
282 return dev_change_flags(net, (unsigned int) new_flags); 301 return dev_change_flags(dev, (unsigned int) new_flags);
283} 302}
284 303
285static ssize_t flags_store(struct device *dev, struct device_attribute *attr, 304static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -289,9 +308,9 @@ static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
289} 308}
290NETDEVICE_SHOW_RW(flags, fmt_hex); 309NETDEVICE_SHOW_RW(flags, fmt_hex);
291 310
292static int change_tx_queue_len(struct net_device *net, unsigned long new_len) 311static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
293{ 312{
294 net->tx_queue_len = new_len; 313 dev->tx_queue_len = new_len;
295 return 0; 314 return 0;
296} 315}
297 316
@@ -344,9 +363,9 @@ static ssize_t ifalias_show(struct device *dev,
344} 363}
345static DEVICE_ATTR_RW(ifalias); 364static DEVICE_ATTR_RW(ifalias);
346 365
347static int change_group(struct net_device *net, unsigned long new_group) 366static int change_group(struct net_device *dev, unsigned long new_group)
348{ 367{
349 dev_set_group(net, (int) new_group); 368 dev_set_group(dev, (int) new_group);
350 return 0; 369 return 0;
351} 370}
352 371
@@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = {
387 &dev_attr_dev_port.attr, 406 &dev_attr_dev_port.attr,
388 &dev_attr_iflink.attr, 407 &dev_attr_iflink.attr,
389 &dev_attr_ifindex.attr, 408 &dev_attr_ifindex.attr,
409 &dev_attr_name_assign_type.attr,
390 &dev_attr_addr_assign_type.attr, 410 &dev_attr_addr_assign_type.attr,
391 &dev_attr_addr_len.attr, 411 &dev_attr_addr_len.attr,
392 &dev_attr_link_mode.attr, 412 &dev_attr_link_mode.attr,
@@ -776,20 +796,20 @@ static struct kobj_type rx_queue_ktype = {
776 .namespace = rx_queue_namespace 796 .namespace = rx_queue_namespace
777}; 797};
778 798
779static int rx_queue_add_kobject(struct net_device *net, int index) 799static int rx_queue_add_kobject(struct net_device *dev, int index)
780{ 800{
781 struct netdev_rx_queue *queue = net->_rx + index; 801 struct netdev_rx_queue *queue = dev->_rx + index;
782 struct kobject *kobj = &queue->kobj; 802 struct kobject *kobj = &queue->kobj;
783 int error = 0; 803 int error = 0;
784 804
785 kobj->kset = net->queues_kset; 805 kobj->kset = dev->queues_kset;
786 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 806 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
787 "rx-%u", index); 807 "rx-%u", index);
788 if (error) 808 if (error)
789 goto exit; 809 goto exit;
790 810
791 if (net->sysfs_rx_queue_group) { 811 if (dev->sysfs_rx_queue_group) {
792 error = sysfs_create_group(kobj, net->sysfs_rx_queue_group); 812 error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
793 if (error) 813 if (error)
794 goto exit; 814 goto exit;
795 } 815 }
@@ -805,18 +825,18 @@ exit:
805#endif /* CONFIG_SYSFS */ 825#endif /* CONFIG_SYSFS */
806 826
807int 827int
808net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 828net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
809{ 829{
810#ifdef CONFIG_SYSFS 830#ifdef CONFIG_SYSFS
811 int i; 831 int i;
812 int error = 0; 832 int error = 0;
813 833
814#ifndef CONFIG_RPS 834#ifndef CONFIG_RPS
815 if (!net->sysfs_rx_queue_group) 835 if (!dev->sysfs_rx_queue_group)
816 return 0; 836 return 0;
817#endif 837#endif
818 for (i = old_num; i < new_num; i++) { 838 for (i = old_num; i < new_num; i++) {
819 error = rx_queue_add_kobject(net, i); 839 error = rx_queue_add_kobject(dev, i);
820 if (error) { 840 if (error) {
821 new_num = old_num; 841 new_num = old_num;
822 break; 842 break;
@@ -824,10 +844,10 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
824 } 844 }
825 845
826 while (--i >= new_num) { 846 while (--i >= new_num) {
827 if (net->sysfs_rx_queue_group) 847 if (dev->sysfs_rx_queue_group)
828 sysfs_remove_group(&net->_rx[i].kobj, 848 sysfs_remove_group(&dev->_rx[i].kobj,
829 net->sysfs_rx_queue_group); 849 dev->sysfs_rx_queue_group);
830 kobject_put(&net->_rx[i].kobj); 850 kobject_put(&dev->_rx[i].kobj);
831 } 851 }
832 852
833 return error; 853 return error;
@@ -1135,13 +1155,13 @@ static struct kobj_type netdev_queue_ktype = {
1135 .namespace = netdev_queue_namespace, 1155 .namespace = netdev_queue_namespace,
1136}; 1156};
1137 1157
1138static int netdev_queue_add_kobject(struct net_device *net, int index) 1158static int netdev_queue_add_kobject(struct net_device *dev, int index)
1139{ 1159{
1140 struct netdev_queue *queue = net->_tx + index; 1160 struct netdev_queue *queue = dev->_tx + index;
1141 struct kobject *kobj = &queue->kobj; 1161 struct kobject *kobj = &queue->kobj;
1142 int error = 0; 1162 int error = 0;
1143 1163
1144 kobj->kset = net->queues_kset; 1164 kobj->kset = dev->queues_kset;
1145 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, 1165 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1146 "tx-%u", index); 1166 "tx-%u", index);
1147 if (error) 1167 if (error)
@@ -1164,14 +1184,14 @@ exit:
1164#endif /* CONFIG_SYSFS */ 1184#endif /* CONFIG_SYSFS */
1165 1185
1166int 1186int
1167netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 1187netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1168{ 1188{
1169#ifdef CONFIG_SYSFS 1189#ifdef CONFIG_SYSFS
1170 int i; 1190 int i;
1171 int error = 0; 1191 int error = 0;
1172 1192
1173 for (i = old_num; i < new_num; i++) { 1193 for (i = old_num; i < new_num; i++) {
1174 error = netdev_queue_add_kobject(net, i); 1194 error = netdev_queue_add_kobject(dev, i);
1175 if (error) { 1195 if (error) {
1176 new_num = old_num; 1196 new_num = old_num;
1177 break; 1197 break;
@@ -1179,7 +1199,7 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1179 } 1199 }
1180 1200
1181 while (--i >= new_num) { 1201 while (--i >= new_num) {
1182 struct netdev_queue *queue = net->_tx + i; 1202 struct netdev_queue *queue = dev->_tx + i;
1183 1203
1184#ifdef CONFIG_BQL 1204#ifdef CONFIG_BQL
1185 sysfs_remove_group(&queue->kobj, &dql_group); 1205 sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1193,25 +1213,25 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1193#endif /* CONFIG_SYSFS */ 1213#endif /* CONFIG_SYSFS */
1194} 1214}
1195 1215
1196static int register_queue_kobjects(struct net_device *net) 1216static int register_queue_kobjects(struct net_device *dev)
1197{ 1217{
1198 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; 1218 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1199 1219
1200#ifdef CONFIG_SYSFS 1220#ifdef CONFIG_SYSFS
1201 net->queues_kset = kset_create_and_add("queues", 1221 dev->queues_kset = kset_create_and_add("queues",
1202 NULL, &net->dev.kobj); 1222 NULL, &dev->dev.kobj);
1203 if (!net->queues_kset) 1223 if (!dev->queues_kset)
1204 return -ENOMEM; 1224 return -ENOMEM;
1205 real_rx = net->real_num_rx_queues; 1225 real_rx = dev->real_num_rx_queues;
1206#endif 1226#endif
1207 real_tx = net->real_num_tx_queues; 1227 real_tx = dev->real_num_tx_queues;
1208 1228
1209 error = net_rx_queue_update_kobjects(net, 0, real_rx); 1229 error = net_rx_queue_update_kobjects(dev, 0, real_rx);
1210 if (error) 1230 if (error)
1211 goto error; 1231 goto error;
1212 rxq = real_rx; 1232 rxq = real_rx;
1213 1233
1214 error = netdev_queue_update_kobjects(net, 0, real_tx); 1234 error = netdev_queue_update_kobjects(dev, 0, real_tx);
1215 if (error) 1235 if (error)
1216 goto error; 1236 goto error;
1217 txq = real_tx; 1237 txq = real_tx;
@@ -1219,24 +1239,24 @@ static int register_queue_kobjects(struct net_device *net)
1219 return 0; 1239 return 0;
1220 1240
1221error: 1241error:
1222 netdev_queue_update_kobjects(net, txq, 0); 1242 netdev_queue_update_kobjects(dev, txq, 0);
1223 net_rx_queue_update_kobjects(net, rxq, 0); 1243 net_rx_queue_update_kobjects(dev, rxq, 0);
1224 return error; 1244 return error;
1225} 1245}
1226 1246
1227static void remove_queue_kobjects(struct net_device *net) 1247static void remove_queue_kobjects(struct net_device *dev)
1228{ 1248{
1229 int real_rx = 0, real_tx = 0; 1249 int real_rx = 0, real_tx = 0;
1230 1250
1231#ifdef CONFIG_SYSFS 1251#ifdef CONFIG_SYSFS
1232 real_rx = net->real_num_rx_queues; 1252 real_rx = dev->real_num_rx_queues;
1233#endif 1253#endif
1234 real_tx = net->real_num_tx_queues; 1254 real_tx = dev->real_num_tx_queues;
1235 1255
1236 net_rx_queue_update_kobjects(net, real_rx, 0); 1256 net_rx_queue_update_kobjects(dev, real_rx, 0);
1237 netdev_queue_update_kobjects(net, real_tx, 0); 1257 netdev_queue_update_kobjects(dev, real_tx, 0);
1238#ifdef CONFIG_SYSFS 1258#ifdef CONFIG_SYSFS
1239 kset_unregister(net->queues_kset); 1259 kset_unregister(dev->queues_kset);
1240#endif 1260#endif
1241} 1261}
1242 1262
@@ -1329,13 +1349,13 @@ static struct class net_class = {
1329/* Delete sysfs entries but hold kobject reference until after all 1349/* Delete sysfs entries but hold kobject reference until after all
1330 * netdev references are gone. 1350 * netdev references are gone.
1331 */ 1351 */
1332void netdev_unregister_kobject(struct net_device * net) 1352void netdev_unregister_kobject(struct net_device *ndev)
1333{ 1353{
1334 struct device *dev = &(net->dev); 1354 struct device *dev = &(ndev->dev);
1335 1355
1336 kobject_get(&dev->kobj); 1356 kobject_get(&dev->kobj);
1337 1357
1338 remove_queue_kobjects(net); 1358 remove_queue_kobjects(ndev);
1339 1359
1340 pm_runtime_set_memalloc_noio(dev, false); 1360 pm_runtime_set_memalloc_noio(dev, false);
1341 1361
@@ -1343,18 +1363,18 @@ void netdev_unregister_kobject(struct net_device * net)
1343} 1363}
1344 1364
1345/* Create sysfs entries for network device. */ 1365/* Create sysfs entries for network device. */
1346int netdev_register_kobject(struct net_device *net) 1366int netdev_register_kobject(struct net_device *ndev)
1347{ 1367{
1348 struct device *dev = &(net->dev); 1368 struct device *dev = &(ndev->dev);
1349 const struct attribute_group **groups = net->sysfs_groups; 1369 const struct attribute_group **groups = ndev->sysfs_groups;
1350 int error = 0; 1370 int error = 0;
1351 1371
1352 device_initialize(dev); 1372 device_initialize(dev);
1353 dev->class = &net_class; 1373 dev->class = &net_class;
1354 dev->platform_data = net; 1374 dev->platform_data = ndev;
1355 dev->groups = groups; 1375 dev->groups = groups;
1356 1376
1357 dev_set_name(dev, "%s", net->name); 1377 dev_set_name(dev, "%s", ndev->name);
1358 1378
1359#ifdef CONFIG_SYSFS 1379#ifdef CONFIG_SYSFS
1360 /* Allow for a device specific group */ 1380 /* Allow for a device specific group */
@@ -1364,10 +1384,10 @@ int netdev_register_kobject(struct net_device *net)
1364 *groups++ = &netstat_group; 1384 *groups++ = &netstat_group;
1365 1385
1366#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211) 1386#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
1367 if (net->ieee80211_ptr) 1387 if (ndev->ieee80211_ptr)
1368 *groups++ = &wireless_group; 1388 *groups++ = &wireless_group;
1369#if IS_ENABLED(CONFIG_WIRELESS_EXT) 1389#if IS_ENABLED(CONFIG_WIRELESS_EXT)
1370 else if (net->wireless_handlers) 1390 else if (ndev->wireless_handlers)
1371 *groups++ = &wireless_group; 1391 *groups++ = &wireless_group;
1372#endif 1392#endif
1373#endif 1393#endif
@@ -1377,7 +1397,7 @@ int netdev_register_kobject(struct net_device *net)
1377 if (error) 1397 if (error)
1378 return error; 1398 return error;
1379 1399
1380 error = register_queue_kobjects(net); 1400 error = register_queue_kobjects(ndev);
1381 if (error) { 1401 if (error) {
1382 device_del(dev); 1402 device_del(dev);
1383 return error; 1403 return error;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e33937fb32a0..907fb5e36c02 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -822,7 +822,8 @@ void __netpoll_cleanup(struct netpoll *np)
822 822
823 RCU_INIT_POINTER(np->dev->npinfo, NULL); 823 RCU_INIT_POINTER(np->dev->npinfo, NULL);
824 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); 824 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
825 } 825 } else
826 RCU_INIT_POINTER(np->dev->npinfo, NULL);
826} 827}
827EXPORT_SYMBOL_GPL(__netpoll_cleanup); 828EXPORT_SYMBOL_GPL(__netpoll_cleanup);
828 829
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fc17a9d309ac..8b849ddfef2e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,8 +69,9 @@
69 * for running devices in the if_list and sends packets until count is 0 it 69 * for running devices in the if_list and sends packets until count is 0 it
70 * also the thread checks the thread->control which is used for inter-process 70 * also the thread checks the thread->control which is used for inter-process
71 * communication. controlling process "posts" operations to the threads this 71 * communication. controlling process "posts" operations to the threads this
72 * way. The if_lock should be possible to remove when add/rem_device is merged 72 * way.
73 * into this too. 73 * The if_list is RCU protected, and the if_lock remains to protect updating
74 * of if_list, from "add_device" as it invoked from userspace (via proc write).
74 * 75 *
75 * By design there should only be *one* "controlling" process. In practice 76 * By design there should only be *one* "controlling" process. In practice
76 * multiple write accesses gives unpredictable result. Understood by "write" 77 * multiple write accesses gives unpredictable result. Understood by "write"
@@ -208,7 +209,7 @@
208#define T_REMDEVALL (1<<2) /* Remove all devs */ 209#define T_REMDEVALL (1<<2) /* Remove all devs */
209#define T_REMDEV (1<<3) /* Remove one dev */ 210#define T_REMDEV (1<<3) /* Remove one dev */
210 211
211/* If lock -- can be removed after some work */ 212/* If lock -- protects updating of if_list */
212#define if_lock(t) spin_lock(&(t->if_lock)); 213#define if_lock(t) spin_lock(&(t->if_lock));
213#define if_unlock(t) spin_unlock(&(t->if_lock)); 214#define if_unlock(t) spin_unlock(&(t->if_lock));
214 215
@@ -241,6 +242,7 @@ struct pktgen_dev {
241 struct proc_dir_entry *entry; /* proc file */ 242 struct proc_dir_entry *entry; /* proc file */
242 struct pktgen_thread *pg_thread;/* the owner */ 243 struct pktgen_thread *pg_thread;/* the owner */
243 struct list_head list; /* chaining in the thread's run-queue */ 244 struct list_head list; /* chaining in the thread's run-queue */
245 struct rcu_head rcu; /* freed by RCU */
244 246
245 int running; /* if false, the test will stop */ 247 int running; /* if false, the test will stop */
246 248
@@ -802,7 +804,6 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
802 case '\t': 804 case '\t':
803 case ' ': 805 case ' ':
804 goto done_str; 806 goto done_str;
805 break;
806 default: 807 default:
807 break; 808 break;
808 } 809 }
@@ -1737,14 +1738,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1737 1738
1738 seq_puts(seq, "Running: "); 1739 seq_puts(seq, "Running: ");
1739 1740
1740 if_lock(t); 1741 rcu_read_lock();
1741 list_for_each_entry(pkt_dev, &t->if_list, list) 1742 list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
1742 if (pkt_dev->running) 1743 if (pkt_dev->running)
1743 seq_printf(seq, "%s ", pkt_dev->odevname); 1744 seq_printf(seq, "%s ", pkt_dev->odevname);
1744 1745
1745 seq_puts(seq, "\nStopped: "); 1746 seq_puts(seq, "\nStopped: ");
1746 1747
1747 list_for_each_entry(pkt_dev, &t->if_list, list) 1748 list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
1748 if (!pkt_dev->running) 1749 if (!pkt_dev->running)
1749 seq_printf(seq, "%s ", pkt_dev->odevname); 1750 seq_printf(seq, "%s ", pkt_dev->odevname);
1750 1751
@@ -1753,7 +1754,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1753 else 1754 else
1754 seq_puts(seq, "\nResult: NA\n"); 1755 seq_puts(seq, "\nResult: NA\n");
1755 1756
1756 if_unlock(t); 1757 rcu_read_unlock();
1757 1758
1758 return 0; 1759 return 0;
1759} 1760}
@@ -1878,10 +1879,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
1878 pkt_dev = pktgen_find_dev(t, ifname, exact); 1879 pkt_dev = pktgen_find_dev(t, ifname, exact);
1879 if (pkt_dev) { 1880 if (pkt_dev) {
1880 if (remove) { 1881 if (remove) {
1881 if_lock(t);
1882 pkt_dev->removal_mark = 1; 1882 pkt_dev->removal_mark = 1;
1883 t->control |= T_REMDEV; 1883 t->control |= T_REMDEV;
1884 if_unlock(t);
1885 } 1884 }
1886 break; 1885 break;
1887 } 1886 }
@@ -1931,7 +1930,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1931 list_for_each_entry(t, &pn->pktgen_threads, th_list) { 1930 list_for_each_entry(t, &pn->pktgen_threads, th_list) {
1932 struct pktgen_dev *pkt_dev; 1931 struct pktgen_dev *pkt_dev;
1933 1932
1934 list_for_each_entry(pkt_dev, &t->if_list, list) { 1933 rcu_read_lock();
1934 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
1935 if (pkt_dev->odev != dev) 1935 if (pkt_dev->odev != dev)
1936 continue; 1936 continue;
1937 1937
@@ -1946,6 +1946,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1946 dev->name); 1946 dev->name);
1947 break; 1947 break;
1948 } 1948 }
1949 rcu_read_unlock();
1949 } 1950 }
1950} 1951}
1951 1952
@@ -2997,8 +2998,8 @@ static void pktgen_run(struct pktgen_thread *t)
2997 2998
2998 func_enter(); 2999 func_enter();
2999 3000
3000 if_lock(t); 3001 rcu_read_lock();
3001 list_for_each_entry(pkt_dev, &t->if_list, list) { 3002 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
3002 3003
3003 /* 3004 /*
3004 * setup odev and create initial packet. 3005 * setup odev and create initial packet.
@@ -3007,18 +3008,18 @@ static void pktgen_run(struct pktgen_thread *t)
3007 3008
3008 if (pkt_dev->odev) { 3009 if (pkt_dev->odev) {
3009 pktgen_clear_counters(pkt_dev); 3010 pktgen_clear_counters(pkt_dev);
3010 pkt_dev->running = 1; /* Cranke yeself! */
3011 pkt_dev->skb = NULL; 3011 pkt_dev->skb = NULL;
3012 pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); 3012 pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
3013 3013
3014 set_pkt_overhead(pkt_dev); 3014 set_pkt_overhead(pkt_dev);
3015 3015
3016 strcpy(pkt_dev->result, "Starting"); 3016 strcpy(pkt_dev->result, "Starting");
3017 pkt_dev->running = 1; /* Cranke yeself! */
3017 started++; 3018 started++;
3018 } else 3019 } else
3019 strcpy(pkt_dev->result, "Error starting"); 3020 strcpy(pkt_dev->result, "Error starting");
3020 } 3021 }
3021 if_unlock(t); 3022 rcu_read_unlock();
3022 if (started) 3023 if (started)
3023 t->control &= ~(T_STOP); 3024 t->control &= ~(T_STOP);
3024} 3025}
@@ -3041,27 +3042,25 @@ static int thread_is_running(const struct pktgen_thread *t)
3041{ 3042{
3042 const struct pktgen_dev *pkt_dev; 3043 const struct pktgen_dev *pkt_dev;
3043 3044
3044 list_for_each_entry(pkt_dev, &t->if_list, list) 3045 rcu_read_lock();
3045 if (pkt_dev->running) 3046 list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
3047 if (pkt_dev->running) {
3048 rcu_read_unlock();
3046 return 1; 3049 return 1;
3050 }
3051 rcu_read_unlock();
3047 return 0; 3052 return 0;
3048} 3053}
3049 3054
3050static int pktgen_wait_thread_run(struct pktgen_thread *t) 3055static int pktgen_wait_thread_run(struct pktgen_thread *t)
3051{ 3056{
3052 if_lock(t);
3053
3054 while (thread_is_running(t)) { 3057 while (thread_is_running(t)) {
3055 3058
3056 if_unlock(t);
3057
3058 msleep_interruptible(100); 3059 msleep_interruptible(100);
3059 3060
3060 if (signal_pending(current)) 3061 if (signal_pending(current))
3061 goto signal; 3062 goto signal;
3062 if_lock(t);
3063 } 3063 }
3064 if_unlock(t);
3065 return 1; 3064 return 1;
3066signal: 3065signal:
3067 return 0; 3066 return 0;
@@ -3166,10 +3165,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3166 return -EINVAL; 3165 return -EINVAL;
3167 } 3166 }
3168 3167
3168 pkt_dev->running = 0;
3169 kfree_skb(pkt_dev->skb); 3169 kfree_skb(pkt_dev->skb);
3170 pkt_dev->skb = NULL; 3170 pkt_dev->skb = NULL;
3171 pkt_dev->stopped_at = ktime_get(); 3171 pkt_dev->stopped_at = ktime_get();
3172 pkt_dev->running = 0;
3173 3172
3174 show_results(pkt_dev, nr_frags); 3173 show_results(pkt_dev, nr_frags);
3175 3174
@@ -3180,9 +3179,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3180{ 3179{
3181 struct pktgen_dev *pkt_dev, *best = NULL; 3180 struct pktgen_dev *pkt_dev, *best = NULL;
3182 3181
3183 if_lock(t); 3182 rcu_read_lock();
3184 3183 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
3185 list_for_each_entry(pkt_dev, &t->if_list, list) {
3186 if (!pkt_dev->running) 3184 if (!pkt_dev->running)
3187 continue; 3185 continue;
3188 if (best == NULL) 3186 if (best == NULL)
@@ -3190,7 +3188,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3190 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) 3188 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
3191 best = pkt_dev; 3189 best = pkt_dev;
3192 } 3190 }
3193 if_unlock(t); 3191 rcu_read_unlock();
3192
3194 return best; 3193 return best;
3195} 3194}
3196 3195
@@ -3200,13 +3199,13 @@ static void pktgen_stop(struct pktgen_thread *t)
3200 3199
3201 func_enter(); 3200 func_enter();
3202 3201
3203 if_lock(t); 3202 rcu_read_lock();
3204 3203
3205 list_for_each_entry(pkt_dev, &t->if_list, list) { 3204 list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
3206 pktgen_stop_device(pkt_dev); 3205 pktgen_stop_device(pkt_dev);
3207 } 3206 }
3208 3207
3209 if_unlock(t); 3208 rcu_read_unlock();
3210} 3209}
3211 3210
3212/* 3211/*
@@ -3220,8 +3219,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3220 3219
3221 func_enter(); 3220 func_enter();
3222 3221
3223 if_lock(t);
3224
3225 list_for_each_safe(q, n, &t->if_list) { 3222 list_for_each_safe(q, n, &t->if_list) {
3226 cur = list_entry(q, struct pktgen_dev, list); 3223 cur = list_entry(q, struct pktgen_dev, list);
3227 3224
@@ -3235,8 +3232,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3235 3232
3236 break; 3233 break;
3237 } 3234 }
3238
3239 if_unlock(t);
3240} 3235}
3241 3236
3242static void pktgen_rem_all_ifs(struct pktgen_thread *t) 3237static void pktgen_rem_all_ifs(struct pktgen_thread *t)
@@ -3248,8 +3243,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3248 3243
3249 /* Remove all devices, free mem */ 3244 /* Remove all devices, free mem */
3250 3245
3251 if_lock(t);
3252
3253 list_for_each_safe(q, n, &t->if_list) { 3246 list_for_each_safe(q, n, &t->if_list) {
3254 cur = list_entry(q, struct pktgen_dev, list); 3247 cur = list_entry(q, struct pktgen_dev, list);
3255 3248
@@ -3258,8 +3251,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3258 3251
3259 pktgen_remove_device(t, cur); 3252 pktgen_remove_device(t, cur);
3260 } 3253 }
3261
3262 if_unlock(t);
3263} 3254}
3264 3255
3265static void pktgen_rem_thread(struct pktgen_thread *t) 3256static void pktgen_rem_thread(struct pktgen_thread *t)
@@ -3407,10 +3398,10 @@ static int pktgen_thread_worker(void *arg)
3407 3398
3408 pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); 3399 pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3409 3400
3410 set_current_state(TASK_INTERRUPTIBLE);
3411
3412 set_freezable(); 3401 set_freezable();
3413 3402
3403 __set_current_state(TASK_RUNNING);
3404
3414 while (!kthread_should_stop()) { 3405 while (!kthread_should_stop()) {
3415 pkt_dev = next_to_run(t); 3406 pkt_dev = next_to_run(t);
3416 3407
@@ -3424,8 +3415,6 @@ static int pktgen_thread_worker(void *arg)
3424 continue; 3415 continue;
3425 } 3416 }
3426 3417
3427 __set_current_state(TASK_RUNNING);
3428
3429 if (likely(pkt_dev)) { 3418 if (likely(pkt_dev)) {
3430 pktgen_xmit(pkt_dev); 3419 pktgen_xmit(pkt_dev);
3431 3420
@@ -3456,9 +3445,8 @@ static int pktgen_thread_worker(void *arg)
3456 } 3445 }
3457 3446
3458 try_to_freeze(); 3447 try_to_freeze();
3459
3460 set_current_state(TASK_INTERRUPTIBLE);
3461 } 3448 }
3449 set_current_state(TASK_INTERRUPTIBLE);
3462 3450
3463 pr_debug("%s stopping all device\n", t->tsk->comm); 3451 pr_debug("%s stopping all device\n", t->tsk->comm);
3464 pktgen_stop(t); 3452 pktgen_stop(t);
@@ -3485,8 +3473,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3485 struct pktgen_dev *p, *pkt_dev = NULL; 3473 struct pktgen_dev *p, *pkt_dev = NULL;
3486 size_t len = strlen(ifname); 3474 size_t len = strlen(ifname);
3487 3475
3488 if_lock(t); 3476 rcu_read_lock();
3489 list_for_each_entry(p, &t->if_list, list) 3477 list_for_each_entry_rcu(p, &t->if_list, list)
3490 if (strncmp(p->odevname, ifname, len) == 0) { 3478 if (strncmp(p->odevname, ifname, len) == 0) {
3491 if (p->odevname[len]) { 3479 if (p->odevname[len]) {
3492 if (exact || p->odevname[len] != '@') 3480 if (exact || p->odevname[len] != '@')
@@ -3496,7 +3484,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3496 break; 3484 break;
3497 } 3485 }
3498 3486
3499 if_unlock(t); 3487 rcu_read_unlock();
3500 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); 3488 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
3501 return pkt_dev; 3489 return pkt_dev;
3502} 3490}
@@ -3510,6 +3498,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3510{ 3498{
3511 int rv = 0; 3499 int rv = 0;
3512 3500
3501 /* This function cannot be called concurrently, as its called
3502 * under pktgen_thread_lock mutex, but it can run from
3503 * userspace on another CPU than the kthread. The if_lock()
3504 * is used here to sync with concurrent instances of
3505 * _rem_dev_from_if_list() invoked via kthread, which is also
3506 * updating the if_list */
3513 if_lock(t); 3507 if_lock(t);
3514 3508
3515 if (pkt_dev->pg_thread) { 3509 if (pkt_dev->pg_thread) {
@@ -3518,9 +3512,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3518 goto out; 3512 goto out;
3519 } 3513 }
3520 3514
3521 list_add(&pkt_dev->list, &t->if_list);
3522 pkt_dev->pg_thread = t;
3523 pkt_dev->running = 0; 3515 pkt_dev->running = 0;
3516 pkt_dev->pg_thread = t;
3517 list_add_rcu(&pkt_dev->list, &t->if_list);
3524 3518
3525out: 3519out:
3526 if_unlock(t); 3520 if_unlock(t);
@@ -3675,11 +3669,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
3675 struct list_head *q, *n; 3669 struct list_head *q, *n;
3676 struct pktgen_dev *p; 3670 struct pktgen_dev *p;
3677 3671
3672 if_lock(t);
3678 list_for_each_safe(q, n, &t->if_list) { 3673 list_for_each_safe(q, n, &t->if_list) {
3679 p = list_entry(q, struct pktgen_dev, list); 3674 p = list_entry(q, struct pktgen_dev, list);
3680 if (p == pkt_dev) 3675 if (p == pkt_dev)
3681 list_del(&p->list); 3676 list_del_rcu(&p->list);
3682 } 3677 }
3678 if_unlock(t);
3683} 3679}
3684 3680
3685static int pktgen_remove_device(struct pktgen_thread *t, 3681static int pktgen_remove_device(struct pktgen_thread *t,
@@ -3699,20 +3695,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3699 pkt_dev->odev = NULL; 3695 pkt_dev->odev = NULL;
3700 } 3696 }
3701 3697
3702 /* And update the thread if_list */ 3698 /* Remove proc before if_list entry, because add_device uses
3703 3699 * list to determine if interface already exist, avoid race
3704 _rem_dev_from_if_list(t, pkt_dev); 3700 * with proc_create_data() */
3705
3706 if (pkt_dev->entry) 3701 if (pkt_dev->entry)
3707 proc_remove(pkt_dev->entry); 3702 proc_remove(pkt_dev->entry);
3708 3703
3704 /* And update the thread if_list */
3705 _rem_dev_from_if_list(t, pkt_dev);
3706
3709#ifdef CONFIG_XFRM 3707#ifdef CONFIG_XFRM
3710 free_SAs(pkt_dev); 3708 free_SAs(pkt_dev);
3711#endif 3709#endif
3712 vfree(pkt_dev->flows); 3710 vfree(pkt_dev->flows);
3713 if (pkt_dev->page) 3711 if (pkt_dev->page)
3714 put_page(pkt_dev->page); 3712 put_page(pkt_dev->page);
3715 kfree(pkt_dev); 3713 kfree_rcu(pkt_dev, rcu);
3716 return 0; 3714 return 0;
3717} 3715}
3718 3716
@@ -3812,6 +3810,7 @@ static void __exit pg_cleanup(void)
3812{ 3810{
3813 unregister_netdevice_notifier(&pktgen_notifier_block); 3811 unregister_netdevice_notifier(&pktgen_notifier_block);
3814 unregister_pernet_subsys(&pg_net_ops); 3812 unregister_pernet_subsys(&pg_net_ops);
3813 /* Don't need rcu_barrier() due to use of kfree_rcu() */
3815} 3814}
3816 3815
3817module_init(pg_init); 3816module_init(pg_init);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index d3027a73fd4b..4eab4a94a59d 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -52,14 +52,43 @@
52 * test_8021q: 52 * test_8021q:
53 * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? 53 * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ?
54 * ldh [16] ; load inner type 54 * ldh [16] ; load inner type
55 * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? 55 * jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ?
56 * ldb [18] ; load payload 56 * ldb [18] ; load payload
57 * and #0x8 ; as we don't have ports here, test 57 * and #0x8 ; as we don't have ports here, test
58 * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these 58 * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
59 * ldh [18] ; reload payload 59 * ldh [18] ; reload payload
60 * and #0xf ; mask PTP_CLASS_VMASK 60 * and #0xf ; mask PTP_CLASS_VMASK
61 * or #0x40 ; PTP_CLASS_V2_VLAN 61 * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2
62 * ret a ; return PTP class
63 *
64 * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
65 * test_8021q_ipv4:
66 * jneq #0x800, test_8021q_ipv6 ; ETH_P_IP ?
67 * ldb [27] ; load proto
68 * jneq #17, drop_8021q_ipv4 ; IPPROTO_UDP ?
69 * ldh [24] ; load frag offset field
70 * jset #0x1fff, drop_8021q_ipv4; don't allow fragments
71 * ldxb 4*([18]&0xf) ; load IP header len
72 * ldh [x + 20] ; load UDP dst port
73 * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ?
74 * ldh [x + 26] ; load payload
75 * and #0xf ; mask PTP_CLASS_VMASK
76 * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
77 * ret a ; return PTP class
78 * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE
79 *
80 * ; PTP over UDP over IPv6 over 802.1Q over Ethernet
81 * test_8021q_ipv6:
82 * jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ?
83 * ldb [24] ; load proto
84 * jneq #17, drop_8021q_ipv6 ; IPPROTO_UDP ?
85 * ldh [60] ; load UDP dst port
86 * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ?
87 * ldh [66] ; load payload
88 * and #0xf ; mask PTP_CLASS_VMASK
89 * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
62 * ret a ; return PTP class 90 * ret a ; return PTP class
91 * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE
63 * 92 *
64 * ; PTP over Ethernet 93 * ; PTP over Ethernet
65 * test_ieee1588: 94 * test_ieee1588:
@@ -78,11 +107,11 @@
78#include <linux/filter.h> 107#include <linux/filter.h>
79#include <linux/ptp_classify.h> 108#include <linux/ptp_classify.h>
80 109
81static struct sk_filter *ptp_insns __read_mostly; 110static struct bpf_prog *ptp_insns __read_mostly;
82 111
83unsigned int ptp_classify_raw(const struct sk_buff *skb) 112unsigned int ptp_classify_raw(const struct sk_buff *skb)
84{ 113{
85 return SK_RUN_FILTER(ptp_insns, skb); 114 return BPF_PROG_RUN(ptp_insns, skb);
86} 115}
87EXPORT_SYMBOL_GPL(ptp_classify_raw); 116EXPORT_SYMBOL_GPL(ptp_classify_raw);
88 117
@@ -113,16 +142,39 @@ void __init ptp_classifier_init(void)
113 { 0x44, 0, 0, 0x00000020 }, 142 { 0x44, 0, 0, 0x00000020 },
114 { 0x16, 0, 0, 0x00000000 }, 143 { 0x16, 0, 0, 0x00000000 },
115 { 0x06, 0, 0, 0x00000000 }, 144 { 0x06, 0, 0, 0x00000000 },
116 { 0x15, 0, 9, 0x00008100 }, 145 { 0x15, 0, 32, 0x00008100 },
117 { 0x28, 0, 0, 0x00000010 }, 146 { 0x28, 0, 0, 0x00000010 },
118 { 0x15, 0, 15, 0x000088f7 }, 147 { 0x15, 0, 7, 0x000088f7 },
119 { 0x30, 0, 0, 0x00000012 }, 148 { 0x30, 0, 0, 0x00000012 },
120 { 0x54, 0, 0, 0x00000008 }, 149 { 0x54, 0, 0, 0x00000008 },
121 { 0x15, 0, 12, 0x00000000 }, 150 { 0x15, 0, 35, 0x00000000 },
122 { 0x28, 0, 0, 0x00000012 }, 151 { 0x28, 0, 0, 0x00000012 },
123 { 0x54, 0, 0, 0x0000000f }, 152 { 0x54, 0, 0, 0x0000000f },
124 { 0x44, 0, 0, 0x00000040 }, 153 { 0x44, 0, 0, 0x00000070 },
154 { 0x16, 0, 0, 0x00000000 },
155 { 0x15, 0, 12, 0x00000800 },
156 { 0x30, 0, 0, 0x0000001b },
157 { 0x15, 0, 9, 0x00000011 },
158 { 0x28, 0, 0, 0x00000018 },
159 { 0x45, 7, 0, 0x00001fff },
160 { 0xb1, 0, 0, 0x00000012 },
161 { 0x48, 0, 0, 0x00000014 },
162 { 0x15, 0, 4, 0x0000013f },
163 { 0x48, 0, 0, 0x0000001a },
164 { 0x54, 0, 0, 0x0000000f },
165 { 0x44, 0, 0, 0x00000050 },
166 { 0x16, 0, 0, 0x00000000 },
167 { 0x06, 0, 0, 0x00000000 },
168 { 0x15, 0, 8, 0x000086dd },
169 { 0x30, 0, 0, 0x00000018 },
170 { 0x15, 0, 6, 0x00000011 },
171 { 0x28, 0, 0, 0x0000003c },
172 { 0x15, 0, 4, 0x0000013f },
173 { 0x28, 0, 0, 0x00000042 },
174 { 0x54, 0, 0, 0x0000000f },
175 { 0x44, 0, 0, 0x00000060 },
125 { 0x16, 0, 0, 0x00000000 }, 176 { 0x16, 0, 0, 0x00000000 },
177 { 0x06, 0, 0, 0x00000000 },
126 { 0x15, 0, 7, 0x000088f7 }, 178 { 0x15, 0, 7, 0x000088f7 },
127 { 0x30, 0, 0, 0x0000000e }, 179 { 0x30, 0, 0, 0x0000000e },
128 { 0x54, 0, 0, 0x00000008 }, 180 { 0x54, 0, 0, 0x00000008 },
@@ -137,5 +189,5 @@ void __init ptp_classifier_init(void)
137 .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, 189 .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
138 }; 190 };
139 191
140 BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); 192 BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
141} 193}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 467f326126e0..04db318e6218 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -41,27 +41,27 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
41 unsigned int nr_table_entries) 41 unsigned int nr_table_entries)
42{ 42{
43 size_t lopt_size = sizeof(struct listen_sock); 43 size_t lopt_size = sizeof(struct listen_sock);
44 struct listen_sock *lopt; 44 struct listen_sock *lopt = NULL;
45 45
46 nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); 46 nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
47 nr_table_entries = max_t(u32, nr_table_entries, 8); 47 nr_table_entries = max_t(u32, nr_table_entries, 8);
48 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); 48 nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
49 lopt_size += nr_table_entries * sizeof(struct request_sock *); 49 lopt_size += nr_table_entries * sizeof(struct request_sock *);
50 if (lopt_size > PAGE_SIZE) 50
51 if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
52 lopt = kzalloc(lopt_size, GFP_KERNEL |
53 __GFP_NOWARN |
54 __GFP_NORETRY);
55 if (!lopt)
51 lopt = vzalloc(lopt_size); 56 lopt = vzalloc(lopt_size);
52 else 57 if (!lopt)
53 lopt = kzalloc(lopt_size, GFP_KERNEL);
54 if (lopt == NULL)
55 return -ENOMEM; 58 return -ENOMEM;
56 59
57 for (lopt->max_qlen_log = 3;
58 (1 << lopt->max_qlen_log) < nr_table_entries;
59 lopt->max_qlen_log++);
60
61 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); 60 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
62 rwlock_init(&queue->syn_wait_lock); 61 rwlock_init(&queue->syn_wait_lock);
63 queue->rskq_accept_head = NULL; 62 queue->rskq_accept_head = NULL;
64 lopt->nr_table_entries = nr_table_entries; 63 lopt->nr_table_entries = nr_table_entries;
64 lopt->max_qlen_log = ilog2(nr_table_entries);
65 65
66 write_lock_bh(&queue->syn_wait_lock); 66 write_lock_bh(&queue->syn_wait_lock);
67 queue->listen_opt = lopt; 67 queue->listen_opt = lopt;
@@ -72,22 +72,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
72 72
73void __reqsk_queue_destroy(struct request_sock_queue *queue) 73void __reqsk_queue_destroy(struct request_sock_queue *queue)
74{ 74{
75 struct listen_sock *lopt; 75 /* This is an error recovery path only, no locking needed */
76 size_t lopt_size; 76 kvfree(queue->listen_opt);
77
78 /*
79 * this is an error recovery path only
80 * no locking needed and the lopt is not NULL
81 */
82
83 lopt = queue->listen_opt;
84 lopt_size = sizeof(struct listen_sock) +
85 lopt->nr_table_entries * sizeof(struct request_sock *);
86
87 if (lopt_size > PAGE_SIZE)
88 vfree(lopt);
89 else
90 kfree(lopt);
91} 77}
92 78
93static inline struct listen_sock *reqsk_queue_yank_listen_sk( 79static inline struct listen_sock *reqsk_queue_yank_listen_sk(
@@ -107,8 +93,6 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
107{ 93{
108 /* make all the listen_opt local to us */ 94 /* make all the listen_opt local to us */
109 struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); 95 struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
110 size_t lopt_size = sizeof(struct listen_sock) +
111 lopt->nr_table_entries * sizeof(struct request_sock *);
112 96
113 if (lopt->qlen != 0) { 97 if (lopt->qlen != 0) {
114 unsigned int i; 98 unsigned int i;
@@ -125,10 +109,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
125 } 109 }
126 110
127 WARN_ON(lopt->qlen != 0); 111 WARN_ON(lopt->qlen != 0);
128 if (lopt_size > PAGE_SIZE) 112 kvfree(lopt);
129 vfree(lopt);
130 else
131 kfree(lopt);
132} 113}
133 114
134/* 115/*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1063996f8317..8d39071f32d7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,7 +299,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
299 if (rtnl_link_ops_get(ops->kind)) 299 if (rtnl_link_ops_get(ops->kind))
300 return -EEXIST; 300 return -EEXIST;
301 301
302 if (!ops->dellink) 302 /* The check for setup is here because if ops
303 * does not have that filled up, it is not possible
304 * to use the ops for creating device. So do not
305 * fill up dellink as well. That disables rtnl_dellink.
306 */
307 if (ops->setup && !ops->dellink)
303 ops->dellink = unregister_netdevice_queue; 308 ops->dellink = unregister_netdevice_queue;
304 309
305 list_add_tail(&ops->list, &link_ops); 310 list_add_tail(&ops->list, &link_ops);
@@ -1777,7 +1782,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
1777 return -ENODEV; 1782 return -ENODEV;
1778 1783
1779 ops = dev->rtnl_link_ops; 1784 ops = dev->rtnl_link_ops;
1780 if (!ops) 1785 if (!ops || !ops->dellink)
1781 return -EOPNOTSUPP; 1786 return -EOPNOTSUPP;
1782 1787
1783 ops->dellink(dev, &list_kill); 1788 ops->dellink(dev, &list_kill);
@@ -1805,7 +1810,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
1805EXPORT_SYMBOL(rtnl_configure_link); 1810EXPORT_SYMBOL(rtnl_configure_link);
1806 1811
1807struct net_device *rtnl_create_link(struct net *net, 1812struct net_device *rtnl_create_link(struct net *net,
1808 char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) 1813 char *ifname, unsigned char name_assign_type,
1814 const struct rtnl_link_ops *ops, struct nlattr *tb[])
1809{ 1815{
1810 int err; 1816 int err;
1811 struct net_device *dev; 1817 struct net_device *dev;
@@ -1823,8 +1829,8 @@ struct net_device *rtnl_create_link(struct net *net,
1823 num_rx_queues = ops->get_num_rx_queues(); 1829 num_rx_queues = ops->get_num_rx_queues();
1824 1830
1825 err = -ENOMEM; 1831 err = -ENOMEM;
1826 dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup, 1832 dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
1827 num_tx_queues, num_rx_queues); 1833 ops->setup, num_tx_queues, num_rx_queues);
1828 if (!dev) 1834 if (!dev)
1829 goto err; 1835 goto err;
1830 1836
@@ -1889,6 +1895,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1889 char ifname[IFNAMSIZ]; 1895 char ifname[IFNAMSIZ];
1890 struct nlattr *tb[IFLA_MAX+1]; 1896 struct nlattr *tb[IFLA_MAX+1];
1891 struct nlattr *linkinfo[IFLA_INFO_MAX+1]; 1897 struct nlattr *linkinfo[IFLA_INFO_MAX+1];
1898 unsigned char name_assign_type = NET_NAME_USER;
1892 int err; 1899 int err;
1893 1900
1894#ifdef CONFIG_MODULES 1901#ifdef CONFIG_MODULES
@@ -2038,14 +2045,19 @@ replay:
2038 return -EOPNOTSUPP; 2045 return -EOPNOTSUPP;
2039 } 2046 }
2040 2047
2041 if (!ifname[0]) 2048 if (!ops->setup)
2049 return -EOPNOTSUPP;
2050
2051 if (!ifname[0]) {
2042 snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); 2052 snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
2053 name_assign_type = NET_NAME_ENUM;
2054 }
2043 2055
2044 dest_net = rtnl_link_get_net(net, tb); 2056 dest_net = rtnl_link_get_net(net, tb);
2045 if (IS_ERR(dest_net)) 2057 if (IS_ERR(dest_net))
2046 return PTR_ERR(dest_net); 2058 return PTR_ERR(dest_net);
2047 2059
2048 dev = rtnl_create_link(dest_net, ifname, ops, tb); 2060 dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
2049 if (IS_ERR(dev)) { 2061 if (IS_ERR(dev)) {
2050 err = PTR_ERR(dev); 2062 err = PTR_ERR(dev);
2051 goto out; 2063 goto out;
@@ -2380,22 +2392,20 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
2380 struct net_device *dev, 2392 struct net_device *dev,
2381 const unsigned char *addr) 2393 const unsigned char *addr)
2382{ 2394{
2383 int err = -EOPNOTSUPP; 2395 int err = -EINVAL;
2384 2396
2385 /* If aging addresses are supported device will need to 2397 /* If aging addresses are supported device will need to
2386 * implement its own handler for this. 2398 * implement its own handler for this.
2387 */ 2399 */
2388 if (!(ndm->ndm_state & NUD_PERMANENT)) { 2400 if (!(ndm->ndm_state & NUD_PERMANENT)) {
2389 pr_info("%s: FDB only supports static addresses\n", dev->name); 2401 pr_info("%s: FDB only supports static addresses\n", dev->name);
2390 return -EINVAL; 2402 return err;
2391 } 2403 }
2392 2404
2393 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) 2405 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
2394 err = dev_uc_del(dev, addr); 2406 err = dev_uc_del(dev, addr);
2395 else if (is_multicast_ether_addr(addr)) 2407 else if (is_multicast_ether_addr(addr))
2396 err = dev_mc_del(dev, addr); 2408 err = dev_mc_del(dev, addr);
2397 else
2398 err = -EINVAL;
2399 2409
2400 return err; 2410 return err;
2401} 2411}
@@ -2509,6 +2519,7 @@ skip:
2509int ndo_dflt_fdb_dump(struct sk_buff *skb, 2519int ndo_dflt_fdb_dump(struct sk_buff *skb,
2510 struct netlink_callback *cb, 2520 struct netlink_callback *cb,
2511 struct net_device *dev, 2521 struct net_device *dev,
2522 struct net_device *filter_dev,
2512 int idx) 2523 int idx)
2513{ 2524{
2514 int err; 2525 int err;
@@ -2526,28 +2537,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump);
2526 2537
2527static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) 2538static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2528{ 2539{
2529 int idx = 0;
2530 struct net *net = sock_net(skb->sk);
2531 struct net_device *dev; 2540 struct net_device *dev;
2541 struct nlattr *tb[IFLA_MAX+1];
2542 struct net_device *bdev = NULL;
2543 struct net_device *br_dev = NULL;
2544 const struct net_device_ops *ops = NULL;
2545 const struct net_device_ops *cops = NULL;
2546 struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
2547 struct net *net = sock_net(skb->sk);
2548 int brport_idx = 0;
2549 int br_idx = 0;
2550 int idx = 0;
2532 2551
2533 rcu_read_lock(); 2552 if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
2534 for_each_netdev_rcu(net, dev) { 2553 ifla_policy) == 0) {
2535 if (dev->priv_flags & IFF_BRIDGE_PORT) { 2554 if (tb[IFLA_MASTER])
2536 struct net_device *br_dev; 2555 br_idx = nla_get_u32(tb[IFLA_MASTER]);
2537 const struct net_device_ops *ops; 2556 }
2557
2558 brport_idx = ifm->ifi_index;
2538 2559
2539 br_dev = netdev_master_upper_dev_get(dev); 2560 if (br_idx) {
2540 ops = br_dev->netdev_ops; 2561 br_dev = __dev_get_by_index(net, br_idx);
2541 if (ops->ndo_fdb_dump) 2562 if (!br_dev)
2542 idx = ops->ndo_fdb_dump(skb, cb, dev, idx); 2563 return -ENODEV;
2564
2565 ops = br_dev->netdev_ops;
2566 bdev = br_dev;
2567 }
2568
2569 for_each_netdev(net, dev) {
2570 if (brport_idx && (dev->ifindex != brport_idx))
2571 continue;
2572
2573 if (!br_idx) { /* user did not specify a specific bridge */
2574 if (dev->priv_flags & IFF_BRIDGE_PORT) {
2575 br_dev = netdev_master_upper_dev_get(dev);
2576 cops = br_dev->netdev_ops;
2577 }
2578
2579 bdev = dev;
2580 } else {
2581 if (dev != br_dev &&
2582 !(dev->priv_flags & IFF_BRIDGE_PORT))
2583 continue;
2584
2585 if (br_dev != netdev_master_upper_dev_get(dev) &&
2586 !(dev->priv_flags & IFF_EBRIDGE))
2587 continue;
2588
2589 bdev = br_dev;
2590 cops = ops;
2591 }
2592
2593 if (dev->priv_flags & IFF_BRIDGE_PORT) {
2594 if (cops && cops->ndo_fdb_dump)
2595 idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
2596 idx);
2543 } 2597 }
2544 2598
2599 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
2545 if (dev->netdev_ops->ndo_fdb_dump) 2600 if (dev->netdev_ops->ndo_fdb_dump)
2546 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); 2601 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
2547 else 2602 idx);
2548 idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); 2603
2604 cops = NULL;
2549 } 2605 }
2550 rcu_read_unlock();
2551 2606
2552 cb->args[0] = idx; 2607 cb->args[0] = idx;
2553 return skb->len; 2608 return skb->len;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a33033cbe2..224506a6fa80 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2976,9 +2976,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
2976 tail = nskb; 2976 tail = nskb;
2977 2977
2978 __copy_skb_header(nskb, head_skb); 2978 __copy_skb_header(nskb, head_skb);
2979 nskb->mac_len = head_skb->mac_len;
2980 2979
2981 skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); 2980 skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
2981 skb_reset_mac_len(nskb);
2982 2982
2983 skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, 2983 skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
2984 nskb->data - tnl_hlen, 2984 nskb->data - tnl_hlen,
@@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
3490} 3490}
3491EXPORT_SYMBOL(sock_queue_err_skb); 3491EXPORT_SYMBOL(sock_queue_err_skb);
3492 3492
3493void skb_tstamp_tx(struct sk_buff *orig_skb, 3493void __skb_tstamp_tx(struct sk_buff *orig_skb,
3494 struct skb_shared_hwtstamps *hwtstamps) 3494 struct skb_shared_hwtstamps *hwtstamps,
3495 struct sock *sk, int tstype)
3495{ 3496{
3496 struct sock *sk = orig_skb->sk;
3497 struct sock_exterr_skb *serr; 3497 struct sock_exterr_skb *serr;
3498 struct sk_buff *skb; 3498 struct sk_buff *skb;
3499 int err; 3499 int err;
@@ -3521,12 +3521,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3521 memset(serr, 0, sizeof(*serr)); 3521 memset(serr, 0, sizeof(*serr));
3522 serr->ee.ee_errno = ENOMSG; 3522 serr->ee.ee_errno = ENOMSG;
3523 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 3523 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
3524 serr->ee.ee_info = tstype;
3525 if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
3526 serr->ee.ee_data = skb_shinfo(skb)->tskey;
3527 if (sk->sk_protocol == IPPROTO_TCP)
3528 serr->ee.ee_data -= sk->sk_tskey;
3529 }
3524 3530
3525 err = sock_queue_err_skb(sk, skb); 3531 err = sock_queue_err_skb(sk, skb);
3526 3532
3527 if (err) 3533 if (err)
3528 kfree_skb(skb); 3534 kfree_skb(skb);
3529} 3535}
3536EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
3537
3538void skb_tstamp_tx(struct sk_buff *orig_skb,
3539 struct skb_shared_hwtstamps *hwtstamps)
3540{
3541 return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
3542 SCM_TSTAMP_SND);
3543}
3530EXPORT_SYMBOL_GPL(skb_tstamp_tx); 3544EXPORT_SYMBOL_GPL(skb_tstamp_tx);
3531 3545
3532void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) 3546void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
diff --git a/net/core/sock.c b/net/core/sock.c
index 026e01f70274..2714811afbd8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -491,7 +491,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
491 491
492 skb->dev = NULL; 492 skb->dev = NULL;
493 493
494 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { 494 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
495 atomic_inc(&sk->sk_drops); 495 atomic_inc(&sk->sk_drops);
496 goto discard_and_relse; 496 goto discard_and_relse;
497 } 497 }
@@ -848,24 +848,25 @@ set_rcvbuf:
848 ret = -EINVAL; 848 ret = -EINVAL;
849 break; 849 break;
850 } 850 }
851 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, 851 if (val & SOF_TIMESTAMPING_OPT_ID &&
852 val & SOF_TIMESTAMPING_TX_HARDWARE); 852 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
853 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, 853 if (sk->sk_protocol == IPPROTO_TCP) {
854 val & SOF_TIMESTAMPING_TX_SOFTWARE); 854 if (sk->sk_state != TCP_ESTABLISHED) {
855 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, 855 ret = -EINVAL;
856 val & SOF_TIMESTAMPING_RX_HARDWARE); 856 break;
857 }
858 sk->sk_tskey = tcp_sk(sk)->snd_una;
859 } else {
860 sk->sk_tskey = 0;
861 }
862 }
863 sk->sk_tsflags = val;
857 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 864 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
858 sock_enable_timestamp(sk, 865 sock_enable_timestamp(sk,
859 SOCK_TIMESTAMPING_RX_SOFTWARE); 866 SOCK_TIMESTAMPING_RX_SOFTWARE);
860 else 867 else
861 sock_disable_timestamp(sk, 868 sock_disable_timestamp(sk,
862 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 869 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
863 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
864 val & SOF_TIMESTAMPING_SOFTWARE);
865 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
866 val & SOF_TIMESTAMPING_SYS_HARDWARE);
867 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
868 val & SOF_TIMESTAMPING_RAW_HARDWARE);
869 break; 870 break;
870 871
871 case SO_RCVLOWAT: 872 case SO_RCVLOWAT:
@@ -1091,21 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1091 break; 1092 break;
1092 1093
1093 case SO_TIMESTAMPING: 1094 case SO_TIMESTAMPING:
1094 v.val = 0; 1095 v.val = sk->sk_tsflags;
1095 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1096 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1097 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1098 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1099 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1100 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1101 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1102 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1103 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1104 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1105 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1106 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1107 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1108 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1109 break; 1096 break;
1110 1097
1111 case SO_RCVTIMEO: 1098 case SO_RCVTIMEO:
@@ -1478,6 +1465,7 @@ static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1478struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 1465struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1479{ 1466{
1480 struct sock *newsk; 1467 struct sock *newsk;
1468 bool is_charged = true;
1481 1469
1482 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); 1470 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1483 if (newsk != NULL) { 1471 if (newsk != NULL) {
@@ -1522,9 +1510,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1522 1510
1523 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1511 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1524 if (filter != NULL) 1512 if (filter != NULL)
1525 sk_filter_charge(newsk, filter); 1513 /* though it's an empty new sock, the charging may fail
1514 * if sysctl_optmem_max was changed between creation of
1515 * original socket and cloning
1516 */
1517 is_charged = sk_filter_charge(newsk, filter);
1526 1518
1527 if (unlikely(xfrm_sk_clone_policy(newsk))) { 1519 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
1528 /* It is still raw copy of parent, so invalidate 1520 /* It is still raw copy of parent, so invalidate
1529 * destructor and make plain sk_free() */ 1521 * destructor and make plain sk_free() */
1530 newsk->sk_destruct = NULL; 1522 newsk->sk_destruct = NULL;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a4216a4c9572..ad704c757bb4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -68,8 +68,8 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
68 if (!filter) 68 if (!filter)
69 goto out; 69 goto out;
70 70
71 fprog = filter->orig_prog; 71 fprog = filter->prog->orig_prog;
72 flen = sk_filter_proglen(fprog); 72 flen = bpf_classic_proglen(fprog);
73 73
74 attr = nla_reserve(skb, attrtype, flen); 74 attr = nla_reserve(skb, attrtype, flen);
75 if (attr == NULL) { 75 if (attr == NULL) {
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 6521dfd8b7c8..a8770391ea5b 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -43,31 +43,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
43 return; 43 return;
44 44
45 type = classify(skb); 45 type = classify(skb);
46 if (type == PTP_CLASS_NONE)
47 return;
48
49 phydev = skb->dev->phydev;
50 if (likely(phydev->drv->txtstamp)) {
51 if (!atomic_inc_not_zero(&sk->sk_refcnt))
52 return;
46 53
47 switch (type) { 54 clone = skb_clone(skb, GFP_ATOMIC);
48 case PTP_CLASS_V1_IPV4: 55 if (!clone) {
49 case PTP_CLASS_V1_IPV6: 56 sock_put(sk);
50 case PTP_CLASS_V2_IPV4: 57 return;
51 case PTP_CLASS_V2_IPV6:
52 case PTP_CLASS_V2_L2:
53 case PTP_CLASS_V2_VLAN:
54 phydev = skb->dev->phydev;
55 if (likely(phydev->drv->txtstamp)) {
56 if (!atomic_inc_not_zero(&sk->sk_refcnt))
57 return;
58
59 clone = skb_clone(skb, GFP_ATOMIC);
60 if (!clone) {
61 sock_put(sk);
62 return;
63 }
64
65 clone->sk = sk;
66 phydev->drv->txtstamp(phydev, clone, type);
67 } 58 }
68 break; 59
69 default: 60 clone->sk = sk;
70 break; 61 phydev->drv->txtstamp(phydev, clone, type);
71 } 62 }
72} 63}
73EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); 64EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
@@ -114,20 +105,12 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
114 105
115 __skb_pull(skb, ETH_HLEN); 106 __skb_pull(skb, ETH_HLEN);
116 107
117 switch (type) { 108 if (type == PTP_CLASS_NONE)
118 case PTP_CLASS_V1_IPV4: 109 return false;
119 case PTP_CLASS_V1_IPV6: 110
120 case PTP_CLASS_V2_IPV4: 111 phydev = skb->dev->phydev;
121 case PTP_CLASS_V2_IPV6: 112 if (likely(phydev->drv->rxtstamp))
122 case PTP_CLASS_V2_L2: 113 return phydev->drv->rxtstamp(phydev, skb, type);
123 case PTP_CLASS_V2_VLAN:
124 phydev = skb->dev->phydev;
125 if (likely(phydev->drv->rxtstamp))
126 return phydev->drv->rxtstamp(phydev, skb, type);
127 break;
128 default:
129 break;
130 }
131 114
132 return false; 115 return false;
133} 116}