aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Gallek <kraig@google.com>2016-01-04 17:41:47 -0500
committerDavid S. Miller <davem@davemloft.net>2016-01-04 22:49:59 -0500
commit538950a1b7527a0a52ccd9337e3fcd304f027f13 (patch)
tree2ecd86127a55719e61ea9a37aeb1cc7be8022d0f
parente32ea7e747271a0abcd37e265005e97cc81d9df5 (diff)
soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF
Expose socket options for setting a classic or extended BPF program for use when selecting sockets in an SO_REUSEPORT group. These options can be used on the first socket to belong to a group before bind or on any socket in the group after bind. This change includes refactoring of the existing sk_filter code to allow reuse of the existing BPF filter validation checks. Signed-off-by: Craig Gallek <kraig@google.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/alpha/include/uapi/asm/socket.h3
-rw-r--r--arch/avr32/include/uapi/asm/socket.h3
-rw-r--r--arch/frv/include/uapi/asm/socket.h3
-rw-r--r--arch/ia64/include/uapi/asm/socket.h3
-rw-r--r--arch/m32r/include/uapi/asm/socket.h3
-rw-r--r--arch/mips/include/uapi/asm/socket.h3
-rw-r--r--arch/mn10300/include/uapi/asm/socket.h3
-rw-r--r--arch/parisc/include/uapi/asm/socket.h3
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h3
-rw-r--r--arch/s390/include/uapi/asm/socket.h3
-rw-r--r--arch/sparc/include/uapi/asm/socket.h3
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h3
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/net/sock_reuseport.h10
-rw-r--r--include/net/udp.h5
-rw-r--r--include/uapi/asm-generic/socket.h3
-rw-r--r--net/core/filter.c121
-rw-r--r--net/core/sock.c29
-rw-r--r--net/core/sock_reuseport.c88
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv4/udp_diag.c4
-rw-r--r--net/ipv6/udp.c14
22 files changed, 282 insertions, 44 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9a20821b111c..c5fb9e6bc3a5 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -92,4 +92,7 @@
92#define SO_ATTACH_BPF 50 92#define SO_ATTACH_BPF 50
93#define SO_DETACH_BPF SO_DETACH_FILTER 93#define SO_DETACH_BPF SO_DETACH_FILTER
94 94
95#define SO_ATTACH_REUSEPORT_CBPF 51
96#define SO_ATTACH_REUSEPORT_EBPF 52
97
95#endif /* _UAPI_ASM_SOCKET_H */ 98#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 2b65ed6b277c..9de0796240a0 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -85,4 +85,7 @@
85#define SO_ATTACH_BPF 50 85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER 86#define SO_DETACH_BPF SO_DETACH_FILTER
87 87
88#define SO_ATTACH_REUSEPORT_CBPF 51
89#define SO_ATTACH_REUSEPORT_EBPF 52
90
88#endif /* _UAPI__ASM_AVR32_SOCKET_H */ 91#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 4823ad125578..f02e4849ae83 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -85,5 +85,8 @@
85#define SO_ATTACH_BPF 50 85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER 86#define SO_DETACH_BPF SO_DETACH_FILTER
87 87
88#define SO_ATTACH_REUSEPORT_CBPF 51
89#define SO_ATTACH_REUSEPORT_EBPF 52
90
88#endif /* _ASM_SOCKET_H */ 91#endif /* _ASM_SOCKET_H */
89 92
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 59be3d87f86d..bce29166de1b 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -94,4 +94,7 @@
94#define SO_ATTACH_BPF 50 94#define SO_ATTACH_BPF 50
95#define SO_DETACH_BPF SO_DETACH_FILTER 95#define SO_DETACH_BPF SO_DETACH_FILTER
96 96
97#define SO_ATTACH_REUSEPORT_CBPF 51
98#define SO_ATTACH_REUSEPORT_EBPF 52
99
97#endif /* _ASM_IA64_SOCKET_H */ 100#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 7bc4cb273856..14aa4a6bccf1 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -85,4 +85,7 @@
85#define SO_ATTACH_BPF 50 85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER 86#define SO_DETACH_BPF SO_DETACH_FILTER
87 87
88#define SO_ATTACH_REUSEPORT_CBPF 51
89#define SO_ATTACH_REUSEPORT_EBPF 52
90
88#endif /* _ASM_M32R_SOCKET_H */ 91#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index dec3c850f36b..5910fe294e93 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -103,4 +103,7 @@
103#define SO_ATTACH_BPF 50 103#define SO_ATTACH_BPF 50
104#define SO_DETACH_BPF SO_DETACH_FILTER 104#define SO_DETACH_BPF SO_DETACH_FILTER
105 105
106#define SO_ATTACH_REUSEPORT_CBPF 51
107#define SO_ATTACH_REUSEPORT_EBPF 52
108
106#endif /* _UAPI_ASM_SOCKET_H */ 109#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index cab7d6d50051..58b1aa01ab9f 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -85,4 +85,7 @@
85#define SO_ATTACH_BPF 50 85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER 86#define SO_DETACH_BPF SO_DETACH_FILTER
87 87
88#define SO_ATTACH_REUSEPORT_CBPF 51
89#define SO_ATTACH_REUSEPORT_EBPF 52
90
88#endif /* _ASM_SOCKET_H */ 91#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a5cd40cd8ee1..f9cf1223422c 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -84,4 +84,7 @@
84#define SO_ATTACH_BPF 0x402B 84#define SO_ATTACH_BPF 0x402B
85#define SO_DETACH_BPF SO_DETACH_FILTER 85#define SO_DETACH_BPF SO_DETACH_FILTER
86 86
87#define SO_ATTACH_REUSEPORT_CBPF 0x402C
88#define SO_ATTACH_REUSEPORT_EBPF 0x402D
89
87#endif /* _UAPI_ASM_SOCKET_H */ 90#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index c046666038f8..dd54f28ecdec 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -92,4 +92,7 @@
92#define SO_ATTACH_BPF 50 92#define SO_ATTACH_BPF 50
93#define SO_DETACH_BPF SO_DETACH_FILTER 93#define SO_DETACH_BPF SO_DETACH_FILTER
94 94
95#define SO_ATTACH_REUSEPORT_CBPF 51
96#define SO_ATTACH_REUSEPORT_EBPF 52
97
95#endif /* _ASM_POWERPC_SOCKET_H */ 98#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 296942d56e6a..d02e89d14fef 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -91,4 +91,7 @@
91#define SO_ATTACH_BPF 50 91#define SO_ATTACH_BPF 50
92#define SO_DETACH_BPF SO_DETACH_FILTER 92#define SO_DETACH_BPF SO_DETACH_FILTER
93 93
94#define SO_ATTACH_REUSEPORT_CBPF 51
95#define SO_ATTACH_REUSEPORT_EBPF 52
96
94#endif /* _ASM_SOCKET_H */ 97#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index e6a16c40be5f..d270ee91968e 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -81,6 +81,9 @@
81#define SO_ATTACH_BPF 0x0034 81#define SO_ATTACH_BPF 0x0034
82#define SO_DETACH_BPF SO_DETACH_FILTER 82#define SO_DETACH_BPF SO_DETACH_FILTER
83 83
84#define SO_ATTACH_REUSEPORT_CBPF 0x0035
85#define SO_ATTACH_REUSEPORT_EBPF 0x0036
86
84/* Security levels - as per NRL IPv6 - don't actually do anything */ 87/* Security levels - as per NRL IPv6 - don't actually do anything */
85#define SO_SECURITY_AUTHENTICATION 0x5001 88#define SO_SECURITY_AUTHENTICATION 0x5001
86#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 89#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 4120af086160..fd3b96d1153f 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -96,4 +96,7 @@
96#define SO_ATTACH_BPF 50 96#define SO_ATTACH_BPF 50
97#define SO_DETACH_BPF SO_DETACH_FILTER 97#define SO_DETACH_BPF SO_DETACH_FILTER
98 98
99#define SO_ATTACH_REUSEPORT_CBPF 51
100#define SO_ATTACH_REUSEPORT_EBPF 52
101
99#endif /* _XTENSA_SOCKET_H */ 102#endif /* _XTENSA_SOCKET_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4165e9ac9e36..294c3cdf07b3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp);
447 447
448int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 448int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
449int sk_attach_bpf(u32 ufd, struct sock *sk); 449int sk_attach_bpf(u32 ufd, struct sock *sk);
450int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
451int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
450int sk_detach_filter(struct sock *sk); 452int sk_detach_filter(struct sock *sk);
451int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, 453int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
452 unsigned int len); 454 unsigned int len);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 67d1eb8fd7af..7dda3d7adba8 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -1,6 +1,8 @@
1#ifndef _SOCK_REUSEPORT_H 1#ifndef _SOCK_REUSEPORT_H
2#define _SOCK_REUSEPORT_H 2#define _SOCK_REUSEPORT_H
3 3
4#include <linux/filter.h>
5#include <linux/skbuff.h>
4#include <linux/types.h> 6#include <linux/types.h>
5#include <net/sock.h> 7#include <net/sock.h>
6 8
@@ -9,12 +11,18 @@ struct sock_reuseport {
9 11
10 u16 max_socks; /* length of socks */ 12 u16 max_socks; /* length of socks */
11 u16 num_socks; /* elements in socks */ 13 u16 num_socks; /* elements in socks */
14 struct bpf_prog __rcu *prog; /* optional BPF sock selector */
12 struct sock *socks[0]; /* array of sock pointers */ 15 struct sock *socks[0]; /* array of sock pointers */
13}; 16};
14 17
15extern int reuseport_alloc(struct sock *sk); 18extern int reuseport_alloc(struct sock *sk);
16extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2); 19extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
17extern void reuseport_detach_sock(struct sock *sk); 20extern void reuseport_detach_sock(struct sock *sk);
18extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash); 21extern struct sock *reuseport_select_sock(struct sock *sk,
22 u32 hash,
23 struct sk_buff *skb,
24 int hdr_len);
25extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
26 struct bpf_prog *prog);
19 27
20#endif /* _SOCK_REUSEPORT_H */ 28#endif /* _SOCK_REUSEPORT_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 3b5d7f93bc23..2842541e28e7 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -258,7 +258,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
258 __be32 daddr, __be16 dport, int dif); 258 __be32 daddr, __be16 dport, int dif);
259struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 259struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
260 __be32 daddr, __be16 dport, int dif, 260 __be32 daddr, __be16 dport, int dif,
261 struct udp_table *tbl); 261 struct udp_table *tbl, struct sk_buff *skb);
262struct sock *udp6_lib_lookup(struct net *net, 262struct sock *udp6_lib_lookup(struct net *net,
263 const struct in6_addr *saddr, __be16 sport, 263 const struct in6_addr *saddr, __be16 sport,
264 const struct in6_addr *daddr, __be16 dport, 264 const struct in6_addr *daddr, __be16 dport,
@@ -266,7 +266,8 @@ struct sock *udp6_lib_lookup(struct net *net,
266struct sock *__udp6_lib_lookup(struct net *net, 266struct sock *__udp6_lib_lookup(struct net *net,
267 const struct in6_addr *saddr, __be16 sport, 267 const struct in6_addr *saddr, __be16 sport,
268 const struct in6_addr *daddr, __be16 dport, 268 const struct in6_addr *daddr, __be16 dport,
269 int dif, struct udp_table *tbl); 269 int dif, struct udp_table *tbl,
270 struct sk_buff *skb);
270 271
271/* 272/*
272 * SNMP statistics for UDP and UDP-Lite 273 * SNMP statistics for UDP and UDP-Lite
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 5c15c2a5c123..fb8a41668382 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -87,4 +87,7 @@
87#define SO_ATTACH_BPF 50 87#define SO_ATTACH_BPF 50
88#define SO_DETACH_BPF SO_DETACH_FILTER 88#define SO_DETACH_BPF SO_DETACH_FILTER
89 89
90#define SO_ATTACH_REUSEPORT_CBPF 51
91#define SO_ATTACH_REUSEPORT_EBPF 52
92
90#endif /* __ASM_GENERIC_SOCKET_H */ 93#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index c770196ae8d5..35e6fed28709 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -50,6 +50,7 @@
50#include <net/cls_cgroup.h> 50#include <net/cls_cgroup.h>
51#include <net/dst_metadata.h> 51#include <net/dst_metadata.h>
52#include <net/dst.h> 52#include <net/dst.h>
53#include <net/sock_reuseport.h>
53 54
54/** 55/**
55 * sk_filter - run a packet through a socket filter 56 * sk_filter - run a packet through a socket filter
@@ -1167,17 +1168,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1167 return 0; 1168 return 0;
1168} 1169}
1169 1170
1170/** 1171static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1171 * sk_attach_filter - attach a socket filter 1172{
1172 * @fprog: the filter program 1173 struct bpf_prog *old_prog;
1173 * @sk: the socket to use 1174 int err;
1174 * 1175
1175 * Attach the user's filter code. We first run some sanity checks on 1176 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1176 * it to make sure it does not explode on us later. If an error 1177 return -ENOMEM;
1177 * occurs or there is insufficient memory for the filter a negative 1178
1178 * errno code is returned. On success the return is zero. 1179 if (sk_unhashed(sk)) {
1179 */ 1180 err = reuseport_alloc(sk);
1180int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1181 if (err)
1182 return err;
1183 } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
1184 /* The socket wasn't bound with SO_REUSEPORT */
1185 return -EINVAL;
1186 }
1187
1188 old_prog = reuseport_attach_prog(sk, prog);
1189 if (old_prog)
1190 bpf_prog_destroy(old_prog);
1191
1192 return 0;
1193}
1194
1195static
1196struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1181{ 1197{
1182 unsigned int fsize = bpf_classic_proglen(fprog); 1198 unsigned int fsize = bpf_classic_proglen(fprog);
1183 unsigned int bpf_fsize = bpf_prog_size(fprog->len); 1199 unsigned int bpf_fsize = bpf_prog_size(fprog->len);
@@ -1185,19 +1201,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1185 int err; 1201 int err;
1186 1202
1187 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1203 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1188 return -EPERM; 1204 return ERR_PTR(-EPERM);
1189 1205
1190 /* Make sure new filter is there and in the right amounts. */ 1206 /* Make sure new filter is there and in the right amounts. */
1191 if (fprog->filter == NULL) 1207 if (fprog->filter == NULL)
1192 return -EINVAL; 1208 return ERR_PTR(-EINVAL);
1193 1209
1194 prog = bpf_prog_alloc(bpf_fsize, 0); 1210 prog = bpf_prog_alloc(bpf_fsize, 0);
1195 if (!prog) 1211 if (!prog)
1196 return -ENOMEM; 1212 return ERR_PTR(-ENOMEM);
1197 1213
1198 if (copy_from_user(prog->insns, fprog->filter, fsize)) { 1214 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1199 __bpf_prog_free(prog); 1215 __bpf_prog_free(prog);
1200 return -EFAULT; 1216 return ERR_PTR(-EFAULT);
1201 } 1217 }
1202 1218
1203 prog->len = fprog->len; 1219 prog->len = fprog->len;
@@ -1205,13 +1221,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1205 err = bpf_prog_store_orig_filter(prog, fprog); 1221 err = bpf_prog_store_orig_filter(prog, fprog);
1206 if (err) { 1222 if (err) {
1207 __bpf_prog_free(prog); 1223 __bpf_prog_free(prog);
1208 return -ENOMEM; 1224 return ERR_PTR(-ENOMEM);
1209 } 1225 }
1210 1226
1211 /* bpf_prepare_filter() already takes care of freeing 1227 /* bpf_prepare_filter() already takes care of freeing
1212 * memory in case something goes wrong. 1228 * memory in case something goes wrong.
1213 */ 1229 */
1214 prog = bpf_prepare_filter(prog, NULL); 1230 return bpf_prepare_filter(prog, NULL);
1231}
1232
1233/**
1234 * sk_attach_filter - attach a socket filter
1235 * @fprog: the filter program
1236 * @sk: the socket to use
1237 *
1238 * Attach the user's filter code. We first run some sanity checks on
1239 * it to make sure it does not explode on us later. If an error
1240 * occurs or there is insufficient memory for the filter a negative
1241 * errno code is returned. On success the return is zero.
1242 */
1243int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1244{
1245 struct bpf_prog *prog = __get_filter(fprog, sk);
1246 int err;
1247
1215 if (IS_ERR(prog)) 1248 if (IS_ERR(prog))
1216 return PTR_ERR(prog); 1249 return PTR_ERR(prog);
1217 1250
@@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1225} 1258}
1226EXPORT_SYMBOL_GPL(sk_attach_filter); 1259EXPORT_SYMBOL_GPL(sk_attach_filter);
1227 1260
1228int sk_attach_bpf(u32 ufd, struct sock *sk) 1261int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1229{ 1262{
1230 struct bpf_prog *prog; 1263 struct bpf_prog *prog = __get_filter(fprog, sk);
1231 int err; 1264 int err;
1232 1265
1266 if (IS_ERR(prog))
1267 return PTR_ERR(prog);
1268
1269 err = __reuseport_attach_prog(prog, sk);
1270 if (err < 0) {
1271 __bpf_prog_release(prog);
1272 return err;
1273 }
1274
1275 return 0;
1276}
1277
1278static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1279{
1280 struct bpf_prog *prog;
1281
1233 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1282 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1234 return -EPERM; 1283 return ERR_PTR(-EPERM);
1235 1284
1236 prog = bpf_prog_get(ufd); 1285 prog = bpf_prog_get(ufd);
1237 if (IS_ERR(prog)) 1286 if (IS_ERR(prog))
1238 return PTR_ERR(prog); 1287 return prog;
1239 1288
1240 if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { 1289 if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
1241 bpf_prog_put(prog); 1290 bpf_prog_put(prog);
1242 return -EINVAL; 1291 return ERR_PTR(-EINVAL);
1243 } 1292 }
1244 1293
1294 return prog;
1295}
1296
1297int sk_attach_bpf(u32 ufd, struct sock *sk)
1298{
1299 struct bpf_prog *prog = __get_bpf(ufd, sk);
1300 int err;
1301
1302 if (IS_ERR(prog))
1303 return PTR_ERR(prog);
1304
1245 err = __sk_attach_prog(prog, sk); 1305 err = __sk_attach_prog(prog, sk);
1246 if (err < 0) { 1306 if (err < 0) {
1247 bpf_prog_put(prog); 1307 bpf_prog_put(prog);
@@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
1251 return 0; 1311 return 0;
1252} 1312}
1253 1313
1314int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1315{
1316 struct bpf_prog *prog = __get_bpf(ufd, sk);
1317 int err;
1318
1319 if (IS_ERR(prog))
1320 return PTR_ERR(prog);
1321
1322 err = __reuseport_attach_prog(prog, sk);
1323 if (err < 0) {
1324 bpf_prog_put(prog);
1325 return err;
1326 }
1327
1328 return 0;
1329}
1330
1254#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) 1331#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
1255#define BPF_LDST_LEN 16U 1332#define BPF_LDST_LEN 16U
1256 1333
diff --git a/net/core/sock.c b/net/core/sock.c
index 565bab7baca9..51270238e269 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -134,6 +134,7 @@
134#include <linux/sock_diag.h> 134#include <linux/sock_diag.h>
135 135
136#include <linux/filter.h> 136#include <linux/filter.h>
137#include <net/sock_reuseport.h>
137 138
138#include <trace/events/sock.h> 139#include <trace/events/sock.h>
139 140
@@ -932,6 +933,32 @@ set_rcvbuf:
932 } 933 }
933 break; 934 break;
934 935
936 case SO_ATTACH_REUSEPORT_CBPF:
937 ret = -EINVAL;
938 if (optlen == sizeof(struct sock_fprog)) {
939 struct sock_fprog fprog;
940
941 ret = -EFAULT;
942 if (copy_from_user(&fprog, optval, sizeof(fprog)))
943 break;
944
945 ret = sk_reuseport_attach_filter(&fprog, sk);
946 }
947 break;
948
949 case SO_ATTACH_REUSEPORT_EBPF:
950 ret = -EINVAL;
951 if (optlen == sizeof(u32)) {
952 u32 ufd;
953
954 ret = -EFAULT;
955 if (copy_from_user(&ufd, optval, sizeof(ufd)))
956 break;
957
958 ret = sk_reuseport_attach_bpf(ufd, sk);
959 }
960 break;
961
935 case SO_DETACH_FILTER: 962 case SO_DETACH_FILTER:
936 ret = sk_detach_filter(sk); 963 ret = sk_detach_filter(sk);
937 break; 964 break;
@@ -1443,6 +1470,8 @@ void sk_destruct(struct sock *sk)
1443 sk_filter_uncharge(sk, filter); 1470 sk_filter_uncharge(sk, filter);
1444 RCU_INIT_POINTER(sk->sk_filter, NULL); 1471 RCU_INIT_POINTER(sk->sk_filter, NULL);
1445 } 1472 }
1473 if (rcu_access_pointer(sk->sk_reuseport_cb))
1474 reuseport_detach_sock(sk);
1446 1475
1447 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 1476 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1448 1477
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 963c8d5f3027..ae0969c0fc2e 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -1,10 +1,12 @@
1/* 1/*
2 * To speed up listener socket lookup, create an array to store all sockets 2 * To speed up listener socket lookup, create an array to store all sockets
3 * listening on the same port. This allows a decision to be made after finding 3 * listening on the same port. This allows a decision to be made after finding
4 * the first socket. 4 * the first socket. An optional BPF program can also be configured for
5 * selecting the socket index from the array of available sockets.
5 */ 6 */
6 7
7#include <net/sock_reuseport.h> 8#include <net/sock_reuseport.h>
9#include <linux/bpf.h>
8#include <linux/rcupdate.h> 10#include <linux/rcupdate.h>
9 11
10#define INIT_SOCKS 128 12#define INIT_SOCKS 128
@@ -22,6 +24,7 @@ static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
22 24
23 reuse->max_socks = max_socks; 25 reuse->max_socks = max_socks;
24 26
27 RCU_INIT_POINTER(reuse->prog, NULL);
25 return reuse; 28 return reuse;
26} 29}
27 30
@@ -67,6 +70,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
67 70
68 more_reuse->max_socks = more_socks_size; 71 more_reuse->max_socks = more_socks_size;
69 more_reuse->num_socks = reuse->num_socks; 72 more_reuse->num_socks = reuse->num_socks;
73 more_reuse->prog = reuse->prog;
70 74
71 memcpy(more_reuse->socks, reuse->socks, 75 memcpy(more_reuse->socks, reuse->socks,
72 reuse->num_socks * sizeof(struct sock *)); 76 reuse->num_socks * sizeof(struct sock *));
@@ -75,6 +79,10 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
75 rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, 79 rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
76 more_reuse); 80 more_reuse);
77 81
82 /* Note: we use kfree_rcu here instead of reuseport_free_rcu so
83 * that reuse and more_reuse can temporarily share a reference
84 * to prog.
85 */
78 kfree_rcu(reuse, rcu); 86 kfree_rcu(reuse, rcu);
79 return more_reuse; 87 return more_reuse;
80} 88}
@@ -116,6 +124,16 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
116} 124}
117EXPORT_SYMBOL(reuseport_add_sock); 125EXPORT_SYMBOL(reuseport_add_sock);
118 126
127static void reuseport_free_rcu(struct rcu_head *head)
128{
129 struct sock_reuseport *reuse;
130
131 reuse = container_of(head, struct sock_reuseport, rcu);
132 if (reuse->prog)
133 bpf_prog_destroy(reuse->prog);
134 kfree(reuse);
135}
136
119void reuseport_detach_sock(struct sock *sk) 137void reuseport_detach_sock(struct sock *sk)
120{ 138{
121 struct sock_reuseport *reuse; 139 struct sock_reuseport *reuse;
@@ -131,7 +149,7 @@ void reuseport_detach_sock(struct sock *sk)
131 reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; 149 reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
132 reuse->num_socks--; 150 reuse->num_socks--;
133 if (reuse->num_socks == 0) 151 if (reuse->num_socks == 0)
134 kfree_rcu(reuse, rcu); 152 call_rcu(&reuse->rcu, reuseport_free_rcu);
135 break; 153 break;
136 } 154 }
137 } 155 }
@@ -139,15 +157,53 @@ void reuseport_detach_sock(struct sock *sk)
139} 157}
140EXPORT_SYMBOL(reuseport_detach_sock); 158EXPORT_SYMBOL(reuseport_detach_sock);
141 159
160static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
161 struct bpf_prog *prog, struct sk_buff *skb,
162 int hdr_len)
163{
164 struct sk_buff *nskb = NULL;
165 u32 index;
166
167 if (skb_shared(skb)) {
168 nskb = skb_clone(skb, GFP_ATOMIC);
169 if (!nskb)
170 return NULL;
171 skb = nskb;
172 }
173
174 /* temporarily advance data past protocol header */
175 if (!pskb_pull(skb, hdr_len)) {
176 consume_skb(nskb);
177 return NULL;
178 }
179 index = bpf_prog_run_save_cb(prog, skb);
180 __skb_push(skb, hdr_len);
181
182 consume_skb(nskb);
183
184 if (index >= socks)
185 return NULL;
186
187 return reuse->socks[index];
188}
189
142/** 190/**
143 * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. 191 * reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
144 * @sk: First socket in the group. 192 * @sk: First socket in the group.
145 * @hash: Use this hash to select. 193 * @hash: When no BPF filter is available, use this hash to select.
194 * @skb: skb to run through BPF filter.
195 * @hdr_len: BPF filter expects skb data pointer at payload data. If
196 * the skb does not yet point at the payload, this parameter represents
197 * how far the pointer needs to advance to reach the payload.
146 * Returns a socket that should receive the packet (or NULL on error). 198 * Returns a socket that should receive the packet (or NULL on error).
147 */ 199 */
148struct sock *reuseport_select_sock(struct sock *sk, u32 hash) 200struct sock *reuseport_select_sock(struct sock *sk,
201 u32 hash,
202 struct sk_buff *skb,
203 int hdr_len)
149{ 204{
150 struct sock_reuseport *reuse; 205 struct sock_reuseport *reuse;
206 struct bpf_prog *prog;
151 struct sock *sk2 = NULL; 207 struct sock *sk2 = NULL;
152 u16 socks; 208 u16 socks;
153 209
@@ -158,12 +214,16 @@ struct sock *reuseport_select_sock(struct sock *sk, u32 hash)
158 if (!reuse) 214 if (!reuse)
159 goto out; 215 goto out;
160 216
217 prog = rcu_dereference(reuse->prog);
161 socks = READ_ONCE(reuse->num_socks); 218 socks = READ_ONCE(reuse->num_socks);
162 if (likely(socks)) { 219 if (likely(socks)) {
163 /* paired with smp_wmb() in reuseport_add_sock() */ 220 /* paired with smp_wmb() in reuseport_add_sock() */
164 smp_rmb(); 221 smp_rmb();
165 222
166 sk2 = reuse->socks[reciprocal_scale(hash, socks)]; 223 if (prog && skb)
224 sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
225 else
226 sk2 = reuse->socks[reciprocal_scale(hash, socks)];
167 } 227 }
168 228
169out: 229out:
@@ -171,3 +231,21 @@ out:
171 return sk2; 231 return sk2;
172} 232}
173EXPORT_SYMBOL(reuseport_select_sock); 233EXPORT_SYMBOL(reuseport_select_sock);
234
235struct bpf_prog *
236reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
237{
238 struct sock_reuseport *reuse;
239 struct bpf_prog *old_prog;
240
241 spin_lock_bh(&reuseport_lock);
242 reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
243 lockdep_is_held(&reuseport_lock));
244 old_prog = rcu_dereference_protected(reuse->prog,
245 lockdep_is_held(&reuseport_lock));
246 rcu_assign_pointer(reuse->prog, prog);
247 spin_unlock_bh(&reuseport_lock);
248
249 return old_prog;
250}
251EXPORT_SYMBOL(reuseport_attach_prog);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 762b01f55707..835378365f25 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -514,7 +514,7 @@ begin:
514 struct sock *sk2; 514 struct sock *sk2;
515 hash = udp_ehashfn(net, daddr, hnum, 515 hash = udp_ehashfn(net, daddr, hnum,
516 saddr, sport); 516 saddr, sport);
517 sk2 = reuseport_select_sock(sk, hash); 517 sk2 = reuseport_select_sock(sk, hash, NULL, 0);
518 if (sk2) { 518 if (sk2) {
519 result = sk2; 519 result = sk2;
520 goto found; 520 goto found;
@@ -553,7 +553,7 @@ found:
553 */ 553 */
554struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 554struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
555 __be16 sport, __be32 daddr, __be16 dport, 555 __be16 sport, __be32 daddr, __be16 dport,
556 int dif, struct udp_table *udptable) 556 int dif, struct udp_table *udptable, struct sk_buff *skb)
557{ 557{
558 struct sock *sk, *result; 558 struct sock *sk, *result;
559 struct hlist_nulls_node *node; 559 struct hlist_nulls_node *node;
@@ -602,7 +602,8 @@ begin:
602 struct sock *sk2; 602 struct sock *sk2;
603 hash = udp_ehashfn(net, daddr, hnum, 603 hash = udp_ehashfn(net, daddr, hnum,
604 saddr, sport); 604 saddr, sport);
605 sk2 = reuseport_select_sock(sk, hash); 605 sk2 = reuseport_select_sock(sk, hash, skb,
606 sizeof(struct udphdr));
606 if (sk2) { 607 if (sk2) {
607 result = sk2; 608 result = sk2;
608 goto found; 609 goto found;
@@ -647,14 +648,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
647 648
648 return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, 649 return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
649 iph->daddr, dport, inet_iif(skb), 650 iph->daddr, dport, inet_iif(skb),
650 udptable); 651 udptable, skb);
651} 652}
652 653
653struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 654struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
654 __be32 daddr, __be16 dport, int dif) 655 __be32 daddr, __be16 dport, int dif)
655{ 656{
656 return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, 657 return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif,
657 &udp_table); 658 &udp_table, NULL);
658} 659}
659EXPORT_SYMBOL_GPL(udp4_lib_lookup); 660EXPORT_SYMBOL_GPL(udp4_lib_lookup);
660 661
@@ -702,7 +703,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
702 struct net *net = dev_net(skb->dev); 703 struct net *net = dev_net(skb->dev);
703 704
704 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 705 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
705 iph->saddr, uh->source, skb->dev->ifindex, udptable); 706 iph->saddr, uh->source, skb->dev->ifindex, udptable,
707 NULL);
706 if (!sk) { 708 if (!sk) {
707 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 709 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
708 return; /* No socket for error */ 710 return; /* No socket for error */
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 6116604bf6e8..df1966f3b6ec 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
44 sk = __udp4_lib_lookup(net, 44 sk = __udp4_lib_lookup(net,
45 req->id.idiag_src[0], req->id.idiag_sport, 45 req->id.idiag_src[0], req->id.idiag_sport,
46 req->id.idiag_dst[0], req->id.idiag_dport, 46 req->id.idiag_dst[0], req->id.idiag_dport,
47 req->id.idiag_if, tbl); 47 req->id.idiag_if, tbl, NULL);
48#if IS_ENABLED(CONFIG_IPV6) 48#if IS_ENABLED(CONFIG_IPV6)
49 else if (req->sdiag_family == AF_INET6) 49 else if (req->sdiag_family == AF_INET6)
50 sk = __udp6_lib_lookup(net, 50 sk = __udp6_lib_lookup(net,
@@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
52 req->id.idiag_sport, 52 req->id.idiag_sport,
53 (struct in6_addr *)req->id.idiag_dst, 53 (struct in6_addr *)req->id.idiag_dst,
54 req->id.idiag_dport, 54 req->id.idiag_dport,
55 req->id.idiag_if, tbl); 55 req->id.idiag_if, tbl, NULL);
56#endif 56#endif
57 else 57 else
58 goto out_nosk; 58 goto out_nosk;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6204b8992de4..56fcb55fda31 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -272,7 +272,7 @@ begin:
272 struct sock *sk2; 272 struct sock *sk2;
273 hash = udp6_ehashfn(net, daddr, hnum, 273 hash = udp6_ehashfn(net, daddr, hnum,
274 saddr, sport); 274 saddr, sport);
275 sk2 = reuseport_select_sock(sk, hash); 275 sk2 = reuseport_select_sock(sk, hash, NULL, 0);
276 if (sk2) { 276 if (sk2) {
277 result = sk2; 277 result = sk2;
278 goto found; 278 goto found;
@@ -310,7 +310,8 @@ found:
310struct sock *__udp6_lib_lookup(struct net *net, 310struct sock *__udp6_lib_lookup(struct net *net,
311 const struct in6_addr *saddr, __be16 sport, 311 const struct in6_addr *saddr, __be16 sport,
312 const struct in6_addr *daddr, __be16 dport, 312 const struct in6_addr *daddr, __be16 dport,
313 int dif, struct udp_table *udptable) 313 int dif, struct udp_table *udptable,
314 struct sk_buff *skb)
314{ 315{
315 struct sock *sk, *result; 316 struct sock *sk, *result;
316 struct hlist_nulls_node *node; 317 struct hlist_nulls_node *node;
@@ -358,7 +359,8 @@ begin:
358 struct sock *sk2; 359 struct sock *sk2;
359 hash = udp6_ehashfn(net, daddr, hnum, 360 hash = udp6_ehashfn(net, daddr, hnum,
360 saddr, sport); 361 saddr, sport);
361 sk2 = reuseport_select_sock(sk, hash); 362 sk2 = reuseport_select_sock(sk, hash, skb,
363 sizeof(struct udphdr));
362 if (sk2) { 364 if (sk2) {
363 result = sk2; 365 result = sk2;
364 goto found; 366 goto found;
@@ -407,13 +409,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
407 return sk; 409 return sk;
408 return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, 410 return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
409 &iph->daddr, dport, inet6_iif(skb), 411 &iph->daddr, dport, inet6_iif(skb),
410 udptable); 412 udptable, skb);
411} 413}
412 414
413struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, 415struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
414 const struct in6_addr *daddr, __be16 dport, int dif) 416 const struct in6_addr *daddr, __be16 dport, int dif)
415{ 417{
416 return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); 418 return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
417} 419}
418EXPORT_SYMBOL_GPL(udp6_lib_lookup); 420EXPORT_SYMBOL_GPL(udp6_lib_lookup);
419 421
@@ -580,7 +582,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
580 struct net *net = dev_net(skb->dev); 582 struct net *net = dev_net(skb->dev);
581 583
582 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, 584 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
583 inet6_iif(skb), udptable); 585 inet6_iif(skb), udptable, skb);
584 if (!sk) { 586 if (!sk) {
585 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), 587 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
586 ICMP6_MIB_INERRORS); 588 ICMP6_MIB_INERRORS);