aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-08-10 19:58:47 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-08-10 19:58:48 -0400
commit9d6f417714c3aaf67b23ffdc1d2b036cce3ecc1c (patch)
tree4bc949c2a15c2c0d1cdbb390740f508fe7644623 /tools
parent74b247f4c36315e5c08580700a68e0eb3b72de03 (diff)
parent91134d849a0e8fbc70b8607d280e0d325dcaf7bb (diff)
Merge branch 'bpf-reuseport-map'
Martin KaFai Lau says: ==================== This series introduces a new map type "BPF_MAP_TYPE_REUSEPORT_SOCKARRAY" and a new prog type BPF_PROG_TYPE_SK_REUSEPORT. Here is a snippet from a commit message: "To unleash the full potential of a bpf prog, it is essential for the userspace to be capable of directly setting up a bpf map which can then be consumed by the bpf prog to make decision. In this case, decide which SO_REUSEPORT sk to serve the incoming request. By adding BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, the userspace has total control and visibility on where a SO_REUSEPORT sk should be located in a bpf map. The later patch will introduce BPF_PROG_TYPE_SK_REUSEPORT such that the bpf prog can directly select a sk from the bpf map. That will raise the programmability of the bpf prog attached to a reuseport group (a group of sk serving the same IP:PORT). For example, in UDP, the bpf prog can peek into the payload (e.g. through the "data" pointer introduced in the later patch) to learn the application level's connection information and then decide which sk to pick from a bpf map. The userspace can tightly couple the sk's location in a bpf map with the application logic in generating the UDP payload's connection information. This connection info contact/API stays within the userspace. Also, when used with map-in-map, the userspace can switch the old-server-process's inner map to a new-server-process's inner map in one call "bpf_map_update_elem(outer_map, &index, &new_reuseport_array)". The bpf prog will then direct incoming requests to the new process instead of the old process. The old process can finish draining the pending requests (e.g. by "accept()") before closing the old-fds. [Note that deleting a fd from a bpf map does not necessary mean the fd is closed]" ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/bpf.h37
-rw-r--r--tools/lib/bpf/bpf.c1
-rw-r--r--tools/lib/bpf/bpf.h1
-rw-r--r--tools/lib/bpf/libbpf.c1
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h4
-rw-r--r--tools/testing/selftests/bpf/test_align.c5
-rw-r--r--tools/testing/selftests/bpf/test_btf.c5
-rw-r--r--tools/testing/selftests/bpf/test_maps.c262
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport.c688
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_common.h36
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_kern.c180
-rw-r--r--tools/testing/selftests/bpf/test_sock.c5
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c5
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c5
16 files changed, 1219 insertions, 24 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..3102a2a23c31 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -126,6 +126,7 @@ enum bpf_map_type {
126 BPF_MAP_TYPE_XSKMAP, 126 BPF_MAP_TYPE_XSKMAP,
127 BPF_MAP_TYPE_SOCKHASH, 127 BPF_MAP_TYPE_SOCKHASH,
128 BPF_MAP_TYPE_CGROUP_STORAGE, 128 BPF_MAP_TYPE_CGROUP_STORAGE,
129 BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
129}; 130};
130 131
131enum bpf_prog_type { 132enum bpf_prog_type {
@@ -150,6 +151,7 @@ enum bpf_prog_type {
150 BPF_PROG_TYPE_CGROUP_SOCK_ADDR, 151 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
151 BPF_PROG_TYPE_LWT_SEG6LOCAL, 152 BPF_PROG_TYPE_LWT_SEG6LOCAL,
152 BPF_PROG_TYPE_LIRC_MODE2, 153 BPF_PROG_TYPE_LIRC_MODE2,
154 BPF_PROG_TYPE_SK_REUSEPORT,
153}; 155};
154 156
155enum bpf_attach_type { 157enum bpf_attach_type {
@@ -2113,6 +2115,14 @@ union bpf_attr {
2113 * the shared data. 2115 * the shared data.
2114 * Return 2116 * Return
2115 * Pointer to the local storage area. 2117 * Pointer to the local storage area.
2118 *
2119 * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
2120 * Description
2121 * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map
2122 * It checks the selected sk is matching the incoming
2123 * request in the skb.
2124 * Return
2125 * 0 on success, or a negative error in case of failure.
2116 */ 2126 */
2117#define __BPF_FUNC_MAPPER(FN) \ 2127#define __BPF_FUNC_MAPPER(FN) \
2118 FN(unspec), \ 2128 FN(unspec), \
@@ -2196,7 +2206,8 @@ union bpf_attr {
2196 FN(rc_keydown), \ 2206 FN(rc_keydown), \
2197 FN(skb_cgroup_id), \ 2207 FN(skb_cgroup_id), \
2198 FN(get_current_cgroup_id), \ 2208 FN(get_current_cgroup_id), \
2199 FN(get_local_storage), 2209 FN(get_local_storage), \
2210 FN(sk_select_reuseport),
2200 2211
2201/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2212/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2202 * function eBPF program intends to call 2213 * function eBPF program intends to call
@@ -2413,6 +2424,30 @@ struct sk_msg_md {
2413 __u32 local_port; /* stored in host byte order */ 2424 __u32 local_port; /* stored in host byte order */
2414}; 2425};
2415 2426
2427struct sk_reuseport_md {
2428 /*
2429 * Start of directly accessible data. It begins from
2430 * the tcp/udp header.
2431 */
2432 void *data;
2433 void *data_end; /* End of directly accessible data */
2434 /*
2435 * Total length of packet (starting from the tcp/udp header).
2436 * Note that the directly accessible bytes (data_end - data)
2437 * could be less than this "len". Those bytes could be
2438 * indirectly read by a helper "bpf_skb_load_bytes()".
2439 */
2440 __u32 len;
2441 /*
2442 * Eth protocol in the mac header (network byte order). e.g.
2443 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
2444 */
2445 __u32 eth_protocol;
2446 __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
2447 __u32 bind_inany; /* Is sock bound to an INANY address? */
2448 __u32 hash; /* A hash of the packet 4 tuples */
2449};
2450
2416#define BPF_TAG_SIZE 8 2451#define BPF_TAG_SIZE 8
2417 2452
2418struct bpf_prog_info { 2453struct bpf_prog_info {
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9ddc89dae962..60aa4ca8b2c5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -92,6 +92,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
92 attr.btf_key_type_id = create_attr->btf_key_type_id; 92 attr.btf_key_type_id = create_attr->btf_key_type_id;
93 attr.btf_value_type_id = create_attr->btf_value_type_id; 93 attr.btf_value_type_id = create_attr->btf_value_type_id;
94 attr.map_ifindex = create_attr->map_ifindex; 94 attr.map_ifindex = create_attr->map_ifindex;
95 attr.inner_map_fd = create_attr->inner_map_fd;
95 96
96 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 97 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
97} 98}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 0639a30a457d..6f38164b2618 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -39,6 +39,7 @@ struct bpf_create_map_attr {
39 __u32 btf_key_type_id; 39 __u32 btf_key_type_id;
40 __u32 btf_value_type_id; 40 __u32 btf_value_type_id;
41 __u32 map_ifindex; 41 __u32 map_ifindex;
42 __u32 inner_map_fd;
42}; 43};
43 44
44int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); 45int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 40211b51427a..2abd0f112627 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1501,6 +1501,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
1501 case BPF_PROG_TYPE_SK_MSG: 1501 case BPF_PROG_TYPE_SK_MSG:
1502 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 1502 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1503 case BPF_PROG_TYPE_LIRC_MODE2: 1503 case BPF_PROG_TYPE_LIRC_MODE2:
1504 case BPF_PROG_TYPE_SK_REUSEPORT:
1504 return false; 1505 return false;
1505 case BPF_PROG_TYPE_UNSPEC: 1506 case BPF_PROG_TYPE_UNSPEC:
1506 case BPF_PROG_TYPE_KPROBE: 1507 case BPF_PROG_TYPE_KPROBE:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 17a7a5818ee1..daed162043c2 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
23TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 23TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
24 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ 24 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
25 test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ 25 test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
26 test_socket_cookie test_cgroup_storage 26 test_socket_cookie test_cgroup_storage test_select_reuseport
27 27
28TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 28TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
29 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 29 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@@ -34,7 +34,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
34 test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ 34 test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
35 test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ 35 test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
36 test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ 36 test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
37 get_cgroup_id_kern.o socket_cookie_prog.o 37 get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o
38 38
39# Order correspond to 'make run_tests' order 39# Order correspond to 'make run_tests' order
40TEST_PROGS := test_kmod.sh \ 40TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 9ba1c72d7cf5..5c32266c2c38 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -111,6 +111,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
111static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, 111static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
112 int size, int flags) = 112 int size, int flags) =
113 (void *) BPF_FUNC_skb_get_xfrm_state; 113 (void *) BPF_FUNC_skb_get_xfrm_state;
114static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
115 (void *) BPF_FUNC_sk_select_reuseport;
114static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = 116static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
115 (void *) BPF_FUNC_get_stack; 117 (void *) BPF_FUNC_get_stack;
116static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, 118static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
@@ -173,6 +175,8 @@ struct bpf_map_def {
173 175
174static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = 176static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
175 (void *) BPF_FUNC_skb_load_bytes; 177 (void *) BPF_FUNC_skb_load_bytes;
178static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
179 (void *) BPF_FUNC_skb_load_bytes_relative;
176static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = 180static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
177 (void *) BPF_FUNC_skb_store_bytes; 181 (void *) BPF_FUNC_skb_store_bytes;
178static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = 182static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index d0811b3d6a6f..315a44fa32af 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -44,4 +44,8 @@ static inline unsigned int bpf_num_possible_cpus(void)
44 name[bpf_num_possible_cpus()] 44 name[bpf_num_possible_cpus()]
45#define bpf_percpu(name, cpu) name[(cpu)].v 45#define bpf_percpu(name, cpu) name[(cpu)].v
46 46
47#ifndef ARRAY_SIZE
48# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
49#endif
50
47#endif /* __BPF_UTIL__ */ 51#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 6b1b302310fe..5f377ec53f2f 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -18,10 +18,7 @@
18 18
19#include "../../../include/linux/filter.h" 19#include "../../../include/linux/filter.h"
20#include "bpf_rlimit.h" 20#include "bpf_rlimit.h"
21 21#include "bpf_util.h"
22#ifndef ARRAY_SIZE
23# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
24#endif
25 22
26#define MAX_INSNS 512 23#define MAX_INSNS 512
27#define MAX_MATCHES 16 24#define MAX_MATCHES 16
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 7fa8c800c540..6b5cfeb7a9cc 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -19,6 +19,7 @@
19#include <bpf/btf.h> 19#include <bpf/btf.h>
20 20
21#include "bpf_rlimit.h" 21#include "bpf_rlimit.h"
22#include "bpf_util.h"
22 23
23static uint32_t pass_cnt; 24static uint32_t pass_cnt;
24static uint32_t error_cnt; 25static uint32_t error_cnt;
@@ -93,10 +94,6 @@ static int __base_pr(const char *format, ...)
93#define MAX_NR_RAW_TYPES 1024 94#define MAX_NR_RAW_TYPES 1024
94#define BTF_LOG_BUF_SIZE 65535 95#define BTF_LOG_BUF_SIZE 65535
95 96
96#ifndef ARRAY_SIZE
97# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
98#endif
99
100static struct args { 97static struct args {
101 unsigned int raw_test_num; 98 unsigned int raw_test_num;
102 unsigned int file_test_num; 99 unsigned int file_test_num;
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6c253343a6f9..4b7c74f5faa7 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -17,7 +17,8 @@
17#include <stdlib.h> 17#include <stdlib.h>
18 18
19#include <sys/wait.h> 19#include <sys/wait.h>
20 20#include <sys/socket.h>
21#include <netinet/in.h>
21#include <linux/bpf.h> 22#include <linux/bpf.h>
22 23
23#include <bpf/bpf.h> 24#include <bpf/bpf.h>
@@ -26,8 +27,21 @@
26#include "bpf_util.h" 27#include "bpf_util.h"
27#include "bpf_rlimit.h" 28#include "bpf_rlimit.h"
28 29
30#ifndef ENOTSUPP
31#define ENOTSUPP 524
32#endif
33
29static int map_flags; 34static int map_flags;
30 35
36#define CHECK(condition, tag, format...) ({ \
37 int __ret = !!(condition); \
38 if (__ret) { \
39 printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
40 printf(format); \
41 exit(-1); \
42 } \
43})
44
31static void test_hashmap(int task, void *data) 45static void test_hashmap(int task, void *data)
32{ 46{
33 long long key, next_key, first_key, value; 47 long long key, next_key, first_key, value;
@@ -1150,6 +1164,250 @@ static void test_map_wronly(void)
1150 assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM); 1164 assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
1151} 1165}
1152 1166
1167static void prepare_reuseport_grp(int type, int map_fd,
1168 __s64 *fds64, __u64 *sk_cookies,
1169 unsigned int n)
1170{
1171 socklen_t optlen, addrlen;
1172 struct sockaddr_in6 s6;
1173 const __u32 index0 = 0;
1174 const int optval = 1;
1175 unsigned int i;
1176 u64 sk_cookie;
1177 __s64 fd64;
1178 int err;
1179
1180 s6.sin6_family = AF_INET6;
1181 s6.sin6_addr = in6addr_any;
1182 s6.sin6_port = 0;
1183 addrlen = sizeof(s6);
1184 optlen = sizeof(sk_cookie);
1185
1186 for (i = 0; i < n; i++) {
1187 fd64 = socket(AF_INET6, type, 0);
1188 CHECK(fd64 == -1, "socket()",
1189 "sock_type:%d fd64:%lld errno:%d\n",
1190 type, fd64, errno);
1191
1192 err = setsockopt(fd64, SOL_SOCKET, SO_REUSEPORT,
1193 &optval, sizeof(optval));
1194 CHECK(err == -1, "setsockopt(SO_REUSEEPORT)",
1195 "err:%d errno:%d\n", err, errno);
1196
1197 /* reuseport_array does not allow unbound sk */
1198 err = bpf_map_update_elem(map_fd, &index0, &fd64,
1199 BPF_ANY);
1200 CHECK(err != -1 || errno != EINVAL,
1201 "reuseport array update unbound sk",
1202 "sock_type:%d err:%d errno:%d\n",
1203 type, err, errno);
1204
1205 err = bind(fd64, (struct sockaddr *)&s6, sizeof(s6));
1206 CHECK(err == -1, "bind()",
1207 "sock_type:%d err:%d errno:%d\n", type, err, errno);
1208
1209 if (i == 0) {
1210 err = getsockname(fd64, (struct sockaddr *)&s6,
1211 &addrlen);
1212 CHECK(err == -1, "getsockname()",
1213 "sock_type:%d err:%d errno:%d\n",
1214 type, err, errno);
1215 }
1216
1217 err = getsockopt(fd64, SOL_SOCKET, SO_COOKIE, &sk_cookie,
1218 &optlen);
1219 CHECK(err == -1, "getsockopt(SO_COOKIE)",
1220 "sock_type:%d err:%d errno:%d\n", type, err, errno);
1221
1222 if (type == SOCK_STREAM) {
1223 /*
1224 * reuseport_array does not allow
1225 * non-listening tcp sk.
1226 */
1227 err = bpf_map_update_elem(map_fd, &index0, &fd64,
1228 BPF_ANY);
1229 CHECK(err != -1 || errno != EINVAL,
1230 "reuseport array update non-listening sk",
1231 "sock_type:%d err:%d errno:%d\n",
1232 type, err, errno);
1233 err = listen(fd64, 0);
1234 CHECK(err == -1, "listen()",
1235 "sock_type:%d, err:%d errno:%d\n",
1236 type, err, errno);
1237 }
1238
1239 fds64[i] = fd64;
1240 sk_cookies[i] = sk_cookie;
1241 }
1242}
1243
1244static void test_reuseport_array(void)
1245{
1246#define REUSEPORT_FD_IDX(err, last) ({ (err) ? last : !last; })
1247
1248 const __u32 array_size = 4, index0 = 0, index3 = 3;
1249 int types[2] = { SOCK_STREAM, SOCK_DGRAM }, type;
1250 __u64 grpa_cookies[2], sk_cookie, map_cookie;
1251 __s64 grpa_fds64[2] = { -1, -1 }, fd64 = -1;
1252 const __u32 bad_index = array_size;
1253 int map_fd, err, t, f;
1254 __u32 fds_idx = 0;
1255 int fd;
1256
1257 map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
1258 sizeof(__u32), sizeof(__u64), array_size, 0);
1259 CHECK(map_fd == -1, "reuseport array create",
1260 "map_fd:%d, errno:%d\n", map_fd, errno);
1261
1262 /* Test lookup/update/delete with invalid index */
1263 err = bpf_map_delete_elem(map_fd, &bad_index);
1264 CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
1265 "err:%d errno:%d\n", err, errno);
1266
1267 err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
1268 CHECK(err != -1 || errno != E2BIG,
1269 "reuseport array update >=max_entries",
1270 "err:%d errno:%d\n", err, errno);
1271
1272 err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
1273 CHECK(err != -1 || errno != ENOENT,
1274 "reuseport array update >=max_entries",
1275 "err:%d errno:%d\n", err, errno);
1276
1277 /* Test lookup/delete non existence elem */
1278 err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
1279 CHECK(err != -1 || errno != ENOENT,
1280 "reuseport array lookup not-exist elem",
1281 "err:%d errno:%d\n", err, errno);
1282 err = bpf_map_delete_elem(map_fd, &index3);
1283 CHECK(err != -1 || errno != ENOENT,
1284 "reuseport array del not-exist elem",
1285 "err:%d errno:%d\n", err, errno);
1286
1287 for (t = 0; t < ARRAY_SIZE(types); t++) {
1288 type = types[t];
1289
1290 prepare_reuseport_grp(type, map_fd, grpa_fds64,
1291 grpa_cookies, ARRAY_SIZE(grpa_fds64));
1292
1293 /* Test BPF_* update flags */
1294 /* BPF_EXIST failure case */
1295 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
1296 BPF_EXIST);
1297 CHECK(err != -1 || errno != ENOENT,
1298 "reuseport array update empty elem BPF_EXIST",
1299 "sock_type:%d err:%d errno:%d\n",
1300 type, err, errno);
1301 fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
1302
1303 /* BPF_NOEXIST success case */
1304 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
1305 BPF_NOEXIST);
1306 CHECK(err == -1,
1307 "reuseport array update empty elem BPF_NOEXIST",
1308 "sock_type:%d err:%d errno:%d\n",
1309 type, err, errno);
1310 fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
1311
1312 /* BPF_EXIST success case. */
1313 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
1314 BPF_EXIST);
1315 CHECK(err == -1,
1316 "reuseport array update same elem BPF_EXIST",
1317 "sock_type:%d err:%d errno:%d\n", type, err, errno);
1318 fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
1319
1320 /* BPF_NOEXIST failure case */
1321 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
1322 BPF_NOEXIST);
1323 CHECK(err != -1 || errno != EEXIST,
1324 "reuseport array update non-empty elem BPF_NOEXIST",
1325 "sock_type:%d err:%d errno:%d\n",
1326 type, err, errno);
1327 fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
1328
1329 /* BPF_ANY case (always succeed) */
1330 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
1331 BPF_ANY);
1332 CHECK(err == -1,
1333 "reuseport array update same sk with BPF_ANY",
1334 "sock_type:%d err:%d errno:%d\n", type, err, errno);
1335
1336 fd64 = grpa_fds64[fds_idx];
1337 sk_cookie = grpa_cookies[fds_idx];
1338
1339 /* The same sk cannot be added to reuseport_array twice */
1340 err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
1341 CHECK(err != -1 || errno != EBUSY,
1342 "reuseport array update same sk with same index",
1343 "sock_type:%d err:%d errno:%d\n",
1344 type, err, errno);
1345
1346 err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
1347 CHECK(err != -1 || errno != EBUSY,
1348 "reuseport array update same sk with different index",
1349 "sock_type:%d err:%d errno:%d\n",
1350 type, err, errno);
1351
1352 /* Test delete elem */
1353 err = bpf_map_delete_elem(map_fd, &index3);
1354 CHECK(err == -1, "reuseport array delete sk",
1355 "sock_type:%d err:%d errno:%d\n",
1356 type, err, errno);
1357
1358 /* Add it back with BPF_NOEXIST */
1359 err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
1360 CHECK(err == -1,
1361 "reuseport array re-add with BPF_NOEXIST after del",
1362 "sock_type:%d err:%d errno:%d\n", type, err, errno);
1363
1364 /* Test cookie */
1365 err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
1366 CHECK(err == -1 || sk_cookie != map_cookie,
1367 "reuseport array lookup re-added sk",
1368 "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
1369 type, err, errno, sk_cookie, map_cookie);
1370
1371 /* Test elem removed by close() */
1372 for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
1373 close(grpa_fds64[f]);
1374 err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
1375 CHECK(err != -1 || errno != ENOENT,
1376 "reuseport array lookup after close()",
1377 "sock_type:%d err:%d errno:%d\n",
1378 type, err, errno);
1379 }
1380
1381 /* Test SOCK_RAW */
1382 fd64 = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP);
1383 CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
1384 err, errno);
1385 err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
1386 CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
1387 "err:%d errno:%d\n", err, errno);
1388 close(fd64);
1389
1390 /* Close the 64 bit value map */
1391 close(map_fd);
1392
1393 /* Test 32 bit fd */
1394 map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
1395 sizeof(__u32), sizeof(__u32), array_size, 0);
1396 CHECK(map_fd == -1, "reuseport array create",
1397 "map_fd:%d, errno:%d\n", map_fd, errno);
1398 prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
1399 fd = fd64;
1400 err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
1401 CHECK(err == -1, "reuseport array update 32 bit fd",
1402 "err:%d errno:%d\n", err, errno);
1403 err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
1404 CHECK(err != -1 || errno != ENOSPC,
1405 "reuseport array lookup 32 bit fd",
1406 "err:%d errno:%d\n", err, errno);
1407 close(fd);
1408 close(map_fd);
1409}
1410
1153static void run_all_tests(void) 1411static void run_all_tests(void)
1154{ 1412{
1155 test_hashmap(0, NULL); 1413 test_hashmap(0, NULL);
@@ -1170,6 +1428,8 @@ static void run_all_tests(void)
1170 1428
1171 test_map_rdonly(); 1429 test_map_rdonly();
1172 test_map_wronly(); 1430 test_map_wronly();
1431
1432 test_reuseport_array();
1173} 1433}
1174 1434
1175int main(void) 1435int main(void)
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
new file mode 100644
index 000000000000..75646d9b34aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -0,0 +1,688 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018 Facebook */
3
4#include <stdlib.h>
5#include <unistd.h>
6#include <stdbool.h>
7#include <string.h>
8#include <errno.h>
9#include <assert.h>
10#include <fcntl.h>
11#include <linux/bpf.h>
12#include <linux/err.h>
13#include <linux/types.h>
14#include <linux/if_ether.h>
15#include <sys/types.h>
16#include <sys/epoll.h>
17#include <sys/socket.h>
18#include <netinet/in.h>
19#include <bpf/bpf.h>
20#include <bpf/libbpf.h>
21#include "bpf_rlimit.h"
22#include "bpf_util.h"
23#include "test_select_reuseport_common.h"
24
25#define MIN_TCPHDR_LEN 20
26#define UDPHDR_LEN 8
27
28#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
29#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
30#define REUSEPORT_ARRAY_SIZE 32
31
32static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
33static enum result expected_results[NR_RESULTS];
34static int sk_fds[REUSEPORT_ARRAY_SIZE];
35static int reuseport_array, outer_map;
36static int select_by_skb_data_prog;
37static int saved_tcp_syncookie;
38static struct bpf_object *obj;
39static int saved_tcp_fo;
40static __u32 index_zero;
41static int epfd;
42
43static union sa46 {
44 struct sockaddr_in6 v6;
45 struct sockaddr_in v4;
46 sa_family_t family;
47} srv_sa;
48
49#define CHECK(condition, tag, format...) ({ \
50 int __ret = !!(condition); \
51 if (__ret) { \
52 printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
53 printf(format); \
54 exit(-1); \
55 } \
56})
57
58static void create_maps(void)
59{
60 struct bpf_create_map_attr attr = {};
61
62 /* Creating reuseport_array */
63 attr.name = "reuseport_array";
64 attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
65 attr.key_size = sizeof(__u32);
66 attr.value_size = sizeof(__u32);
67 attr.max_entries = REUSEPORT_ARRAY_SIZE;
68
69 reuseport_array = bpf_create_map_xattr(&attr);
70 CHECK(reuseport_array == -1, "creating reuseport_array",
71 "reuseport_array:%d errno:%d\n", reuseport_array, errno);
72
73 /* Creating outer_map */
74 attr.name = "outer_map";
75 attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
76 attr.key_size = sizeof(__u32);
77 attr.value_size = sizeof(__u32);
78 attr.max_entries = 1;
79 attr.inner_map_fd = reuseport_array;
80 outer_map = bpf_create_map_xattr(&attr);
81 CHECK(outer_map == -1, "creating outer_map",
82 "outer_map:%d errno:%d\n", outer_map, errno);
83}
84
85static void prepare_bpf_obj(void)
86{
87 struct bpf_program *prog;
88 struct bpf_map *map;
89 int err;
90 struct bpf_object_open_attr attr = {
91 .file = "test_select_reuseport_kern.o",
92 .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
93 };
94
95 obj = bpf_object__open_xattr(&attr);
96 CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
97 "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
98
99 prog = bpf_program__next(NULL, obj);
100 CHECK(!prog, "get first bpf_program", "!prog\n");
101 bpf_program__set_type(prog, attr.prog_type);
102
103 map = bpf_object__find_map_by_name(obj, "outer_map");
104 CHECK(!map, "find outer_map", "!map\n");
105 err = bpf_map__reuse_fd(map, outer_map);
106 CHECK(err, "reuse outer_map", "err:%d\n", err);
107
108 err = bpf_object__load(obj);
109 CHECK(err, "load bpf_object", "err:%d\n", err);
110
111 select_by_skb_data_prog = bpf_program__fd(prog);
112 CHECK(select_by_skb_data_prog == -1, "get prog fd",
113 "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
114
115 map = bpf_object__find_map_by_name(obj, "result_map");
116 CHECK(!map, "find result_map", "!map\n");
117 result_map = bpf_map__fd(map);
118 CHECK(result_map == -1, "get result_map fd",
119 "result_map:%d\n", result_map);
120
121 map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
122 CHECK(!map, "find tmp_index_ovr_map", "!map\n");
123 tmp_index_ovr_map = bpf_map__fd(map);
124 CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
125 "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
126
127 map = bpf_object__find_map_by_name(obj, "linum_map");
128 CHECK(!map, "find linum_map", "!map\n");
129 linum_map = bpf_map__fd(map);
130 CHECK(linum_map == -1, "get linum_map fd",
131 "linum_map:%d\n", linum_map);
132
133 map = bpf_object__find_map_by_name(obj, "data_check_map");
134 CHECK(!map, "find data_check_map", "!map\n");
135 data_check_map = bpf_map__fd(map);
136 CHECK(data_check_map == -1, "get data_check_map fd",
137 "data_check_map:%d\n", data_check_map);
138}
139
140static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
141{
142 memset(sa, 0, sizeof(*sa));
143 sa->family = family;
144 if (sa->family == AF_INET6)
145 sa->v6.sin6_addr = in6addr_loopback;
146 else
147 sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
148}
149
150static void sa46_init_inany(union sa46 *sa, sa_family_t family)
151{
152 memset(sa, 0, sizeof(*sa));
153 sa->family = family;
154 if (sa->family == AF_INET6)
155 sa->v6.sin6_addr = in6addr_any;
156 else
157 sa->v4.sin_addr.s_addr = INADDR_ANY;
158}
159
160static int read_int_sysctl(const char *sysctl)
161{
162 char buf[16];
163 int fd, ret;
164
165 fd = open(sysctl, 0);
166 CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
167 sysctl, fd, errno);
168
169 ret = read(fd, buf, sizeof(buf));
170 CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
171 sysctl, ret, errno);
172 close(fd);
173
174 return atoi(buf);
175}
176
177static void write_int_sysctl(const char *sysctl, int v)
178{
179 int fd, ret, size;
180 char buf[16];
181
182 fd = open(sysctl, O_RDWR);
183 CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
184 sysctl, fd, errno);
185
186 size = snprintf(buf, sizeof(buf), "%d", v);
187 ret = write(fd, buf, size);
188 CHECK(ret != size, "write(sysctl)",
189 "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
190 close(fd);
191}
192
193static void restore_sysctls(void)
194{
195 write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
196 write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
197}
198
199static void enable_fastopen(void)
200{
201 int fo;
202
203 fo = read_int_sysctl(TCP_FO_SYSCTL);
204 write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
205}
206
207static void enable_syncookie(void)
208{
209 write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
210}
211
212static void disable_syncookie(void)
213{
214 write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
215}
216
217static __u32 get_linum(void)
218{
219 __u32 linum;
220 int err;
221
222 err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
223 CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
224 err, errno);
225
226 return linum;
227}
228
229static void check_data(int type, sa_family_t family, const struct cmd *cmd,
230 int cli_fd)
231{
232 struct data_check expected = {}, result;
233 union sa46 cli_sa;
234 socklen_t addrlen;
235 int err;
236
237 addrlen = sizeof(cli_sa);
238 err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
239 &addrlen);
240 CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
241 err, errno);
242
243 err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
244 CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
245 err, errno);
246
247 if (type == SOCK_STREAM) {
248 expected.len = MIN_TCPHDR_LEN;
249 expected.ip_protocol = IPPROTO_TCP;
250 } else {
251 expected.len = UDPHDR_LEN;
252 expected.ip_protocol = IPPROTO_UDP;
253 }
254
255 if (family == AF_INET6) {
256 expected.eth_protocol = htons(ETH_P_IPV6);
257 expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
258 !srv_sa.v6.sin6_addr.s6_addr32[2] &&
259 !srv_sa.v6.sin6_addr.s6_addr32[1] &&
260 !srv_sa.v6.sin6_addr.s6_addr32[0];
261
262 memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
263 sizeof(cli_sa.v6.sin6_addr));
264 memcpy(&expected.skb_addrs[4], &in6addr_loopback,
265 sizeof(in6addr_loopback));
266 expected.skb_ports[0] = cli_sa.v6.sin6_port;
267 expected.skb_ports[1] = srv_sa.v6.sin6_port;
268 } else {
269 expected.eth_protocol = htons(ETH_P_IP);
270 expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
271
272 expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
273 expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
274 expected.skb_ports[0] = cli_sa.v4.sin_port;
275 expected.skb_ports[1] = srv_sa.v4.sin_port;
276 }
277
278 if (memcmp(&result, &expected, offsetof(struct data_check,
279 equal_check_end))) {
280 printf("unexpected data_check\n");
281 printf(" result: (0x%x, %u, %u)\n",
282 result.eth_protocol, result.ip_protocol,
283 result.bind_inany);
284 printf("expected: (0x%x, %u, %u)\n",
285 expected.eth_protocol, expected.ip_protocol,
286 expected.bind_inany);
287 CHECK(1, "data_check result != expected",
288 "bpf_prog_linum:%u\n", get_linum());
289 }
290
291 CHECK(!result.hash, "data_check result.hash empty",
292 "result.hash:%u", result.hash);
293
294 expected.len += cmd ? sizeof(*cmd) : 0;
295 if (type == SOCK_STREAM)
296 CHECK(expected.len > result.len, "expected.len > result.len",
297 "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
298 expected.len, result.len, get_linum());
299 else
300 CHECK(expected.len != result.len, "expected.len != result.len",
301 "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
302 expected.len, result.len, get_linum());
303}
304
305static void check_results(void)
306{
307 __u32 results[NR_RESULTS];
308 __u32 i, broken = 0;
309 int err;
310
311 for (i = 0; i < NR_RESULTS; i++) {
312 err = bpf_map_lookup_elem(result_map, &i, &results[i]);
313 CHECK(err == -1, "lookup_elem(result_map)",
314 "i:%u err:%d errno:%d\n", i, err, errno);
315 }
316
317 for (i = 0; i < NR_RESULTS; i++) {
318 if (results[i] != expected_results[i]) {
319 broken = i;
320 break;
321 }
322 }
323
324 if (i == NR_RESULTS)
325 return;
326
327 printf("unexpected result\n");
328 printf(" result: [");
329 printf("%u", results[0]);
330 for (i = 1; i < NR_RESULTS; i++)
331 printf(", %u", results[i]);
332 printf("]\n");
333
334 printf("expected: [");
335 printf("%u", expected_results[0]);
336 for (i = 1; i < NR_RESULTS; i++)
337 printf(", %u", expected_results[i]);
338 printf("]\n");
339
340 CHECK(expected_results[broken] != results[broken],
341 "unexpected result",
342 "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
343 broken, broken, get_linum());
344}
345
346static int send_data(int type, sa_family_t family, void *data, size_t len,
347 enum result expected)
348{
349 union sa46 cli_sa;
350 int fd, err;
351
352 fd = socket(family, type, 0);
353 CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
354
355 sa46_init_loopback(&cli_sa, family);
356 err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
357 CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
358
359 err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
360 sizeof(srv_sa));
361 CHECK(err != len && expected >= PASS,
362 "sendto()", "family:%u err:%d errno:%d expected:%d\n",
363 family, err, errno, expected);
364
365 return fd;
366}
367
368static void do_test(int type, sa_family_t family, struct cmd *cmd,
369 enum result expected)
370{
371 int nev, srv_fd, cli_fd;
372 struct epoll_event ev;
373 struct cmd rcv_cmd;
374 ssize_t nread;
375
376 cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
377 expected);
378 nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
379 CHECK((nev <= 0 && expected >= PASS) ||
380 (nev > 0 && expected < PASS),
381 "nev <> expected",
382 "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
383 nev, expected, type, family,
384 cmd ? cmd->reuseport_index : -1,
385 cmd ? cmd->pass_on_failure : -1);
386 check_results();
387 check_data(type, family, cmd, cli_fd);
388
389 if (expected < PASS)
390 return;
391
392 CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
393 cmd->reuseport_index != ev.data.u32,
394 "check cmd->reuseport_index",
395 "cmd:(%u, %u) ev.data.u32:%u\n",
396 cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
397
398 srv_fd = sk_fds[ev.data.u32];
399 if (type == SOCK_STREAM) {
400 int new_fd = accept(srv_fd, NULL, 0);
401
402 CHECK(new_fd == -1, "accept(srv_fd)",
403 "ev.data.u32:%u new_fd:%d errno:%d\n",
404 ev.data.u32, new_fd, errno);
405
406 nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
407 CHECK(nread != sizeof(rcv_cmd),
408 "recv(new_fd)",
409 "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
410 ev.data.u32, nread, sizeof(rcv_cmd), errno);
411
412 close(new_fd);
413 } else {
414 nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
415 CHECK(nread != sizeof(rcv_cmd),
416 "recv(sk_fds)",
417 "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
418 ev.data.u32, nread, sizeof(rcv_cmd), errno);
419 }
420
421 close(cli_fd);
422}
423
424static void test_err_inner_map(int type, sa_family_t family)
425{
426 struct cmd cmd = {
427 .reuseport_index = 0,
428 .pass_on_failure = 0,
429 };
430
431 printf("%s: ", __func__);
432 expected_results[DROP_ERR_INNER_MAP]++;
433 do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
434 printf("OK\n");
435}
436
437static void test_err_skb_data(int type, sa_family_t family)
438{
439 printf("%s: ", __func__);
440 expected_results[DROP_ERR_SKB_DATA]++;
441 do_test(type, family, NULL, DROP_ERR_SKB_DATA);
442 printf("OK\n");
443}
444
445static void test_err_sk_select_port(int type, sa_family_t family)
446{
447 struct cmd cmd = {
448 .reuseport_index = REUSEPORT_ARRAY_SIZE,
449 .pass_on_failure = 0,
450 };
451
452 printf("%s: ", __func__);
453 expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
454 do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
455 printf("OK\n");
456}
457
458static void test_pass(int type, sa_family_t family)
459{
460 struct cmd cmd;
461 int i;
462
463 printf("%s: ", __func__);
464 cmd.pass_on_failure = 0;
465 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
466 expected_results[PASS]++;
467 cmd.reuseport_index = i;
468 do_test(type, family, &cmd, PASS);
469 }
470 printf("OK\n");
471}
472
473static void test_syncookie(int type, sa_family_t family)
474{
475 int err, tmp_index = 1;
476 struct cmd cmd = {
477 .reuseport_index = 0,
478 .pass_on_failure = 0,
479 };
480
481 if (type != SOCK_STREAM)
482 return;
483
484 printf("%s: ", __func__);
485 /*
486 * +1 for TCP-SYN and
487 * +1 for the TCP-ACK (ack the syncookie)
488 */
489 expected_results[PASS] += 2;
490 enable_syncookie();
491 /*
492 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
493 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
494 * tmp_index_ovr_map
495 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
496 * is from the cmd.reuseport_index
497 */
498 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
499 &tmp_index, BPF_ANY);
500 CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
501 "err:%d errno:%d\n", err, errno);
502 do_test(type, family, &cmd, PASS);
503 err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
504 &tmp_index);
505 CHECK(err == -1 || tmp_index != -1,
506 "lookup_elem(tmp_index_ovr_map)",
507 "err:%d errno:%d tmp_index:%d\n",
508 err, errno, tmp_index);
509 disable_syncookie();
510 printf("OK\n");
511}
512
513static void test_pass_on_err(int type, sa_family_t family)
514{
515 struct cmd cmd = {
516 .reuseport_index = REUSEPORT_ARRAY_SIZE,
517 .pass_on_failure = 1,
518 };
519
520 printf("%s: ", __func__);
521 expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
522 do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
523 printf("OK\n");
524}
525
526static void prepare_sk_fds(int type, sa_family_t family, bool inany)
527{
528 const int first = REUSEPORT_ARRAY_SIZE - 1;
529 int i, err, optval = 1;
530 struct epoll_event ev;
531 socklen_t addrlen;
532
533 if (inany)
534 sa46_init_inany(&srv_sa, family);
535 else
536 sa46_init_loopback(&srv_sa, family);
537 addrlen = sizeof(srv_sa);
538
539 /*
540 * The sk_fds[] is filled from the back such that the order
541 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
542 */
543 for (i = first; i >= 0; i--) {
544 sk_fds[i] = socket(family, type, 0);
545 CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
546 i, sk_fds[i], errno);
547 err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
548 &optval, sizeof(optval));
549 CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
550 "sk_fds[%d] err:%d errno:%d\n",
551 i, err, errno);
552
553 if (i == first) {
554 err = setsockopt(sk_fds[i], SOL_SOCKET,
555 SO_ATTACH_REUSEPORT_EBPF,
556 &select_by_skb_data_prog,
557 sizeof(select_by_skb_data_prog));
558 CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
559 "err:%d errno:%d\n", err, errno);
560 }
561
562 err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
563 CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
564 i, err, errno);
565
566 if (type == SOCK_STREAM) {
567 err = listen(sk_fds[i], 10);
568 CHECK(err == -1, "listen()",
569 "sk_fds[%d] err:%d errno:%d\n",
570 i, err, errno);
571 }
572
573 err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
574 BPF_NOEXIST);
575 CHECK(err == -1, "update_elem(reuseport_array)",
576 "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
577
578 if (i == first) {
579 socklen_t addrlen = sizeof(srv_sa);
580
581 err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
582 &addrlen);
583 CHECK(err == -1, "getsockname()",
584 "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
585 }
586 }
587
588 epfd = epoll_create(1);
589 CHECK(epfd == -1, "epoll_create(1)",
590 "epfd:%d errno:%d\n", epfd, errno);
591
592 ev.events = EPOLLIN;
593 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
594 ev.data.u32 = i;
595 err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
596 CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
597 }
598}
599
600static void setup_per_test(int type, unsigned short family, bool inany)
601{
602 int ovr = -1, err;
603
604 prepare_sk_fds(type, family, inany);
605 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
606 BPF_ANY);
607 CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
608 "err:%d errno:%d\n", err, errno);
609}
610
611static void cleanup_per_test(void)
612{
613 int i, err;
614
615 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
616 close(sk_fds[i]);
617 close(epfd);
618
619 err = bpf_map_delete_elem(outer_map, &index_zero);
620 CHECK(err == -1, "delete_elem(outer_map)",
621 "err:%d errno:%d\n", err, errno);
622}
623
624static void cleanup(void)
625{
626 close(outer_map);
627 close(reuseport_array);
628 bpf_object__close(obj);
629}
630
631static void test_all(void)
632{
633 /* Extra SOCK_STREAM to test bind_inany==true */
634 const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
635 const char * const type_strings[] = { "TCP", "UDP", "TCP" };
636 const char * const family_strings[] = { "IPv6", "IPv4" };
637 const unsigned short families[] = { AF_INET6, AF_INET };
638 const bool bind_inany[] = { false, false, true };
639 int t, f, err;
640
641 for (f = 0; f < ARRAY_SIZE(families); f++) {
642 unsigned short family = families[f];
643
644 for (t = 0; t < ARRAY_SIZE(types); t++) {
645 bool inany = bind_inany[t];
646 int type = types[t];
647
648 printf("######## %s/%s %s ########\n",
649 family_strings[f], type_strings[t],
650 inany ? " INANY " : "LOOPBACK");
651
652 setup_per_test(type, family, inany);
653
654 test_err_inner_map(type, family);
655
656 /* Install reuseport_array to the outer_map */
657 err = bpf_map_update_elem(outer_map, &index_zero,
658 &reuseport_array, BPF_ANY);
659 CHECK(err == -1, "update_elem(outer_map)",
660 "err:%d errno:%d\n", err, errno);
661
662 test_err_skb_data(type, family);
663 test_err_sk_select_port(type, family);
664 test_pass(type, family);
665 test_syncookie(type, family);
666 test_pass_on_err(type, family);
667
668 cleanup_per_test();
669 printf("\n");
670 }
671 }
672}
673
674int main(int argc, const char **argv)
675{
676 create_maps();
677 prepare_bpf_obj();
678 saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
679 saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
680 enable_fastopen();
681 disable_syncookie();
682 atexit(restore_sysctls);
683
684 test_all();
685
686 cleanup();
687 return 0;
688}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_common.h b/tools/testing/selftests/bpf/test_select_reuseport_common.h
new file mode 100644
index 000000000000..08eb2a9f145f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_common.h
@@ -0,0 +1,36 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3
4#ifndef __TEST_SELECT_REUSEPORT_COMMON_H
5#define __TEST_SELECT_REUSEPORT_COMMON_H
6
7#include <linux/types.h>
8
9enum result {
10 DROP_ERR_INNER_MAP,
11 DROP_ERR_SKB_DATA,
12 DROP_ERR_SK_SELECT_REUSEPORT,
13 DROP_MISC,
14 PASS,
15 PASS_ERR_SK_SELECT_REUSEPORT,
16 NR_RESULTS,
17};
18
19struct cmd {
20 __u32 reuseport_index;
21 __u32 pass_on_failure;
22};
23
24struct data_check {
25 __u32 ip_protocol;
26 __u32 skb_addrs[8];
27 __u16 skb_ports[2];
28 __u16 eth_protocol;
29 __u8 bind_inany;
30 __u8 equal_check_end[0];
31
32 __u32 len;
33 __u32 hash;
34};
35
36#endif
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
new file mode 100644
index 000000000000..5b54ec637ada
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
@@ -0,0 +1,180 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018 Facebook */
3
4#include <stdlib.h>
5#include <linux/in.h>
6#include <linux/ip.h>
7#include <linux/ipv6.h>
8#include <linux/tcp.h>
9#include <linux/udp.h>
10#include <linux/bpf.h>
11#include <linux/types.h>
12#include <linux/if_ether.h>
13
14#include "bpf_endian.h"
15#include "bpf_helpers.h"
16#include "test_select_reuseport_common.h"
17
18int _version SEC("version") = 1;
19
20#ifndef offsetof
21#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
22#endif
23
24struct bpf_map_def SEC("maps") outer_map = {
25 .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
26 .key_size = sizeof(__u32),
27 .value_size = sizeof(__u32),
28 .max_entries = 1,
29};
30
31struct bpf_map_def SEC("maps") result_map = {
32 .type = BPF_MAP_TYPE_ARRAY,
33 .key_size = sizeof(__u32),
34 .value_size = sizeof(__u32),
35 .max_entries = NR_RESULTS,
36};
37
38struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
39 .type = BPF_MAP_TYPE_ARRAY,
40 .key_size = sizeof(__u32),
41 .value_size = sizeof(int),
42 .max_entries = 1,
43};
44
45struct bpf_map_def SEC("maps") linum_map = {
46 .type = BPF_MAP_TYPE_ARRAY,
47 .key_size = sizeof(__u32),
48 .value_size = sizeof(__u32),
49 .max_entries = 1,
50};
51
52struct bpf_map_def SEC("maps") data_check_map = {
53 .type = BPF_MAP_TYPE_ARRAY,
54 .key_size = sizeof(__u32),
55 .value_size = sizeof(struct data_check),
56 .max_entries = 1,
57};
58
59#define GOTO_DONE(_result) ({ \
60 result = (_result); \
61 linum = __LINE__; \
62 goto done; \
63})
64
65SEC("select_by_skb_data")
66int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
67{
68 __u32 linum, index = 0, flags = 0, index_zero = 0;
69 __u32 *result_cnt, *linum_value;
70 struct data_check data_check = {};
71 struct cmd *cmd, cmd_copy;
72 void *data, *data_end;
73 void *reuseport_array;
74 enum result result;
75 int *index_ovr;
76 int err;
77
78 data = reuse_md->data;
79 data_end = reuse_md->data_end;
80 data_check.len = reuse_md->len;
81 data_check.eth_protocol = reuse_md->eth_protocol;
82 data_check.ip_protocol = reuse_md->ip_protocol;
83 data_check.hash = reuse_md->hash;
84 data_check.bind_inany = reuse_md->bind_inany;
85 if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
86 if (bpf_skb_load_bytes_relative(reuse_md,
87 offsetof(struct iphdr, saddr),
88 data_check.skb_addrs, 8,
89 BPF_HDR_START_NET))
90 GOTO_DONE(DROP_MISC);
91 } else {
92 if (bpf_skb_load_bytes_relative(reuse_md,
93 offsetof(struct ipv6hdr, saddr),
94 data_check.skb_addrs, 32,
95 BPF_HDR_START_NET))
96 GOTO_DONE(DROP_MISC);
97 }
98
99 /*
100 * The ip_protocol could be a compile time decision
101 * if the bpf_prog.o is dedicated to either TCP or
102 * UDP.
103 *
104 * Otherwise, reuse_md->ip_protocol or
105 * the protocol field in the iphdr can be used.
106 */
107 if (data_check.ip_protocol == IPPROTO_TCP) {
108 struct tcphdr *th = data;
109
110 if (th + 1 > data_end)
111 GOTO_DONE(DROP_MISC);
112
113 data_check.skb_ports[0] = th->source;
114 data_check.skb_ports[1] = th->dest;
115
116 if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
117 GOTO_DONE(DROP_ERR_SKB_DATA);
118 if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
119 sizeof(cmd_copy)))
120 GOTO_DONE(DROP_MISC);
121 cmd = &cmd_copy;
122 } else if (data_check.ip_protocol == IPPROTO_UDP) {
123 struct udphdr *uh = data;
124
125 if (uh + 1 > data_end)
126 GOTO_DONE(DROP_MISC);
127
128 data_check.skb_ports[0] = uh->source;
129 data_check.skb_ports[1] = uh->dest;
130
131 if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
132 GOTO_DONE(DROP_ERR_SKB_DATA);
133 if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
134 if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
135 &cmd_copy, sizeof(cmd_copy)))
136 GOTO_DONE(DROP_MISC);
137 cmd = &cmd_copy;
138 } else {
139 cmd = data + sizeof(struct udphdr);
140 }
141 } else {
142 GOTO_DONE(DROP_MISC);
143 }
144
145 reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
146 if (!reuseport_array)
147 GOTO_DONE(DROP_ERR_INNER_MAP);
148
149 index = cmd->reuseport_index;
150 index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
151 if (!index_ovr)
152 GOTO_DONE(DROP_MISC);
153
154 if (*index_ovr != -1) {
155 index = *index_ovr;
156 *index_ovr = -1;
157 }
158 err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
159 flags);
160 if (!err)
161 GOTO_DONE(PASS);
162
163 if (cmd->pass_on_failure)
164 GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
165 else
166 GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
167
168done:
169 result_cnt = bpf_map_lookup_elem(&result_map, &result);
170 if (!result_cnt)
171 return SK_DROP;
172
173 bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
174 bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
175
176 (*result_cnt)++;
177 return result < PASS ? SK_DROP : SK_PASS;
178}
179
180char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index f4d99fabc56d..b8ebe2f58074 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -14,10 +14,7 @@
14 14
15#include "cgroup_helpers.h" 15#include "cgroup_helpers.h"
16#include "bpf_rlimit.h" 16#include "bpf_rlimit.h"
17 17#include "bpf_util.h"
18#ifndef ARRAY_SIZE
19# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
20#endif
21 18
22#define CG_PATH "/foo" 19#define CG_PATH "/foo"
23#define MAX_INSNS 512 20#define MAX_INSNS 512
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2e45c92d1111..aeeb76a54d63 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -20,15 +20,12 @@
20 20
21#include "cgroup_helpers.h" 21#include "cgroup_helpers.h"
22#include "bpf_rlimit.h" 22#include "bpf_rlimit.h"
23#include "bpf_util.h"
23 24
24#ifndef ENOTSUPP 25#ifndef ENOTSUPP
25# define ENOTSUPP 524 26# define ENOTSUPP 524
26#endif 27#endif
27 28
28#ifndef ARRAY_SIZE
29# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
30#endif
31
32#define CG_PATH "/foo" 29#define CG_PATH "/foo"
33#define CONNECT4_PROG_PATH "./connect4_prog.o" 30#define CONNECT4_PROG_PATH "./connect4_prog.o"
34#define CONNECT6_PROG_PATH "./connect6_prog.o" 31#define CONNECT6_PROG_PATH "./connect6_prog.o"
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 452cf5c6c784..67c412d19c09 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -42,12 +42,9 @@
42#endif 42#endif
43#include "bpf_rlimit.h" 43#include "bpf_rlimit.h"
44#include "bpf_rand.h" 44#include "bpf_rand.h"
45#include "bpf_util.h"
45#include "../../../include/linux/filter.h" 46#include "../../../include/linux/filter.h"
46 47
47#ifndef ARRAY_SIZE
48# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
49#endif
50
51#define MAX_INSNS BPF_MAXINSNS 48#define MAX_INSNS BPF_MAXINSNS
52#define MAX_FIXUPS 8 49#define MAX_FIXUPS 8
53#define MAX_NR_MAPS 8 50#define MAX_NR_MAPS 8