aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-20 00:06:27 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-20 00:06:27 -0400
commitdca73a65a68329ee386d3ff473152bac66eaab39 (patch)
tree97c41afb932bdd6cbe67e7ffc38bfe5952c97798
parent497ad9f5b2dc86b733761b9afa44ecfa2f17be65 (diff)
parent94079b64255fe40b9b53fd2e4081f68b9b14f54a (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-06-19 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) new SO_REUSEPORT_DETACH_BPF setsocktopt, from Martin. 2) BTF based map definition, from Andrii. 3) support bpf_map_lookup_elem for xskmap, from Jonathan. 4) bounded loops and scalar precision logic in the verifier, from Alexei. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--include/linux/bpf.h25
-rw-r--r--include/linux/bpf_verifier.h69
-rw-r--r--include/net/sock_reuseport.h2
-rw-r--r--include/net/xdp_sock.h4
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/devmap.c2
-rw-r--r--kernel/bpf/verifier.c793
-rw-r--r--kernel/bpf/xskmap.c9
-rw-r--r--net/core/filter.c86
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/sock_reuseport.c24
-rw-r--r--samples/bpf/Makefile23
-rw-r--r--samples/bpf/fds_example.c2
-rw-r--r--samples/bpf/hbm.c6
-rw-r--r--samples/bpf/ibumad_user.c2
-rw-r--r--samples/bpf/sockex1_user.c2
-rw-r--r--samples/bpf/sockex2_user.c2
-rw-r--r--samples/bpf/xdp1_user.c4
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c4
-rw-r--r--samples/bpf/xdp_fwd_user.c2
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c2
-rw-r--r--samples/bpf/xdp_redirect_map_user.c2
-rw-r--r--samples/bpf/xdp_redirect_user.c2
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c2
-rw-r--r--samples/bpf/xdp_rxq_info_user.c4
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c2
-rw-r--r--samples/bpf/xdpsock_user.c4
-rw-r--r--tools/bpf/bpftool/common.c53
-rw-r--r--tools/include/uapi/asm-generic/socket.h147
-rw-r--r--tools/include/uapi/linux/bpf.h6
-rw-r--r--tools/lib/bpf/bpf.c7
-rw-r--r--tools/lib/bpf/bpf_prog_linfo.c5
-rw-r--r--tools/lib/bpf/btf.c3
-rw-r--r--tools/lib/bpf/btf.h1
-rw-r--r--tools/lib/bpf/btf_dump.c3
-rw-r--r--tools/lib/bpf/libbpf.c927
-rw-r--r--tools/lib/bpf/libbpf.h78
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/libbpf_internal.h7
-rw-r--r--tools/lib/bpf/xsk.c103
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/bpf_endian.h1
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h37
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c67
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c18
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c28
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c28
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c22
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c22
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600.c9
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_nounroll.c8
-rw-r--r--tools/testing/selftests/bpf/progs/socket_cookie_prog.c49
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c1
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c1
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.c10
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h528
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c9
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lock.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c261
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_send_signal_kern.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields_kern.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_estats.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c231
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c60
-rw-r--r--tools/testing/selftests/bpf/test_btf.c10
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport.c54
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c24
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/cfg.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c28
-rw-r--r--tools/testing/selftests/bpf/verifier/loops1.c161
-rw-r--r--tools/testing/selftests/bpf/verifier/prevent_map_lookup.c15
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c18
101 files changed, 4048 insertions, 860 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 976e89b116e5..de6c4df61082 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -122,6 +122,8 @@
122#define SO_RCVTIMEO_NEW 66 122#define SO_RCVTIMEO_NEW 66
123#define SO_SNDTIMEO_NEW 67 123#define SO_SNDTIMEO_NEW 67
124 124
125#define SO_DETACH_REUSEPORT_BPF 68
126
125#if !defined(__KERNEL__) 127#if !defined(__KERNEL__)
126 128
127#if __BITS_PER_LONG == 64 129#if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index d41765cfbc6e..d0a9ed2ca2d6 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -133,6 +133,8 @@
133#define SO_RCVTIMEO_NEW 66 133#define SO_RCVTIMEO_NEW 66
134#define SO_SNDTIMEO_NEW 67 134#define SO_SNDTIMEO_NEW 67
135 135
136#define SO_DETACH_REUSEPORT_BPF 68
137
136#if !defined(__KERNEL__) 138#if !defined(__KERNEL__)
137 139
138#if __BITS_PER_LONG == 64 140#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 66c5dd245ac7..10173c32195e 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -114,6 +114,8 @@
114#define SO_RCVTIMEO_NEW 0x4040 114#define SO_RCVTIMEO_NEW 0x4040
115#define SO_SNDTIMEO_NEW 0x4041 115#define SO_SNDTIMEO_NEW 0x4041
116 116
117#define SO_DETACH_REUSEPORT_BPF 0x4042
118
117#if !defined(__KERNEL__) 119#if !defined(__KERNEL__)
118 120
119#if __BITS_PER_LONG == 64 121#if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 9265a9eece15..8029b681fc7c 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -115,6 +115,8 @@
115#define SO_RCVTIMEO_NEW 0x0044 115#define SO_RCVTIMEO_NEW 0x0044
116#define SO_SNDTIMEO_NEW 0x0045 116#define SO_SNDTIMEO_NEW 0x0045
117 117
118#define SO_DETACH_REUSEPORT_BPF 0x0047
119
118#if !defined(__KERNEL__) 120#if !defined(__KERNEL__)
119 121
120 122
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9f7c453db70c..a62e7889b0b6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -277,6 +277,7 @@ enum bpf_reg_type {
277 PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ 277 PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
278 PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ 278 PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
279 PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ 279 PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
280 PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
280}; 281};
281 282
282/* The information passed from prog-specific *_is_valid_access 283/* The information passed from prog-specific *_is_valid_access
@@ -1098,6 +1099,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
1098 struct bpf_insn *insn_buf, 1099 struct bpf_insn *insn_buf,
1099 struct bpf_prog *prog, 1100 struct bpf_prog *prog,
1100 u32 *target_size); 1101 u32 *target_size);
1102
1103bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
1104 struct bpf_insn_access_aux *info);
1105
1106u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
1107 const struct bpf_insn *si,
1108 struct bpf_insn *insn_buf,
1109 struct bpf_prog *prog,
1110 u32 *target_size);
1101#else 1111#else
1102static inline bool bpf_tcp_sock_is_valid_access(int off, int size, 1112static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
1103 enum bpf_access_type type, 1113 enum bpf_access_type type,
@@ -1114,6 +1124,21 @@ static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
1114{ 1124{
1115 return 0; 1125 return 0;
1116} 1126}
1127static inline bool bpf_xdp_sock_is_valid_access(int off, int size,
1128 enum bpf_access_type type,
1129 struct bpf_insn_access_aux *info)
1130{
1131 return false;
1132}
1133
1134static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
1135 const struct bpf_insn *si,
1136 struct bpf_insn *insn_buf,
1137 struct bpf_prog *prog,
1138 u32 *target_size)
1139{
1140 return 0;
1141}
1117#endif /* CONFIG_INET */ 1142#endif /* CONFIG_INET */
1118 1143
1119#endif /* _LINUX_BPF_H */ 1144#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 026ba8b81e88..5fe99f322b1c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -136,6 +136,8 @@ struct bpf_reg_state {
136 */ 136 */
137 s32 subreg_def; 137 s32 subreg_def;
138 enum bpf_reg_liveness live; 138 enum bpf_reg_liveness live;
139 /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
140 bool precise;
139}; 141};
140 142
141enum bpf_stack_slot_type { 143enum bpf_stack_slot_type {
@@ -187,14 +189,77 @@ struct bpf_func_state {
187 struct bpf_stack_state *stack; 189 struct bpf_stack_state *stack;
188}; 190};
189 191
192struct bpf_idx_pair {
193 u32 prev_idx;
194 u32 idx;
195};
196
190#define MAX_CALL_FRAMES 8 197#define MAX_CALL_FRAMES 8
191struct bpf_verifier_state { 198struct bpf_verifier_state {
192 /* call stack tracking */ 199 /* call stack tracking */
193 struct bpf_func_state *frame[MAX_CALL_FRAMES]; 200 struct bpf_func_state *frame[MAX_CALL_FRAMES];
201 struct bpf_verifier_state *parent;
202 /*
203 * 'branches' field is the number of branches left to explore:
204 * 0 - all possible paths from this state reached bpf_exit or
205 * were safely pruned
206 * 1 - at least one path is being explored.
207 * This state hasn't reached bpf_exit
208 * 2 - at least two paths are being explored.
209 * This state is an immediate parent of two children.
210 * One is fallthrough branch with branches==1 and another
211 * state is pushed into stack (to be explored later) also with
212 * branches==1. The parent of this state has branches==1.
213 * The verifier state tree connected via 'parent' pointer looks like:
214 * 1
215 * 1
216 * 2 -> 1 (first 'if' pushed into stack)
217 * 1
218 * 2 -> 1 (second 'if' pushed into stack)
219 * 1
220 * 1
221 * 1 bpf_exit.
222 *
223 * Once do_check() reaches bpf_exit, it calls update_branch_counts()
224 * and the verifier state tree will look:
225 * 1
226 * 1
227 * 2 -> 1 (first 'if' pushed into stack)
228 * 1
229 * 1 -> 1 (second 'if' pushed into stack)
230 * 0
231 * 0
232 * 0 bpf_exit.
233 * After pop_stack() the do_check() will resume at second 'if'.
234 *
235 * If is_state_visited() sees a state with branches > 0 it means
236 * there is a loop. If such state is exactly equal to the current state
237 * it's an infinite loop. Note states_equal() checks for states
238 * equvalency, so two states being 'states_equal' does not mean
239 * infinite loop. The exact comparison is provided by
240 * states_maybe_looping() function. It's a stronger pre-check and
241 * much faster than states_equal().
242 *
243 * This algorithm may not find all possible infinite loops or
244 * loop iteration count may be too high.
245 * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
246 */
247 u32 branches;
194 u32 insn_idx; 248 u32 insn_idx;
195 u32 curframe; 249 u32 curframe;
196 u32 active_spin_lock; 250 u32 active_spin_lock;
197 bool speculative; 251 bool speculative;
252
253 /* first and last insn idx of this verifier state */
254 u32 first_insn_idx;
255 u32 last_insn_idx;
256 /* jmp history recorded from first to last.
257 * backtracking is using it to go from last to first.
258 * For most states jmp_history_cnt is [0-3].
259 * For loops can go up to ~40.
260 */
261 struct bpf_idx_pair *jmp_history;
262 u32 jmp_history_cnt;
198}; 263};
199 264
200#define bpf_get_spilled_reg(slot, frame) \ 265#define bpf_get_spilled_reg(slot, frame) \
@@ -309,7 +374,9 @@ struct bpf_verifier_env {
309 } cfg; 374 } cfg;
310 u32 subprog_cnt; 375 u32 subprog_cnt;
311 /* number of instructions analyzed by the verifier */ 376 /* number of instructions analyzed by the verifier */
312 u32 insn_processed; 377 u32 prev_insn_processed, insn_processed;
378 /* number of jmps, calls, exits analyzed so far */
379 u32 prev_jmps_processed, jmps_processed;
313 /* total verification time */ 380 /* total verification time */
314 u64 verification_time; 381 u64 verification_time;
315 /* maximum number of verifier states kept in 'branching' instructions */ 382 /* maximum number of verifier states kept in 'branching' instructions */
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 8a5f70c7cdf2..d9112de85261 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -35,6 +35,8 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
35 struct sk_buff *skb, 35 struct sk_buff *skb,
36 int hdr_len); 36 int hdr_len);
37extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); 37extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
38extern int reuseport_detach_prog(struct sock *sk);
39
38int reuseport_get_id(struct sock_reuseport *reuse); 40int reuseport_get_id(struct sock_reuseport *reuse);
39 41
40#endif /* _SOCK_REUSEPORT_H */ 42#endif /* _SOCK_REUSEPORT_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index d074b6d60f8a..ae0f368a62bb 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -58,11 +58,11 @@ struct xdp_sock {
58 struct xdp_umem *umem; 58 struct xdp_umem *umem;
59 struct list_head flush_node; 59 struct list_head flush_node;
60 u16 queue_id; 60 u16 queue_id;
61 struct xsk_queue *tx ____cacheline_aligned_in_smp;
62 struct list_head list;
63 bool zc; 61 bool zc;
64 /* Protects multiple processes in the control path */ 62 /* Protects multiple processes in the control path */
65 struct mutex mutex; 63 struct mutex mutex;
64 struct xsk_queue *tx ____cacheline_aligned_in_smp;
65 struct list_head list;
66 /* Mutual exclusion of NAPI TX thread and sendmsg error paths 66 /* Mutual exclusion of NAPI TX thread and sendmsg error paths
67 * in the SKB destructor callback. 67 * in the SKB destructor callback.
68 */ 68 */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 8c1391c89171..77f7c1638eb1 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -117,6 +117,8 @@
117#define SO_RCVTIMEO_NEW 66 117#define SO_RCVTIMEO_NEW 66
118#define SO_SNDTIMEO_NEW 67 118#define SO_SNDTIMEO_NEW 67
119 119
120#define SO_DETACH_REUSEPORT_BPF 68
121
120#if !defined(__KERNEL__) 122#if !defined(__KERNEL__)
121 123
122#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) 124#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0e879721f75a..b077507efa3f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3085,6 +3085,10 @@ struct bpf_sock_tuple {
3085 }; 3085 };
3086}; 3086};
3087 3087
3088struct bpf_xdp_sock {
3089 __u32 queue_id;
3090};
3091
3088#define XDP_PACKET_HEADROOM 256 3092#define XDP_PACKET_HEADROOM 256
3089 3093
3090/* User return codes for XDP prog type. 3094/* User return codes for XDP prog type.
@@ -3245,6 +3249,7 @@ struct bpf_sock_addr {
3245 __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. 3249 __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
3246 * Stored in network byte order. 3250 * Stored in network byte order.
3247 */ 3251 */
3252 __bpf_md_ptr(struct bpf_sock *, sk);
3248}; 3253};
3249 3254
3250/* User bpf_sock_ops struct to access socket values and specify request ops 3255/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -3296,6 +3301,7 @@ struct bpf_sock_ops {
3296 __u32 sk_txhash; 3301 __u32 sk_txhash;
3297 __u64 bytes_received; 3302 __u64 bytes_received;
3298 __u64 bytes_acked; 3303 __u64 bytes_acked;
3304 __bpf_md_ptr(struct bpf_sock *, sk);
3299}; 3305};
3300 3306
3301/* Definitions for bpf_sock_ops_cb_flags */ 3307/* Definitions for bpf_sock_ops_cb_flags */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 4c2fa3ac56f6..29d781061cd5 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-y := core.o 2obj-y := core.o
3CFLAGS_core.o += $(call cc-disable-warning, override-init)
3 4
4obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 5obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
5obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 6obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index b84c44505e06..40e86a7e0ef0 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -80,8 +80,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
80static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 80static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
81{ 81{
82 struct bpf_dtab *dtab; 82 struct bpf_dtab *dtab;
83 int err = -EINVAL;
84 u64 cost; 83 u64 cost;
84 int err;
85 85
86 if (!capable(CAP_NET_ADMIN)) 86 if (!capable(CAP_NET_ADMIN))
87 return ERR_PTR(-EPERM); 87 return ERR_PTR(-EPERM);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1e9d10b32984..0e079b2298f8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -326,7 +326,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
326{ 326{
327 return type == PTR_TO_SOCKET || 327 return type == PTR_TO_SOCKET ||
328 type == PTR_TO_SOCK_COMMON || 328 type == PTR_TO_SOCK_COMMON ||
329 type == PTR_TO_TCP_SOCK; 329 type == PTR_TO_TCP_SOCK ||
330 type == PTR_TO_XDP_SOCK;
330} 331}
331 332
332static bool reg_type_may_be_null(enum bpf_reg_type type) 333static bool reg_type_may_be_null(enum bpf_reg_type type)
@@ -398,6 +399,7 @@ static const char * const reg_type_str[] = {
398 [PTR_TO_TCP_SOCK] = "tcp_sock", 399 [PTR_TO_TCP_SOCK] = "tcp_sock",
399 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 400 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
400 [PTR_TO_TP_BUFFER] = "tp_buffer", 401 [PTR_TO_TP_BUFFER] = "tp_buffer",
402 [PTR_TO_XDP_SOCK] = "xdp_sock",
401}; 403};
402 404
403static char slot_type_char[] = { 405static char slot_type_char[] = {
@@ -445,12 +447,12 @@ static void print_verifier_state(struct bpf_verifier_env *env,
445 verbose(env, " R%d", i); 447 verbose(env, " R%d", i);
446 print_liveness(env, reg->live); 448 print_liveness(env, reg->live);
447 verbose(env, "=%s", reg_type_str[t]); 449 verbose(env, "=%s", reg_type_str[t]);
450 if (t == SCALAR_VALUE && reg->precise)
451 verbose(env, "P");
448 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && 452 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
449 tnum_is_const(reg->var_off)) { 453 tnum_is_const(reg->var_off)) {
450 /* reg->off should be 0 for SCALAR_VALUE */ 454 /* reg->off should be 0 for SCALAR_VALUE */
451 verbose(env, "%lld", reg->var_off.value + reg->off); 455 verbose(env, "%lld", reg->var_off.value + reg->off);
452 if (t == PTR_TO_STACK)
453 verbose(env, ",call_%d", func(env, reg)->callsite);
454 } else { 456 } else {
455 verbose(env, "(id=%d", reg->id); 457 verbose(env, "(id=%d", reg->id);
456 if (reg_type_may_be_refcounted_or_null(t)) 458 if (reg_type_may_be_refcounted_or_null(t))
@@ -512,11 +514,17 @@ static void print_verifier_state(struct bpf_verifier_env *env,
512 continue; 514 continue;
513 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); 515 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
514 print_liveness(env, state->stack[i].spilled_ptr.live); 516 print_liveness(env, state->stack[i].spilled_ptr.live);
515 if (state->stack[i].slot_type[0] == STACK_SPILL) 517 if (state->stack[i].slot_type[0] == STACK_SPILL) {
516 verbose(env, "=%s", 518 reg = &state->stack[i].spilled_ptr;
517 reg_type_str[state->stack[i].spilled_ptr.type]); 519 t = reg->type;
518 else 520 verbose(env, "=%s", reg_type_str[t]);
521 if (t == SCALAR_VALUE && reg->precise)
522 verbose(env, "P");
523 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
524 verbose(env, "%lld", reg->var_off.value + reg->off);
525 } else {
519 verbose(env, "=%s", types_buf); 526 verbose(env, "=%s", types_buf);
527 }
520 } 528 }
521 if (state->acquired_refs && state->refs[0].id) { 529 if (state->acquired_refs && state->refs[0].id) {
522 verbose(env, " refs=%d", state->refs[0].id); 530 verbose(env, " refs=%d", state->refs[0].id);
@@ -665,6 +673,13 @@ static void free_func_state(struct bpf_func_state *state)
665 kfree(state); 673 kfree(state);
666} 674}
667 675
676static void clear_jmp_history(struct bpf_verifier_state *state)
677{
678 kfree(state->jmp_history);
679 state->jmp_history = NULL;
680 state->jmp_history_cnt = 0;
681}
682
668static void free_verifier_state(struct bpf_verifier_state *state, 683static void free_verifier_state(struct bpf_verifier_state *state,
669 bool free_self) 684 bool free_self)
670{ 685{
@@ -674,6 +689,7 @@ static void free_verifier_state(struct bpf_verifier_state *state,
674 free_func_state(state->frame[i]); 689 free_func_state(state->frame[i]);
675 state->frame[i] = NULL; 690 state->frame[i] = NULL;
676 } 691 }
692 clear_jmp_history(state);
677 if (free_self) 693 if (free_self)
678 kfree(state); 694 kfree(state);
679} 695}
@@ -701,8 +717,18 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
701 const struct bpf_verifier_state *src) 717 const struct bpf_verifier_state *src)
702{ 718{
703 struct bpf_func_state *dst; 719 struct bpf_func_state *dst;
720 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
704 int i, err; 721 int i, err;
705 722
723 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
724 kfree(dst_state->jmp_history);
725 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
726 if (!dst_state->jmp_history)
727 return -ENOMEM;
728 }
729 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
730 dst_state->jmp_history_cnt = src->jmp_history_cnt;
731
706 /* if dst has more stack frames then src frame, free them */ 732 /* if dst has more stack frames then src frame, free them */
707 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 733 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
708 free_func_state(dst_state->frame[i]); 734 free_func_state(dst_state->frame[i]);
@@ -711,6 +737,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
711 dst_state->speculative = src->speculative; 737 dst_state->speculative = src->speculative;
712 dst_state->curframe = src->curframe; 738 dst_state->curframe = src->curframe;
713 dst_state->active_spin_lock = src->active_spin_lock; 739 dst_state->active_spin_lock = src->active_spin_lock;
740 dst_state->branches = src->branches;
741 dst_state->parent = src->parent;
742 dst_state->first_insn_idx = src->first_insn_idx;
743 dst_state->last_insn_idx = src->last_insn_idx;
714 for (i = 0; i <= src->curframe; i++) { 744 for (i = 0; i <= src->curframe; i++) {
715 dst = dst_state->frame[i]; 745 dst = dst_state->frame[i];
716 if (!dst) { 746 if (!dst) {
@@ -726,6 +756,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
726 return 0; 756 return 0;
727} 757}
728 758
759static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
760{
761 while (st) {
762 u32 br = --st->branches;
763
764 /* WARN_ON(br > 1) technically makes sense here,
765 * but see comment in push_stack(), hence:
766 */
767 WARN_ONCE((int)br < 0,
768 "BUG update_branch_counts:branches_to_explore=%d\n",
769 br);
770 if (br)
771 break;
772 st = st->parent;
773 }
774}
775
729static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 776static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
730 int *insn_idx) 777 int *insn_idx)
731{ 778{
@@ -779,6 +826,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
779 env->stack_size); 826 env->stack_size);
780 goto err; 827 goto err;
781 } 828 }
829 if (elem->st.parent) {
830 ++elem->st.parent->branches;
831 /* WARN_ON(branches > 2) technically makes sense here,
832 * but
833 * 1. speculative states will bump 'branches' for non-branch
834 * instructions
835 * 2. is_state_visited() heuristics may decide not to create
836 * a new state for a sequence of branches and all such current
837 * and cloned states will be pointing to a single parent state
838 * which might have large 'branches' count.
839 */
840 }
782 return &elem->st; 841 return &elem->st;
783err: 842err:
784 free_verifier_state(env->cur_state, true); 843 free_verifier_state(env->cur_state, true);
@@ -926,6 +985,9 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
926 reg->smax_value = S64_MAX; 985 reg->smax_value = S64_MAX;
927 reg->umin_value = 0; 986 reg->umin_value = 0;
928 reg->umax_value = U64_MAX; 987 reg->umax_value = U64_MAX;
988
989 /* constant backtracking is enabled for root only for now */
990 reg->precise = capable(CAP_SYS_ADMIN) ? false : true;
929} 991}
930 992
931/* Mark a register as having a completely unknown (scalar) value. */ 993/* Mark a register as having a completely unknown (scalar) value. */
@@ -1337,6 +1399,389 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1337 return 0; 1399 return 0;
1338} 1400}
1339 1401
1402/* for any branch, call, exit record the history of jmps in the given state */
1403static int push_jmp_history(struct bpf_verifier_env *env,
1404 struct bpf_verifier_state *cur)
1405{
1406 u32 cnt = cur->jmp_history_cnt;
1407 struct bpf_idx_pair *p;
1408
1409 cnt++;
1410 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1411 if (!p)
1412 return -ENOMEM;
1413 p[cnt - 1].idx = env->insn_idx;
1414 p[cnt - 1].prev_idx = env->prev_insn_idx;
1415 cur->jmp_history = p;
1416 cur->jmp_history_cnt = cnt;
1417 return 0;
1418}
1419
1420/* Backtrack one insn at a time. If idx is not at the top of recorded
1421 * history then previous instruction came from straight line execution.
1422 */
1423static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1424 u32 *history)
1425{
1426 u32 cnt = *history;
1427
1428 if (cnt && st->jmp_history[cnt - 1].idx == i) {
1429 i = st->jmp_history[cnt - 1].prev_idx;
1430 (*history)--;
1431 } else {
1432 i--;
1433 }
1434 return i;
1435}
1436
1437/* For given verifier state backtrack_insn() is called from the last insn to
1438 * the first insn. Its purpose is to compute a bitmask of registers and
1439 * stack slots that needs precision in the parent verifier state.
1440 */
1441static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1442 u32 *reg_mask, u64 *stack_mask)
1443{
1444 const struct bpf_insn_cbs cbs = {
1445 .cb_print = verbose,
1446 .private_data = env,
1447 };
1448 struct bpf_insn *insn = env->prog->insnsi + idx;
1449 u8 class = BPF_CLASS(insn->code);
1450 u8 opcode = BPF_OP(insn->code);
1451 u8 mode = BPF_MODE(insn->code);
1452 u32 dreg = 1u << insn->dst_reg;
1453 u32 sreg = 1u << insn->src_reg;
1454 u32 spi;
1455
1456 if (insn->code == 0)
1457 return 0;
1458 if (env->log.level & BPF_LOG_LEVEL) {
1459 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1460 verbose(env, "%d: ", idx);
1461 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1462 }
1463
1464 if (class == BPF_ALU || class == BPF_ALU64) {
1465 if (!(*reg_mask & dreg))
1466 return 0;
1467 if (opcode == BPF_MOV) {
1468 if (BPF_SRC(insn->code) == BPF_X) {
1469 /* dreg = sreg
1470 * dreg needs precision after this insn
1471 * sreg needs precision before this insn
1472 */
1473 *reg_mask &= ~dreg;
1474 *reg_mask |= sreg;
1475 } else {
1476 /* dreg = K
1477 * dreg needs precision after this insn.
1478 * Corresponding register is already marked
1479 * as precise=true in this verifier state.
1480 * No further markings in parent are necessary
1481 */
1482 *reg_mask &= ~dreg;
1483 }
1484 } else {
1485 if (BPF_SRC(insn->code) == BPF_X) {
1486 /* dreg += sreg
1487 * both dreg and sreg need precision
1488 * before this insn
1489 */
1490 *reg_mask |= sreg;
1491 } /* else dreg += K
1492 * dreg still needs precision before this insn
1493 */
1494 }
1495 } else if (class == BPF_LDX) {
1496 if (!(*reg_mask & dreg))
1497 return 0;
1498 *reg_mask &= ~dreg;
1499
1500 /* scalars can only be spilled into stack w/o losing precision.
1501 * Load from any other memory can be zero extended.
1502 * The desire to keep that precision is already indicated
1503 * by 'precise' mark in corresponding register of this state.
1504 * No further tracking necessary.
1505 */
1506 if (insn->src_reg != BPF_REG_FP)
1507 return 0;
1508 if (BPF_SIZE(insn->code) != BPF_DW)
1509 return 0;
1510
1511 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1512 * that [fp - off] slot contains scalar that needs to be
1513 * tracked with precision
1514 */
1515 spi = (-insn->off - 1) / BPF_REG_SIZE;
1516 if (spi >= 64) {
1517 verbose(env, "BUG spi %d\n", spi);
1518 WARN_ONCE(1, "verifier backtracking bug");
1519 return -EFAULT;
1520 }
1521 *stack_mask |= 1ull << spi;
1522 } else if (class == BPF_STX) {
1523 if (*reg_mask & dreg)
1524 /* stx shouldn't be using _scalar_ dst_reg
1525 * to access memory. It means backtracking
1526 * encountered a case of pointer subtraction.
1527 */
1528 return -ENOTSUPP;
1529 /* scalars can only be spilled into stack */
1530 if (insn->dst_reg != BPF_REG_FP)
1531 return 0;
1532 if (BPF_SIZE(insn->code) != BPF_DW)
1533 return 0;
1534 spi = (-insn->off - 1) / BPF_REG_SIZE;
1535 if (spi >= 64) {
1536 verbose(env, "BUG spi %d\n", spi);
1537 WARN_ONCE(1, "verifier backtracking bug");
1538 return -EFAULT;
1539 }
1540 if (!(*stack_mask & (1ull << spi)))
1541 return 0;
1542 *stack_mask &= ~(1ull << spi);
1543 *reg_mask |= sreg;
1544 } else if (class == BPF_JMP || class == BPF_JMP32) {
1545 if (opcode == BPF_CALL) {
1546 if (insn->src_reg == BPF_PSEUDO_CALL)
1547 return -ENOTSUPP;
1548 /* regular helper call sets R0 */
1549 *reg_mask &= ~1;
1550 if (*reg_mask & 0x3f) {
1551 /* if backtracing was looking for registers R1-R5
1552 * they should have been found already.
1553 */
1554 verbose(env, "BUG regs %x\n", *reg_mask);
1555 WARN_ONCE(1, "verifier backtracking bug");
1556 return -EFAULT;
1557 }
1558 } else if (opcode == BPF_EXIT) {
1559 return -ENOTSUPP;
1560 }
1561 } else if (class == BPF_LD) {
1562 if (!(*reg_mask & dreg))
1563 return 0;
1564 *reg_mask &= ~dreg;
1565 /* It's ld_imm64 or ld_abs or ld_ind.
1566 * For ld_imm64 no further tracking of precision
1567 * into parent is necessary
1568 */
1569 if (mode == BPF_IND || mode == BPF_ABS)
1570 /* to be analyzed */
1571 return -ENOTSUPP;
1572 } else if (class == BPF_ST) {
1573 if (*reg_mask & dreg)
1574 /* likely pointer subtraction */
1575 return -ENOTSUPP;
1576 }
1577 return 0;
1578}
1579
1580/* the scalar precision tracking algorithm:
1581 * . at the start all registers have precise=false.
1582 * . scalar ranges are tracked as normal through alu and jmp insns.
1583 * . once precise value of the scalar register is used in:
1584 * . ptr + scalar alu
1585 * . if (scalar cond K|scalar)
1586 * . helper_call(.., scalar, ...) where ARG_CONST is expected
1587 * backtrack through the verifier states and mark all registers and
1588 * stack slots with spilled constants that these scalar regisers
1589 * should be precise.
1590 * . during state pruning two registers (or spilled stack slots)
1591 * are equivalent if both are not precise.
1592 *
1593 * Note the verifier cannot simply walk register parentage chain,
1594 * since many different registers and stack slots could have been
1595 * used to compute single precise scalar.
1596 *
1597 * The approach of starting with precise=true for all registers and then
1598 * backtrack to mark a register as not precise when the verifier detects
1599 * that program doesn't care about specific value (e.g., when helper
1600 * takes register as ARG_ANYTHING parameter) is not safe.
1601 *
1602 * It's ok to walk single parentage chain of the verifier states.
1603 * It's possible that this backtracking will go all the way till 1st insn.
1604 * All other branches will be explored for needing precision later.
1605 *
1606 * The backtracking needs to deal with cases like:
1607 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1608 * r9 -= r8
1609 * r5 = r9
1610 * if r5 > 0x79f goto pc+7
1611 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1612 * r5 += 1
1613 * ...
1614 * call bpf_perf_event_output#25
1615 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1616 *
1617 * and this case:
1618 * r6 = 1
1619 * call foo // uses callee's r6 inside to compute r0
1620 * r0 += r6
1621 * if r0 == 0 goto
1622 *
1623 * to track above reg_mask/stack_mask needs to be independent for each frame.
1624 *
1625 * Also if parent's curframe > frame where backtracking started,
1626 * the verifier need to mark registers in both frames, otherwise callees
1627 * may incorrectly prune callers. This is similar to
1628 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1629 *
1630 * For now backtracking falls back into conservative marking.
1631 */
1632static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1633 struct bpf_verifier_state *st)
1634{
1635 struct bpf_func_state *func;
1636 struct bpf_reg_state *reg;
1637 int i, j;
1638
1639 /* big hammer: mark all scalars precise in this path.
1640 * pop_stack may still get !precise scalars.
1641 */
1642 for (; st; st = st->parent)
1643 for (i = 0; i <= st->curframe; i++) {
1644 func = st->frame[i];
1645 for (j = 0; j < BPF_REG_FP; j++) {
1646 reg = &func->regs[j];
1647 if (reg->type != SCALAR_VALUE)
1648 continue;
1649 reg->precise = true;
1650 }
1651 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
1652 if (func->stack[j].slot_type[0] != STACK_SPILL)
1653 continue;
1654 reg = &func->stack[j].spilled_ptr;
1655 if (reg->type != SCALAR_VALUE)
1656 continue;
1657 reg->precise = true;
1658 }
1659 }
1660}
1661
1662static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
1663{
1664 struct bpf_verifier_state *st = env->cur_state;
1665 int first_idx = st->first_insn_idx;
1666 int last_idx = env->insn_idx;
1667 struct bpf_func_state *func;
1668 struct bpf_reg_state *reg;
1669 u32 reg_mask = 1u << regno;
1670 u64 stack_mask = 0;
1671 bool skip_first = true;
1672 int i, err;
1673
1674 if (!env->allow_ptr_leaks)
1675 /* backtracking is root only for now */
1676 return 0;
1677
1678 func = st->frame[st->curframe];
1679 reg = &func->regs[regno];
1680 if (reg->type != SCALAR_VALUE) {
1681 WARN_ONCE(1, "backtracing misuse");
1682 return -EFAULT;
1683 }
1684 if (reg->precise)
1685 return 0;
1686 func->regs[regno].precise = true;
1687
1688 for (;;) {
1689 DECLARE_BITMAP(mask, 64);
1690 bool new_marks = false;
1691 u32 history = st->jmp_history_cnt;
1692
1693 if (env->log.level & BPF_LOG_LEVEL)
1694 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
1695 for (i = last_idx;;) {
1696 if (skip_first) {
1697 err = 0;
1698 skip_first = false;
1699 } else {
1700 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
1701 }
1702 if (err == -ENOTSUPP) {
1703 mark_all_scalars_precise(env, st);
1704 return 0;
1705 } else if (err) {
1706 return err;
1707 }
1708 if (!reg_mask && !stack_mask)
1709 /* Found assignment(s) into tracked register in this state.
1710 * Since this state is already marked, just return.
1711 * Nothing to be tracked further in the parent state.
1712 */
1713 return 0;
1714 if (i == first_idx)
1715 break;
1716 i = get_prev_insn_idx(st, i, &history);
1717 if (i >= env->prog->len) {
1718 /* This can happen if backtracking reached insn 0
1719 * and there are still reg_mask or stack_mask
1720 * to backtrack.
1721 * It means the backtracking missed the spot where
1722 * particular register was initialized with a constant.
1723 */
1724 verbose(env, "BUG backtracking idx %d\n", i);
1725 WARN_ONCE(1, "verifier backtracking bug");
1726 return -EFAULT;
1727 }
1728 }
1729 st = st->parent;
1730 if (!st)
1731 break;
1732
1733 func = st->frame[st->curframe];
1734 bitmap_from_u64(mask, reg_mask);
1735 for_each_set_bit(i, mask, 32) {
1736 reg = &func->regs[i];
1737 if (reg->type != SCALAR_VALUE)
1738 continue;
1739 if (!reg->precise)
1740 new_marks = true;
1741 reg->precise = true;
1742 }
1743
1744 bitmap_from_u64(mask, stack_mask);
1745 for_each_set_bit(i, mask, 64) {
1746 if (i >= func->allocated_stack / BPF_REG_SIZE) {
1747 /* This can happen if backtracking
1748 * is propagating stack precision where
1749 * caller has larger stack frame
1750 * than callee, but backtrack_insn() should
1751 * have returned -ENOTSUPP.
1752 */
1753 verbose(env, "BUG spi %d stack_size %d\n",
1754 i, func->allocated_stack);
1755 WARN_ONCE(1, "verifier backtracking bug");
1756 return -EFAULT;
1757 }
1758
1759 if (func->stack[i].slot_type[0] != STACK_SPILL)
1760 continue;
1761 reg = &func->stack[i].spilled_ptr;
1762 if (reg->type != SCALAR_VALUE)
1763 continue;
1764 if (!reg->precise)
1765 new_marks = true;
1766 reg->precise = true;
1767 }
1768 if (env->log.level & BPF_LOG_LEVEL) {
1769 print_verifier_state(env, func);
1770 verbose(env, "parent %s regs=%x stack=%llx marks\n",
1771 new_marks ? "didn't have" : "already had",
1772 reg_mask, stack_mask);
1773 }
1774
1775 if (!new_marks)
1776 break;
1777
1778 last_idx = st->last_insn_idx;
1779 first_idx = st->first_insn_idx;
1780 }
1781 return 0;
1782}
1783
1784
1340static bool is_spillable_regtype(enum bpf_reg_type type) 1785static bool is_spillable_regtype(enum bpf_reg_type type)
1341{ 1786{
1342 switch (type) { 1787 switch (type) {
@@ -1355,6 +1800,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
1355 case PTR_TO_SOCK_COMMON_OR_NULL: 1800 case PTR_TO_SOCK_COMMON_OR_NULL:
1356 case PTR_TO_TCP_SOCK: 1801 case PTR_TO_TCP_SOCK:
1357 case PTR_TO_TCP_SOCK_OR_NULL: 1802 case PTR_TO_TCP_SOCK_OR_NULL:
1803 case PTR_TO_XDP_SOCK:
1358 return true; 1804 return true;
1359 default: 1805 default:
1360 return false; 1806 return false;
@@ -1367,6 +1813,23 @@ static bool register_is_null(struct bpf_reg_state *reg)
1367 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); 1813 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1368} 1814}
1369 1815
1816static bool register_is_const(struct bpf_reg_state *reg)
1817{
1818 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
1819}
1820
1821static void save_register_state(struct bpf_func_state *state,
1822 int spi, struct bpf_reg_state *reg)
1823{
1824 int i;
1825
1826 state->stack[spi].spilled_ptr = *reg;
1827 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1828
1829 for (i = 0; i < BPF_REG_SIZE; i++)
1830 state->stack[spi].slot_type[i] = STACK_SPILL;
1831}
1832
1370/* check_stack_read/write functions track spill/fill of registers, 1833/* check_stack_read/write functions track spill/fill of registers,
1371 * stack boundary and alignment are checked in check_mem_access() 1834 * stack boundary and alignment are checked in check_mem_access()
1372 */ 1835 */
@@ -1376,7 +1839,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
1376{ 1839{
1377 struct bpf_func_state *cur; /* state of the current function */ 1840 struct bpf_func_state *cur; /* state of the current function */
1378 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 1841 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1379 enum bpf_reg_type type; 1842 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
1843 struct bpf_reg_state *reg = NULL;
1380 1844
1381 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), 1845 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1382 state->acquired_refs, true); 1846 state->acquired_refs, true);
@@ -1393,27 +1857,48 @@ static int check_stack_write(struct bpf_verifier_env *env,
1393 } 1857 }
1394 1858
1395 cur = env->cur_state->frame[env->cur_state->curframe]; 1859 cur = env->cur_state->frame[env->cur_state->curframe];
1396 if (value_regno >= 0 && 1860 if (value_regno >= 0)
1397 is_spillable_regtype((type = cur->regs[value_regno].type))) { 1861 reg = &cur->regs[value_regno];
1398 1862
1863 if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
1864 !register_is_null(reg) && env->allow_ptr_leaks) {
1865 if (dst_reg != BPF_REG_FP) {
1866 /* The backtracking logic can only recognize explicit
1867 * stack slot address like [fp - 8]. Other spill of
1868 * scalar via different register has to be conervative.
1869 * Backtrack from here and mark all registers as precise
1870 * that contributed into 'reg' being a constant.
1871 */
1872 err = mark_chain_precision(env, value_regno);
1873 if (err)
1874 return err;
1875 }
1876 save_register_state(state, spi, reg);
1877 } else if (reg && is_spillable_regtype(reg->type)) {
1399 /* register containing pointer is being spilled into stack */ 1878 /* register containing pointer is being spilled into stack */
1400 if (size != BPF_REG_SIZE) { 1879 if (size != BPF_REG_SIZE) {
1880 verbose_linfo(env, insn_idx, "; ");
1401 verbose(env, "invalid size of register spill\n"); 1881 verbose(env, "invalid size of register spill\n");
1402 return -EACCES; 1882 return -EACCES;
1403 } 1883 }
1404 1884
1405 if (state != cur && type == PTR_TO_STACK) { 1885 if (state != cur && reg->type == PTR_TO_STACK) {
1406 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 1886 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1407 return -EINVAL; 1887 return -EINVAL;
1408 } 1888 }
1409 1889
1410 /* save register state */ 1890 if (!env->allow_ptr_leaks) {
1411 state->stack[spi].spilled_ptr = cur->regs[value_regno]; 1891 bool sanitize = false;
1412 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1413 1892
1414 for (i = 0; i < BPF_REG_SIZE; i++) { 1893 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
1415 if (state->stack[spi].slot_type[i] == STACK_MISC && 1894 register_is_const(&state->stack[spi].spilled_ptr))
1416 !env->allow_ptr_leaks) { 1895 sanitize = true;
1896 for (i = 0; i < BPF_REG_SIZE; i++)
1897 if (state->stack[spi].slot_type[i] == STACK_MISC) {
1898 sanitize = true;
1899 break;
1900 }
1901 if (sanitize) {
1417 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; 1902 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1418 int soff = (-spi - 1) * BPF_REG_SIZE; 1903 int soff = (-spi - 1) * BPF_REG_SIZE;
1419 1904
@@ -1436,8 +1921,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
1436 } 1921 }
1437 *poff = soff; 1922 *poff = soff;
1438 } 1923 }
1439 state->stack[spi].slot_type[i] = STACK_SPILL;
1440 } 1924 }
1925 save_register_state(state, spi, reg);
1441 } else { 1926 } else {
1442 u8 type = STACK_MISC; 1927 u8 type = STACK_MISC;
1443 1928
@@ -1460,9 +1945,13 @@ static int check_stack_write(struct bpf_verifier_env *env,
1460 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 1945 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1461 1946
1462 /* when we zero initialize stack slots mark them as such */ 1947 /* when we zero initialize stack slots mark them as such */
1463 if (value_regno >= 0 && 1948 if (reg && register_is_null(reg)) {
1464 register_is_null(&cur->regs[value_regno])) 1949 /* backtracking doesn't work for STACK_ZERO yet. */
1950 err = mark_chain_precision(env, value_regno);
1951 if (err)
1952 return err;
1465 type = STACK_ZERO; 1953 type = STACK_ZERO;
1954 }
1466 1955
1467 /* Mark slots affected by this stack write. */ 1956 /* Mark slots affected by this stack write. */
1468 for (i = 0; i < size; i++) 1957 for (i = 0; i < size; i++)
@@ -1479,6 +1968,7 @@ static int check_stack_read(struct bpf_verifier_env *env,
1479 struct bpf_verifier_state *vstate = env->cur_state; 1968 struct bpf_verifier_state *vstate = env->cur_state;
1480 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 1969 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1481 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 1970 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1971 struct bpf_reg_state *reg;
1482 u8 *stype; 1972 u8 *stype;
1483 1973
1484 if (reg_state->allocated_stack <= slot) { 1974 if (reg_state->allocated_stack <= slot) {
@@ -1487,11 +1977,21 @@ static int check_stack_read(struct bpf_verifier_env *env,
1487 return -EACCES; 1977 return -EACCES;
1488 } 1978 }
1489 stype = reg_state->stack[spi].slot_type; 1979 stype = reg_state->stack[spi].slot_type;
1980 reg = &reg_state->stack[spi].spilled_ptr;
1490 1981
1491 if (stype[0] == STACK_SPILL) { 1982 if (stype[0] == STACK_SPILL) {
1492 if (size != BPF_REG_SIZE) { 1983 if (size != BPF_REG_SIZE) {
1493 verbose(env, "invalid size of register spill\n"); 1984 if (reg->type != SCALAR_VALUE) {
1494 return -EACCES; 1985 verbose_linfo(env, env->insn_idx, "; ");
1986 verbose(env, "invalid size of register fill\n");
1987 return -EACCES;
1988 }
1989 if (value_regno >= 0) {
1990 mark_reg_unknown(env, state->regs, value_regno);
1991 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1992 }
1993 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
1994 return 0;
1495 } 1995 }
1496 for (i = 1; i < BPF_REG_SIZE; i++) { 1996 for (i = 1; i < BPF_REG_SIZE; i++) {
1497 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { 1997 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
@@ -1502,17 +2002,14 @@ static int check_stack_read(struct bpf_verifier_env *env,
1502 2002
1503 if (value_regno >= 0) { 2003 if (value_regno >= 0) {
1504 /* restore register state from stack */ 2004 /* restore register state from stack */
1505 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr; 2005 state->regs[value_regno] = *reg;
1506 /* mark reg as written since spilled pointer state likely 2006 /* mark reg as written since spilled pointer state likely
1507 * has its liveness marks cleared by is_state_visited() 2007 * has its liveness marks cleared by is_state_visited()
1508 * which resets stack/reg liveness for state transitions 2008 * which resets stack/reg liveness for state transitions
1509 */ 2009 */
1510 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2010 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1511 } 2011 }
1512 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr, 2012 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
1513 reg_state->stack[spi].spilled_ptr.parent,
1514 REG_LIVE_READ64);
1515 return 0;
1516 } else { 2013 } else {
1517 int zeros = 0; 2014 int zeros = 0;
1518 2015
@@ -1527,23 +2024,32 @@ static int check_stack_read(struct bpf_verifier_env *env,
1527 off, i, size); 2024 off, i, size);
1528 return -EACCES; 2025 return -EACCES;
1529 } 2026 }
1530 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr, 2027 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
1531 reg_state->stack[spi].spilled_ptr.parent,
1532 REG_LIVE_READ64);
1533 if (value_regno >= 0) { 2028 if (value_regno >= 0) {
1534 if (zeros == size) { 2029 if (zeros == size) {
1535 /* any size read into register is zero extended, 2030 /* any size read into register is zero extended,
1536 * so the whole register == const_zero 2031 * so the whole register == const_zero
1537 */ 2032 */
1538 __mark_reg_const_zero(&state->regs[value_regno]); 2033 __mark_reg_const_zero(&state->regs[value_regno]);
2034 /* backtracking doesn't support STACK_ZERO yet,
2035 * so mark it precise here, so that later
2036 * backtracking can stop here.
2037 * Backtracking may not need this if this register
2038 * doesn't participate in pointer adjustment.
2039 * Forward propagation of precise flag is not
2040 * necessary either. This mark is only to stop
2041 * backtracking. Any register that contributed
2042 * to const 0 was marked precise before spill.
2043 */
2044 state->regs[value_regno].precise = true;
1539 } else { 2045 } else {
1540 /* have read misc data from the stack */ 2046 /* have read misc data from the stack */
1541 mark_reg_unknown(env, state->regs, value_regno); 2047 mark_reg_unknown(env, state->regs, value_regno);
1542 } 2048 }
1543 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2049 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1544 } 2050 }
1545 return 0;
1546 } 2051 }
2052 return 0;
1547} 2053}
1548 2054
1549static int check_stack_access(struct bpf_verifier_env *env, 2055static int check_stack_access(struct bpf_verifier_env *env,
@@ -1835,6 +2341,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1835 case PTR_TO_TCP_SOCK: 2341 case PTR_TO_TCP_SOCK:
1836 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 2342 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1837 break; 2343 break;
2344 case PTR_TO_XDP_SOCK:
2345 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
2346 break;
1838 default: 2347 default:
1839 valid = false; 2348 valid = false;
1840 } 2349 }
@@ -1999,6 +2508,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1999 case PTR_TO_TCP_SOCK: 2508 case PTR_TO_TCP_SOCK:
2000 pointer_desc = "tcp_sock "; 2509 pointer_desc = "tcp_sock ";
2001 break; 2510 break;
2511 case PTR_TO_XDP_SOCK:
2512 pointer_desc = "xdp_sock ";
2513 break;
2002 default: 2514 default:
2003 break; 2515 break;
2004 } 2516 }
@@ -2398,7 +2910,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2398{ 2910{
2399 struct bpf_reg_state *reg = reg_state(env, regno); 2911 struct bpf_reg_state *reg = reg_state(env, regno);
2400 struct bpf_func_state *state = func(env, reg); 2912 struct bpf_func_state *state = func(env, reg);
2401 int err, min_off, max_off, i, slot, spi; 2913 int err, min_off, max_off, i, j, slot, spi;
2402 2914
2403 if (reg->type != PTR_TO_STACK) { 2915 if (reg->type != PTR_TO_STACK) {
2404 /* Allow zero-byte read from NULL, regardless of pointer type */ 2916 /* Allow zero-byte read from NULL, regardless of pointer type */
@@ -2486,6 +2998,14 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2486 *stype = STACK_MISC; 2998 *stype = STACK_MISC;
2487 goto mark; 2999 goto mark;
2488 } 3000 }
3001 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
3002 state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
3003 __mark_reg_unknown(&state->stack[spi].spilled_ptr);
3004 for (j = 0; j < BPF_REG_SIZE; j++)
3005 state->stack[spi].slot_type[j] = STACK_MISC;
3006 goto mark;
3007 }
3008
2489err: 3009err:
2490 if (tnum_is_const(reg->var_off)) { 3010 if (tnum_is_const(reg->var_off)) {
2491 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 3011 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
@@ -2837,6 +3357,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2837 err = check_helper_mem_access(env, regno - 1, 3357 err = check_helper_mem_access(env, regno - 1,
2838 reg->umax_value, 3358 reg->umax_value,
2839 zero_size_allowed, meta); 3359 zero_size_allowed, meta);
3360 if (!err)
3361 err = mark_chain_precision(env, regno);
2840 } else if (arg_type_is_int_ptr(arg_type)) { 3362 } else if (arg_type_is_int_ptr(arg_type)) {
2841 int size = int_ptr_type_to_size(arg_type); 3363 int size = int_ptr_type_to_size(arg_type);
2842 3364
@@ -2897,10 +3419,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
2897 * appear. 3419 * appear.
2898 */ 3420 */
2899 case BPF_MAP_TYPE_CPUMAP: 3421 case BPF_MAP_TYPE_CPUMAP:
2900 case BPF_MAP_TYPE_XSKMAP:
2901 if (func_id != BPF_FUNC_redirect_map) 3422 if (func_id != BPF_FUNC_redirect_map)
2902 goto error; 3423 goto error;
2903 break; 3424 break;
3425 case BPF_MAP_TYPE_XSKMAP:
3426 if (func_id != BPF_FUNC_redirect_map &&
3427 func_id != BPF_FUNC_map_lookup_elem)
3428 goto error;
3429 break;
2904 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 3430 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2905 case BPF_MAP_TYPE_HASH_OF_MAPS: 3431 case BPF_MAP_TYPE_HASH_OF_MAPS:
2906 if (func_id != BPF_FUNC_map_lookup_elem) 3432 if (func_id != BPF_FUNC_map_lookup_elem)
@@ -3791,6 +4317,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3791 case PTR_TO_SOCK_COMMON_OR_NULL: 4317 case PTR_TO_SOCK_COMMON_OR_NULL:
3792 case PTR_TO_TCP_SOCK: 4318 case PTR_TO_TCP_SOCK:
3793 case PTR_TO_TCP_SOCK_OR_NULL: 4319 case PTR_TO_TCP_SOCK_OR_NULL:
4320 case PTR_TO_XDP_SOCK:
3794 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 4321 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3795 dst, reg_type_str[ptr_reg->type]); 4322 dst, reg_type_str[ptr_reg->type]);
3796 return -EACCES; 4323 return -EACCES;
@@ -4268,6 +4795,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
4268 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 4795 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
4269 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 4796 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
4270 u8 opcode = BPF_OP(insn->code); 4797 u8 opcode = BPF_OP(insn->code);
4798 int err;
4271 4799
4272 dst_reg = &regs[insn->dst_reg]; 4800 dst_reg = &regs[insn->dst_reg];
4273 src_reg = NULL; 4801 src_reg = NULL;
@@ -4294,11 +4822,17 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
4294 * This is legal, but we have to reverse our 4822 * This is legal, but we have to reverse our
4295 * src/dest handling in computing the range 4823 * src/dest handling in computing the range
4296 */ 4824 */
4825 err = mark_chain_precision(env, insn->dst_reg);
4826 if (err)
4827 return err;
4297 return adjust_ptr_min_max_vals(env, insn, 4828 return adjust_ptr_min_max_vals(env, insn,
4298 src_reg, dst_reg); 4829 src_reg, dst_reg);
4299 } 4830 }
4300 } else if (ptr_reg) { 4831 } else if (ptr_reg) {
4301 /* pointer += scalar */ 4832 /* pointer += scalar */
4833 err = mark_chain_precision(env, insn->src_reg);
4834 if (err)
4835 return err;
4302 return adjust_ptr_min_max_vals(env, insn, 4836 return adjust_ptr_min_max_vals(env, insn,
4303 dst_reg, src_reg); 4837 dst_reg, src_reg);
4304 } 4838 }
@@ -5030,6 +5564,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
5030 if (reg->map_ptr->inner_map_meta) { 5564 if (reg->map_ptr->inner_map_meta) {
5031 reg->type = CONST_PTR_TO_MAP; 5565 reg->type = CONST_PTR_TO_MAP;
5032 reg->map_ptr = reg->map_ptr->inner_map_meta; 5566 reg->map_ptr = reg->map_ptr->inner_map_meta;
5567 } else if (reg->map_ptr->map_type ==
5568 BPF_MAP_TYPE_XSKMAP) {
5569 reg->type = PTR_TO_XDP_SOCK;
5033 } else { 5570 } else {
5034 reg->type = PTR_TO_MAP_VALUE; 5571 reg->type = PTR_TO_MAP_VALUE;
5035 } 5572 }
@@ -5201,9 +5738,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
5201 struct bpf_verifier_state *this_branch = env->cur_state; 5738 struct bpf_verifier_state *this_branch = env->cur_state;
5202 struct bpf_verifier_state *other_branch; 5739 struct bpf_verifier_state *other_branch;
5203 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 5740 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
5204 struct bpf_reg_state *dst_reg, *other_branch_regs; 5741 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
5205 u8 opcode = BPF_OP(insn->code); 5742 u8 opcode = BPF_OP(insn->code);
5206 bool is_jmp32; 5743 bool is_jmp32;
5744 int pred = -1;
5207 int err; 5745 int err;
5208 5746
5209 /* Only conditional jumps are expected to reach here. */ 5747 /* Only conditional jumps are expected to reach here. */
@@ -5228,6 +5766,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
5228 insn->src_reg); 5766 insn->src_reg);
5229 return -EACCES; 5767 return -EACCES;
5230 } 5768 }
5769 src_reg = &regs[insn->src_reg];
5231 } else { 5770 } else {
5232 if (insn->src_reg != BPF_REG_0) { 5771 if (insn->src_reg != BPF_REG_0) {
5233 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 5772 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
@@ -5243,20 +5782,29 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
5243 dst_reg = &regs[insn->dst_reg]; 5782 dst_reg = &regs[insn->dst_reg];
5244 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 5783 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
5245 5784
5246 if (BPF_SRC(insn->code) == BPF_K) { 5785 if (BPF_SRC(insn->code) == BPF_K)
5247 int pred = is_branch_taken(dst_reg, insn->imm, opcode, 5786 pred = is_branch_taken(dst_reg, insn->imm,
5248 is_jmp32); 5787 opcode, is_jmp32);
5249 5788 else if (src_reg->type == SCALAR_VALUE &&
5250 if (pred == 1) { 5789 tnum_is_const(src_reg->var_off))
5251 /* only follow the goto, ignore fall-through */ 5790 pred = is_branch_taken(dst_reg, src_reg->var_off.value,
5252 *insn_idx += insn->off; 5791 opcode, is_jmp32);
5253 return 0; 5792 if (pred >= 0) {
5254 } else if (pred == 0) { 5793 err = mark_chain_precision(env, insn->dst_reg);
5255 /* only follow fall-through branch, since 5794 if (BPF_SRC(insn->code) == BPF_X && !err)
5256 * that's where the program will go 5795 err = mark_chain_precision(env, insn->src_reg);
5257 */ 5796 if (err)
5258 return 0; 5797 return err;
5259 } 5798 }
5799 if (pred == 1) {
5800 /* only follow the goto, ignore fall-through */
5801 *insn_idx += insn->off;
5802 return 0;
5803 } else if (pred == 0) {
5804 /* only follow fall-through branch, since
5805 * that's where the program will go
5806 */
5807 return 0;
5260 } 5808 }
5261 5809
5262 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, 5810 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
@@ -5616,7 +6164,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx)
5616 * w - next instruction 6164 * w - next instruction
5617 * e - edge 6165 * e - edge
5618 */ 6166 */
5619static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) 6167static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
6168 bool loop_ok)
5620{ 6169{
5621 int *insn_stack = env->cfg.insn_stack; 6170 int *insn_stack = env->cfg.insn_stack;
5622 int *insn_state = env->cfg.insn_state; 6171 int *insn_state = env->cfg.insn_state;
@@ -5646,6 +6195,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
5646 insn_stack[env->cfg.cur_stack++] = w; 6195 insn_stack[env->cfg.cur_stack++] = w;
5647 return 1; 6196 return 1;
5648 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 6197 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
6198 if (loop_ok && env->allow_ptr_leaks)
6199 return 0;
5649 verbose_linfo(env, t, "%d: ", t); 6200 verbose_linfo(env, t, "%d: ", t);
5650 verbose_linfo(env, w, "%d: ", w); 6201 verbose_linfo(env, w, "%d: ", w);
5651 verbose(env, "back-edge from insn %d to %d\n", t, w); 6202 verbose(env, "back-edge from insn %d to %d\n", t, w);
@@ -5697,7 +6248,7 @@ peek_stack:
5697 if (opcode == BPF_EXIT) { 6248 if (opcode == BPF_EXIT) {
5698 goto mark_explored; 6249 goto mark_explored;
5699 } else if (opcode == BPF_CALL) { 6250 } else if (opcode == BPF_CALL) {
5700 ret = push_insn(t, t + 1, FALLTHROUGH, env); 6251 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
5701 if (ret == 1) 6252 if (ret == 1)
5702 goto peek_stack; 6253 goto peek_stack;
5703 else if (ret < 0) 6254 else if (ret < 0)
@@ -5706,7 +6257,8 @@ peek_stack:
5706 init_explored_state(env, t + 1); 6257 init_explored_state(env, t + 1);
5707 if (insns[t].src_reg == BPF_PSEUDO_CALL) { 6258 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5708 init_explored_state(env, t); 6259 init_explored_state(env, t);
5709 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); 6260 ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
6261 env, false);
5710 if (ret == 1) 6262 if (ret == 1)
5711 goto peek_stack; 6263 goto peek_stack;
5712 else if (ret < 0) 6264 else if (ret < 0)
@@ -5719,11 +6271,16 @@ peek_stack:
5719 } 6271 }
5720 /* unconditional jump with single edge */ 6272 /* unconditional jump with single edge */
5721 ret = push_insn(t, t + insns[t].off + 1, 6273 ret = push_insn(t, t + insns[t].off + 1,
5722 FALLTHROUGH, env); 6274 FALLTHROUGH, env, true);
5723 if (ret == 1) 6275 if (ret == 1)
5724 goto peek_stack; 6276 goto peek_stack;
5725 else if (ret < 0) 6277 else if (ret < 0)
5726 goto err_free; 6278 goto err_free;
6279 /* unconditional jmp is not a good pruning point,
6280 * but it's marked, since backtracking needs
6281 * to record jmp history in is_state_visited().
6282 */
6283 init_explored_state(env, t + insns[t].off + 1);
5727 /* tell verifier to check for equivalent states 6284 /* tell verifier to check for equivalent states
5728 * after every call and jump 6285 * after every call and jump
5729 */ 6286 */
@@ -5732,13 +6289,13 @@ peek_stack:
5732 } else { 6289 } else {
5733 /* conditional jump with two edges */ 6290 /* conditional jump with two edges */
5734 init_explored_state(env, t); 6291 init_explored_state(env, t);
5735 ret = push_insn(t, t + 1, FALLTHROUGH, env); 6292 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
5736 if (ret == 1) 6293 if (ret == 1)
5737 goto peek_stack; 6294 goto peek_stack;
5738 else if (ret < 0) 6295 else if (ret < 0)
5739 goto err_free; 6296 goto err_free;
5740 6297
5741 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); 6298 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
5742 if (ret == 1) 6299 if (ret == 1)
5743 goto peek_stack; 6300 goto peek_stack;
5744 else if (ret < 0) 6301 else if (ret < 0)
@@ -5748,7 +6305,7 @@ peek_stack:
5748 /* all other non-branch instructions with single 6305 /* all other non-branch instructions with single
5749 * fall-through edge 6306 * fall-through edge
5750 */ 6307 */
5751 ret = push_insn(t, t + 1, FALLTHROUGH, env); 6308 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
5752 if (ret == 1) 6309 if (ret == 1)
5753 goto peek_stack; 6310 goto peek_stack;
5754 else if (ret < 0) 6311 else if (ret < 0)
@@ -6181,6 +6738,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
6181 6738
6182 sl = *explored_state(env, insn); 6739 sl = *explored_state(env, insn);
6183 while (sl) { 6740 while (sl) {
6741 if (sl->state.branches)
6742 goto next;
6184 if (sl->state.insn_idx != insn || 6743 if (sl->state.insn_idx != insn ||
6185 sl->state.curframe != cur->curframe) 6744 sl->state.curframe != cur->curframe)
6186 goto next; 6745 goto next;
@@ -6222,6 +6781,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
6222 switch (rold->type) { 6781 switch (rold->type) {
6223 case SCALAR_VALUE: 6782 case SCALAR_VALUE:
6224 if (rcur->type == SCALAR_VALUE) { 6783 if (rcur->type == SCALAR_VALUE) {
6784 if (!rold->precise && !rcur->precise)
6785 return true;
6225 /* new val must satisfy old val knowledge */ 6786 /* new val must satisfy old val knowledge */
6226 return range_within(rold, rcur) && 6787 return range_within(rold, rcur) &&
6227 tnum_in(rold->var_off, rcur->var_off); 6788 tnum_in(rold->var_off, rcur->var_off);
@@ -6294,6 +6855,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
6294 case PTR_TO_SOCK_COMMON_OR_NULL: 6855 case PTR_TO_SOCK_COMMON_OR_NULL:
6295 case PTR_TO_TCP_SOCK: 6856 case PTR_TO_TCP_SOCK:
6296 case PTR_TO_TCP_SOCK_OR_NULL: 6857 case PTR_TO_TCP_SOCK_OR_NULL:
6858 case PTR_TO_XDP_SOCK:
6297 /* Only valid matches are exact, which memcmp() above 6859 /* Only valid matches are exact, which memcmp() above
6298 * would have accepted 6860 * would have accepted
6299 */ 6861 */
@@ -6544,19 +7106,52 @@ static int propagate_liveness(struct bpf_verifier_env *env,
6544 return 0; 7106 return 0;
6545} 7107}
6546 7108
7109static bool states_maybe_looping(struct bpf_verifier_state *old,
7110 struct bpf_verifier_state *cur)
7111{
7112 struct bpf_func_state *fold, *fcur;
7113 int i, fr = cur->curframe;
7114
7115 if (old->curframe != fr)
7116 return false;
7117
7118 fold = old->frame[fr];
7119 fcur = cur->frame[fr];
7120 for (i = 0; i < MAX_BPF_REG; i++)
7121 if (memcmp(&fold->regs[i], &fcur->regs[i],
7122 offsetof(struct bpf_reg_state, parent)))
7123 return false;
7124 return true;
7125}
7126
7127
6547static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 7128static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6548{ 7129{
6549 struct bpf_verifier_state_list *new_sl; 7130 struct bpf_verifier_state_list *new_sl;
6550 struct bpf_verifier_state_list *sl, **pprev; 7131 struct bpf_verifier_state_list *sl, **pprev;
6551 struct bpf_verifier_state *cur = env->cur_state, *new; 7132 struct bpf_verifier_state *cur = env->cur_state, *new;
6552 int i, j, err, states_cnt = 0; 7133 int i, j, err, states_cnt = 0;
7134 bool add_new_state = false;
6553 7135
7136 cur->last_insn_idx = env->prev_insn_idx;
6554 if (!env->insn_aux_data[insn_idx].prune_point) 7137 if (!env->insn_aux_data[insn_idx].prune_point)
6555 /* this 'insn_idx' instruction wasn't marked, so we will not 7138 /* this 'insn_idx' instruction wasn't marked, so we will not
6556 * be doing state search here 7139 * be doing state search here
6557 */ 7140 */
6558 return 0; 7141 return 0;
6559 7142
7143 /* bpf progs typically have pruning point every 4 instructions
7144 * http://vger.kernel.org/bpfconf2019.html#session-1
7145 * Do not add new state for future pruning if the verifier hasn't seen
7146 * at least 2 jumps and at least 8 instructions.
7147 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
7148 * In tests that amounts to up to 50% reduction into total verifier
7149 * memory consumption and 20% verifier time speedup.
7150 */
7151 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
7152 env->insn_processed - env->prev_insn_processed >= 8)
7153 add_new_state = true;
7154
6560 pprev = explored_state(env, insn_idx); 7155 pprev = explored_state(env, insn_idx);
6561 sl = *pprev; 7156 sl = *pprev;
6562 7157
@@ -6566,6 +7161,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6566 states_cnt++; 7161 states_cnt++;
6567 if (sl->state.insn_idx != insn_idx) 7162 if (sl->state.insn_idx != insn_idx)
6568 goto next; 7163 goto next;
7164 if (sl->state.branches) {
7165 if (states_maybe_looping(&sl->state, cur) &&
7166 states_equal(env, &sl->state, cur)) {
7167 verbose_linfo(env, insn_idx, "; ");
7168 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
7169 return -EINVAL;
7170 }
7171 /* if the verifier is processing a loop, avoid adding new state
7172 * too often, since different loop iterations have distinct
7173 * states and may not help future pruning.
7174 * This threshold shouldn't be too low to make sure that
7175 * a loop with large bound will be rejected quickly.
7176 * The most abusive loop will be:
7177 * r1 += 1
7178 * if r1 < 1000000 goto pc-2
7179 * 1M insn_procssed limit / 100 == 10k peak states.
7180 * This threshold shouldn't be too high either, since states
7181 * at the end of the loop are likely to be useful in pruning.
7182 */
7183 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
7184 env->insn_processed - env->prev_insn_processed < 100)
7185 add_new_state = false;
7186 goto miss;
7187 }
6569 if (states_equal(env, &sl->state, cur)) { 7188 if (states_equal(env, &sl->state, cur)) {
6570 sl->hit_cnt++; 7189 sl->hit_cnt++;
6571 /* reached equivalent register/stack state, 7190 /* reached equivalent register/stack state,
@@ -6583,7 +7202,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6583 return err; 7202 return err;
6584 return 1; 7203 return 1;
6585 } 7204 }
6586 sl->miss_cnt++; 7205miss:
7206 /* when new state is not going to be added do not increase miss count.
7207 * Otherwise several loop iterations will remove the state
7208 * recorded earlier. The goal of these heuristics is to have
7209 * states from some iterations of the loop (some in the beginning
7210 * and some at the end) to help pruning.
7211 */
7212 if (add_new_state)
7213 sl->miss_cnt++;
6587 /* heuristic to determine whether this state is beneficial 7214 /* heuristic to determine whether this state is beneficial
6588 * to keep checking from state equivalence point of view. 7215 * to keep checking from state equivalence point of view.
6589 * Higher numbers increase max_states_per_insn and verification time, 7216 * Higher numbers increase max_states_per_insn and verification time,
@@ -6595,6 +7222,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6595 */ 7222 */
6596 *pprev = sl->next; 7223 *pprev = sl->next;
6597 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { 7224 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
7225 u32 br = sl->state.branches;
7226
7227 WARN_ONCE(br,
7228 "BUG live_done but branches_to_explore %d\n",
7229 br);
6598 free_verifier_state(&sl->state, false); 7230 free_verifier_state(&sl->state, false);
6599 kfree(sl); 7231 kfree(sl);
6600 env->peak_states--; 7232 env->peak_states--;
@@ -6618,20 +7250,27 @@ next:
6618 env->max_states_per_insn = states_cnt; 7250 env->max_states_per_insn = states_cnt;
6619 7251
6620 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 7252 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6621 return 0; 7253 return push_jmp_history(env, cur);
7254
7255 if (!add_new_state)
7256 return push_jmp_history(env, cur);
6622 7257
6623 /* there were no equivalent states, remember current one. 7258 /* There were no equivalent states, remember the current one.
6624 * technically the current state is not proven to be safe yet, 7259 * Technically the current state is not proven to be safe yet,
6625 * but it will either reach outer most bpf_exit (which means it's safe) 7260 * but it will either reach outer most bpf_exit (which means it's safe)
6626 * or it will be rejected. Since there are no loops, we won't be 7261 * or it will be rejected. When there are no loops the verifier won't be
6627 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 7262 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6628 * again on the way to bpf_exit 7263 * again on the way to bpf_exit.
7264 * When looping the sl->state.branches will be > 0 and this state
7265 * will not be considered for equivalence until branches == 0.
6629 */ 7266 */
6630 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 7267 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
6631 if (!new_sl) 7268 if (!new_sl)
6632 return -ENOMEM; 7269 return -ENOMEM;
6633 env->total_states++; 7270 env->total_states++;
6634 env->peak_states++; 7271 env->peak_states++;
7272 env->prev_jmps_processed = env->jmps_processed;
7273 env->prev_insn_processed = env->insn_processed;
6635 7274
6636 /* add new state to the head of linked list */ 7275 /* add new state to the head of linked list */
6637 new = &new_sl->state; 7276 new = &new_sl->state;
@@ -6642,6 +7281,12 @@ next:
6642 return err; 7281 return err;
6643 } 7282 }
6644 new->insn_idx = insn_idx; 7283 new->insn_idx = insn_idx;
7284 WARN_ONCE(new->branches != 1,
7285 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
7286
7287 cur->parent = new;
7288 cur->first_insn_idx = insn_idx;
7289 clear_jmp_history(cur);
6645 new_sl->next = *explored_state(env, insn_idx); 7290 new_sl->next = *explored_state(env, insn_idx);
6646 *explored_state(env, insn_idx) = new_sl; 7291 *explored_state(env, insn_idx) = new_sl;
6647 /* connect new state to parentage chain. Current frame needs all 7292 /* connect new state to parentage chain. Current frame needs all
@@ -6651,17 +7296,18 @@ next:
6651 * the state of the call instruction (with WRITTEN set), and r0 comes 7296 * the state of the call instruction (with WRITTEN set), and r0 comes
6652 * from callee with its full parentage chain, anyway. 7297 * from callee with its full parentage chain, anyway.
6653 */ 7298 */
6654 for (j = 0; j <= cur->curframe; j++)
6655 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
6656 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
6657 /* clear write marks in current state: the writes we did are not writes 7299 /* clear write marks in current state: the writes we did are not writes
6658 * our child did, so they don't screen off its reads from us. 7300 * our child did, so they don't screen off its reads from us.
6659 * (There are no read marks in current state, because reads always mark 7301 * (There are no read marks in current state, because reads always mark
6660 * their parent and current state never has children yet. Only 7302 * their parent and current state never has children yet. Only
6661 * explored_states can get read marks.) 7303 * explored_states can get read marks.)
6662 */ 7304 */
6663 for (i = 0; i < BPF_REG_FP; i++) 7305 for (j = 0; j <= cur->curframe; j++) {
6664 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE; 7306 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
7307 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
7308 for (i = 0; i < BPF_REG_FP; i++)
7309 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
7310 }
6665 7311
6666 /* all stack frames are accessible from callee, clear them all */ 7312 /* all stack frames are accessible from callee, clear them all */
6667 for (j = 0; j <= cur->curframe; j++) { 7313 for (j = 0; j <= cur->curframe; j++) {
@@ -6688,6 +7334,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6688 case PTR_TO_SOCK_COMMON_OR_NULL: 7334 case PTR_TO_SOCK_COMMON_OR_NULL:
6689 case PTR_TO_TCP_SOCK: 7335 case PTR_TO_TCP_SOCK:
6690 case PTR_TO_TCP_SOCK_OR_NULL: 7336 case PTR_TO_TCP_SOCK_OR_NULL:
7337 case PTR_TO_XDP_SOCK:
6691 return false; 7338 return false;
6692 default: 7339 default:
6693 return true; 7340 return true;
@@ -6719,6 +7366,7 @@ static int do_check(struct bpf_verifier_env *env)
6719 struct bpf_reg_state *regs; 7366 struct bpf_reg_state *regs;
6720 int insn_cnt = env->prog->len; 7367 int insn_cnt = env->prog->len;
6721 bool do_print_state = false; 7368 bool do_print_state = false;
7369 int prev_insn_idx = -1;
6722 7370
6723 env->prev_linfo = NULL; 7371 env->prev_linfo = NULL;
6724 7372
@@ -6727,6 +7375,7 @@ static int do_check(struct bpf_verifier_env *env)
6727 return -ENOMEM; 7375 return -ENOMEM;
6728 state->curframe = 0; 7376 state->curframe = 0;
6729 state->speculative = false; 7377 state->speculative = false;
7378 state->branches = 1;
6730 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 7379 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
6731 if (!state->frame[0]) { 7380 if (!state->frame[0]) {
6732 kfree(state); 7381 kfree(state);
@@ -6743,6 +7392,7 @@ static int do_check(struct bpf_verifier_env *env)
6743 u8 class; 7392 u8 class;
6744 int err; 7393 int err;
6745 7394
7395 env->prev_insn_idx = prev_insn_idx;
6746 if (env->insn_idx >= insn_cnt) { 7396 if (env->insn_idx >= insn_cnt) {
6747 verbose(env, "invalid insn idx %d insn_cnt %d\n", 7397 verbose(env, "invalid insn idx %d insn_cnt %d\n",
6748 env->insn_idx, insn_cnt); 7398 env->insn_idx, insn_cnt);
@@ -6815,6 +7465,7 @@ static int do_check(struct bpf_verifier_env *env)
6815 7465
6816 regs = cur_regs(env); 7466 regs = cur_regs(env);
6817 env->insn_aux_data[env->insn_idx].seen = true; 7467 env->insn_aux_data[env->insn_idx].seen = true;
7468 prev_insn_idx = env->insn_idx;
6818 7469
6819 if (class == BPF_ALU || class == BPF_ALU64) { 7470 if (class == BPF_ALU || class == BPF_ALU64) {
6820 err = check_alu_op(env, insn); 7471 err = check_alu_op(env, insn);
@@ -6933,6 +7584,7 @@ static int do_check(struct bpf_verifier_env *env)
6933 } else if (class == BPF_JMP || class == BPF_JMP32) { 7584 } else if (class == BPF_JMP || class == BPF_JMP32) {
6934 u8 opcode = BPF_OP(insn->code); 7585 u8 opcode = BPF_OP(insn->code);
6935 7586
7587 env->jmps_processed++;
6936 if (opcode == BPF_CALL) { 7588 if (opcode == BPF_CALL) {
6937 if (BPF_SRC(insn->code) != BPF_K || 7589 if (BPF_SRC(insn->code) != BPF_K ||
6938 insn->off != 0 || 7590 insn->off != 0 ||
@@ -6987,7 +7639,6 @@ static int do_check(struct bpf_verifier_env *env)
6987 7639
6988 if (state->curframe) { 7640 if (state->curframe) {
6989 /* exit from nested function */ 7641 /* exit from nested function */
6990 env->prev_insn_idx = env->insn_idx;
6991 err = prepare_func_exit(env, &env->insn_idx); 7642 err = prepare_func_exit(env, &env->insn_idx);
6992 if (err) 7643 if (err)
6993 return err; 7644 return err;
@@ -7018,7 +7669,8 @@ static int do_check(struct bpf_verifier_env *env)
7018 if (err) 7669 if (err)
7019 return err; 7670 return err;
7020process_bpf_exit: 7671process_bpf_exit:
7021 err = pop_stack(env, &env->prev_insn_idx, 7672 update_branch_counts(env, env->cur_state);
7673 err = pop_stack(env, &prev_insn_idx,
7022 &env->insn_idx); 7674 &env->insn_idx);
7023 if (err < 0) { 7675 if (err < 0) {
7024 if (err != -ENOENT) 7676 if (err != -ENOENT)
@@ -7821,6 +8473,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
7821 case PTR_TO_TCP_SOCK: 8473 case PTR_TO_TCP_SOCK:
7822 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 8474 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7823 break; 8475 break;
8476 case PTR_TO_XDP_SOCK:
8477 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
8478 break;
7824 default: 8479 default:
7825 continue; 8480 continue;
7826 } 8481 }
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 22066c28ba61..ef7338cebd18 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -17,8 +17,8 @@ struct xsk_map {
17 17
18static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 18static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
19{ 19{
20 int cpu, err = -EINVAL;
21 struct xsk_map *m; 20 struct xsk_map *m;
21 int cpu, err;
22 u64 cost; 22 u64 cost;
23 23
24 if (!capable(CAP_NET_ADMIN)) 24 if (!capable(CAP_NET_ADMIN))
@@ -152,6 +152,12 @@ void __xsk_map_flush(struct bpf_map *map)
152 152
153static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) 153static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
154{ 154{
155 WARN_ON_ONCE(!rcu_read_lock_held());
156 return __xsk_map_lookup_elem(map, *(u32 *)key);
157}
158
159static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
160{
155 return ERR_PTR(-EOPNOTSUPP); 161 return ERR_PTR(-EOPNOTSUPP);
156} 162}
157 163
@@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = {
218 .map_free = xsk_map_free, 224 .map_free = xsk_map_free,
219 .map_get_next_key = xsk_map_get_next_key, 225 .map_get_next_key = xsk_map_get_next_key,
220 .map_lookup_elem = xsk_map_lookup_elem, 226 .map_lookup_elem = xsk_map_lookup_elem,
227 .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
221 .map_update_elem = xsk_map_update_elem, 228 .map_update_elem = xsk_map_update_elem,
222 .map_delete_elem = xsk_map_delete_elem, 229 .map_delete_elem = xsk_map_delete_elem,
223 .map_check_btf = map_check_no_btf, 230 .map_check_btf = map_check_no_btf,
diff --git a/net/core/filter.c b/net/core/filter.c
index 949adc3d9abb..2014d76e0d2a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5695,6 +5695,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
5695 return INET_ECN_set_ce(skb); 5695 return INET_ECN_set_ce(skb);
5696} 5696}
5697 5697
5698bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
5699 struct bpf_insn_access_aux *info)
5700{
5701 if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
5702 return false;
5703
5704 if (off % size != 0)
5705 return false;
5706
5707 switch (off) {
5708 default:
5709 return size == sizeof(__u32);
5710 }
5711}
5712
5713u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
5714 const struct bpf_insn *si,
5715 struct bpf_insn *insn_buf,
5716 struct bpf_prog *prog, u32 *target_size)
5717{
5718 struct bpf_insn *insn = insn_buf;
5719
5720#define BPF_XDP_SOCK_GET(FIELD) \
5721 do { \
5722 BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
5723 FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
5724 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
5725 si->dst_reg, si->src_reg, \
5726 offsetof(struct xdp_sock, FIELD)); \
5727 } while (0)
5728
5729 switch (si->off) {
5730 case offsetof(struct bpf_xdp_sock, queue_id):
5731 BPF_XDP_SOCK_GET(queue_id);
5732 break;
5733 }
5734
5735 return insn - insn_buf;
5736}
5737
5698static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = { 5738static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
5699 .func = bpf_skb_ecn_set_ce, 5739 .func = bpf_skb_ecn_set_ce,
5700 .gpl_only = false, 5740 .gpl_only = false,
@@ -5897,6 +5937,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5897 case BPF_FUNC_skc_lookup_tcp: 5937 case BPF_FUNC_skc_lookup_tcp:
5898 return &bpf_sock_addr_skc_lookup_tcp_proto; 5938 return &bpf_sock_addr_skc_lookup_tcp_proto;
5899#endif /* CONFIG_INET */ 5939#endif /* CONFIG_INET */
5940 case BPF_FUNC_sk_storage_get:
5941 return &bpf_sk_storage_get_proto;
5942 case BPF_FUNC_sk_storage_delete:
5943 return &bpf_sk_storage_delete_proto;
5900 default: 5944 default:
5901 return bpf_base_func_proto(func_id); 5945 return bpf_base_func_proto(func_id);
5902 } 5946 }
@@ -5934,6 +5978,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5934 return &bpf_sk_storage_get_proto; 5978 return &bpf_sk_storage_get_proto;
5935 case BPF_FUNC_sk_storage_delete: 5979 case BPF_FUNC_sk_storage_delete:
5936 return &bpf_sk_storage_delete_proto; 5980 return &bpf_sk_storage_delete_proto;
5981#ifdef CONFIG_SOCK_CGROUP_DATA
5982 case BPF_FUNC_skb_cgroup_id:
5983 return &bpf_skb_cgroup_id_proto;
5984#endif
5937#ifdef CONFIG_INET 5985#ifdef CONFIG_INET
5938 case BPF_FUNC_tcp_sock: 5986 case BPF_FUNC_tcp_sock:
5939 return &bpf_tcp_sock_proto; 5987 return &bpf_tcp_sock_proto;
@@ -6114,6 +6162,14 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6114 return &bpf_get_local_storage_proto; 6162 return &bpf_get_local_storage_proto;
6115 case BPF_FUNC_perf_event_output: 6163 case BPF_FUNC_perf_event_output:
6116 return &bpf_sockopt_event_output_proto; 6164 return &bpf_sockopt_event_output_proto;
6165 case BPF_FUNC_sk_storage_get:
6166 return &bpf_sk_storage_get_proto;
6167 case BPF_FUNC_sk_storage_delete:
6168 return &bpf_sk_storage_delete_proto;
6169#ifdef CONFIG_INET
6170 case BPF_FUNC_tcp_sock:
6171 return &bpf_tcp_sock_proto;
6172#endif /* CONFIG_INET */
6117 default: 6173 default:
6118 return bpf_base_func_proto(func_id); 6174 return bpf_base_func_proto(func_id);
6119 } 6175 }
@@ -6801,6 +6857,13 @@ static bool sock_addr_is_valid_access(int off, int size,
6801 if (size != size_default) 6857 if (size != size_default)
6802 return false; 6858 return false;
6803 break; 6859 break;
6860 case offsetof(struct bpf_sock_addr, sk):
6861 if (type != BPF_READ)
6862 return false;
6863 if (size != sizeof(__u64))
6864 return false;
6865 info->reg_type = PTR_TO_SOCKET;
6866 break;
6804 default: 6867 default:
6805 if (type == BPF_READ) { 6868 if (type == BPF_READ) {
6806 if (size != size_default) 6869 if (size != size_default)
@@ -6844,6 +6907,11 @@ static bool sock_ops_is_valid_access(int off, int size,
6844 if (size != sizeof(__u64)) 6907 if (size != sizeof(__u64))
6845 return false; 6908 return false;
6846 break; 6909 break;
6910 case offsetof(struct bpf_sock_ops, sk):
6911 if (size != sizeof(__u64))
6912 return false;
6913 info->reg_type = PTR_TO_SOCKET_OR_NULL;
6914 break;
6847 default: 6915 default:
6848 if (size != size_default) 6916 if (size != size_default)
6849 return false; 6917 return false;
@@ -7751,6 +7819,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
7751 struct bpf_sock_addr_kern, struct in6_addr, t_ctx, 7819 struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
7752 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg); 7820 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
7753 break; 7821 break;
7822 case offsetof(struct bpf_sock_addr, sk):
7823 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
7824 si->dst_reg, si->src_reg,
7825 offsetof(struct bpf_sock_addr_kern, sk));
7826 break;
7754 } 7827 }
7755 7828
7756 return insn - insn_buf; 7829 return insn - insn_buf;
@@ -8010,6 +8083,19 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
8010 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, 8083 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
8011 struct sock, type); 8084 struct sock, type);
8012 break; 8085 break;
8086 case offsetof(struct bpf_sock_ops, sk):
8087 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8088 struct bpf_sock_ops_kern,
8089 is_fullsock),
8090 si->dst_reg, si->src_reg,
8091 offsetof(struct bpf_sock_ops_kern,
8092 is_fullsock));
8093 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
8094 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8095 struct bpf_sock_ops_kern, sk),
8096 si->dst_reg, si->src_reg,
8097 offsetof(struct bpf_sock_ops_kern, sk));
8098 break;
8013 } 8099 }
8014 return insn - insn_buf; 8100 return insn - insn_buf;
8015} 8101}
diff --git a/net/core/sock.c b/net/core/sock.c
index af09a23e4822..ef471f643c95 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1039,6 +1039,10 @@ set_rcvbuf:
1039 } 1039 }
1040 break; 1040 break;
1041 1041
1042 case SO_DETACH_REUSEPORT_BPF:
1043 ret = reuseport_detach_prog(sk);
1044 break;
1045
1042 case SO_DETACH_FILTER: 1046 case SO_DETACH_FILTER:
1043 ret = sk_detach_filter(sk); 1047 ret = sk_detach_filter(sk);
1044 break; 1048 break;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index dc4aefdf2a08..9408f9264d05 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -332,3 +332,27 @@ int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
332 return 0; 332 return 0;
333} 333}
334EXPORT_SYMBOL(reuseport_attach_prog); 334EXPORT_SYMBOL(reuseport_attach_prog);
335
336int reuseport_detach_prog(struct sock *sk)
337{
338 struct sock_reuseport *reuse;
339 struct bpf_prog *old_prog;
340
341 if (!rcu_access_pointer(sk->sk_reuseport_cb))
342 return sk->sk_reuseport ? -ENOENT : -EINVAL;
343
344 old_prog = NULL;
345 spin_lock_bh(&reuseport_lock);
346 reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
347 lockdep_is_held(&reuseport_lock));
348 rcu_swap_protected(reuse->prog, old_prog,
349 lockdep_is_held(&reuseport_lock));
350 spin_unlock_bh(&reuseport_lock);
351
352 if (!old_prog)
353 return -ENOENT;
354
355 sk_reuseport_prog_free(old_prog);
356 return 0;
357}
358EXPORT_SYMBOL(reuseport_detach_prog);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 253e5a2856be..0917f8cf4fab 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -170,21 +170,12 @@ always += ibumad_kern.o
170always += hbm_out_kern.o 170always += hbm_out_kern.o
171 171
172KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include 172KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
173KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ 173KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
174KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ 174KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
175KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include 175KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
176KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf 176KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
177 177
178HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable 178HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
179HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/
180
181HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/
182HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
183HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
184HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
185HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
186HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
187HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
188 179
189KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf 180KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf
190HOSTLDLIBS_tracex4 += -lrt 181HOSTLDLIBS_tracex4 += -lrt
@@ -206,6 +197,17 @@ HOSTCC = $(CROSS_COMPILE)gcc
206CLANG_ARCH_ARGS = -target $(ARCH) 197CLANG_ARCH_ARGS = -target $(ARCH)
207endif 198endif
208 199
200# Don't evaluate probes and warnings if we need to run make recursively
201ifneq ($(src),)
202HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
203 $(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \
204 echo okay)
205
206ifeq ($(HDR_PROBE),)
207$(warning WARNING: Detected possible issues with include path.)
208$(warning WARNING: Please install kernel headers locally (make headers_install).)
209endif
210
209BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) 211BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
210BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) 212BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
211BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') 213BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
@@ -223,6 +225,7 @@ ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
223 DWARF2BTF = y 225 DWARF2BTF = y
224endif 226endif
225endif 227endif
228endif
226 229
227# Trick to allow make to be run from this directory 230# Trick to allow make to be run from this directory
228all: 231all:
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index e51eb060244e..2d4b717726b6 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -14,7 +14,7 @@
14 14
15#include <bpf/bpf.h> 15#include <bpf/bpf.h>
16 16
17#include "bpf/libbpf.h" 17#include "libbpf.h"
18#include "bpf_insn.h" 18#include "bpf_insn.h"
19#include "sock_example.h" 19#include "sock_example.h"
20 20
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 480b7ad6a1f2..b905b32ff185 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -50,8 +50,8 @@
50#include "cgroup_helpers.h" 50#include "cgroup_helpers.h"
51#include "hbm.h" 51#include "hbm.h"
52#include "bpf_util.h" 52#include "bpf_util.h"
53#include "bpf/bpf.h" 53#include "bpf.h"
54#include "bpf/libbpf.h" 54#include "libbpf.h"
55 55
56bool outFlag = true; 56bool outFlag = true;
57int minRate = 1000; /* cgroup rate limit in Mbps */ 57int minRate = 1000; /* cgroup rate limit in Mbps */
@@ -411,7 +411,7 @@ static void Usage(void)
411 " -l also limit flows using loopback\n" 411 " -l also limit flows using loopback\n"
412 " -n <#> to create cgroup \"/hbm#\" and attach prog\n" 412 " -n <#> to create cgroup \"/hbm#\" and attach prog\n"
413 " Default is /hbm1\n" 413 " Default is /hbm1\n"
414 " --no_cn disable CN notifcations\n" 414 " --no_cn disable CN notifications\n"
415 " -r <rate> Rate in Mbps\n" 415 " -r <rate> Rate in Mbps\n"
416 " -s Update HBM stats\n" 416 " -s Update HBM stats\n"
417 " -t <time> Exit after specified seconds (default is 0)\n" 417 " -t <time> Exit after specified seconds (default is 0)\n"
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
index 097d76143363..cb5a8f994849 100644
--- a/samples/bpf/ibumad_user.c
+++ b/samples/bpf/ibumad_user.c
@@ -25,7 +25,7 @@
25 25
26#include "bpf_load.h" 26#include "bpf_load.h"
27#include "bpf_util.h" 27#include "bpf_util.h"
28#include "bpf/libbpf.h" 28#include "libbpf.h"
29 29
30static void dump_counts(int fd) 30static void dump_counts(int fd)
31{ 31{
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
index 7f90796ae15a..a219442afbee 100644
--- a/samples/bpf/sockex1_user.c
+++ b/samples/bpf/sockex1_user.c
@@ -3,7 +3,7 @@
3#include <assert.h> 3#include <assert.h>
4#include <linux/bpf.h> 4#include <linux/bpf.h>
5#include <bpf/bpf.h> 5#include <bpf/bpf.h>
6#include "bpf/libbpf.h" 6#include "libbpf.h"
7#include "sock_example.h" 7#include "sock_example.h"
8#include <unistd.h> 8#include <unistd.h>
9#include <arpa/inet.h> 9#include <arpa/inet.h>
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index bc257333ad92..6de383ddd08b 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -3,7 +3,7 @@
3#include <assert.h> 3#include <assert.h>
4#include <linux/bpf.h> 4#include <linux/bpf.h>
5#include <bpf/bpf.h> 5#include <bpf/bpf.h>
6#include "bpf/libbpf.h" 6#include "libbpf.h"
7#include "sock_example.h" 7#include "sock_example.h"
8#include <unistd.h> 8#include <unistd.h>
9#include <arpa/inet.h> 9#include <arpa/inet.h>
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 5b39421adb44..a8e5fa02e8a8 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -15,8 +15,8 @@
15#include <net/if.h> 15#include <net/if.h>
16 16
17#include "bpf_util.h" 17#include "bpf_util.h"
18#include "bpf/bpf.h" 18#include "bpf.h"
19#include "bpf/libbpf.h" 19#include "libbpf.h"
20 20
21static int ifindex; 21static int ifindex;
22static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 22static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index 07e1b9269e49..586ff751aba9 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -18,8 +18,8 @@
18#include <netinet/ether.h> 18#include <netinet/ether.h>
19#include <unistd.h> 19#include <unistd.h>
20#include <time.h> 20#include <time.h>
21#include "bpf/bpf.h" 21#include "bpf.h"
22#include "bpf/libbpf.h" 22#include "libbpf.h"
23 23
24#define STATS_INTERVAL_S 2U 24#define STATS_INTERVAL_S 2U
25 25
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index f88e1d7093d6..5b46ee12c696 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -24,7 +24,7 @@
24#include <fcntl.h> 24#include <fcntl.h>
25#include <libgen.h> 25#include <libgen.h>
26 26
27#include "bpf/libbpf.h" 27#include "libbpf.h"
28#include <bpf/bpf.h> 28#include <bpf/bpf.h>
29 29
30 30
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 586b294d72d3..f5dc7e1f8bc6 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -25,7 +25,7 @@ static const char *__doc__ =
25#define MAX_PROG 6 25#define MAX_PROG 6
26 26
27#include <bpf/bpf.h> 27#include <bpf/bpf.h>
28#include "bpf/libbpf.h" 28#include "libbpf.h"
29 29
30#include "bpf_util.h" 30#include "bpf_util.h"
31 31
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index be317f5f058f..15bb6f67f9c3 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -16,7 +16,7 @@
16 16
17#include "bpf_util.h" 17#include "bpf_util.h"
18#include <bpf/bpf.h> 18#include <bpf/bpf.h>
19#include "bpf/libbpf.h" 19#include "libbpf.h"
20 20
21static int ifindex_in; 21static int ifindex_in;
22static int ifindex_out; 22static int ifindex_out;
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 09747bee6668..ce71be187205 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -16,7 +16,7 @@
16 16
17#include "bpf_util.h" 17#include "bpf_util.h"
18#include <bpf/bpf.h> 18#include <bpf/bpf.h>
19#include "bpf/libbpf.h" 19#include "libbpf.h"
20 20
21static int ifindex_in; 21static int ifindex_in;
22static int ifindex_out; 22static int ifindex_out;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 1f66419631c3..1469b66ebad1 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -21,7 +21,7 @@
21#include <sys/ioctl.h> 21#include <sys/ioctl.h>
22#include <sys/syscall.h> 22#include <sys/syscall.h>
23#include "bpf_util.h" 23#include "bpf_util.h"
24#include "bpf/libbpf.h" 24#include "libbpf.h"
25#include <sys/resource.h> 25#include <sys/resource.h>
26#include <libgen.h> 26#include <libgen.h>
27 27
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 1210f3b170f0..c7e4e45d824a 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -22,8 +22,8 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n"
22#include <arpa/inet.h> 22#include <arpa/inet.h>
23#include <linux/if_link.h> 23#include <linux/if_link.h>
24 24
25#include "bpf/bpf.h" 25#include "bpf.h"
26#include "bpf/libbpf.h" 26#include "libbpf.h"
27#include "bpf_util.h" 27#include "bpf_util.h"
28 28
29static int ifindex = -1; 29static int ifindex = -1;
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index e746a00d122e..394896430712 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -14,7 +14,7 @@
14#include <netinet/ether.h> 14#include <netinet/ether.h>
15#include <unistd.h> 15#include <unistd.h>
16#include <time.h> 16#include <time.h>
17#include "bpf/libbpf.h" 17#include "libbpf.h"
18#include <bpf/bpf.h> 18#include <bpf/bpf.h>
19#include "bpf_util.h" 19#include "bpf_util.h"
20#include "xdp_tx_iptunnel_common.h" 20#include "xdp_tx_iptunnel_common.h"
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index d08ee1ab7bb4..0f5eb0d7f2df 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -27,8 +27,8 @@
27#include <time.h> 27#include <time.h>
28#include <unistd.h> 28#include <unistd.h>
29 29
30#include "bpf/libbpf.h" 30#include "libbpf.h"
31#include "bpf/xsk.h" 31#include "xsk.h"
32#include <bpf/bpf.h> 32#include <bpf/bpf.h>
33 33
34#ifndef SOL_XDP 34#ifndef SOL_XDP
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index f7261fad45c1..5215e0870bcb 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
21#include <sys/vfs.h> 21#include <sys/vfs.h>
22 22
23#include <bpf.h> 23#include <bpf.h>
24#include <libbpf.h> /* libbpf_num_possible_cpus */
24 25
25#include "main.h" 26#include "main.h"
26 27
@@ -439,57 +440,13 @@ unsigned int get_page_size(void)
439 440
440unsigned int get_possible_cpus(void) 441unsigned int get_possible_cpus(void)
441{ 442{
442 static unsigned int result; 443 int cpus = libbpf_num_possible_cpus();
443 char buf[128];
444 long int n;
445 char *ptr;
446 int fd;
447
448 if (result)
449 return result;
450
451 fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
452 if (fd < 0) {
453 p_err("can't open sysfs possible cpus");
454 exit(-1);
455 }
456
457 n = read(fd, buf, sizeof(buf));
458 if (n < 2) {
459 p_err("can't read sysfs possible cpus");
460 exit(-1);
461 }
462 close(fd);
463 444
464 if (n == sizeof(buf)) { 445 if (cpus < 0) {
465 p_err("read sysfs possible cpus overflow"); 446 p_err("Can't get # of possible cpus: %s", strerror(-cpus));
466 exit(-1); 447 exit(-1);
467 } 448 }
468 449 return cpus;
469 ptr = buf;
470 n = 0;
471 while (*ptr && *ptr != '\n') {
472 unsigned int a, b;
473
474 if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
475 n += b - a + 1;
476
477 ptr = strchr(ptr, '-') + 1;
478 } else if (sscanf(ptr, "%u", &a) == 1) {
479 n++;
480 } else {
481 assert(0);
482 }
483
484 while (isdigit(*ptr))
485 ptr++;
486 if (*ptr == ',')
487 ptr++;
488 }
489
490 result = n;
491
492 return result;
493} 450}
494 451
495static char * 452static char *
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
new file mode 100644
index 000000000000..77f7c1638eb1
--- /dev/null
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -0,0 +1,147 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2#ifndef __ASM_GENERIC_SOCKET_H
3#define __ASM_GENERIC_SOCKET_H
4
5#include <linux/posix_types.h>
6#include <asm/sockios.h>
7
8/* For setsockopt(2) */
9#define SOL_SOCKET 1
10
11#define SO_DEBUG 1
12#define SO_REUSEADDR 2
13#define SO_TYPE 3
14#define SO_ERROR 4
15#define SO_DONTROUTE 5
16#define SO_BROADCAST 6
17#define SO_SNDBUF 7
18#define SO_RCVBUF 8
19#define SO_SNDBUFFORCE 32
20#define SO_RCVBUFFORCE 33
21#define SO_KEEPALIVE 9
22#define SO_OOBINLINE 10
23#define SO_NO_CHECK 11
24#define SO_PRIORITY 12
25#define SO_LINGER 13
26#define SO_BSDCOMPAT 14
27#define SO_REUSEPORT 15
28#ifndef SO_PASSCRED /* powerpc only differs in these */
29#define SO_PASSCRED 16
30#define SO_PEERCRED 17
31#define SO_RCVLOWAT 18
32#define SO_SNDLOWAT 19
33#define SO_RCVTIMEO_OLD 20
34#define SO_SNDTIMEO_OLD 21
35#endif
36
37/* Security levels - as per NRL IPv6 - don't actually do anything */
38#define SO_SECURITY_AUTHENTICATION 22
39#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
40#define SO_SECURITY_ENCRYPTION_NETWORK 24
41
42#define SO_BINDTODEVICE 25
43
44/* Socket filtering */
45#define SO_ATTACH_FILTER 26
46#define SO_DETACH_FILTER 27
47#define SO_GET_FILTER SO_ATTACH_FILTER
48
49#define SO_PEERNAME 28
50
51#define SO_ACCEPTCONN 30
52
53#define SO_PEERSEC 31
54#define SO_PASSSEC 34
55
56#define SO_MARK 36
57
58#define SO_PROTOCOL 38
59#define SO_DOMAIN 39
60
61#define SO_RXQ_OVFL 40
62
63#define SO_WIFI_STATUS 41
64#define SCM_WIFI_STATUS SO_WIFI_STATUS
65#define SO_PEEK_OFF 42
66
67/* Instruct lower device to use last 4-bytes of skb data as FCS */
68#define SO_NOFCS 43
69
70#define SO_LOCK_FILTER 44
71
72#define SO_SELECT_ERR_QUEUE 45
73
74#define SO_BUSY_POLL 46
75
76#define SO_MAX_PACING_RATE 47
77
78#define SO_BPF_EXTENSIONS 48
79
80#define SO_INCOMING_CPU 49
81
82#define SO_ATTACH_BPF 50
83#define SO_DETACH_BPF SO_DETACH_FILTER
84
85#define SO_ATTACH_REUSEPORT_CBPF 51
86#define SO_ATTACH_REUSEPORT_EBPF 52
87
88#define SO_CNX_ADVICE 53
89
90#define SCM_TIMESTAMPING_OPT_STATS 54
91
92#define SO_MEMINFO 55
93
94#define SO_INCOMING_NAPI_ID 56
95
96#define SO_COOKIE 57
97
98#define SCM_TIMESTAMPING_PKTINFO 58
99
100#define SO_PEERGROUPS 59
101
102#define SO_ZEROCOPY 60
103
104#define SO_TXTIME 61
105#define SCM_TXTIME SO_TXTIME
106
107#define SO_BINDTOIFINDEX 62
108
109#define SO_TIMESTAMP_OLD 29
110#define SO_TIMESTAMPNS_OLD 35
111#define SO_TIMESTAMPING_OLD 37
112
113#define SO_TIMESTAMP_NEW 63
114#define SO_TIMESTAMPNS_NEW 64
115#define SO_TIMESTAMPING_NEW 65
116
117#define SO_RCVTIMEO_NEW 66
118#define SO_SNDTIMEO_NEW 67
119
120#define SO_DETACH_REUSEPORT_BPF 68
121
122#if !defined(__KERNEL__)
123
124#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
125/* on 64-bit and x32, avoid the ?: operator */
126#define SO_TIMESTAMP SO_TIMESTAMP_OLD
127#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD
128#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD
129
130#define SO_RCVTIMEO SO_RCVTIMEO_OLD
131#define SO_SNDTIMEO SO_SNDTIMEO_OLD
132#else
133#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
134#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
135#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
136
137#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
138#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
139#endif
140
141#define SCM_TIMESTAMP SO_TIMESTAMP
142#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
143#define SCM_TIMESTAMPING SO_TIMESTAMPING
144
145#endif
146
147#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0e879721f75a..b077507efa3f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3085,6 +3085,10 @@ struct bpf_sock_tuple {
3085 }; 3085 };
3086}; 3086};
3087 3087
3088struct bpf_xdp_sock {
3089 __u32 queue_id;
3090};
3091
3088#define XDP_PACKET_HEADROOM 256 3092#define XDP_PACKET_HEADROOM 256
3089 3093
3090/* User return codes for XDP prog type. 3094/* User return codes for XDP prog type.
@@ -3245,6 +3249,7 @@ struct bpf_sock_addr {
3245 __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. 3249 __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
3246 * Stored in network byte order. 3250 * Stored in network byte order.
3247 */ 3251 */
3252 __bpf_md_ptr(struct bpf_sock *, sk);
3248}; 3253};
3249 3254
3250/* User bpf_sock_ops struct to access socket values and specify request ops 3255/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -3296,6 +3301,7 @@ struct bpf_sock_ops {
3296 __u32 sk_txhash; 3301 __u32 sk_txhash;
3297 __u64 bytes_received; 3302 __u64 bytes_received;
3298 __u64 bytes_acked; 3303 __u64 bytes_acked;
3304 __bpf_md_ptr(struct bpf_sock *, sk);
3299}; 3305};
3300 3306
3301/* Definitions for bpf_sock_ops_cb_flags */ 3307/* Definitions for bpf_sock_ops_cb_flags */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 0d4b4fe10a84..c7d7993c44bb 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -26,10 +26,11 @@
26#include <memory.h> 26#include <memory.h>
27#include <unistd.h> 27#include <unistd.h>
28#include <asm/unistd.h> 28#include <asm/unistd.h>
29#include <errno.h>
29#include <linux/bpf.h> 30#include <linux/bpf.h>
30#include "bpf.h" 31#include "bpf.h"
31#include "libbpf.h" 32#include "libbpf.h"
32#include <errno.h> 33#include "libbpf_internal.h"
33 34
34/* 35/*
35 * When building perf, unistd.h is overridden. __NR_bpf is 36 * When building perf, unistd.h is overridden. __NR_bpf is
@@ -53,10 +54,6 @@
53# endif 54# endif
54#endif 55#endif
55 56
56#ifndef min
57#define min(x, y) ((x) < (y) ? (x) : (y))
58#endif
59
60static inline __u64 ptr_to_u64(const void *ptr) 57static inline __u64 ptr_to_u64(const void *ptr)
61{ 58{
62 return (__u64) (unsigned long) ptr; 59 return (__u64) (unsigned long) ptr;
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 6978314ea7f6..8c67561c93b0 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -6,10 +6,7 @@
6#include <linux/err.h> 6#include <linux/err.h>
7#include <linux/bpf.h> 7#include <linux/bpf.h>
8#include "libbpf.h" 8#include "libbpf.h"
9 9#include "libbpf_internal.h"
10#ifndef min
11#define min(x, y) ((x) < (y) ? (x) : (y))
12#endif
13 10
14struct bpf_prog_linfo { 11struct bpf_prog_linfo {
15 void *raw_linfo; 12 void *raw_linfo;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index b2478e98c367..467224feb43b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -16,9 +16,6 @@
16#include "libbpf_internal.h" 16#include "libbpf_internal.h"
17#include "hashmap.h" 17#include "hashmap.h"
18 18
19#define max(a, b) ((a) > (b) ? (a) : (b))
20#define min(a, b) ((a) < (b) ? (a) : (b))
21
22#define BTF_MAX_NR_TYPES 0x7fffffff 19#define BTF_MAX_NR_TYPES 0x7fffffff
23#define BTF_MAX_STR_OFFSET 0x7fffffff 20#define BTF_MAX_STR_OFFSET 0x7fffffff
24 21
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ba4ffa831aa4..88a52ae56fc6 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -17,6 +17,7 @@ extern "C" {
17 17
18#define BTF_ELF_SEC ".BTF" 18#define BTF_ELF_SEC ".BTF"
19#define BTF_EXT_ELF_SEC ".BTF.ext" 19#define BTF_EXT_ELF_SEC ".BTF.ext"
20#define MAPS_ELF_SEC ".maps"
20 21
21struct btf; 22struct btf;
22struct btf_ext; 23struct btf_ext;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 4b22db77e2cc..7065bb5b2752 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -18,9 +18,6 @@
18#include "libbpf.h" 18#include "libbpf.h"
19#include "libbpf_internal.h" 19#include "libbpf_internal.h"
20 20
21#define min(x, y) ((x) < (y) ? (x) : (y))
22#define max(x, y) ((x) < (y) ? (y) : (x))
23
24static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; 21static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
25static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; 22static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
26 23
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index c8fbc050fd78..4259c9f0cfe7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -207,7 +207,8 @@ static const char * const libbpf_type_to_btf_name[] = {
207struct bpf_map { 207struct bpf_map {
208 int fd; 208 int fd;
209 char *name; 209 char *name;
210 size_t offset; 210 int sec_idx;
211 size_t sec_offset;
211 int map_ifindex; 212 int map_ifindex;
212 int inner_map_fd; 213 int inner_map_fd;
213 struct bpf_map_def def; 214 struct bpf_map_def def;
@@ -234,6 +235,7 @@ struct bpf_object {
234 size_t nr_programs; 235 size_t nr_programs;
235 struct bpf_map *maps; 236 struct bpf_map *maps;
236 size_t nr_maps; 237 size_t nr_maps;
238 size_t maps_cap;
237 struct bpf_secdata sections; 239 struct bpf_secdata sections;
238 240
239 bool loaded; 241 bool loaded;
@@ -260,6 +262,7 @@ struct bpf_object {
260 } *reloc; 262 } *reloc;
261 int nr_reloc; 263 int nr_reloc;
262 int maps_shndx; 264 int maps_shndx;
265 int btf_maps_shndx;
263 int text_shndx; 266 int text_shndx;
264 int data_shndx; 267 int data_shndx;
265 int rodata_shndx; 268 int rodata_shndx;
@@ -512,6 +515,7 @@ static struct bpf_object *bpf_object__new(const char *path,
512 obj->efile.obj_buf = obj_buf; 515 obj->efile.obj_buf = obj_buf;
513 obj->efile.obj_buf_sz = obj_buf_sz; 516 obj->efile.obj_buf_sz = obj_buf_sz;
514 obj->efile.maps_shndx = -1; 517 obj->efile.maps_shndx = -1;
518 obj->efile.btf_maps_shndx = -1;
515 obj->efile.data_shndx = -1; 519 obj->efile.data_shndx = -1;
516 obj->efile.rodata_shndx = -1; 520 obj->efile.rodata_shndx = -1;
517 obj->efile.bss_shndx = -1; 521 obj->efile.bss_shndx = -1;
@@ -646,7 +650,9 @@ static int compare_bpf_map(const void *_a, const void *_b)
646 const struct bpf_map *a = _a; 650 const struct bpf_map *a = _a;
647 const struct bpf_map *b = _b; 651 const struct bpf_map *b = _b;
648 652
649 return a->offset - b->offset; 653 if (a->sec_idx != b->sec_idx)
654 return a->sec_idx - b->sec_idx;
655 return a->sec_offset - b->sec_offset;
650} 656}
651 657
652static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 658static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
@@ -763,24 +769,55 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
763 return -ENOENT; 769 return -ENOENT;
764} 770}
765 771
766static bool bpf_object__has_maps(const struct bpf_object *obj) 772static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
767{ 773{
768 return obj->efile.maps_shndx >= 0 || 774 struct bpf_map *new_maps;
769 obj->efile.data_shndx >= 0 || 775 size_t new_cap;
770 obj->efile.rodata_shndx >= 0 || 776 int i;
771 obj->efile.bss_shndx >= 0; 777
778 if (obj->nr_maps < obj->maps_cap)
779 return &obj->maps[obj->nr_maps++];
780
781 new_cap = max(4ul, obj->maps_cap * 3 / 2);
782 new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
783 if (!new_maps) {
784 pr_warning("alloc maps for object failed\n");
785 return ERR_PTR(-ENOMEM);
786 }
787
788 obj->maps_cap = new_cap;
789 obj->maps = new_maps;
790
791 /* zero out new maps */
792 memset(obj->maps + obj->nr_maps, 0,
793 (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
794 /*
795 * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
796 * when failure (zclose won't close negative fd)).
797 */
798 for (i = obj->nr_maps; i < obj->maps_cap; i++) {
799 obj->maps[i].fd = -1;
800 obj->maps[i].inner_map_fd = -1;
801 }
802
803 return &obj->maps[obj->nr_maps++];
772} 804}
773 805
774static int 806static int
775bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, 807bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
776 enum libbpf_map_type type, Elf_Data *data, 808 int sec_idx, Elf_Data *data, void **data_buff)
777 void **data_buff)
778{ 809{
779 struct bpf_map_def *def = &map->def;
780 char map_name[BPF_OBJ_NAME_LEN]; 810 char map_name[BPF_OBJ_NAME_LEN];
811 struct bpf_map_def *def;
812 struct bpf_map *map;
813
814 map = bpf_object__add_map(obj);
815 if (IS_ERR(map))
816 return PTR_ERR(map);
781 817
782 map->libbpf_type = type; 818 map->libbpf_type = type;
783 map->offset = ~(typeof(map->offset))0; 819 map->sec_idx = sec_idx;
820 map->sec_offset = 0;
784 snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, 821 snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
785 libbpf_type_to_btf_name[type]); 822 libbpf_type_to_btf_name[type]);
786 map->name = strdup(map_name); 823 map->name = strdup(map_name);
@@ -788,7 +825,10 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
788 pr_warning("failed to alloc map name\n"); 825 pr_warning("failed to alloc map name\n");
789 return -ENOMEM; 826 return -ENOMEM;
790 } 827 }
828 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n",
829 map_name, map->sec_idx, map->sec_offset);
791 830
831 def = &map->def;
792 def->type = BPF_MAP_TYPE_ARRAY; 832 def->type = BPF_MAP_TYPE_ARRAY;
793 def->key_size = sizeof(int); 833 def->key_size = sizeof(int);
794 def->value_size = data->d_size; 834 def->value_size = data->d_size;
@@ -808,29 +848,61 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
808 return 0; 848 return 0;
809} 849}
810 850
811static int bpf_object__init_maps(struct bpf_object *obj, int flags) 851static int bpf_object__init_global_data_maps(struct bpf_object *obj)
852{
853 int err;
854
855 if (!obj->caps.global_data)
856 return 0;
857 /*
858 * Populate obj->maps with libbpf internal maps.
859 */
860 if (obj->efile.data_shndx >= 0) {
861 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
862 obj->efile.data_shndx,
863 obj->efile.data,
864 &obj->sections.data);
865 if (err)
866 return err;
867 }
868 if (obj->efile.rodata_shndx >= 0) {
869 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
870 obj->efile.rodata_shndx,
871 obj->efile.rodata,
872 &obj->sections.rodata);
873 if (err)
874 return err;
875 }
876 if (obj->efile.bss_shndx >= 0) {
877 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
878 obj->efile.bss_shndx,
879 obj->efile.bss, NULL);
880 if (err)
881 return err;
882 }
883 return 0;
884}
885
886static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
812{ 887{
813 int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
814 bool strict = !(flags & MAPS_RELAX_COMPAT);
815 Elf_Data *symbols = obj->efile.symbols; 888 Elf_Data *symbols = obj->efile.symbols;
889 int i, map_def_sz = 0, nr_maps = 0, nr_syms;
816 Elf_Data *data = NULL; 890 Elf_Data *data = NULL;
817 int ret = 0; 891 Elf_Scn *scn;
892
893 if (obj->efile.maps_shndx < 0)
894 return 0;
818 895
819 if (!symbols) 896 if (!symbols)
820 return -EINVAL; 897 return -EINVAL;
821 nr_syms = symbols->d_size / sizeof(GElf_Sym);
822 898
823 if (obj->efile.maps_shndx >= 0) { 899 scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
824 Elf_Scn *scn = elf_getscn(obj->efile.elf, 900 if (scn)
825 obj->efile.maps_shndx); 901 data = elf_getdata(scn, NULL);
826 902 if (!scn || !data) {
827 if (scn) 903 pr_warning("failed to get Elf_Data from map section %d\n",
828 data = elf_getdata(scn, NULL); 904 obj->efile.maps_shndx);
829 if (!scn || !data) { 905 return -EINVAL;
830 pr_warning("failed to get Elf_Data from map section %d\n",
831 obj->efile.maps_shndx);
832 return -EINVAL;
833 }
834 } 906 }
835 907
836 /* 908 /*
@@ -840,16 +912,8 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
840 * 912 *
841 * TODO: Detect array of map and report error. 913 * TODO: Detect array of map and report error.
842 */ 914 */
843 if (obj->caps.global_data) { 915 nr_syms = symbols->d_size / sizeof(GElf_Sym);
844 if (obj->efile.data_shndx >= 0) 916 for (i = 0; i < nr_syms; i++) {
845 nr_maps_glob++;
846 if (obj->efile.rodata_shndx >= 0)
847 nr_maps_glob++;
848 if (obj->efile.bss_shndx >= 0)
849 nr_maps_glob++;
850 }
851
852 for (i = 0; data && i < nr_syms; i++) {
853 GElf_Sym sym; 917 GElf_Sym sym;
854 918
855 if (!gelf_getsym(symbols, i, &sym)) 919 if (!gelf_getsym(symbols, i, &sym))
@@ -858,79 +922,59 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
858 continue; 922 continue;
859 nr_maps++; 923 nr_maps++;
860 } 924 }
861
862 if (!nr_maps && !nr_maps_glob)
863 return 0;
864
865 /* Assume equally sized map definitions */ 925 /* Assume equally sized map definitions */
866 if (data) { 926 pr_debug("maps in %s: %d maps in %zd bytes\n",
867 pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, 927 obj->path, nr_maps, data->d_size);
868 nr_maps, data->d_size); 928
869 929 map_def_sz = data->d_size / nr_maps;
870 map_def_sz = data->d_size / nr_maps; 930 if (!data->d_size || (data->d_size % nr_maps) != 0) {
871 if (!data->d_size || (data->d_size % nr_maps) != 0) { 931 pr_warning("unable to determine map definition size "
872 pr_warning("unable to determine map definition size " 932 "section %s, %d maps in %zd bytes\n",
873 "section %s, %d maps in %zd bytes\n", 933 obj->path, nr_maps, data->d_size);
874 obj->path, nr_maps, data->d_size); 934 return -EINVAL;
875 return -EINVAL;
876 }
877 }
878
879 nr_maps += nr_maps_glob;
880 obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
881 if (!obj->maps) {
882 pr_warning("alloc maps for object failed\n");
883 return -ENOMEM;
884 }
885 obj->nr_maps = nr_maps;
886
887 for (i = 0; i < nr_maps; i++) {
888 /*
889 * fill all fd with -1 so won't close incorrect
890 * fd (fd=0 is stdin) when failure (zclose won't close
891 * negative fd)).
892 */
893 obj->maps[i].fd = -1;
894 obj->maps[i].inner_map_fd = -1;
895 } 935 }
896 936
897 /* 937 /* Fill obj->maps using data in "maps" section. */
898 * Fill obj->maps using data in "maps" section. 938 for (i = 0; i < nr_syms; i++) {
899 */
900 for (i = 0, map_idx = 0; data && i < nr_syms; i++) {
901 GElf_Sym sym; 939 GElf_Sym sym;
902 const char *map_name; 940 const char *map_name;
903 struct bpf_map_def *def; 941 struct bpf_map_def *def;
942 struct bpf_map *map;
904 943
905 if (!gelf_getsym(symbols, i, &sym)) 944 if (!gelf_getsym(symbols, i, &sym))
906 continue; 945 continue;
907 if (sym.st_shndx != obj->efile.maps_shndx) 946 if (sym.st_shndx != obj->efile.maps_shndx)
908 continue; 947 continue;
909 948
910 map_name = elf_strptr(obj->efile.elf, 949 map = bpf_object__add_map(obj);
911 obj->efile.strtabidx, 950 if (IS_ERR(map))
951 return PTR_ERR(map);
952
953 map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
912 sym.st_name); 954 sym.st_name);
913 if (!map_name) { 955 if (!map_name) {
914 pr_warning("failed to get map #%d name sym string for obj %s\n", 956 pr_warning("failed to get map #%d name sym string for obj %s\n",
915 map_idx, obj->path); 957 i, obj->path);
916 return -LIBBPF_ERRNO__FORMAT; 958 return -LIBBPF_ERRNO__FORMAT;
917 } 959 }
918 960
919 obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; 961 map->libbpf_type = LIBBPF_MAP_UNSPEC;
920 obj->maps[map_idx].offset = sym.st_value; 962 map->sec_idx = sym.st_shndx;
963 map->sec_offset = sym.st_value;
964 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
965 map_name, map->sec_idx, map->sec_offset);
921 if (sym.st_value + map_def_sz > data->d_size) { 966 if (sym.st_value + map_def_sz > data->d_size) {
922 pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", 967 pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
923 obj->path, map_name); 968 obj->path, map_name);
924 return -EINVAL; 969 return -EINVAL;
925 } 970 }
926 971
927 obj->maps[map_idx].name = strdup(map_name); 972 map->name = strdup(map_name);
928 if (!obj->maps[map_idx].name) { 973 if (!map->name) {
929 pr_warning("failed to alloc map name\n"); 974 pr_warning("failed to alloc map name\n");
930 return -ENOMEM; 975 return -ENOMEM;
931 } 976 }
932 pr_debug("map %d is \"%s\"\n", map_idx, 977 pr_debug("map %d is \"%s\"\n", i, map->name);
933 obj->maps[map_idx].name);
934 def = (struct bpf_map_def *)(data->d_buf + sym.st_value); 978 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
935 /* 979 /*
936 * If the definition of the map in the object file fits in 980 * If the definition of the map in the object file fits in
@@ -939,7 +983,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
939 * calloc above. 983 * calloc above.
940 */ 984 */
941 if (map_def_sz <= sizeof(struct bpf_map_def)) { 985 if (map_def_sz <= sizeof(struct bpf_map_def)) {
942 memcpy(&obj->maps[map_idx].def, def, map_def_sz); 986 memcpy(&map->def, def, map_def_sz);
943 } else { 987 } else {
944 /* 988 /*
945 * Here the map structure being read is bigger than what 989 * Here the map structure being read is bigger than what
@@ -959,37 +1003,340 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
959 return -EINVAL; 1003 return -EINVAL;
960 } 1004 }
961 } 1005 }
962 memcpy(&obj->maps[map_idx].def, def, 1006 memcpy(&map->def, def, sizeof(struct bpf_map_def));
963 sizeof(struct bpf_map_def));
964 } 1007 }
965 map_idx++;
966 } 1008 }
1009 return 0;
1010}
967 1011
968 if (!obj->caps.global_data) 1012static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
969 goto finalize; 1013 __u32 id)
1014{
1015 const struct btf_type *t = btf__type_by_id(btf, id);
970 1016
971 /* 1017 while (true) {
972 * Populate rest of obj->maps with libbpf internal maps. 1018 switch (BTF_INFO_KIND(t->info)) {
973 */ 1019 case BTF_KIND_VOLATILE:
974 if (obj->efile.data_shndx >= 0) 1020 case BTF_KIND_CONST:
975 ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], 1021 case BTF_KIND_RESTRICT:
976 LIBBPF_MAP_DATA, 1022 case BTF_KIND_TYPEDEF:
977 obj->efile.data, 1023 t = btf__type_by_id(btf, t->type);
978 &obj->sections.data); 1024 break;
979 if (!ret && obj->efile.rodata_shndx >= 0) 1025 default:
980 ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], 1026 return t;
981 LIBBPF_MAP_RODATA, 1027 }
982 obj->efile.rodata, 1028 }
983 &obj->sections.rodata); 1029}
984 if (!ret && obj->efile.bss_shndx >= 0) 1030
985 ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], 1031static bool get_map_field_int(const char *map_name,
986 LIBBPF_MAP_BSS, 1032 const struct btf *btf,
987 obj->efile.bss, NULL); 1033 const struct btf_type *def,
988finalize: 1034 const struct btf_member *m,
989 if (!ret) 1035 const void *data, __u32 *res) {
1036 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
1037 const char *name = btf__name_by_offset(btf, m->name_off);
1038 __u32 int_info = *(const __u32 *)(const void *)(t + 1);
1039
1040 if (BTF_INFO_KIND(t->info) != BTF_KIND_INT) {
1041 pr_warning("map '%s': attr '%s': expected INT, got %u.\n",
1042 map_name, name, BTF_INFO_KIND(t->info));
1043 return false;
1044 }
1045 if (t->size != 4 || BTF_INT_BITS(int_info) != 32 ||
1046 BTF_INT_OFFSET(int_info)) {
1047 pr_warning("map '%s': attr '%s': expected 32-bit non-bitfield integer, "
1048 "got %u-byte (%d-bit) one with bit offset %d.\n",
1049 map_name, name, t->size, BTF_INT_BITS(int_info),
1050 BTF_INT_OFFSET(int_info));
1051 return false;
1052 }
1053 if (BTF_INFO_KFLAG(def->info) && BTF_MEMBER_BITFIELD_SIZE(m->offset)) {
1054 pr_warning("map '%s': attr '%s': bitfield is not supported.\n",
1055 map_name, name);
1056 return false;
1057 }
1058 if (m->offset % 32) {
1059 pr_warning("map '%s': attr '%s': unaligned fields are not supported.\n",
1060 map_name, name);
1061 return false;
1062 }
1063
1064 *res = *(const __u32 *)(data + m->offset / 8);
1065 return true;
1066}
1067
1068static int bpf_object__init_user_btf_map(struct bpf_object *obj,
1069 const struct btf_type *sec,
1070 int var_idx, int sec_idx,
1071 const Elf_Data *data, bool strict)
1072{
1073 const struct btf_type *var, *def, *t;
1074 const struct btf_var_secinfo *vi;
1075 const struct btf_var *var_extra;
1076 const struct btf_member *m;
1077 const void *def_data;
1078 const char *map_name;
1079 struct bpf_map *map;
1080 int vlen, i;
1081
1082 vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
1083 var = btf__type_by_id(obj->btf, vi->type);
1084 var_extra = (const void *)(var + 1);
1085 map_name = btf__name_by_offset(obj->btf, var->name_off);
1086 vlen = BTF_INFO_VLEN(var->info);
1087
1088 if (map_name == NULL || map_name[0] == '\0') {
1089 pr_warning("map #%d: empty name.\n", var_idx);
1090 return -EINVAL;
1091 }
1092 if ((__u64)vi->offset + vi->size > data->d_size) {
1093 pr_warning("map '%s' BTF data is corrupted.\n", map_name);
1094 return -EINVAL;
1095 }
1096 if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
1097 pr_warning("map '%s': unexpected var kind %u.\n",
1098 map_name, BTF_INFO_KIND(var->info));
1099 return -EINVAL;
1100 }
1101 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
1102 var_extra->linkage != BTF_VAR_STATIC) {
1103 pr_warning("map '%s': unsupported var linkage %u.\n",
1104 map_name, var_extra->linkage);
1105 return -EOPNOTSUPP;
1106 }
1107
1108 def = skip_mods_and_typedefs(obj->btf, var->type);
1109 if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
1110 pr_warning("map '%s': unexpected def kind %u.\n",
1111 map_name, BTF_INFO_KIND(var->info));
1112 return -EINVAL;
1113 }
1114 if (def->size > vi->size) {
1115 pr_warning("map '%s': invalid def size.\n", map_name);
1116 return -EINVAL;
1117 }
1118
1119 map = bpf_object__add_map(obj);
1120 if (IS_ERR(map))
1121 return PTR_ERR(map);
1122 map->name = strdup(map_name);
1123 if (!map->name) {
1124 pr_warning("map '%s': failed to alloc map name.\n", map_name);
1125 return -ENOMEM;
1126 }
1127 map->libbpf_type = LIBBPF_MAP_UNSPEC;
1128 map->def.type = BPF_MAP_TYPE_UNSPEC;
1129 map->sec_idx = sec_idx;
1130 map->sec_offset = vi->offset;
1131 pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
1132 map_name, map->sec_idx, map->sec_offset);
1133
1134 def_data = data->d_buf + vi->offset;
1135 vlen = BTF_INFO_VLEN(def->info);
1136 m = (const void *)(def + 1);
1137 for (i = 0; i < vlen; i++, m++) {
1138 const char *name = btf__name_by_offset(obj->btf, m->name_off);
1139
1140 if (!name) {
1141 pr_warning("map '%s': invalid field #%d.\n",
1142 map_name, i);
1143 return -EINVAL;
1144 }
1145 if (strcmp(name, "type") == 0) {
1146 if (!get_map_field_int(map_name, obj->btf, def, m,
1147 def_data, &map->def.type))
1148 return -EINVAL;
1149 pr_debug("map '%s': found type = %u.\n",
1150 map_name, map->def.type);
1151 } else if (strcmp(name, "max_entries") == 0) {
1152 if (!get_map_field_int(map_name, obj->btf, def, m,
1153 def_data, &map->def.max_entries))
1154 return -EINVAL;
1155 pr_debug("map '%s': found max_entries = %u.\n",
1156 map_name, map->def.max_entries);
1157 } else if (strcmp(name, "map_flags") == 0) {
1158 if (!get_map_field_int(map_name, obj->btf, def, m,
1159 def_data, &map->def.map_flags))
1160 return -EINVAL;
1161 pr_debug("map '%s': found map_flags = %u.\n",
1162 map_name, map->def.map_flags);
1163 } else if (strcmp(name, "key_size") == 0) {
1164 __u32 sz;
1165
1166 if (!get_map_field_int(map_name, obj->btf, def, m,
1167 def_data, &sz))
1168 return -EINVAL;
1169 pr_debug("map '%s': found key_size = %u.\n",
1170 map_name, sz);
1171 if (map->def.key_size && map->def.key_size != sz) {
1172 pr_warning("map '%s': conflictling key size %u != %u.\n",
1173 map_name, map->def.key_size, sz);
1174 return -EINVAL;
1175 }
1176 map->def.key_size = sz;
1177 } else if (strcmp(name, "key") == 0) {
1178 __s64 sz;
1179
1180 t = btf__type_by_id(obj->btf, m->type);
1181 if (!t) {
1182 pr_warning("map '%s': key type [%d] not found.\n",
1183 map_name, m->type);
1184 return -EINVAL;
1185 }
1186 if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
1187 pr_warning("map '%s': key spec is not PTR: %u.\n",
1188 map_name, BTF_INFO_KIND(t->info));
1189 return -EINVAL;
1190 }
1191 sz = btf__resolve_size(obj->btf, t->type);
1192 if (sz < 0) {
1193 pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n",
1194 map_name, t->type, sz);
1195 return sz;
1196 }
1197 pr_debug("map '%s': found key [%u], sz = %lld.\n",
1198 map_name, t->type, sz);
1199 if (map->def.key_size && map->def.key_size != sz) {
1200 pr_warning("map '%s': conflictling key size %u != %lld.\n",
1201 map_name, map->def.key_size, sz);
1202 return -EINVAL;
1203 }
1204 map->def.key_size = sz;
1205 map->btf_key_type_id = t->type;
1206 } else if (strcmp(name, "value_size") == 0) {
1207 __u32 sz;
1208
1209 if (!get_map_field_int(map_name, obj->btf, def, m,
1210 def_data, &sz))
1211 return -EINVAL;
1212 pr_debug("map '%s': found value_size = %u.\n",
1213 map_name, sz);
1214 if (map->def.value_size && map->def.value_size != sz) {
1215 pr_warning("map '%s': conflictling value size %u != %u.\n",
1216 map_name, map->def.value_size, sz);
1217 return -EINVAL;
1218 }
1219 map->def.value_size = sz;
1220 } else if (strcmp(name, "value") == 0) {
1221 __s64 sz;
1222
1223 t = btf__type_by_id(obj->btf, m->type);
1224 if (!t) {
1225 pr_warning("map '%s': value type [%d] not found.\n",
1226 map_name, m->type);
1227 return -EINVAL;
1228 }
1229 if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
1230 pr_warning("map '%s': value spec is not PTR: %u.\n",
1231 map_name, BTF_INFO_KIND(t->info));
1232 return -EINVAL;
1233 }
1234 sz = btf__resolve_size(obj->btf, t->type);
1235 if (sz < 0) {
1236 pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n",
1237 map_name, t->type, sz);
1238 return sz;
1239 }
1240 pr_debug("map '%s': found value [%u], sz = %lld.\n",
1241 map_name, t->type, sz);
1242 if (map->def.value_size && map->def.value_size != sz) {
1243 pr_warning("map '%s': conflictling value size %u != %lld.\n",
1244 map_name, map->def.value_size, sz);
1245 return -EINVAL;
1246 }
1247 map->def.value_size = sz;
1248 map->btf_value_type_id = t->type;
1249 } else {
1250 if (strict) {
1251 pr_warning("map '%s': unknown field '%s'.\n",
1252 map_name, name);
1253 return -ENOTSUP;
1254 }
1255 pr_debug("map '%s': ignoring unknown field '%s'.\n",
1256 map_name, name);
1257 }
1258 }
1259
1260 if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
1261 pr_warning("map '%s': map type isn't specified.\n", map_name);
1262 return -EINVAL;
1263 }
1264
1265 return 0;
1266}
1267
1268static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
1269{
1270 const struct btf_type *sec = NULL;
1271 int nr_types, i, vlen, err;
1272 const struct btf_type *t;
1273 const char *name;
1274 Elf_Data *data;
1275 Elf_Scn *scn;
1276
1277 if (obj->efile.btf_maps_shndx < 0)
1278 return 0;
1279
1280 scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
1281 if (scn)
1282 data = elf_getdata(scn, NULL);
1283 if (!scn || !data) {
1284 pr_warning("failed to get Elf_Data from map section %d (%s)\n",
1285 obj->efile.maps_shndx, MAPS_ELF_SEC);
1286 return -EINVAL;
1287 }
1288
1289 nr_types = btf__get_nr_types(obj->btf);
1290 for (i = 1; i <= nr_types; i++) {
1291 t = btf__type_by_id(obj->btf, i);
1292 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
1293 continue;
1294 name = btf__name_by_offset(obj->btf, t->name_off);
1295 if (strcmp(name, MAPS_ELF_SEC) == 0) {
1296 sec = t;
1297 break;
1298 }
1299 }
1300
1301 if (!sec) {
1302 pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
1303 return -ENOENT;
1304 }
1305
1306 vlen = BTF_INFO_VLEN(sec->info);
1307 for (i = 0; i < vlen; i++) {
1308 err = bpf_object__init_user_btf_map(obj, sec, i,
1309 obj->efile.btf_maps_shndx,
1310 data, strict);
1311 if (err)
1312 return err;
1313 }
1314
1315 return 0;
1316}
1317
1318static int bpf_object__init_maps(struct bpf_object *obj, int flags)
1319{
1320 bool strict = !(flags & MAPS_RELAX_COMPAT);
1321 int err;
1322
1323 err = bpf_object__init_user_maps(obj, strict);
1324 if (err)
1325 return err;
1326
1327 err = bpf_object__init_user_btf_maps(obj, strict);
1328 if (err)
1329 return err;
1330
1331 err = bpf_object__init_global_data_maps(obj);
1332 if (err)
1333 return err;
1334
1335 if (obj->nr_maps) {
990 qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), 1336 qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
991 compare_bpf_map); 1337 compare_bpf_map);
992 return ret; 1338 }
1339 return 0;
993} 1340}
994 1341
995static bool section_have_execinstr(struct bpf_object *obj, int idx) 1342static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -1078,6 +1425,86 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
1078 } 1425 }
1079} 1426}
1080 1427
1428static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
1429{
1430 return obj->efile.btf_maps_shndx >= 0;
1431}
1432
1433static int bpf_object__init_btf(struct bpf_object *obj,
1434 Elf_Data *btf_data,
1435 Elf_Data *btf_ext_data)
1436{
1437 bool btf_required = bpf_object__is_btf_mandatory(obj);
1438 int err = 0;
1439
1440 if (btf_data) {
1441 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
1442 if (IS_ERR(obj->btf)) {
1443 pr_warning("Error loading ELF section %s: %d.\n",
1444 BTF_ELF_SEC, err);
1445 goto out;
1446 }
1447 err = btf__finalize_data(obj, obj->btf);
1448 if (err) {
1449 pr_warning("Error finalizing %s: %d.\n",
1450 BTF_ELF_SEC, err);
1451 goto out;
1452 }
1453 }
1454 if (btf_ext_data) {
1455 if (!obj->btf) {
1456 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
1457 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
1458 goto out;
1459 }
1460 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
1461 btf_ext_data->d_size);
1462 if (IS_ERR(obj->btf_ext)) {
1463 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
1464 BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
1465 obj->btf_ext = NULL;
1466 goto out;
1467 }
1468 }
1469out:
1470 if (err || IS_ERR(obj->btf)) {
1471 if (btf_required)
1472 err = err ? : PTR_ERR(obj->btf);
1473 else
1474 err = 0;
1475 if (!IS_ERR_OR_NULL(obj->btf))
1476 btf__free(obj->btf);
1477 obj->btf = NULL;
1478 }
1479 if (btf_required && !obj->btf) {
1480 pr_warning("BTF is required, but is missing or corrupted.\n");
1481 return err == 0 ? -ENOENT : err;
1482 }
1483 return 0;
1484}
1485
1486static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
1487{
1488 int err = 0;
1489
1490 if (!obj->btf)
1491 return 0;
1492
1493 bpf_object__sanitize_btf(obj);
1494 bpf_object__sanitize_btf_ext(obj);
1495
1496 err = btf__load(obj->btf);
1497 if (err) {
1498 pr_warning("Error loading %s into kernel: %d.\n",
1499 BTF_ELF_SEC, err);
1500 btf__free(obj->btf);
1501 obj->btf = NULL;
1502 if (bpf_object__is_btf_mandatory(obj))
1503 return err;
1504 }
1505 return 0;
1506}
1507
1081static int bpf_object__elf_collect(struct bpf_object *obj, int flags) 1508static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1082{ 1509{
1083 Elf *elf = obj->efile.elf; 1510 Elf *elf = obj->efile.elf;
@@ -1102,24 +1529,21 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1102 if (gelf_getshdr(scn, &sh) != &sh) { 1529 if (gelf_getshdr(scn, &sh) != &sh) {
1103 pr_warning("failed to get section(%d) header from %s\n", 1530 pr_warning("failed to get section(%d) header from %s\n",
1104 idx, obj->path); 1531 idx, obj->path);
1105 err = -LIBBPF_ERRNO__FORMAT; 1532 return -LIBBPF_ERRNO__FORMAT;
1106 goto out;
1107 } 1533 }
1108 1534
1109 name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); 1535 name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
1110 if (!name) { 1536 if (!name) {
1111 pr_warning("failed to get section(%d) name from %s\n", 1537 pr_warning("failed to get section(%d) name from %s\n",
1112 idx, obj->path); 1538 idx, obj->path);
1113 err = -LIBBPF_ERRNO__FORMAT; 1539 return -LIBBPF_ERRNO__FORMAT;
1114 goto out;
1115 } 1540 }
1116 1541
1117 data = elf_getdata(scn, 0); 1542 data = elf_getdata(scn, 0);
1118 if (!data) { 1543 if (!data) {
1119 pr_warning("failed to get section(%d) data from %s(%s)\n", 1544 pr_warning("failed to get section(%d) data from %s(%s)\n",
1120 idx, name, obj->path); 1545 idx, name, obj->path);
1121 err = -LIBBPF_ERRNO__FORMAT; 1546 return -LIBBPF_ERRNO__FORMAT;
1122 goto out;
1123 } 1547 }
1124 pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 1548 pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
1125 idx, name, (unsigned long)data->d_size, 1549 idx, name, (unsigned long)data->d_size,
@@ -1130,12 +1554,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1130 err = bpf_object__init_license(obj, 1554 err = bpf_object__init_license(obj,
1131 data->d_buf, 1555 data->d_buf,
1132 data->d_size); 1556 data->d_size);
1557 if (err)
1558 return err;
1133 } else if (strcmp(name, "version") == 0) { 1559 } else if (strcmp(name, "version") == 0) {
1134 err = bpf_object__init_kversion(obj, 1560 err = bpf_object__init_kversion(obj,
1135 data->d_buf, 1561 data->d_buf,
1136 data->d_size); 1562 data->d_size);
1563 if (err)
1564 return err;
1137 } else if (strcmp(name, "maps") == 0) { 1565 } else if (strcmp(name, "maps") == 0) {
1138 obj->efile.maps_shndx = idx; 1566 obj->efile.maps_shndx = idx;
1567 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
1568 obj->efile.btf_maps_shndx = idx;
1139 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 1569 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
1140 btf_data = data; 1570 btf_data = data;
1141 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 1571 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
@@ -1144,11 +1574,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1144 if (obj->efile.symbols) { 1574 if (obj->efile.symbols) {
1145 pr_warning("bpf: multiple SYMTAB in %s\n", 1575 pr_warning("bpf: multiple SYMTAB in %s\n",
1146 obj->path); 1576 obj->path);
1147 err = -LIBBPF_ERRNO__FORMAT; 1577 return -LIBBPF_ERRNO__FORMAT;
1148 } else {
1149 obj->efile.symbols = data;
1150 obj->efile.strtabidx = sh.sh_link;
1151 } 1578 }
1579 obj->efile.symbols = data;
1580 obj->efile.strtabidx = sh.sh_link;
1152 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { 1581 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
1153 if (sh.sh_flags & SHF_EXECINSTR) { 1582 if (sh.sh_flags & SHF_EXECINSTR) {
1154 if (strcmp(name, ".text") == 0) 1583 if (strcmp(name, ".text") == 0)
@@ -1162,6 +1591,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1162 1591
1163 pr_warning("failed to alloc program %s (%s): %s", 1592 pr_warning("failed to alloc program %s (%s): %s",
1164 name, obj->path, cp); 1593 name, obj->path, cp);
1594 return err;
1165 } 1595 }
1166 } else if (strcmp(name, ".data") == 0) { 1596 } else if (strcmp(name, ".data") == 0) {
1167 obj->efile.data = data; 1597 obj->efile.data = data;
@@ -1173,8 +1603,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1173 pr_debug("skip section(%d) %s\n", idx, name); 1603 pr_debug("skip section(%d) %s\n", idx, name);
1174 } 1604 }
1175 } else if (sh.sh_type == SHT_REL) { 1605 } else if (sh.sh_type == SHT_REL) {
1606 int nr_reloc = obj->efile.nr_reloc;
1176 void *reloc = obj->efile.reloc; 1607 void *reloc = obj->efile.reloc;
1177 int nr_reloc = obj->efile.nr_reloc + 1;
1178 int sec = sh.sh_info; /* points to other section */ 1608 int sec = sh.sh_info; /* points to other section */
1179 1609
1180 /* Only do relo for section with exec instructions */ 1610 /* Only do relo for section with exec instructions */
@@ -1184,79 +1614,37 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1184 continue; 1614 continue;
1185 } 1615 }
1186 1616
1187 reloc = reallocarray(reloc, nr_reloc, 1617 reloc = reallocarray(reloc, nr_reloc + 1,
1188 sizeof(*obj->efile.reloc)); 1618 sizeof(*obj->efile.reloc));
1189 if (!reloc) { 1619 if (!reloc) {
1190 pr_warning("realloc failed\n"); 1620 pr_warning("realloc failed\n");
1191 err = -ENOMEM; 1621 return -ENOMEM;
1192 } else { 1622 }
1193 int n = nr_reloc - 1;
1194 1623
1195 obj->efile.reloc = reloc; 1624 obj->efile.reloc = reloc;
1196 obj->efile.nr_reloc = nr_reloc; 1625 obj->efile.nr_reloc++;
1197 1626
1198 obj->efile.reloc[n].shdr = sh; 1627 obj->efile.reloc[nr_reloc].shdr = sh;
1199 obj->efile.reloc[n].data = data; 1628 obj->efile.reloc[nr_reloc].data = data;
1200 }
1201 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { 1629 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
1202 obj->efile.bss = data; 1630 obj->efile.bss = data;
1203 obj->efile.bss_shndx = idx; 1631 obj->efile.bss_shndx = idx;
1204 } else { 1632 } else {
1205 pr_debug("skip section(%d) %s\n", idx, name); 1633 pr_debug("skip section(%d) %s\n", idx, name);
1206 } 1634 }
1207 if (err)
1208 goto out;
1209 } 1635 }
1210 1636
1211 if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { 1637 if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
1212 pr_warning("Corrupted ELF file: index of strtab invalid\n"); 1638 pr_warning("Corrupted ELF file: index of strtab invalid\n");
1213 return -LIBBPF_ERRNO__FORMAT; 1639 return -LIBBPF_ERRNO__FORMAT;
1214 } 1640 }
1215 if (btf_data) { 1641 err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
1216 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 1642 if (!err)
1217 if (IS_ERR(obj->btf)) {
1218 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
1219 BTF_ELF_SEC, PTR_ERR(obj->btf));
1220 obj->btf = NULL;
1221 } else {
1222 err = btf__finalize_data(obj, obj->btf);
1223 if (!err) {
1224 bpf_object__sanitize_btf(obj);
1225 err = btf__load(obj->btf);
1226 }
1227 if (err) {
1228 pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n",
1229 BTF_ELF_SEC, err);
1230 btf__free(obj->btf);
1231 obj->btf = NULL;
1232 err = 0;
1233 }
1234 }
1235 }
1236 if (btf_ext_data) {
1237 if (!obj->btf) {
1238 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
1239 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
1240 } else {
1241 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
1242 btf_ext_data->d_size);
1243 if (IS_ERR(obj->btf_ext)) {
1244 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
1245 BTF_EXT_ELF_SEC,
1246 PTR_ERR(obj->btf_ext));
1247 obj->btf_ext = NULL;
1248 } else {
1249 bpf_object__sanitize_btf_ext(obj);
1250 }
1251 }
1252 }
1253 if (bpf_object__has_maps(obj)) {
1254 err = bpf_object__init_maps(obj, flags); 1643 err = bpf_object__init_maps(obj, flags);
1255 if (err) 1644 if (!err)
1256 goto out; 1645 err = bpf_object__sanitize_and_load_btf(obj);
1257 } 1646 if (!err)
1258 err = bpf_object__init_prog_names(obj); 1647 err = bpf_object__init_prog_names(obj);
1259out:
1260 return err; 1648 return err;
1261} 1649}
1262 1650
@@ -1275,7 +1663,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
1275} 1663}
1276 1664
1277struct bpf_program * 1665struct bpf_program *
1278bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) 1666bpf_object__find_program_by_title(const struct bpf_object *obj,
1667 const char *title)
1279{ 1668{
1280 struct bpf_program *pos; 1669 struct bpf_program *pos;
1281 1670
@@ -1297,7 +1686,8 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
1297static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 1686static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
1298 int shndx) 1687 int shndx)
1299{ 1688{
1300 return shndx == obj->efile.maps_shndx; 1689 return shndx == obj->efile.maps_shndx ||
1690 shndx == obj->efile.btf_maps_shndx;
1301} 1691}
1302 1692
1303static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, 1693static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
@@ -1341,14 +1731,14 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1341 prog->nr_reloc = nrels; 1731 prog->nr_reloc = nrels;
1342 1732
1343 for (i = 0; i < nrels; i++) { 1733 for (i = 0; i < nrels; i++) {
1344 GElf_Sym sym;
1345 GElf_Rel rel;
1346 unsigned int insn_idx;
1347 unsigned int shdr_idx;
1348 struct bpf_insn *insns = prog->insns; 1734 struct bpf_insn *insns = prog->insns;
1349 enum libbpf_map_type type; 1735 enum libbpf_map_type type;
1736 unsigned int insn_idx;
1737 unsigned int shdr_idx;
1350 const char *name; 1738 const char *name;
1351 size_t map_idx; 1739 size_t map_idx;
1740 GElf_Sym sym;
1741 GElf_Rel rel;
1352 1742
1353 if (!gelf_getrel(data, i, &rel)) { 1743 if (!gelf_getrel(data, i, &rel)) {
1354 pr_warning("relocation: failed to get %d reloc\n", i); 1744 pr_warning("relocation: failed to get %d reloc\n", i);
@@ -1416,9 +1806,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1416 if (maps[map_idx].libbpf_type != type) 1806 if (maps[map_idx].libbpf_type != type)
1417 continue; 1807 continue;
1418 if (type != LIBBPF_MAP_UNSPEC || 1808 if (type != LIBBPF_MAP_UNSPEC ||
1419 maps[map_idx].offset == sym.st_value) { 1809 (maps[map_idx].sec_idx == sym.st_shndx &&
1420 pr_debug("relocation: find map %zd (%s) for insn %u\n", 1810 maps[map_idx].sec_offset == sym.st_value)) {
1421 map_idx, maps[map_idx].name, insn_idx); 1811 pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n",
1812 map_idx, maps[map_idx].name,
1813 maps[map_idx].sec_idx,
1814 maps[map_idx].sec_offset,
1815 insn_idx);
1422 break; 1816 break;
1423 } 1817 }
1424 } 1818 }
@@ -1438,14 +1832,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1438 return 0; 1832 return 0;
1439} 1833}
1440 1834
1441static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) 1835static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
1442{ 1836{
1443 struct bpf_map_def *def = &map->def; 1837 struct bpf_map_def *def = &map->def;
1444 __u32 key_type_id = 0, value_type_id = 0; 1838 __u32 key_type_id = 0, value_type_id = 0;
1445 int ret; 1839 int ret;
1446 1840
1841 /* if it's BTF-defined map, we don't need to search for type IDs */
1842 if (map->sec_idx == obj->efile.btf_maps_shndx)
1843 return 0;
1844
1447 if (!bpf_map__is_internal(map)) { 1845 if (!bpf_map__is_internal(map)) {
1448 ret = btf__get_map_kv_tids(btf, map->name, def->key_size, 1846 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
1449 def->value_size, &key_type_id, 1847 def->value_size, &key_type_id,
1450 &value_type_id); 1848 &value_type_id);
1451 } else { 1849 } else {
@@ -1453,7 +1851,7 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
1453 * LLVM annotates global data differently in BTF, that is, 1851 * LLVM annotates global data differently in BTF, that is,
1454 * only as '.data', '.bss' or '.rodata'. 1852 * only as '.data', '.bss' or '.rodata'.
1455 */ 1853 */
1456 ret = btf__find_by_name(btf, 1854 ret = btf__find_by_name(obj->btf,
1457 libbpf_type_to_btf_name[map->libbpf_type]); 1855 libbpf_type_to_btf_name[map->libbpf_type]);
1458 } 1856 }
1459 if (ret < 0) 1857 if (ret < 0)
@@ -1740,25 +2138,26 @@ bpf_object__create_maps(struct bpf_object *obj)
1740 create_attr.key_size = def->key_size; 2138 create_attr.key_size = def->key_size;
1741 create_attr.value_size = def->value_size; 2139 create_attr.value_size = def->value_size;
1742 create_attr.max_entries = def->max_entries; 2140 create_attr.max_entries = def->max_entries;
1743 create_attr.btf_fd = -1; 2141 create_attr.btf_fd = 0;
1744 create_attr.btf_key_type_id = 0; 2142 create_attr.btf_key_type_id = 0;
1745 create_attr.btf_value_type_id = 0; 2143 create_attr.btf_value_type_id = 0;
1746 if (bpf_map_type__is_map_in_map(def->type) && 2144 if (bpf_map_type__is_map_in_map(def->type) &&
1747 map->inner_map_fd >= 0) 2145 map->inner_map_fd >= 0)
1748 create_attr.inner_map_fd = map->inner_map_fd; 2146 create_attr.inner_map_fd = map->inner_map_fd;
1749 2147
1750 if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { 2148 if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
1751 create_attr.btf_fd = btf__fd(obj->btf); 2149 create_attr.btf_fd = btf__fd(obj->btf);
1752 create_attr.btf_key_type_id = map->btf_key_type_id; 2150 create_attr.btf_key_type_id = map->btf_key_type_id;
1753 create_attr.btf_value_type_id = map->btf_value_type_id; 2151 create_attr.btf_value_type_id = map->btf_value_type_id;
1754 } 2152 }
1755 2153
1756 *pfd = bpf_create_map_xattr(&create_attr); 2154 *pfd = bpf_create_map_xattr(&create_attr);
1757 if (*pfd < 0 && create_attr.btf_fd >= 0) { 2155 if (*pfd < 0 && (create_attr.btf_key_type_id ||
2156 create_attr.btf_value_type_id)) {
1758 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 2157 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
1759 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 2158 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
1760 map->name, cp, errno); 2159 map->name, cp, errno);
1761 create_attr.btf_fd = -1; 2160 create_attr.btf_fd = 0;
1762 create_attr.btf_key_type_id = 0; 2161 create_attr.btf_key_type_id = 0;
1763 create_attr.btf_value_type_id = 0; 2162 create_attr.btf_value_type_id = 0;
1764 map->btf_key_type_id = 0; 2163 map->btf_key_type_id = 0;
@@ -2049,7 +2448,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
2049 load_attr.license = license; 2448 load_attr.license = license;
2050 load_attr.kern_version = kern_version; 2449 load_attr.kern_version = kern_version;
2051 load_attr.prog_ifindex = prog->prog_ifindex; 2450 load_attr.prog_ifindex = prog->prog_ifindex;
2052 load_attr.prog_btf_fd = prog->btf_fd; 2451 load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
2053 load_attr.func_info = prog->func_info; 2452 load_attr.func_info = prog->func_info;
2054 load_attr.func_info_rec_size = prog->func_info_rec_size; 2453 load_attr.func_info_rec_size = prog->func_info_rec_size;
2055 load_attr.func_info_cnt = prog->func_info_cnt; 2454 load_attr.func_info_cnt = prog->func_info_cnt;
@@ -2195,8 +2594,8 @@ out:
2195 return err; 2594 return err;
2196} 2595}
2197 2596
2198static bool bpf_program__is_function_storage(struct bpf_program *prog, 2597static bool bpf_program__is_function_storage(const struct bpf_program *prog,
2199 struct bpf_object *obj) 2598 const struct bpf_object *obj)
2200{ 2599{
2201 return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls; 2600 return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
2202} 2601}
@@ -2902,17 +3301,17 @@ bpf_object__next(struct bpf_object *prev)
2902 return next; 3301 return next;
2903} 3302}
2904 3303
2905const char *bpf_object__name(struct bpf_object *obj) 3304const char *bpf_object__name(const struct bpf_object *obj)
2906{ 3305{
2907 return obj ? obj->path : ERR_PTR(-EINVAL); 3306 return obj ? obj->path : ERR_PTR(-EINVAL);
2908} 3307}
2909 3308
2910unsigned int bpf_object__kversion(struct bpf_object *obj) 3309unsigned int bpf_object__kversion(const struct bpf_object *obj)
2911{ 3310{
2912 return obj ? obj->kern_version : 0; 3311 return obj ? obj->kern_version : 0;
2913} 3312}
2914 3313
2915struct btf *bpf_object__btf(struct bpf_object *obj) 3314struct btf *bpf_object__btf(const struct bpf_object *obj)
2916{ 3315{
2917 return obj ? obj->btf : NULL; 3316 return obj ? obj->btf : NULL;
2918} 3317}
@@ -2933,13 +3332,14 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
2933 return 0; 3332 return 0;
2934} 3333}
2935 3334
2936void *bpf_object__priv(struct bpf_object *obj) 3335void *bpf_object__priv(const struct bpf_object *obj)
2937{ 3336{
2938 return obj ? obj->priv : ERR_PTR(-EINVAL); 3337 return obj ? obj->priv : ERR_PTR(-EINVAL);
2939} 3338}
2940 3339
2941static struct bpf_program * 3340static struct bpf_program *
2942__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward) 3341__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
3342 bool forward)
2943{ 3343{
2944 size_t nr_programs = obj->nr_programs; 3344 size_t nr_programs = obj->nr_programs;
2945 ssize_t idx; 3345 ssize_t idx;
@@ -2964,7 +3364,7 @@ __bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)
2964} 3364}
2965 3365
2966struct bpf_program * 3366struct bpf_program *
2967bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) 3367bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
2968{ 3368{
2969 struct bpf_program *prog = prev; 3369 struct bpf_program *prog = prev;
2970 3370
@@ -2976,7 +3376,7 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
2976} 3376}
2977 3377
2978struct bpf_program * 3378struct bpf_program *
2979bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) 3379bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
2980{ 3380{
2981 struct bpf_program *prog = next; 3381 struct bpf_program *prog = next;
2982 3382
@@ -2998,7 +3398,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
2998 return 0; 3398 return 0;
2999} 3399}
3000 3400
3001void *bpf_program__priv(struct bpf_program *prog) 3401void *bpf_program__priv(const struct bpf_program *prog)
3002{ 3402{
3003 return prog ? prog->priv : ERR_PTR(-EINVAL); 3403 return prog ? prog->priv : ERR_PTR(-EINVAL);
3004} 3404}
@@ -3008,7 +3408,7 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
3008 prog->prog_ifindex = ifindex; 3408 prog->prog_ifindex = ifindex;
3009} 3409}
3010 3410
3011const char *bpf_program__title(struct bpf_program *prog, bool needs_copy) 3411const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
3012{ 3412{
3013 const char *title; 3413 const char *title;
3014 3414
@@ -3024,7 +3424,7 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
3024 return title; 3424 return title;
3025} 3425}
3026 3426
3027int bpf_program__fd(struct bpf_program *prog) 3427int bpf_program__fd(const struct bpf_program *prog)
3028{ 3428{
3029 return bpf_program__nth_fd(prog, 0); 3429 return bpf_program__nth_fd(prog, 0);
3030} 3430}
@@ -3057,7 +3457,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
3057 return 0; 3457 return 0;
3058} 3458}
3059 3459
3060int bpf_program__nth_fd(struct bpf_program *prog, int n) 3460int bpf_program__nth_fd(const struct bpf_program *prog, int n)
3061{ 3461{
3062 int fd; 3462 int fd;
3063 3463
@@ -3085,25 +3485,25 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
3085 prog->type = type; 3485 prog->type = type;
3086} 3486}
3087 3487
3088static bool bpf_program__is_type(struct bpf_program *prog, 3488static bool bpf_program__is_type(const struct bpf_program *prog,
3089 enum bpf_prog_type type) 3489 enum bpf_prog_type type)
3090{ 3490{
3091 return prog ? (prog->type == type) : false; 3491 return prog ? (prog->type == type) : false;
3092} 3492}
3093 3493
3094#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ 3494#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
3095int bpf_program__set_##NAME(struct bpf_program *prog) \ 3495int bpf_program__set_##NAME(struct bpf_program *prog) \
3096{ \ 3496{ \
3097 if (!prog) \ 3497 if (!prog) \
3098 return -EINVAL; \ 3498 return -EINVAL; \
3099 bpf_program__set_type(prog, TYPE); \ 3499 bpf_program__set_type(prog, TYPE); \
3100 return 0; \ 3500 return 0; \
3101} \ 3501} \
3102 \ 3502 \
3103bool bpf_program__is_##NAME(struct bpf_program *prog) \ 3503bool bpf_program__is_##NAME(const struct bpf_program *prog) \
3104{ \ 3504{ \
3105 return bpf_program__is_type(prog, TYPE); \ 3505 return bpf_program__is_type(prog, TYPE); \
3106} \ 3506} \
3107 3507
3108BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); 3508BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
3109BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); 3509BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
@@ -3302,17 +3702,17 @@ bpf_program__identify_section(struct bpf_program *prog,
3302 expected_attach_type); 3702 expected_attach_type);
3303} 3703}
3304 3704
3305int bpf_map__fd(struct bpf_map *map) 3705int bpf_map__fd(const struct bpf_map *map)
3306{ 3706{
3307 return map ? map->fd : -EINVAL; 3707 return map ? map->fd : -EINVAL;
3308} 3708}
3309 3709
3310const struct bpf_map_def *bpf_map__def(struct bpf_map *map) 3710const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
3311{ 3711{
3312 return map ? &map->def : ERR_PTR(-EINVAL); 3712 return map ? &map->def : ERR_PTR(-EINVAL);
3313} 3713}
3314 3714
3315const char *bpf_map__name(struct bpf_map *map) 3715const char *bpf_map__name(const struct bpf_map *map)
3316{ 3716{
3317 return map ? map->name : NULL; 3717 return map ? map->name : NULL;
3318} 3718}
@@ -3343,17 +3743,17 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
3343 return 0; 3743 return 0;
3344} 3744}
3345 3745
3346void *bpf_map__priv(struct bpf_map *map) 3746void *bpf_map__priv(const struct bpf_map *map)
3347{ 3747{
3348 return map ? map->priv : ERR_PTR(-EINVAL); 3748 return map ? map->priv : ERR_PTR(-EINVAL);
3349} 3749}
3350 3750
3351bool bpf_map__is_offload_neutral(struct bpf_map *map) 3751bool bpf_map__is_offload_neutral(const struct bpf_map *map)
3352{ 3752{
3353 return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; 3753 return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
3354} 3754}
3355 3755
3356bool bpf_map__is_internal(struct bpf_map *map) 3756bool bpf_map__is_internal(const struct bpf_map *map)
3357{ 3757{
3358 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 3758 return map->libbpf_type != LIBBPF_MAP_UNSPEC;
3359} 3759}
@@ -3378,7 +3778,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
3378} 3778}
3379 3779
3380static struct bpf_map * 3780static struct bpf_map *
3381__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) 3781__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
3382{ 3782{
3383 ssize_t idx; 3783 ssize_t idx;
3384 struct bpf_map *s, *e; 3784 struct bpf_map *s, *e;
@@ -3402,7 +3802,7 @@ __bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
3402} 3802}
3403 3803
3404struct bpf_map * 3804struct bpf_map *
3405bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) 3805bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
3406{ 3806{
3407 if (prev == NULL) 3807 if (prev == NULL)
3408 return obj->maps; 3808 return obj->maps;
@@ -3411,7 +3811,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
3411} 3811}
3412 3812
3413struct bpf_map * 3813struct bpf_map *
3414bpf_map__prev(struct bpf_map *next, struct bpf_object *obj) 3814bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
3415{ 3815{
3416 if (next == NULL) { 3816 if (next == NULL) {
3417 if (!obj->nr_maps) 3817 if (!obj->nr_maps)
@@ -3423,7 +3823,7 @@ bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)
3423} 3823}
3424 3824
3425struct bpf_map * 3825struct bpf_map *
3426bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) 3826bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
3427{ 3827{
3428 struct bpf_map *pos; 3828 struct bpf_map *pos;
3429 3829
@@ -3435,7 +3835,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
3435} 3835}
3436 3836
3437int 3837int
3438bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name) 3838bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
3439{ 3839{
3440 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 3840 return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
3441} 3841}
@@ -3443,13 +3843,7 @@ bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name)
3443struct bpf_map * 3843struct bpf_map *
3444bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) 3844bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
3445{ 3845{
3446 int i; 3846 return ERR_PTR(-ENOTSUP);
3447
3448 for (i = 0; i < obj->nr_maps; i++) {
3449 if (obj->maps[i].offset == offset)
3450 return &obj->maps[i];
3451 }
3452 return ERR_PTR(-ENOENT);
3453} 3847}
3454 3848
3455long libbpf_get_error(const void *ptr) 3849long libbpf_get_error(const void *ptr)
@@ -3835,3 +4229,60 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
3835 desc->array_offset, addr); 4229 desc->array_offset, addr);
3836 } 4230 }
3837} 4231}
4232
4233int libbpf_num_possible_cpus(void)
4234{
4235 static const char *fcpu = "/sys/devices/system/cpu/possible";
4236 int len = 0, n = 0, il = 0, ir = 0;
4237 unsigned int start = 0, end = 0;
4238 static int cpus;
4239 char buf[128];
4240 int error = 0;
4241 int fd = -1;
4242
4243 if (cpus > 0)
4244 return cpus;
4245
4246 fd = open(fcpu, O_RDONLY);
4247 if (fd < 0) {
4248 error = errno;
4249 pr_warning("Failed to open file %s: %s\n",
4250 fcpu, strerror(error));
4251 return -error;
4252 }
4253 len = read(fd, buf, sizeof(buf));
4254 close(fd);
4255 if (len <= 0) {
4256 error = len ? errno : EINVAL;
4257 pr_warning("Failed to read # of possible cpus from %s: %s\n",
4258 fcpu, strerror(error));
4259 return -error;
4260 }
4261 if (len == sizeof(buf)) {
4262 pr_warning("File %s size overflow\n", fcpu);
4263 return -EOVERFLOW;
4264 }
4265 buf[len] = '\0';
4266
4267 for (ir = 0, cpus = 0; ir <= len; ir++) {
4268 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
4269 if (buf[ir] == ',' || buf[ir] == '\0') {
4270 buf[ir] = '\0';
4271 n = sscanf(&buf[il], "%u-%u", &start, &end);
4272 if (n <= 0) {
4273 pr_warning("Failed to get # CPUs from %s\n",
4274 &buf[il]);
4275 return -EINVAL;
4276 } else if (n == 1) {
4277 end = start;
4278 }
4279 cpus += end - start + 1;
4280 il = ir + 1;
4281 }
4282 }
4283 if (cpus <= 0) {
4284 pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
4285 return -EINVAL;
4286 }
4287 return cpus;
4288}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1af0d48178c8..d639f47e3110 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -98,15 +98,16 @@ struct bpf_object_load_attr {
98LIBBPF_API int bpf_object__load(struct bpf_object *obj); 98LIBBPF_API int bpf_object__load(struct bpf_object *obj);
99LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); 99LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
100LIBBPF_API int bpf_object__unload(struct bpf_object *obj); 100LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
101LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); 101LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
102LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); 102LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
103 103
104struct btf; 104struct btf;
105LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj); 105LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
106LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); 106LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
107 107
108LIBBPF_API struct bpf_program * 108LIBBPF_API struct bpf_program *
109bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); 109bpf_object__find_program_by_title(const struct bpf_object *obj,
110 const char *title);
110 111
111LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); 112LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
112#define bpf_object__for_each_safe(pos, tmp) \ 113#define bpf_object__for_each_safe(pos, tmp) \
@@ -118,7 +119,7 @@ LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
118typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); 119typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
119LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, 120LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
120 bpf_object_clear_priv_t clear_priv); 121 bpf_object_clear_priv_t clear_priv);
121LIBBPF_API void *bpf_object__priv(struct bpf_object *prog); 122LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
122 123
123LIBBPF_API int 124LIBBPF_API int
124libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 125libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
@@ -129,7 +130,7 @@ LIBBPF_API int libbpf_attach_type_by_name(const char *name,
129/* Accessors of bpf_program */ 130/* Accessors of bpf_program */
130struct bpf_program; 131struct bpf_program;
131LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, 132LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
132 struct bpf_object *obj); 133 const struct bpf_object *obj);
133 134
134#define bpf_object__for_each_program(pos, obj) \ 135#define bpf_object__for_each_program(pos, obj) \
135 for ((pos) = bpf_program__next(NULL, (obj)); \ 136 for ((pos) = bpf_program__next(NULL, (obj)); \
@@ -137,24 +138,23 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
137 (pos) = bpf_program__next((pos), (obj))) 138 (pos) = bpf_program__next((pos), (obj)))
138 139
139LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, 140LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
140 struct bpf_object *obj); 141 const struct bpf_object *obj);
141 142
142typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, 143typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
143 void *);
144 144
145LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, 145LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
146 bpf_program_clear_priv_t clear_priv); 146 bpf_program_clear_priv_t clear_priv);
147 147
148LIBBPF_API void *bpf_program__priv(struct bpf_program *prog); 148LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
149LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, 149LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
150 __u32 ifindex); 150 __u32 ifindex);
151 151
152LIBBPF_API const char *bpf_program__title(struct bpf_program *prog, 152LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
153 bool needs_copy); 153 bool needs_copy);
154 154
155LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, 155LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
156 __u32 kern_version); 156 __u32 kern_version);
157LIBBPF_API int bpf_program__fd(struct bpf_program *prog); 157LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
158LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, 158LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
159 const char *path, 159 const char *path,
160 int instance); 160 int instance);
@@ -227,7 +227,7 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
227LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, 227LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
228 bpf_program_prep_t prep); 228 bpf_program_prep_t prep);
229 229
230LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n); 230LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
231 231
232/* 232/*
233 * Adjust type of BPF program. Default is kprobe. 233 * Adjust type of BPF program. Default is kprobe.
@@ -246,14 +246,14 @@ LIBBPF_API void
246bpf_program__set_expected_attach_type(struct bpf_program *prog, 246bpf_program__set_expected_attach_type(struct bpf_program *prog,
247 enum bpf_attach_type type); 247 enum bpf_attach_type type);
248 248
249LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog); 249LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
250LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog); 250LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
251LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); 251LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
252LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog); 252LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
253LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog); 253LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
254LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog); 254LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
255LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog); 255LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
256LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog); 256LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
257 257
258/* 258/*
259 * No need for __attribute__((packed)), all members of 'bpf_map_def' 259 * No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -275,10 +275,10 @@ struct bpf_map_def {
275 */ 275 */
276struct bpf_map; 276struct bpf_map;
277LIBBPF_API struct bpf_map * 277LIBBPF_API struct bpf_map *
278bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); 278bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
279 279
280LIBBPF_API int 280LIBBPF_API int
281bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name); 281bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
282 282
283/* 283/*
284 * Get bpf_map through the offset of corresponding struct bpf_map_def 284 * Get bpf_map through the offset of corresponding struct bpf_map_def
@@ -288,7 +288,7 @@ LIBBPF_API struct bpf_map *
288bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); 288bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
289 289
290LIBBPF_API struct bpf_map * 290LIBBPF_API struct bpf_map *
291bpf_map__next(struct bpf_map *map, struct bpf_object *obj); 291bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
292#define bpf_object__for_each_map(pos, obj) \ 292#define bpf_object__for_each_map(pos, obj) \
293 for ((pos) = bpf_map__next(NULL, (obj)); \ 293 for ((pos) = bpf_map__next(NULL, (obj)); \
294 (pos) != NULL; \ 294 (pos) != NULL; \
@@ -296,22 +296,22 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
296#define bpf_map__for_each bpf_object__for_each_map 296#define bpf_map__for_each bpf_object__for_each_map
297 297
298LIBBPF_API struct bpf_map * 298LIBBPF_API struct bpf_map *
299bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); 299bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
300 300
301LIBBPF_API int bpf_map__fd(struct bpf_map *map); 301LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
302LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 302LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
303LIBBPF_API const char *bpf_map__name(struct bpf_map *map); 303LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
304LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); 304LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
305LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); 305LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
306 306
307typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); 307typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
308LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, 308LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
309 bpf_map_clear_priv_t clear_priv); 309 bpf_map_clear_priv_t clear_priv);
310LIBBPF_API void *bpf_map__priv(struct bpf_map *map); 310LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
311LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); 311LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
312LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); 312LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
313LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); 313LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
314LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map); 314LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
315LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); 315LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
316LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); 316LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
317LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); 317LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
@@ -454,6 +454,22 @@ bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
454LIBBPF_API void 454LIBBPF_API void
455bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); 455bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
456 456
457/*
458 * A helper function to get the number of possible CPUs before looking up
459 * per-CPU maps. Negative errno is returned on failure.
460 *
461 * Example usage:
462 *
463 * int ncpus = libbpf_num_possible_cpus();
464 * if (ncpus < 0) {
465 * // error handling
466 * }
467 * long values[ncpus];
468 * bpf_map_lookup_elem(per_cpu_map_fd, key, values);
469 *
470 */
471LIBBPF_API int libbpf_num_possible_cpus(void);
472
457#ifdef __cplusplus 473#ifdef __cplusplus
458} /* extern "C" */ 474} /* extern "C" */
459#endif 475#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 46dcda89df21..2c6d835620d2 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -172,4 +172,5 @@ LIBBPF_0.0.4 {
172 btf_dump__new; 172 btf_dump__new;
173 btf__parse_elf; 173 btf__parse_elf;
174 bpf_object__load_xattr; 174 bpf_object__load_xattr;
175 libbpf_num_possible_cpus;
175} LIBBPF_0.0.3; 176} LIBBPF_0.0.3;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 61d90eb82ee6..2ac29bd36226 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -23,6 +23,13 @@
23#define BTF_PARAM_ENC(name, type) (name), (type) 23#define BTF_PARAM_ENC(name, type) (name), (type)
24#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) 24#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
25 25
26#ifndef min
27# define min(x, y) ((x) < (y) ? (x) : (y))
28#endif
29#ifndef max
30# define max(x, y) ((x) < (y) ? (y) : (x))
31#endif
32
26extern void libbpf_print(enum libbpf_print_level level, 33extern void libbpf_print(enum libbpf_print_level level,
27 const char *format, ...) 34 const char *format, ...)
28 __attribute__((format(printf, 2, 3))); 35 __attribute__((format(printf, 2, 3)));
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 38667b62f1fe..7ef6293b4fd7 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -60,10 +60,8 @@ struct xsk_socket {
60 struct xsk_umem *umem; 60 struct xsk_umem *umem;
61 struct xsk_socket_config config; 61 struct xsk_socket_config config;
62 int fd; 62 int fd;
63 int xsks_map;
64 int ifindex; 63 int ifindex;
65 int prog_fd; 64 int prog_fd;
66 int qidconf_map_fd;
67 int xsks_map_fd; 65 int xsks_map_fd;
68 __u32 queue_id; 66 __u32 queue_id;
69 char ifname[IFNAMSIZ]; 67 char ifname[IFNAMSIZ];
@@ -265,15 +263,11 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
265 /* This is the C-program: 263 /* This is the C-program:
266 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) 264 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
267 * { 265 * {
268 * int *qidconf, index = ctx->rx_queue_index; 266 * int index = ctx->rx_queue_index;
269 * 267 *
270 * // A set entry here means that the correspnding queue_id 268 * // A set entry here means that the correspnding queue_id
271 * // has an active AF_XDP socket bound to it. 269 * // has an active AF_XDP socket bound to it.
272 * qidconf = bpf_map_lookup_elem(&qidconf_map, &index); 270 * if (bpf_map_lookup_elem(&xsks_map, &index))
273 * if (!qidconf)
274 * return XDP_ABORTED;
275 *
276 * if (*qidconf)
277 * return bpf_redirect_map(&xsks_map, index, 0); 271 * return bpf_redirect_map(&xsks_map, index, 0);
278 * 272 *
279 * return XDP_PASS; 273 * return XDP_PASS;
@@ -286,15 +280,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
286 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), 280 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
287 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 281 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
288 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 282 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
289 BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd), 283 BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
290 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), 284 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
291 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 285 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
292 BPF_MOV32_IMM(BPF_REG_0, 0),
293 /* if r1 == 0 goto +8 */
294 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
295 BPF_MOV32_IMM(BPF_REG_0, 2), 286 BPF_MOV32_IMM(BPF_REG_0, 2),
296 /* r1 = *(u32 *)(r1 + 0) */
297 BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
298 /* if r1 == 0 goto +5 */ 287 /* if r1 == 0 goto +5 */
299 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), 288 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
300 /* r2 = *(u32 *)(r10 - 4) */ 289 /* r2 = *(u32 *)(r10 - 4) */
@@ -366,18 +355,11 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
366 if (max_queues < 0) 355 if (max_queues < 0)
367 return max_queues; 356 return max_queues;
368 357
369 fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map", 358 fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
370 sizeof(int), sizeof(int), max_queues, 0); 359 sizeof(int), sizeof(int), max_queues, 0);
371 if (fd < 0) 360 if (fd < 0)
372 return fd; 361 return fd;
373 xsk->qidconf_map_fd = fd;
374 362
375 fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
376 sizeof(int), sizeof(int), max_queues, 0);
377 if (fd < 0) {
378 close(xsk->qidconf_map_fd);
379 return fd;
380 }
381 xsk->xsks_map_fd = fd; 363 xsk->xsks_map_fd = fd;
382 364
383 return 0; 365 return 0;
@@ -385,10 +367,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
385 367
386static void xsk_delete_bpf_maps(struct xsk_socket *xsk) 368static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
387{ 369{
388 close(xsk->qidconf_map_fd); 370 bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
389 close(xsk->xsks_map_fd); 371 close(xsk->xsks_map_fd);
390 xsk->qidconf_map_fd = -1;
391 xsk->xsks_map_fd = -1;
392} 372}
393 373
394static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) 374static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -417,10 +397,9 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
417 if (err) 397 if (err)
418 goto out_map_ids; 398 goto out_map_ids;
419 399
420 for (i = 0; i < prog_info.nr_map_ids; i++) { 400 xsk->xsks_map_fd = -1;
421 if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
422 break;
423 401
402 for (i = 0; i < prog_info.nr_map_ids; i++) {
424 fd = bpf_map_get_fd_by_id(map_ids[i]); 403 fd = bpf_map_get_fd_by_id(map_ids[i]);
425 if (fd < 0) 404 if (fd < 0)
426 continue; 405 continue;
@@ -431,11 +410,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
431 continue; 410 continue;
432 } 411 }
433 412
434 if (!strcmp(map_info.name, "qidconf_map")) {
435 xsk->qidconf_map_fd = fd;
436 continue;
437 }
438
439 if (!strcmp(map_info.name, "xsks_map")) { 413 if (!strcmp(map_info.name, "xsks_map")) {
440 xsk->xsks_map_fd = fd; 414 xsk->xsks_map_fd = fd;
441 continue; 415 continue;
@@ -445,40 +419,18 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
445 } 419 }
446 420
447 err = 0; 421 err = 0;
448 if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) { 422 if (xsk->xsks_map_fd == -1)
449 err = -ENOENT; 423 err = -ENOENT;
450 xsk_delete_bpf_maps(xsk);
451 }
452 424
453out_map_ids: 425out_map_ids:
454 free(map_ids); 426 free(map_ids);
455 return err; 427 return err;
456} 428}
457 429
458static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
459{
460 int qid = false;
461
462 bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
463 bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
464}
465
466static int xsk_set_bpf_maps(struct xsk_socket *xsk) 430static int xsk_set_bpf_maps(struct xsk_socket *xsk)
467{ 431{
468 int qid = true, fd = xsk->fd, err; 432 return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
469 433 &xsk->fd, 0);
470 err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
471 if (err)
472 goto out;
473
474 err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
475 if (err)
476 goto out;
477
478 return 0;
479out:
480 xsk_clear_bpf_maps(xsk);
481 return err;
482} 434}
483 435
484static int xsk_setup_xdp_prog(struct xsk_socket *xsk) 436static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
@@ -497,26 +449,27 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
497 return err; 449 return err;
498 450
499 err = xsk_load_xdp_prog(xsk); 451 err = xsk_load_xdp_prog(xsk);
500 if (err) 452 if (err) {
501 goto out_maps; 453 xsk_delete_bpf_maps(xsk);
454 return err;
455 }
502 } else { 456 } else {
503 xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); 457 xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
504 err = xsk_lookup_bpf_maps(xsk); 458 err = xsk_lookup_bpf_maps(xsk);
505 if (err) 459 if (err) {
506 goto out_load; 460 close(xsk->prog_fd);
461 return err;
462 }
507 } 463 }
508 464
509 err = xsk_set_bpf_maps(xsk); 465 err = xsk_set_bpf_maps(xsk);
510 if (err) 466 if (err) {
511 goto out_load; 467 xsk_delete_bpf_maps(xsk);
468 close(xsk->prog_fd);
469 return err;
470 }
512 471
513 return 0; 472 return 0;
514
515out_load:
516 close(xsk->prog_fd);
517out_maps:
518 xsk_delete_bpf_maps(xsk);
519 return err;
520} 473}
521 474
522int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, 475int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
@@ -643,9 +596,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
643 goto out_mmap_tx; 596 goto out_mmap_tx;
644 } 597 }
645 598
646 xsk->qidconf_map_fd = -1; 599 xsk->prog_fd = -1;
647 xsk->xsks_map_fd = -1;
648
649 if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { 600 if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
650 err = xsk_setup_xdp_prog(xsk); 601 err = xsk_setup_xdp_prog(xsk);
651 if (err) 602 if (err)
@@ -708,8 +659,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)
708 if (!xsk) 659 if (!xsk)
709 return; 660 return;
710 661
711 xsk_clear_bpf_maps(xsk); 662 if (xsk->prog_fd != -1) {
712 xsk_delete_bpf_maps(xsk); 663 xsk_delete_bpf_maps(xsk);
664 close(xsk->prog_fd);
665 }
713 666
714 optlen = sizeof(off); 667 optlen = sizeof(off);
715 err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); 668 err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index d8df5c9b5b2f..fb5ce43e28b3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -280,4 +280,5 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES)
280 ) > $(VERIFIER_TESTS_H)) 280 ) > $(VERIFIER_TESTS_H))
281 281
282EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \ 282EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \
283 $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) 283 $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \
284 feature
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index b25595ea4a78..05f036df8a4c 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -2,6 +2,7 @@
2#ifndef __BPF_ENDIAN__ 2#ifndef __BPF_ENDIAN__
3#define __BPF_ENDIAN__ 3#define __BPF_ENDIAN__
4 4
5#include <linux/stddef.h>
5#include <linux/swab.h> 6#include <linux/swab.h>
6 7
7/* LLVM's BPF target selects the endianness of the CPU 8/* LLVM's BPF target selects the endianness of the CPU
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index e6d243b7cd74..1a5b1accf091 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -31,7 +31,7 @@ static int (*bpf_map_pop_elem)(void *map, void *value) =
31 (void *) BPF_FUNC_map_pop_elem; 31 (void *) BPF_FUNC_map_pop_elem;
32static int (*bpf_map_peek_elem)(void *map, void *value) = 32static int (*bpf_map_peek_elem)(void *map, void *value) =
33 (void *) BPF_FUNC_map_peek_elem; 33 (void *) BPF_FUNC_map_peek_elem;
34static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = 34static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) =
35 (void *) BPF_FUNC_probe_read; 35 (void *) BPF_FUNC_probe_read;
36static unsigned long long (*bpf_ktime_get_ns)(void) = 36static unsigned long long (*bpf_ktime_get_ns)(void) =
37 (void *) BPF_FUNC_ktime_get_ns; 37 (void *) BPF_FUNC_ktime_get_ns;
@@ -62,7 +62,7 @@ static int (*bpf_perf_event_output)(void *ctx, void *map,
62 (void *) BPF_FUNC_perf_event_output; 62 (void *) BPF_FUNC_perf_event_output;
63static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = 63static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
64 (void *) BPF_FUNC_get_stackid; 64 (void *) BPF_FUNC_get_stackid;
65static int (*bpf_probe_write_user)(void *dst, void *src, int size) = 65static int (*bpf_probe_write_user)(void *dst, const void *src, int size) =
66 (void *) BPF_FUNC_probe_write_user; 66 (void *) BPF_FUNC_probe_write_user;
67static int (*bpf_current_task_under_cgroup)(void *map, int index) = 67static int (*bpf_current_task_under_cgroup)(void *map, int index) =
68 (void *) BPF_FUNC_current_task_under_cgroup; 68 (void *) BPF_FUNC_current_task_under_cgroup;
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a29206ebbd13..ec219f84e041 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,44 +6,17 @@
6#include <stdlib.h> 6#include <stdlib.h>
7#include <string.h> 7#include <string.h>
8#include <errno.h> 8#include <errno.h>
9#include <libbpf.h> /* libbpf_num_possible_cpus */
9 10
10static inline unsigned int bpf_num_possible_cpus(void) 11static inline unsigned int bpf_num_possible_cpus(void)
11{ 12{
12 static const char *fcpu = "/sys/devices/system/cpu/possible"; 13 int possible_cpus = libbpf_num_possible_cpus();
13 unsigned int start, end, possible_cpus = 0;
14 char buff[128];
15 FILE *fp;
16 int len, n, i, j = 0;
17 14
18 fp = fopen(fcpu, "r"); 15 if (possible_cpus < 0) {
19 if (!fp) { 16 printf("Failed to get # of possible cpus: '%s'!\n",
20 printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno)); 17 strerror(-possible_cpus));
21 exit(1); 18 exit(1);
22 } 19 }
23
24 if (!fgets(buff, sizeof(buff), fp)) {
25 printf("Failed to read %s!\n", fcpu);
26 exit(1);
27 }
28
29 len = strlen(buff);
30 for (i = 0; i <= len; i++) {
31 if (buff[i] == ',' || buff[i] == '\0') {
32 buff[i] = '\0';
33 n = sscanf(&buff[j], "%u-%u", &start, &end);
34 if (n <= 0) {
35 printf("Failed to retrieve # possible CPUs!\n");
36 exit(1);
37 } else if (n == 1) {
38 end = start;
39 }
40 possible_cpus += end - start + 1;
41 j = i + 1;
42 }
43 }
44
45 fclose(fp);
46
47 return possible_cpus; 20 return possible_cpus;
48} 21}
49 22
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0d89f0396be4..e95c33e333a4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -47,7 +47,7 @@ int enable_all_controllers(char *cgroup_path)
47 char buf[PATH_MAX]; 47 char buf[PATH_MAX];
48 char *c, *c2; 48 char *c, *c2;
49 int fd, cfd; 49 int fd, cfd;
50 size_t len; 50 ssize_t len;
51 51
52 snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path); 52 snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
53 fd = open(path, O_RDONLY); 53 fd = open(path, O_RDONLY);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index c0091137074b..e1b55261526f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -5,7 +5,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
5 const char *format, va_list args) 5 const char *format, va_list args)
6{ 6{
7 if (level != LIBBPF_DEBUG) 7 if (level != LIBBPF_DEBUG)
8 return 0; 8 return vfprintf(stderr, format, args);
9 9
10 if (!strstr(format, "verifier log")) 10 if (!strstr(format, "verifier log"))
11 return 0; 11 return 0;
@@ -32,24 +32,69 @@ static int check_load(const char *file, enum bpf_prog_type type)
32 32
33void test_bpf_verif_scale(void) 33void test_bpf_verif_scale(void)
34{ 34{
35 const char *scale[] = { 35 const char *sched_cls[] = {
36 "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o" 36 "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
37 }; 37 };
38 const char *pyperf[] = { 38 const char *raw_tp[] = {
39 "./pyperf50.o", "./pyperf100.o", "./pyperf180.o" 39 /* full unroll by llvm */
40 "./pyperf50.o", "./pyperf100.o", "./pyperf180.o",
41
42 /* partial unroll. llvm will unroll loop ~150 times.
43 * C loop count -> 600.
44 * Asm loop count -> 4.
45 * 16k insns in loop body.
46 * Total of 5 such loops. Total program size ~82k insns.
47 */
48 "./pyperf600.o",
49
50 /* no unroll at all.
51 * C loop count -> 600.
52 * ASM loop count -> 600.
53 * ~110 insns in loop body.
54 * Total of 5 such loops. Total program size ~1500 insns.
55 */
56 "./pyperf600_nounroll.o",
57
58 "./loop1.o", "./loop2.o",
59
60 /* partial unroll. 19k insn in a loop.
61 * Total program size 20.8k insn.
62 * ~350k processed_insns
63 */
64 "./strobemeta.o",
65
66 /* no unroll, tiny loops */
67 "./strobemeta_nounroll1.o",
68 "./strobemeta_nounroll2.o",
69 };
70 const char *cg_sysctl[] = {
71 "./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
40 }; 72 };
41 int err, i; 73 int err, i;
42 74
43 if (verifier_stats) 75 if (verifier_stats)
44 libbpf_set_print(libbpf_debug_print); 76 libbpf_set_print(libbpf_debug_print);
45 77
46 for (i = 0; i < ARRAY_SIZE(scale); i++) { 78 err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
47 err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS); 79 printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
48 printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK"); 80
81 for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
82 err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
83 printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
49 } 84 }
50 85
51 for (i = 0; i < ARRAY_SIZE(pyperf); i++) { 86 for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
52 err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT); 87 err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
53 printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK"); 88 printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
54 } 89 }
90
91 for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
92 err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
93 printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
94 }
95 err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
96 printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
97
98 err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
99 printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
55} 100}
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 81ad9a0b29d0..849f42e548b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -57,17 +57,25 @@ struct frag_hdr {
57 __be32 identification; 57 __be32 identification;
58}; 58};
59 59
60struct bpf_map_def SEC("maps") jmp_table = { 60struct {
61 __u32 type;
62 __u32 max_entries;
63 __u32 key_size;
64 __u32 value_size;
65} jmp_table SEC(".maps") = {
61 .type = BPF_MAP_TYPE_PROG_ARRAY, 66 .type = BPF_MAP_TYPE_PROG_ARRAY,
67 .max_entries = 8,
62 .key_size = sizeof(__u32), 68 .key_size = sizeof(__u32),
63 .value_size = sizeof(__u32), 69 .value_size = sizeof(__u32),
64 .max_entries = 8
65}; 70};
66 71
67struct bpf_map_def SEC("maps") last_dissection = { 72struct {
73 __u32 type;
74 __u32 max_entries;
75 __u32 *key;
76 struct bpf_flow_keys *value;
77} last_dissection SEC(".maps") = {
68 .type = BPF_MAP_TYPE_ARRAY, 78 .type = BPF_MAP_TYPE_ARRAY,
69 .key_size = sizeof(__u32),
70 .value_size = sizeof(struct bpf_flow_keys),
71 .max_entries = 1, 79 .max_entries = 1,
72}; 80};
73 81
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
new file mode 100644
index 000000000000..dea395af9ea9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -0,0 +1,28 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdbool.h>
8#include <linux/bpf.h>
9#include "bpf_helpers.h"
10
11char _license[] SEC("license") = "GPL";
12
13SEC("raw_tracepoint/kfree_skb")
14int nested_loops(volatile struct pt_regs* ctx)
15{
16 int i, j, sum = 0, m;
17
18 for (j = 0; j < 300; j++)
19 for (i = 0; i < j; i++) {
20 if (j & 1)
21 m = ctx->rax;
22 else
23 m = j;
24 sum += i * m;
25 }
26
27 return sum;
28}
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
new file mode 100644
index 000000000000..0637bd8e8bcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -0,0 +1,28 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdbool.h>
8#include <linux/bpf.h>
9#include "bpf_helpers.h"
10
11char _license[] SEC("license") = "GPL";
12
13SEC("raw_tracepoint/consume_skb")
14int while_true(volatile struct pt_regs* ctx)
15{
16 int i = 0;
17
18 while (true) {
19 if (ctx->rax & 1)
20 i += 3;
21 else
22 i += 7;
23 if (i > 40)
24 break;
25 }
26
27 return i;
28}
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
new file mode 100644
index 000000000000..30a0f6cba080
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -0,0 +1,22 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdbool.h>
8#include <linux/bpf.h>
9#include "bpf_helpers.h"
10
11char _license[] SEC("license") = "GPL";
12
13SEC("raw_tracepoint/consume_skb")
14int while_true(volatile struct pt_regs* ctx)
15{
16 __u64 i = 0, sum = 0;
17 do {
18 i++;
19 sum += ctx->rax;
20 } while (i < 0x100000000ULL);
21 return sum;
22}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index 9f741e69cebe..a25c82a5b7c8 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -10,24 +10,22 @@
10#define REFRESH_TIME_NS 100000000 10#define REFRESH_TIME_NS 100000000
11#define NS_PER_SEC 1000000000 11#define NS_PER_SEC 1000000000
12 12
13struct bpf_map_def SEC("maps") percpu_netcnt = { 13struct {
14 __u32 type;
15 struct bpf_cgroup_storage_key *key;
16 struct percpu_net_cnt *value;
17} percpu_netcnt SEC(".maps") = {
14 .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 18 .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
15 .key_size = sizeof(struct bpf_cgroup_storage_key),
16 .value_size = sizeof(struct percpu_net_cnt),
17}; 19};
18 20
19BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key, 21struct {
20 struct percpu_net_cnt); 22 __u32 type;
21 23 struct bpf_cgroup_storage_key *key;
22struct bpf_map_def SEC("maps") netcnt = { 24 struct net_cnt *value;
25} netcnt SEC(".maps") = {
23 .type = BPF_MAP_TYPE_CGROUP_STORAGE, 26 .type = BPF_MAP_TYPE_CGROUP_STORAGE,
24 .key_size = sizeof(struct bpf_cgroup_storage_key),
25 .value_size = sizeof(struct net_cnt),
26}; 27};
27 28
28BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key,
29 struct net_cnt);
30
31SEC("cgroup/skb") 29SEC("cgroup/skb")
32int bpf_nextcnt(struct __sk_buff *skb) 30int bpf_nextcnt(struct __sk_buff *skb)
33{ 31{
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 0cc5e4ee90bd..6b0781391be5 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -220,7 +220,11 @@ static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *
220 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); 220 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
221 if (symbol_counter == NULL) 221 if (symbol_counter == NULL)
222 return 0; 222 return 0;
223#pragma unroll 223#ifdef NO_UNROLL
224#pragma clang loop unroll(disable)
225#else
226#pragma clang loop unroll(full)
227#endif
224 /* Unwind python stack */ 228 /* Unwind python stack */
225 for (int i = 0; i < STACK_MAX_LEN; ++i) { 229 for (int i = 0; i < STACK_MAX_LEN; ++i) {
226 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { 230 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
new file mode 100644
index 000000000000..cb49b89e37cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -0,0 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#define STACK_MAX_LEN 600
4/* clang will not unroll the loop 600 times.
5 * Instead it will unroll it to the amount it deemed
6 * appropriate, but the loop will still execute 600 times.
7 * Total program size is around 90k insns
8 */
9#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
new file mode 100644
index 000000000000..6beff7502f4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -0,0 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#define STACK_MAX_LEN 600
4#define NO_UNROLL
5/* clang will not unroll at all.
6 * Total program size is around 2k insns
7 */
8#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index 9ff8ac4b0bf6..6aabb681fb9a 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -7,25 +7,36 @@
7#include "bpf_helpers.h" 7#include "bpf_helpers.h"
8#include "bpf_endian.h" 8#include "bpf_endian.h"
9 9
10struct bpf_map_def SEC("maps") socket_cookies = { 10struct socket_cookie {
11 .type = BPF_MAP_TYPE_HASH, 11 __u64 cookie_key;
12 .key_size = sizeof(__u64), 12 __u32 cookie_value;
13 .value_size = sizeof(__u32), 13};
14 .max_entries = 1 << 8, 14
15struct {
16 __u32 type;
17 __u32 map_flags;
18 int *key;
19 struct socket_cookie *value;
20} socket_cookies SEC(".maps") = {
21 .type = BPF_MAP_TYPE_SK_STORAGE,
22 .map_flags = BPF_F_NO_PREALLOC,
15}; 23};
16 24
17SEC("cgroup/connect6") 25SEC("cgroup/connect6")
18int set_cookie(struct bpf_sock_addr *ctx) 26int set_cookie(struct bpf_sock_addr *ctx)
19{ 27{
20 __u32 cookie_value = 0xFF; 28 struct socket_cookie *p;
21 __u64 cookie_key;
22 29
23 if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) 30 if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
24 return 1; 31 return 1;
25 32
26 cookie_key = bpf_get_socket_cookie(ctx); 33 p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0,
27 if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0)) 34 BPF_SK_STORAGE_GET_F_CREATE);
28 return 0; 35 if (!p)
36 return 1;
37
38 p->cookie_value = 0xFF;
39 p->cookie_key = bpf_get_socket_cookie(ctx);
29 40
30 return 1; 41 return 1;
31} 42}
@@ -33,9 +44,8 @@ int set_cookie(struct bpf_sock_addr *ctx)
33SEC("sockops") 44SEC("sockops")
34int update_cookie(struct bpf_sock_ops *ctx) 45int update_cookie(struct bpf_sock_ops *ctx)
35{ 46{
36 __u32 new_cookie_value; 47 struct bpf_sock *sk;
37 __u32 *cookie_value; 48 struct socket_cookie *p;
38 __u64 cookie_key;
39 49
40 if (ctx->family != AF_INET6) 50 if (ctx->family != AF_INET6)
41 return 1; 51 return 1;
@@ -43,14 +53,17 @@ int update_cookie(struct bpf_sock_ops *ctx)
43 if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) 53 if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
44 return 1; 54 return 1;
45 55
46 cookie_key = bpf_get_socket_cookie(ctx); 56 if (!ctx->sk)
57 return 1;
58
59 p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0);
60 if (!p)
61 return 1;
47 62
48 cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key); 63 if (p->cookie_key != bpf_get_socket_cookie(ctx))
49 if (!cookie_value)
50 return 1; 64 return 1;
51 65
52 new_cookie_value = (ctx->local_port << 8) | *cookie_value; 66 p->cookie_value = (ctx->local_port << 8) | p->cookie_value;
53 bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
54 67
55 return 1; 68 return 1;
56} 69}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index ed3e4a551c57..9390e0244259 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -1,6 +1,5 @@
1#include <linux/bpf.h> 1#include <linux/bpf.h>
2#include "bpf_helpers.h" 2#include "bpf_helpers.h"
3#include "bpf_util.h"
4#include "bpf_endian.h" 3#include "bpf_endian.h"
5 4
6int _version SEC("version") = 1; 5int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index 65fbfdb6cd3a..e80484d98a1a 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -1,6 +1,6 @@
1#include <linux/bpf.h> 1#include <linux/bpf.h>
2
2#include "bpf_helpers.h" 3#include "bpf_helpers.h"
3#include "bpf_util.h"
4#include "bpf_endian.h" 4#include "bpf_endian.h"
5 5
6int _version SEC("version") = 1; 6int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index bdc22be46f2e..d85c874ef25e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,6 +1,5 @@
1#include <linux/bpf.h> 1#include <linux/bpf.h>
2#include "bpf_helpers.h" 2#include "bpf_helpers.h"
3#include "bpf_util.h"
4#include "bpf_endian.h" 3#include "bpf_endian.h"
5 4
6int _version SEC("version") = 1; 5int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.c b/tools/testing/selftests/bpf/progs/strobemeta.c
new file mode 100644
index 000000000000..d3df3d86f092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.c
@@ -0,0 +1,10 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2// Copyright (c) 2019 Facebook
3
4#define STROBE_MAX_INTS 2
5#define STROBE_MAX_STRS 25
6#define STROBE_MAX_MAPS 100
7#define STROBE_MAX_MAP_ENTRIES 20
8/* full unroll by llvm #undef NO_UNROLL */
9#include "strobemeta.h"
10
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
new file mode 100644
index 000000000000..1ff73f60a3e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -0,0 +1,528 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3
4#include <stdint.h>
5#include <stddef.h>
6#include <stdbool.h>
7#include <linux/bpf.h>
8#include <linux/ptrace.h>
9#include <linux/sched.h>
10#include <linux/types.h>
11#include "bpf_helpers.h"
12
13typedef uint32_t pid_t;
14struct task_struct {};
15
16#define TASK_COMM_LEN 16
17#define PERF_MAX_STACK_DEPTH 127
18
19#define STROBE_TYPE_INVALID 0
20#define STROBE_TYPE_INT 1
21#define STROBE_TYPE_STR 2
22#define STROBE_TYPE_MAP 3
23
24#define STACK_TABLE_EPOCH_SHIFT 20
25#define STROBE_MAX_STR_LEN 1
26#define STROBE_MAX_CFGS 32
27#define STROBE_MAX_PAYLOAD \
28 (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
29 STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
30
31struct strobe_value_header {
32 /*
33 * meaning depends on type:
34 * 1. int: 0, if value not set, 1 otherwise
35 * 2. str: 1 always, whether value is set or not is determined by ptr
36 * 3. map: 1 always, pointer points to additional struct with number
37 * of entries (up to STROBE_MAX_MAP_ENTRIES)
38 */
39 uint16_t len;
40 /*
41 * _reserved might be used for some future fields/flags, but we always
42 * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
43 * bytes in one go and get both header and value
44 */
45 uint8_t _reserved[6];
46};
47
48/*
49 * strobe_value_generic is used from BPF probe only, but needs to be a union
50 * of strobe_value_int/strobe_value_str/strobe_value_map
51 */
52struct strobe_value_generic {
53 struct strobe_value_header header;
54 union {
55 int64_t val;
56 void *ptr;
57 };
58};
59
60struct strobe_value_int {
61 struct strobe_value_header header;
62 int64_t value;
63};
64
65struct strobe_value_str {
66 struct strobe_value_header header;
67 const char* value;
68};
69
70struct strobe_value_map {
71 struct strobe_value_header header;
72 const struct strobe_map_raw* value;
73};
74
75struct strobe_map_entry {
76 const char* key;
77 const char* val;
78};
79
80/*
81 * Map of C-string key/value pairs with fixed maximum capacity. Each map has
82 * corresponding int64 ID, which application can use (or ignore) in whatever
83 * way appropriate. Map is "write-only", there is no way to get data out of
84 * map. Map is intended to be used to provide metadata for profilers and is
85 * not to be used for internal in-app communication. All methods are
86 * thread-safe.
87 */
88struct strobe_map_raw {
89 /*
90 * general purpose unique ID that's up to application to decide
91 * whether and how to use; for request metadata use case id is unique
92 * request ID that's used to match metadata with stack traces on
93 * Strobelight backend side
94 */
95 int64_t id;
96 /* number of used entries in map */
97 int64_t cnt;
98 /*
99 * having volatile doesn't change anything on BPF side, but clang
100 * emits warnings for passing `volatile const char *` into
101 * bpf_probe_read_str that expects just `const char *`
102 */
103 const char* tag;
104 /*
105 * key/value entries, each consisting of 2 pointers to key and value
106 * C strings
107 */
108 struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
109};
110
111/* Following values define supported values of TLS mode */
112#define TLS_NOT_SET -1
113#define TLS_LOCAL_EXEC 0
114#define TLS_IMM_EXEC 1
115#define TLS_GENERAL_DYN 2
116
117/*
118 * structure that universally represents TLS location (both for static
119 * executables and shared libraries)
120 */
121struct strobe_value_loc {
122 /*
123 * tls_mode defines what TLS mode was used for particular metavariable:
124 * - -1 (TLS_NOT_SET) - no metavariable;
125 * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
126 * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
127 * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
128 * Local Dynamic mode is not yet supported, because never seen in
129 * practice. Mode defines how offset field is interpreted. See
130 * calc_location() in below for details.
131 */
132 int64_t tls_mode;
133 /*
134 * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
135 * tpidr_el0 for aarch64).
136 * TLS_IMM_EXEC: absolute address of GOT entry containing offset
137 * from thread pointer;
138 * TLS_GENERAL_DYN: absolute addres of double GOT entry
139 * containing tls_index_t struct;
140 */
141 int64_t offset;
142};
143
144struct strobemeta_cfg {
145 int64_t req_meta_idx;
146 struct strobe_value_loc int_locs[STROBE_MAX_INTS];
147 struct strobe_value_loc str_locs[STROBE_MAX_STRS];
148 struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
149};
150
151struct strobe_map_descr {
152 uint64_t id;
153 int16_t tag_len;
154 /*
155 * cnt <0 - map value isn't set;
156 * 0 - map has id set, but no key/value entries
157 */
158 int16_t cnt;
159 /*
160 * both key_lens[i] and val_lens[i] should be >0 for present key/value
161 * entry
162 */
163 uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
164 uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
165};
166
167struct strobemeta_payload {
168 /* req_id has valid request ID, if req_meta_valid == 1 */
169 int64_t req_id;
170 uint8_t req_meta_valid;
171 /*
172 * mask has Nth bit set to 1, if Nth metavar was present and
173 * successfully read
174 */
175 uint64_t int_vals_set_mask;
176 int64_t int_vals[STROBE_MAX_INTS];
177 /* len is >0 for present values */
178 uint16_t str_lens[STROBE_MAX_STRS];
179 /* if map_descrs[i].cnt == -1, metavar is not present/set */
180 struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
181 /*
182 * payload has compactly packed values of str and map variables in the
183 * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
184 * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
185 * value length
186 */
187 char payload[STROBE_MAX_PAYLOAD];
188};
189
190struct strobelight_bpf_sample {
191 uint64_t ktime;
192 char comm[TASK_COMM_LEN];
193 pid_t pid;
194 int user_stack_id;
195 int kernel_stack_id;
196 int has_meta;
197 struct strobemeta_payload metadata;
198 /*
199 * makes it possible to pass (<real payload size> + 1) as data size to
200 * perf_submit() to avoid perf_submit's paranoia about passing zero as
201 * size, as it deduces that <real payload size> might be
202 * **theoretically** zero
203 */
204 char dummy_safeguard;
205};
206
207struct bpf_map_def SEC("maps") samples = {
208 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
209 .key_size = sizeof(int),
210 .value_size = sizeof(int),
211 .max_entries = 32,
212};
213
214struct bpf_map_def SEC("maps") stacks_0 = {
215 .type = BPF_MAP_TYPE_STACK_TRACE,
216 .key_size = sizeof(uint32_t),
217 .value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
218 .max_entries = 16,
219};
220
221struct bpf_map_def SEC("maps") stacks_1 = {
222 .type = BPF_MAP_TYPE_STACK_TRACE,
223 .key_size = sizeof(uint32_t),
224 .value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
225 .max_entries = 16,
226};
227
228struct bpf_map_def SEC("maps") sample_heap = {
229 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
230 .key_size = sizeof(uint32_t),
231 .value_size = sizeof(struct strobelight_bpf_sample),
232 .max_entries = 1,
233};
234
235struct bpf_map_def SEC("maps") strobemeta_cfgs = {
236 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
237 .key_size = sizeof(pid_t),
238 .value_size = sizeof(struct strobemeta_cfg),
239 .max_entries = STROBE_MAX_CFGS,
240};
241
242/* Type for the dtv. */
243/* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
244typedef union dtv {
245 size_t counter;
246 struct {
247 void* val;
248 bool is_static;
249 } pointer;
250} dtv_t;
251
252/* Partial definition for tcbhead_t */
253/* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
254struct tcbhead {
255 void* tcb;
256 dtv_t* dtv;
257};
258
259/*
260 * TLS module/offset information for shared library case.
261 * For x86-64, this is mapped onto two entries in GOT.
262 * For aarch64, this is pointed to by second GOT entry.
263 */
264struct tls_index {
265 uint64_t module;
266 uint64_t offset;
267};
268
269static inline __attribute__((always_inline))
270void *calc_location(struct strobe_value_loc *loc, void *tls_base)
271{
272 /*
273 * tls_mode value is:
274 * - -1 (TLS_NOT_SET), if no metavar is present;
275 * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
276 * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
277 * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
278 * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
279 * This schema allows to use something like:
280 * (tls_mode + 1) * (tls_base + offset)
281 * to get NULL for "no metavar" location, or correct pointer for local
282 * executable mode without doing extra ifs.
283 */
284 if (loc->tls_mode <= TLS_LOCAL_EXEC) {
285 /* static executable is simple, we just have offset from
286 * tls_base */
287 void *addr = tls_base + loc->offset;
288 /* multiply by (tls_mode + 1) to get NULL, if we have no
289 * metavar in this slot */
290 return (void *)((loc->tls_mode + 1) * (int64_t)addr);
291 }
292 /*
293 * Other modes are more complicated, we need to jump through few hoops.
294 *
295 * For immediate executable mode (currently supported only for aarch64):
296 * - loc->offset is pointing to a GOT entry containing fixed offset
297 * relative to tls_base;
298 *
299 * For general dynamic mode:
300 * - loc->offset is pointing to a beginning of double GOT entries;
301 * - (for aarch64 only) second entry points to tls_index_t struct;
302 * - (for x86-64 only) two GOT entries are already tls_index_t;
303 * - tls_index_t->module is used to find start of TLS section in
304 * which variable resides;
305 * - tls_index_t->offset provides offset within that TLS section,
306 * pointing to value of variable.
307 */
308 struct tls_index tls_index;
309 dtv_t *dtv;
310 void *tls_ptr;
311
312 bpf_probe_read(&tls_index, sizeof(struct tls_index),
313 (void *)loc->offset);
314 /* valid module index is always positive */
315 if (tls_index.module > 0) {
316 /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
317 bpf_probe_read(&dtv, sizeof(dtv),
318 &((struct tcbhead *)tls_base)->dtv);
319 dtv += tls_index.module;
320 } else {
321 dtv = NULL;
322 }
323 bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
324 /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
325 return tls_ptr && tls_ptr != (void *)-1
326 ? tls_ptr + tls_index.offset
327 : NULL;
328}
329
330static inline __attribute__((always_inline))
331void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
332 struct strobe_value_generic *value,
333 struct strobemeta_payload *data)
334{
335 void *location = calc_location(&cfg->int_locs[idx], tls_base);
336 if (!location)
337 return;
338
339 bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
340 data->int_vals[idx] = value->val;
341 if (value->header.len)
342 data->int_vals_set_mask |= (1 << idx);
343}
344
345static inline __attribute__((always_inline))
346uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
347 struct strobe_value_generic *value,
348 struct strobemeta_payload *data, void *payload)
349{
350 void *location;
351 uint32_t len;
352
353 data->str_lens[idx] = 0;
354 location = calc_location(&cfg->str_locs[idx], tls_base);
355 if (!location)
356 return 0;
357
358 bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
359 len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
360 /*
361 * if bpf_probe_read_str returns error (<0), due to casting to
362 * unsinged int, it will become big number, so next check is
363 * sufficient to check for errors AND prove to BPF verifier, that
364 * bpf_probe_read_str won't return anything bigger than
365 * STROBE_MAX_STR_LEN
366 */
367 if (len > STROBE_MAX_STR_LEN)
368 return 0;
369
370 data->str_lens[idx] = len;
371 return len;
372}
373
374static inline __attribute__((always_inline))
375void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
376 struct strobe_value_generic *value,
377 struct strobemeta_payload* data, void *payload)
378{
379 struct strobe_map_descr* descr = &data->map_descrs[idx];
380 struct strobe_map_raw map;
381 void *location;
382 uint32_t len;
383 int i;
384
385 descr->tag_len = 0; /* presume no tag is set */
386 descr->cnt = -1; /* presume no value is set */
387
388 location = calc_location(&cfg->map_locs[idx], tls_base);
389 if (!location)
390 return payload;
391
392 bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
393 if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
394 return payload;
395
396 descr->id = map.id;
397 descr->cnt = map.cnt;
398 if (cfg->req_meta_idx == idx) {
399 data->req_id = map.id;
400 data->req_meta_valid = 1;
401 }
402
403 len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
404 if (len <= STROBE_MAX_STR_LEN) {
405 descr->tag_len = len;
406 payload += len;
407 }
408
409#ifdef NO_UNROLL
410#pragma clang loop unroll(disable)
411#else
412#pragma unroll
413#endif
414 for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
415 descr->key_lens[i] = 0;
416 len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
417 map.entries[i].key);
418 if (len <= STROBE_MAX_STR_LEN) {
419 descr->key_lens[i] = len;
420 payload += len;
421 }
422 descr->val_lens[i] = 0;
423 len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
424 map.entries[i].val);
425 if (len <= STROBE_MAX_STR_LEN) {
426 descr->val_lens[i] = len;
427 payload += len;
428 }
429 }
430
431 return payload;
432}
433
434/*
435 * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
436 * pointer to *right after* payload ends
437 */
438static inline __attribute__((always_inline))
439void *read_strobe_meta(struct task_struct* task,
440 struct strobemeta_payload* data) {
441 pid_t pid = bpf_get_current_pid_tgid() >> 32;
442 struct strobe_value_generic value = {0};
443 struct strobemeta_cfg *cfg;
444 void *tls_base, *payload;
445
446 cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
447 if (!cfg)
448 return NULL;
449
450 data->int_vals_set_mask = 0;
451 data->req_meta_valid = 0;
452 payload = data->payload;
453 /*
454 * we don't have struct task_struct definition, it should be:
455 * tls_base = (void *)task->thread.fsbase;
456 */
457 tls_base = (void *)task;
458
459#ifdef NO_UNROLL
460#pragma clang loop unroll(disable)
461#else
462#pragma unroll
463#endif
464 for (int i = 0; i < STROBE_MAX_INTS; ++i) {
465 read_int_var(cfg, i, tls_base, &value, data);
466 }
467#ifdef NO_UNROLL
468#pragma clang loop unroll(disable)
469#else
470#pragma unroll
471#endif
472 for (int i = 0; i < STROBE_MAX_STRS; ++i) {
473 payload += read_str_var(cfg, i, tls_base, &value, data, payload);
474 }
475#ifdef NO_UNROLL
476#pragma clang loop unroll(disable)
477#else
478#pragma unroll
479#endif
480 for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
481 payload = read_map_var(cfg, i, tls_base, &value, data, payload);
482 }
483 /*
484 * return pointer right after end of payload, so it's possible to
485 * calculate exact amount of useful data that needs to be sent
486 */
487 return payload;
488}
489
490SEC("raw_tracepoint/kfree_skb")
491int on_event(struct pt_regs *ctx) {
492 pid_t pid = bpf_get_current_pid_tgid() >> 32;
493 struct strobelight_bpf_sample* sample;
494 struct task_struct *task;
495 uint32_t zero = 0;
496 uint64_t ktime_ns;
497 void *sample_end;
498
499 sample = bpf_map_lookup_elem(&sample_heap, &zero);
500 if (!sample)
501 return 0; /* this will never happen */
502
503 sample->pid = pid;
504 bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
505 ktime_ns = bpf_ktime_get_ns();
506 sample->ktime = ktime_ns;
507
508 task = (struct task_struct *)bpf_get_current_task();
509 sample_end = read_strobe_meta(task, &sample->metadata);
510 sample->has_meta = sample_end != NULL;
511 sample_end = sample_end ? : &sample->metadata;
512
513 if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
514 sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
515 sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
516 } else {
517 sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
518 sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
519 }
520
521 uint64_t sample_size = sample_end - (void *)sample;
522 /* should always be true */
523 if (sample_size < sizeof(struct strobelight_bpf_sample))
524 bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
525 return 0;
526}
527
528char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
new file mode 100644
index 000000000000..f0a1669e11d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
@@ -0,0 +1,9 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2// Copyright (c) 2019 Facebook
3
4#define STROBE_MAX_INTS 2
5#define STROBE_MAX_STRS 25
6#define STROBE_MAX_MAPS 13
7#define STROBE_MAX_MAP_ENTRIES 20
8#define NO_UNROLL
9#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
new file mode 100644
index 000000000000..4291a7d642e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
@@ -0,0 +1,9 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2// Copyright (c) 2019 Facebook
3
4#define STROBE_MAX_INTS 2
5#define STROBE_MAX_STRS 25
6#define STROBE_MAX_MAPS 30
7#define STROBE_MAX_MAP_ENTRIES 20
8#define NO_UNROLL
9#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
new file mode 100644
index 000000000000..28c16bb583b6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -0,0 +1,73 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3#include <linux/bpf.h>
4#include "bpf_helpers.h"
5
6int _version SEC("version") = 1;
7
8struct ipv_counts {
9 unsigned int v4;
10 unsigned int v6;
11};
12
13/* just to validate we can handle maps in multiple sections */
14struct bpf_map_def SEC("maps") btf_map_legacy = {
15 .type = BPF_MAP_TYPE_ARRAY,
16 .key_size = sizeof(int),
17 .value_size = sizeof(long long),
18 .max_entries = 4,
19};
20
21BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
22
23struct {
24 int *key;
25 struct ipv_counts *value;
26 unsigned int type;
27 unsigned int max_entries;
28} btf_map SEC(".maps") = {
29 .type = BPF_MAP_TYPE_ARRAY,
30 .max_entries = 4,
31};
32
33struct dummy_tracepoint_args {
34 unsigned long long pad;
35 struct sock *sock;
36};
37
38__attribute__((noinline))
39static int test_long_fname_2(struct dummy_tracepoint_args *arg)
40{
41 struct ipv_counts *counts;
42 int key = 0;
43
44 if (!arg->sock)
45 return 0;
46
47 counts = bpf_map_lookup_elem(&btf_map, &key);
48 if (!counts)
49 return 0;
50
51 counts->v6++;
52
53 /* just verify we can reference both maps */
54 counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
55 if (!counts)
56 return 0;
57
58 return 0;
59}
60
61__attribute__((noinline))
62static int test_long_fname_1(struct dummy_tracepoint_args *arg)
63{
64 return test_long_fname_2(arg);
65}
66
67SEC("dummy_tracepoint")
68int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
69{
70 return test_long_fname_1(arg);
71}
72
73char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index f6d9f238e00a..aaa6ec250e15 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -15,17 +15,25 @@ struct stack_trace_t {
15 struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; 15 struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
16}; 16};
17 17
18struct bpf_map_def SEC("maps") perfmap = { 18struct {
19 __u32 type;
20 __u32 max_entries;
21 __u32 key_size;
22 __u32 value_size;
23} perfmap SEC(".maps") = {
19 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 24 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
25 .max_entries = 2,
20 .key_size = sizeof(int), 26 .key_size = sizeof(int),
21 .value_size = sizeof(__u32), 27 .value_size = sizeof(__u32),
22 .max_entries = 2,
23}; 28};
24 29
25struct bpf_map_def SEC("maps") stackdata_map = { 30struct {
31 __u32 type;
32 __u32 max_entries;
33 __u32 *key;
34 struct stack_trace_t *value;
35} stackdata_map SEC(".maps") = {
26 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 36 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
27 .key_size = sizeof(__u32),
28 .value_size = sizeof(struct stack_trace_t),
29 .max_entries = 1, 37 .max_entries = 1,
30}; 38};
31 39
@@ -47,10 +55,13 @@ struct bpf_map_def SEC("maps") stackdata_map = {
47 * issue and avoid complicated C programming massaging. 55 * issue and avoid complicated C programming massaging.
48 * This is an acceptable workaround since there is one entry here. 56 * This is an acceptable workaround since there is one entry here.
49 */ 57 */
50struct bpf_map_def SEC("maps") rawdata_map = { 58struct {
59 __u32 type;
60 __u32 max_entries;
61 __u32 *key;
62 __u64 (*value)[2 * MAX_STACK_RAWTP];
63} rawdata_map SEC(".maps") = {
51 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 64 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
52 .key_size = sizeof(__u32),
53 .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
54 .max_entries = 1, 65 .max_entries = 1,
55}; 66};
56 67
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 5ab14e941980..866cc7ddbe43 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -7,17 +7,23 @@
7 7
8#include "bpf_helpers.h" 8#include "bpf_helpers.h"
9 9
10struct bpf_map_def SEC("maps") result_number = { 10struct {
11 __u32 type;
12 __u32 max_entries;
13 __u32 *key;
14 __u64 *value;
15} result_number SEC(".maps") = {
11 .type = BPF_MAP_TYPE_ARRAY, 16 .type = BPF_MAP_TYPE_ARRAY,
12 .key_size = sizeof(__u32),
13 .value_size = sizeof(__u64),
14 .max_entries = 11, 17 .max_entries = 11,
15}; 18};
16 19
17struct bpf_map_def SEC("maps") result_string = { 20struct {
21 __u32 type;
22 __u32 max_entries;
23 __u32 *key;
24 const char (*value)[32];
25} result_string SEC(".maps") = {
18 .type = BPF_MAP_TYPE_ARRAY, 26 .type = BPF_MAP_TYPE_ARRAY,
19 .key_size = sizeof(__u32),
20 .value_size = 32,
21 .max_entries = 5, 27 .max_entries = 5,
22}; 28};
23 29
@@ -27,10 +33,13 @@ struct foo {
27 __u64 c; 33 __u64 c;
28}; 34};
29 35
30struct bpf_map_def SEC("maps") result_struct = { 36struct {
37 __u32 type;
38 __u32 max_entries;
39 __u32 *key;
40 struct foo *value;
41} result_struct SEC(".maps") = {
31 .type = BPF_MAP_TYPE_ARRAY, 42 .type = BPF_MAP_TYPE_ARRAY,
32 .key_size = sizeof(__u32),
33 .value_size = sizeof(struct foo),
34 .max_entries = 5, 43 .max_entries = 5,
35}; 44};
36 45
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 1e10c9590991..848cbb90f581 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -169,38 +169,53 @@ struct eth_hdr {
169 unsigned short eth_proto; 169 unsigned short eth_proto;
170}; 170};
171 171
172struct bpf_map_def SEC("maps") vip_map = { 172struct {
173 __u32 type;
174 __u32 max_entries;
175 struct vip *key;
176 struct vip_meta *value;
177} vip_map SEC(".maps") = {
173 .type = BPF_MAP_TYPE_HASH, 178 .type = BPF_MAP_TYPE_HASH,
174 .key_size = sizeof(struct vip),
175 .value_size = sizeof(struct vip_meta),
176 .max_entries = MAX_VIPS, 179 .max_entries = MAX_VIPS,
177}; 180};
178 181
179struct bpf_map_def SEC("maps") ch_rings = { 182struct {
183 __u32 type;
184 __u32 max_entries;
185 __u32 *key;
186 __u32 *value;
187} ch_rings SEC(".maps") = {
180 .type = BPF_MAP_TYPE_ARRAY, 188 .type = BPF_MAP_TYPE_ARRAY,
181 .key_size = sizeof(__u32),
182 .value_size = sizeof(__u32),
183 .max_entries = CH_RINGS_SIZE, 189 .max_entries = CH_RINGS_SIZE,
184}; 190};
185 191
186struct bpf_map_def SEC("maps") reals = { 192struct {
193 __u32 type;
194 __u32 max_entries;
195 __u32 *key;
196 struct real_definition *value;
197} reals SEC(".maps") = {
187 .type = BPF_MAP_TYPE_ARRAY, 198 .type = BPF_MAP_TYPE_ARRAY,
188 .key_size = sizeof(__u32),
189 .value_size = sizeof(struct real_definition),
190 .max_entries = MAX_REALS, 199 .max_entries = MAX_REALS,
191}; 200};
192 201
193struct bpf_map_def SEC("maps") stats = { 202struct {
203 __u32 type;
204 __u32 max_entries;
205 __u32 *key;
206 struct vip_stats *value;
207} stats SEC(".maps") = {
194 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 208 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
195 .key_size = sizeof(__u32),
196 .value_size = sizeof(struct vip_stats),
197 .max_entries = MAX_VIPS, 209 .max_entries = MAX_VIPS,
198}; 210};
199 211
200struct bpf_map_def SEC("maps") ctl_array = { 212struct {
213 __u32 type;
214 __u32 max_entries;
215 __u32 *key;
216 struct ctl_value *value;
217} ctl_array SEC(".maps") = {
201 .type = BPF_MAP_TYPE_ARRAY, 218 .type = BPF_MAP_TYPE_ARRAY,
202 .key_size = sizeof(__u32),
203 .value_size = sizeof(struct ctl_value),
204 .max_entries = CTL_MAP_SIZE, 219 .max_entries = CTL_MAP_SIZE,
205}; 220};
206 221
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index ba44a14e6dc4..c63ecf3ca573 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -165,38 +165,53 @@ struct eth_hdr {
165 unsigned short eth_proto; 165 unsigned short eth_proto;
166}; 166};
167 167
168struct bpf_map_def SEC("maps") vip_map = { 168struct {
169 __u32 type;
170 __u32 max_entries;
171 struct vip *key;
172 struct vip_meta *value;
173} vip_map SEC(".maps") = {
169 .type = BPF_MAP_TYPE_HASH, 174 .type = BPF_MAP_TYPE_HASH,
170 .key_size = sizeof(struct vip),
171 .value_size = sizeof(struct vip_meta),
172 .max_entries = MAX_VIPS, 175 .max_entries = MAX_VIPS,
173}; 176};
174 177
175struct bpf_map_def SEC("maps") ch_rings = { 178struct {
179 __u32 type;
180 __u32 max_entries;
181 __u32 *key;
182 __u32 *value;
183} ch_rings SEC(".maps") = {
176 .type = BPF_MAP_TYPE_ARRAY, 184 .type = BPF_MAP_TYPE_ARRAY,
177 .key_size = sizeof(__u32),
178 .value_size = sizeof(__u32),
179 .max_entries = CH_RINGS_SIZE, 185 .max_entries = CH_RINGS_SIZE,
180}; 186};
181 187
182struct bpf_map_def SEC("maps") reals = { 188struct {
189 __u32 type;
190 __u32 max_entries;
191 __u32 *key;
192 struct real_definition *value;
193} reals SEC(".maps") = {
183 .type = BPF_MAP_TYPE_ARRAY, 194 .type = BPF_MAP_TYPE_ARRAY,
184 .key_size = sizeof(__u32),
185 .value_size = sizeof(struct real_definition),
186 .max_entries = MAX_REALS, 195 .max_entries = MAX_REALS,
187}; 196};
188 197
189struct bpf_map_def SEC("maps") stats = { 198struct {
199 __u32 type;
200 __u32 max_entries;
201 __u32 *key;
202 struct vip_stats *value;
203} stats SEC(".maps") = {
190 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 204 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
191 .key_size = sizeof(__u32),
192 .value_size = sizeof(struct vip_stats),
193 .max_entries = MAX_VIPS, 205 .max_entries = MAX_VIPS,
194}; 206};
195 207
196struct bpf_map_def SEC("maps") ctl_array = { 208struct {
209 __u32 type;
210 __u32 max_entries;
211 __u32 *key;
212 struct ctl_value *value;
213} ctl_array SEC(".maps") = {
197 .type = BPF_MAP_TYPE_ARRAY, 214 .type = BPF_MAP_TYPE_ARRAY,
198 .key_size = sizeof(__u32),
199 .value_size = sizeof(struct ctl_value),
200 .max_entries = CTL_MAP_SIZE, 215 .max_entries = CTL_MAP_SIZE,
201}; 216};
202 217
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index af8cc68ed2f9..40d9c2853393 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -11,29 +11,31 @@ struct hmap_elem {
11 int var[VAR_NUM]; 11 int var[VAR_NUM];
12}; 12};
13 13
14struct bpf_map_def SEC("maps") hash_map = { 14struct {
15 __u32 type;
16 __u32 max_entries;
17 __u32 *key;
18 struct hmap_elem *value;
19} hash_map SEC(".maps") = {
15 .type = BPF_MAP_TYPE_HASH, 20 .type = BPF_MAP_TYPE_HASH,
16 .key_size = sizeof(int),
17 .value_size = sizeof(struct hmap_elem),
18 .max_entries = 1, 21 .max_entries = 1,
19}; 22};
20 23
21BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem);
22
23struct array_elem { 24struct array_elem {
24 struct bpf_spin_lock lock; 25 struct bpf_spin_lock lock;
25 int var[VAR_NUM]; 26 int var[VAR_NUM];
26}; 27};
27 28
28struct bpf_map_def SEC("maps") array_map = { 29struct {
30 __u32 type;
31 __u32 max_entries;
32 int *key;
33 struct array_elem *value;
34} array_map SEC(".maps") = {
29 .type = BPF_MAP_TYPE_ARRAY, 35 .type = BPF_MAP_TYPE_ARRAY,
30 .key_size = sizeof(int),
31 .value_size = sizeof(struct array_elem),
32 .max_entries = 1, 36 .max_entries = 1,
33}; 37};
34 38
35BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem);
36
37SEC("map_lock_demo") 39SEC("map_lock_demo")
38int bpf_map_lock_test(struct __sk_buff *skb) 40int bpf_map_lock_test(struct __sk_buff *skb)
39{ 41{
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
new file mode 100644
index 000000000000..463964d79f73
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -0,0 +1,261 @@
1#include <stddef.h>
2#include <inttypes.h>
3#include <errno.h>
4#include <linux/seg6_local.h>
5#include <linux/bpf.h>
6#include "bpf_helpers.h"
7#include "bpf_endian.h"
8
9/* Packet parsing state machine helpers. */
10#define cursor_advance(_cursor, _len) \
11 ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
12
13#define SR6_FLAG_ALERT (1 << 4)
14
15#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
16 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
17#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
18 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
19#define BPF_PACKET_HEADER __attribute__((packed))
20
21struct ip6_t {
22 unsigned int ver:4;
23 unsigned int priority:8;
24 unsigned int flow_label:20;
25 unsigned short payload_len;
26 unsigned char next_header;
27 unsigned char hop_limit;
28 unsigned long long src_hi;
29 unsigned long long src_lo;
30 unsigned long long dst_hi;
31 unsigned long long dst_lo;
32} BPF_PACKET_HEADER;
33
34struct ip6_addr_t {
35 unsigned long long hi;
36 unsigned long long lo;
37} BPF_PACKET_HEADER;
38
39struct ip6_srh_t {
40 unsigned char nexthdr;
41 unsigned char hdrlen;
42 unsigned char type;
43 unsigned char segments_left;
44 unsigned char first_segment;
45 unsigned char flags;
46 unsigned short tag;
47
48 struct ip6_addr_t segments[0];
49} BPF_PACKET_HEADER;
50
51struct sr6_tlv_t {
52 unsigned char type;
53 unsigned char len;
54 unsigned char value[0];
55} BPF_PACKET_HEADER;
56
57static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
58{
59 void *cursor, *data_end;
60 struct ip6_srh_t *srh;
61 struct ip6_t *ip;
62 uint8_t *ipver;
63
64 data_end = (void *)(long)skb->data_end;
65 cursor = (void *)(long)skb->data;
66 ipver = (uint8_t *)cursor;
67
68 if ((void *)ipver + sizeof(*ipver) > data_end)
69 return NULL;
70
71 if ((*ipver >> 4) != 6)
72 return NULL;
73
74 ip = cursor_advance(cursor, sizeof(*ip));
75 if ((void *)ip + sizeof(*ip) > data_end)
76 return NULL;
77
78 if (ip->next_header != 43)
79 return NULL;
80
81 srh = cursor_advance(cursor, sizeof(*srh));
82 if ((void *)srh + sizeof(*srh) > data_end)
83 return NULL;
84
85 if (srh->type != 4)
86 return NULL;
87
88 return srh;
89}
90
91static __attribute__((always_inline))
92int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
93 uint32_t old_pad, uint32_t pad_off)
94{
95 int err;
96
97 if (new_pad != old_pad) {
98 err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
99 (int) new_pad - (int) old_pad);
100 if (err)
101 return err;
102 }
103
104 if (new_pad > 0) {
105 char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0};
107 struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
108
109 pad_tlv->type = SR6_TLV_PADDING;
110 pad_tlv->len = new_pad - 2;
111
112 err = bpf_lwt_seg6_store_bytes(skb, pad_off,
113 (void *)pad_tlv_buf, new_pad);
114 if (err)
115 return err;
116 }
117
118 return 0;
119}
120
121static __attribute__((always_inline))
122int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
123 uint32_t *tlv_off, uint32_t *pad_size,
124 uint32_t *pad_off)
125{
126 uint32_t srh_off, cur_off;
127 int offset_valid = 0;
128 int err;
129
130 srh_off = (char *)srh - (char *)(long)skb->data;
131 // cur_off = end of segments, start of possible TLVs
132 cur_off = srh_off + sizeof(*srh) +
133 sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
134
135 *pad_off = 0;
136
137 // we can only go as far as ~10 TLVs due to the BPF max stack size
138 #pragma clang loop unroll(disable)
139 for (int i = 0; i < 100; i++) {
140 struct sr6_tlv_t tlv;
141
142 if (cur_off == *tlv_off)
143 offset_valid = 1;
144
145 if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
146 break;
147
148 err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
149 if (err)
150 return err;
151
152 if (tlv.type == SR6_TLV_PADDING) {
153 *pad_size = tlv.len + sizeof(tlv);
154 *pad_off = cur_off;
155
156 if (*tlv_off == srh_off) {
157 *tlv_off = cur_off;
158 offset_valid = 1;
159 }
160 break;
161
162 } else if (tlv.type == SR6_TLV_HMAC) {
163 break;
164 }
165
166 cur_off += sizeof(tlv) + tlv.len;
167 } // we reached the padding or HMAC TLVs, or the end of the SRH
168
169 if (*pad_off == 0)
170 *pad_off = cur_off;
171
172 if (*tlv_off == -1)
173 *tlv_off = cur_off;
174 else if (!offset_valid)
175 return -EINVAL;
176
177 return 0;
178}
179
180static __attribute__((always_inline))
181int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
182 struct sr6_tlv_t *itlv, uint8_t tlv_size)
183{
184 uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
185 uint8_t len_remaining, new_pad;
186 uint32_t pad_off = 0;
187 uint32_t pad_size = 0;
188 uint32_t partial_srh_len;
189 int err;
190
191 if (tlv_off != -1)
192 tlv_off += srh_off;
193
194 if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
195 return -EINVAL;
196
197 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
198 if (err)
199 return err;
200
201 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
202 if (err)
203 return err;
204
205 err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
206 if (err)
207 return err;
208
209 // the following can't be moved inside update_tlv_pad because the
210 // bpf verifier has some issues with it
211 pad_off += sizeof(*itlv) + itlv->len;
212 partial_srh_len = pad_off - srh_off;
213 len_remaining = partial_srh_len % 8;
214 new_pad = 8 - len_remaining;
215
216 if (new_pad == 1) // cannot pad for 1 byte only
217 new_pad = 9;
218 else if (new_pad == 8)
219 new_pad = 0;
220
221 return update_tlv_pad(skb, new_pad, pad_size, pad_off);
222}
223
224// Add an Egress TLV fc00::4, add the flag A,
225// and apply End.X action to fc42::1
226SEC("lwt_seg6local")
227int __add_egr_x(struct __sk_buff *skb)
228{
229 unsigned long long hi = 0xfc42000000000000;
230 unsigned long long lo = 0x1;
231 struct ip6_srh_t *srh = get_srh(skb);
232 uint8_t new_flags = SR6_FLAG_ALERT;
233 struct ip6_addr_t addr;
234 int err, offset;
235
236 if (srh == NULL)
237 return BPF_DROP;
238
239 uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
240 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
241
242 err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
243 (struct sr6_tlv_t *)&tlv, 20);
244 if (err)
245 return BPF_DROP;
246
247 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
248 err = bpf_lwt_seg6_store_bytes(skb, offset,
249 (void *)&new_flags, sizeof(new_flags));
250 if (err)
251 return BPF_DROP;
252
253 addr.lo = htonll(lo);
254 addr.hi = htonll(hi);
255 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
256 (void *)&addr, sizeof(addr));
257 if (err)
258 return BPF_DROP;
259 return BPF_REDIRECT;
260}
261char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5b54ec637ada..435a9527733e 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -21,38 +21,55 @@ int _version SEC("version") = 1;
21#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 21#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
22#endif 22#endif
23 23
24struct bpf_map_def SEC("maps") outer_map = { 24struct {
25 __u32 type;
26 __u32 max_entries;
27 __u32 key_size;
28 __u32 value_size;
29} outer_map SEC(".maps") = {
25 .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, 30 .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
31 .max_entries = 1,
26 .key_size = sizeof(__u32), 32 .key_size = sizeof(__u32),
27 .value_size = sizeof(__u32), 33 .value_size = sizeof(__u32),
28 .max_entries = 1,
29}; 34};
30 35
31struct bpf_map_def SEC("maps") result_map = { 36struct {
37 __u32 type;
38 __u32 max_entries;
39 __u32 *key;
40 __u32 *value;
41} result_map SEC(".maps") = {
32 .type = BPF_MAP_TYPE_ARRAY, 42 .type = BPF_MAP_TYPE_ARRAY,
33 .key_size = sizeof(__u32),
34 .value_size = sizeof(__u32),
35 .max_entries = NR_RESULTS, 43 .max_entries = NR_RESULTS,
36}; 44};
37 45
38struct bpf_map_def SEC("maps") tmp_index_ovr_map = { 46struct {
47 __u32 type;
48 __u32 max_entries;
49 __u32 *key;
50 int *value;
51} tmp_index_ovr_map SEC(".maps") = {
39 .type = BPF_MAP_TYPE_ARRAY, 52 .type = BPF_MAP_TYPE_ARRAY,
40 .key_size = sizeof(__u32),
41 .value_size = sizeof(int),
42 .max_entries = 1, 53 .max_entries = 1,
43}; 54};
44 55
45struct bpf_map_def SEC("maps") linum_map = { 56struct {
57 __u32 type;
58 __u32 max_entries;
59 __u32 *key;
60 __u32 *value;
61} linum_map SEC(".maps") = {
46 .type = BPF_MAP_TYPE_ARRAY, 62 .type = BPF_MAP_TYPE_ARRAY,
47 .key_size = sizeof(__u32),
48 .value_size = sizeof(__u32),
49 .max_entries = 1, 63 .max_entries = 1,
50}; 64};
51 65
52struct bpf_map_def SEC("maps") data_check_map = { 66struct {
67 __u32 type;
68 __u32 max_entries;
69 __u32 *key;
70 struct data_check *value;
71} data_check_map SEC(".maps") = {
53 .type = BPF_MAP_TYPE_ARRAY, 72 .type = BPF_MAP_TYPE_ARRAY,
54 .key_size = sizeof(__u32),
55 .value_size = sizeof(struct data_check),
56 .max_entries = 1, 73 .max_entries = 1,
57}; 74};
58 75
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 45a1a1a2c345..6ac68be5d68b 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -4,24 +4,26 @@
4#include <linux/version.h> 4#include <linux/version.h>
5#include "bpf_helpers.h" 5#include "bpf_helpers.h"
6 6
7struct bpf_map_def SEC("maps") info_map = { 7struct {
8 __u32 type;
9 __u32 max_entries;
10 __u32 *key;
11 __u64 *value;
12} info_map SEC(".maps") = {
8 .type = BPF_MAP_TYPE_ARRAY, 13 .type = BPF_MAP_TYPE_ARRAY,
9 .key_size = sizeof(__u32),
10 .value_size = sizeof(__u64),
11 .max_entries = 1, 14 .max_entries = 1,
12}; 15};
13 16
14BPF_ANNOTATE_KV_PAIR(info_map, __u32, __u64); 17struct {
15 18 __u32 type;
16struct bpf_map_def SEC("maps") status_map = { 19 __u32 max_entries;
20 __u32 *key;
21 __u64 *value;
22} status_map SEC(".maps") = {
17 .type = BPF_MAP_TYPE_ARRAY, 23 .type = BPF_MAP_TYPE_ARRAY,
18 .key_size = sizeof(__u32),
19 .value_size = sizeof(__u64),
20 .max_entries = 1, 24 .max_entries = 1,
21}; 25};
22 26
23BPF_ANNOTATE_KV_PAIR(status_map, __u32, __u64);
24
25SEC("send_signal_demo") 27SEC("send_signal_demo")
26int bpf_send_signal_test(void *ctx) 28int bpf_send_signal_test(void *ctx)
27{ 29{
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index 1c39e4ccb7f1..c3d383d650cb 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -27,31 +27,43 @@ enum bpf_linum_array_idx {
27 __NR_BPF_LINUM_ARRAY_IDX, 27 __NR_BPF_LINUM_ARRAY_IDX,
28}; 28};
29 29
30struct bpf_map_def SEC("maps") addr_map = { 30struct {
31 __u32 type;
32 __u32 max_entries;
33 __u32 *key;
34 struct sockaddr_in6 *value;
35} addr_map SEC(".maps") = {
31 .type = BPF_MAP_TYPE_ARRAY, 36 .type = BPF_MAP_TYPE_ARRAY,
32 .key_size = sizeof(__u32),
33 .value_size = sizeof(struct sockaddr_in6),
34 .max_entries = __NR_BPF_ADDR_ARRAY_IDX, 37 .max_entries = __NR_BPF_ADDR_ARRAY_IDX,
35}; 38};
36 39
37struct bpf_map_def SEC("maps") sock_result_map = { 40struct {
41 __u32 type;
42 __u32 max_entries;
43 __u32 *key;
44 struct bpf_sock *value;
45} sock_result_map SEC(".maps") = {
38 .type = BPF_MAP_TYPE_ARRAY, 46 .type = BPF_MAP_TYPE_ARRAY,
39 .key_size = sizeof(__u32),
40 .value_size = sizeof(struct bpf_sock),
41 .max_entries = __NR_BPF_RESULT_ARRAY_IDX, 47 .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
42}; 48};
43 49
44struct bpf_map_def SEC("maps") tcp_sock_result_map = { 50struct {
51 __u32 type;
52 __u32 max_entries;
53 __u32 *key;
54 struct bpf_tcp_sock *value;
55} tcp_sock_result_map SEC(".maps") = {
45 .type = BPF_MAP_TYPE_ARRAY, 56 .type = BPF_MAP_TYPE_ARRAY,
46 .key_size = sizeof(__u32),
47 .value_size = sizeof(struct bpf_tcp_sock),
48 .max_entries = __NR_BPF_RESULT_ARRAY_IDX, 57 .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
49}; 58};
50 59
51struct bpf_map_def SEC("maps") linum_map = { 60struct {
61 __u32 type;
62 __u32 max_entries;
63 __u32 *key;
64 __u32 *value;
65} linum_map SEC(".maps") = {
52 .type = BPF_MAP_TYPE_ARRAY, 66 .type = BPF_MAP_TYPE_ARRAY,
53 .key_size = sizeof(__u32),
54 .value_size = sizeof(__u32),
55 .max_entries = __NR_BPF_LINUM_ARRAY_IDX, 67 .max_entries = __NR_BPF_LINUM_ARRAY_IDX,
56}; 68};
57 69
@@ -60,26 +72,26 @@ struct bpf_spinlock_cnt {
60 __u32 cnt; 72 __u32 cnt;
61}; 73};
62 74
63struct bpf_map_def SEC("maps") sk_pkt_out_cnt = { 75struct {
76 __u32 type;
77 __u32 map_flags;
78 int *key;
79 struct bpf_spinlock_cnt *value;
80} sk_pkt_out_cnt SEC(".maps") = {
64 .type = BPF_MAP_TYPE_SK_STORAGE, 81 .type = BPF_MAP_TYPE_SK_STORAGE,
65 .key_size = sizeof(int),
66 .value_size = sizeof(struct bpf_spinlock_cnt),
67 .max_entries = 0,
68 .map_flags = BPF_F_NO_PREALLOC, 82 .map_flags = BPF_F_NO_PREALLOC,
69}; 83};
70 84
71BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt); 85struct {
72 86 __u32 type;
73struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = { 87 __u32 map_flags;
88 int *key;
89 struct bpf_spinlock_cnt *value;
90} sk_pkt_out_cnt10 SEC(".maps") = {
74 .type = BPF_MAP_TYPE_SK_STORAGE, 91 .type = BPF_MAP_TYPE_SK_STORAGE,
75 .key_size = sizeof(int),
76 .value_size = sizeof(struct bpf_spinlock_cnt),
77 .max_entries = 0,
78 .map_flags = BPF_F_NO_PREALLOC, 92 .map_flags = BPF_F_NO_PREALLOC,
79}; 93};
80 94
81BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt);
82
83static bool is_loopback6(__u32 *a6) 95static bool is_loopback6(__u32 *a6)
84{ 96{
85 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); 97 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 40f904312090..0a77ae36d981 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -10,30 +10,29 @@ struct hmap_elem {
10 int test_padding; 10 int test_padding;
11}; 11};
12 12
13struct bpf_map_def SEC("maps") hmap = { 13struct {
14 __u32 type;
15 __u32 max_entries;
16 int *key;
17 struct hmap_elem *value;
18} hmap SEC(".maps") = {
14 .type = BPF_MAP_TYPE_HASH, 19 .type = BPF_MAP_TYPE_HASH,
15 .key_size = sizeof(int),
16 .value_size = sizeof(struct hmap_elem),
17 .max_entries = 1, 20 .max_entries = 1,
18}; 21};
19 22
20BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem);
21
22
23struct cls_elem { 23struct cls_elem {
24 struct bpf_spin_lock lock; 24 struct bpf_spin_lock lock;
25 volatile int cnt; 25 volatile int cnt;
26}; 26};
27 27
28struct bpf_map_def SEC("maps") cls_map = { 28struct {
29 __u32 type;
30 struct bpf_cgroup_storage_key *key;
31 struct cls_elem *value;
32} cls_map SEC(".maps") = {
29 .type = BPF_MAP_TYPE_CGROUP_STORAGE, 33 .type = BPF_MAP_TYPE_CGROUP_STORAGE,
30 .key_size = sizeof(struct bpf_cgroup_storage_key),
31 .value_size = sizeof(struct cls_elem),
32}; 34};
33 35
34BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key,
35 struct cls_elem);
36
37struct bpf_vqueue { 36struct bpf_vqueue {
38 struct bpf_spin_lock lock; 37 struct bpf_spin_lock lock;
39 /* 4 byte hole */ 38 /* 4 byte hole */
@@ -42,14 +41,16 @@ struct bpf_vqueue {
42 unsigned int rate; 41 unsigned int rate;
43}; 42};
44 43
45struct bpf_map_def SEC("maps") vqueue = { 44struct {
45 __u32 type;
46 __u32 max_entries;
47 int *key;
48 struct bpf_vqueue *value;
49} vqueue SEC(".maps") = {
46 .type = BPF_MAP_TYPE_ARRAY, 50 .type = BPF_MAP_TYPE_ARRAY,
47 .key_size = sizeof(int),
48 .value_size = sizeof(struct bpf_vqueue),
49 .max_entries = 1, 51 .max_entries = 1,
50}; 52};
51 53
52BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);
53#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) 54#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
54 55
55SEC("spin_lock_demo") 56SEC("spin_lock_demo")
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index d86c281e957f..fcf2280bb60c 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -8,34 +8,50 @@
8#define PERF_MAX_STACK_DEPTH 127 8#define PERF_MAX_STACK_DEPTH 127
9#endif 9#endif
10 10
11struct bpf_map_def SEC("maps") control_map = { 11struct {
12 __u32 type;
13 __u32 max_entries;
14 __u32 *key;
15 __u32 *value;
16} control_map SEC(".maps") = {
12 .type = BPF_MAP_TYPE_ARRAY, 17 .type = BPF_MAP_TYPE_ARRAY,
13 .key_size = sizeof(__u32),
14 .value_size = sizeof(__u32),
15 .max_entries = 1, 18 .max_entries = 1,
16}; 19};
17 20
18struct bpf_map_def SEC("maps") stackid_hmap = { 21struct {
22 __u32 type;
23 __u32 max_entries;
24 __u32 *key;
25 __u32 *value;
26} stackid_hmap SEC(".maps") = {
19 .type = BPF_MAP_TYPE_HASH, 27 .type = BPF_MAP_TYPE_HASH,
20 .key_size = sizeof(__u32),
21 .value_size = sizeof(__u32),
22 .max_entries = 16384, 28 .max_entries = 16384,
23}; 29};
24 30
25struct bpf_map_def SEC("maps") stackmap = { 31typedef struct bpf_stack_build_id stack_trace_t[PERF_MAX_STACK_DEPTH];
32
33struct {
34 __u32 type;
35 __u32 max_entries;
36 __u32 map_flags;
37 __u32 key_size;
38 __u32 value_size;
39} stackmap SEC(".maps") = {
26 .type = BPF_MAP_TYPE_STACK_TRACE, 40 .type = BPF_MAP_TYPE_STACK_TRACE,
27 .key_size = sizeof(__u32),
28 .value_size = sizeof(struct bpf_stack_build_id)
29 * PERF_MAX_STACK_DEPTH,
30 .max_entries = 128, 41 .max_entries = 128,
31 .map_flags = BPF_F_STACK_BUILD_ID, 42 .map_flags = BPF_F_STACK_BUILD_ID,
43 .key_size = sizeof(__u32),
44 .value_size = sizeof(stack_trace_t),
32}; 45};
33 46
34struct bpf_map_def SEC("maps") stack_amap = { 47struct {
48 __u32 type;
49 __u32 max_entries;
50 __u32 *key;
51 /* there seems to be a bug in kernel not handling typedef properly */
52 struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH];
53} stack_amap SEC(".maps") = {
35 .type = BPF_MAP_TYPE_ARRAY, 54 .type = BPF_MAP_TYPE_ARRAY,
36 .key_size = sizeof(__u32),
37 .value_size = sizeof(struct bpf_stack_build_id)
38 * PERF_MAX_STACK_DEPTH,
39 .max_entries = 128, 55 .max_entries = 128,
40}; 56};
41 57
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index af111af7ca1a..7ad09adbf648 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -8,31 +8,47 @@
8#define PERF_MAX_STACK_DEPTH 127 8#define PERF_MAX_STACK_DEPTH 127
9#endif 9#endif
10 10
11struct bpf_map_def SEC("maps") control_map = { 11struct {
12 __u32 type;
13 __u32 max_entries;
14 __u32 *key;
15 __u32 *value;
16} control_map SEC(".maps") = {
12 .type = BPF_MAP_TYPE_ARRAY, 17 .type = BPF_MAP_TYPE_ARRAY,
13 .key_size = sizeof(__u32),
14 .value_size = sizeof(__u32),
15 .max_entries = 1, 18 .max_entries = 1,
16}; 19};
17 20
18struct bpf_map_def SEC("maps") stackid_hmap = { 21struct {
22 __u32 type;
23 __u32 max_entries;
24 __u32 *key;
25 __u32 *value;
26} stackid_hmap SEC(".maps") = {
19 .type = BPF_MAP_TYPE_HASH, 27 .type = BPF_MAP_TYPE_HASH,
20 .key_size = sizeof(__u32),
21 .value_size = sizeof(__u32),
22 .max_entries = 16384, 28 .max_entries = 16384,
23}; 29};
24 30
25struct bpf_map_def SEC("maps") stackmap = { 31typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
32
33struct {
34 __u32 type;
35 __u32 max_entries;
36 __u32 key_size;
37 __u32 value_size;
38} stackmap SEC(".maps") = {
26 .type = BPF_MAP_TYPE_STACK_TRACE, 39 .type = BPF_MAP_TYPE_STACK_TRACE,
27 .key_size = sizeof(__u32),
28 .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
29 .max_entries = 16384, 40 .max_entries = 16384,
41 .key_size = sizeof(__u32),
42 .value_size = sizeof(stack_trace_t),
30}; 43};
31 44
32struct bpf_map_def SEC("maps") stack_amap = { 45struct {
46 __u32 type;
47 __u32 max_entries;
48 __u32 *key;
49 __u64 (*value)[PERF_MAX_STACK_DEPTH];
50} stack_amap SEC(".maps") = {
33 .type = BPF_MAP_TYPE_ARRAY, 51 .type = BPF_MAP_TYPE_ARRAY,
34 .key_size = sizeof(__u32),
35 .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
36 .max_entries = 16384, 52 .max_entries = 16384,
37}; 53};
38 54
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
new file mode 100644
index 000000000000..608a06871572
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -0,0 +1,71 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3
4#include <stdint.h>
5#include <string.h>
6
7#include <linux/stddef.h>
8#include <linux/bpf.h>
9
10#include "bpf_helpers.h"
11
12#ifndef ARRAY_SIZE
13#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
14#endif
15
16/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
17#define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */
18#define MAX_ULONG_STR_LEN 7
19#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
20
21static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
22{
23 volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
24 unsigned char i;
25 char name[64];
26 int ret;
27
28 memset(name, 0, sizeof(name));
29 ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
30 if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
31 return 0;
32
33#pragma clang loop unroll(disable)
34 for (i = 0; i < sizeof(tcp_mem_name); ++i)
35 if (name[i] != tcp_mem_name[i])
36 return 0;
37
38 return 1;
39}
40
41SEC("cgroup/sysctl")
42int sysctl_tcp_mem(struct bpf_sysctl *ctx)
43{
44 unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
45 char value[MAX_VALUE_STR_LEN];
46 unsigned char i, off = 0;
47 int ret;
48
49 if (ctx->write)
50 return 0;
51
52 if (!is_tcp_mem(ctx))
53 return 0;
54
55 ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
56 if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
57 return 0;
58
59#pragma clang loop unroll(disable)
60 for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
61 ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
62 tcp_mem + i);
63 if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
64 return 0;
65 off += ret & MAX_ULONG_STR_LEN;
66 }
67
68 return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
69}
70
71char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
new file mode 100644
index 000000000000..cb201cbe11e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -0,0 +1,72 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3
4#include <stdint.h>
5#include <string.h>
6
7#include <linux/stddef.h>
8#include <linux/bpf.h>
9
10#include "bpf_helpers.h"
11
12#ifndef ARRAY_SIZE
13#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
14#endif
15
16/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
17#define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */
18#define MAX_ULONG_STR_LEN 7
19#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
20
21static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
22{
23 volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
24 unsigned char i;
25 char name[64];
26 int ret;
27
28 memset(name, 0, sizeof(name));
29 ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
30 if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
31 return 0;
32
33#pragma clang loop unroll(disable)
34 for (i = 0; i < sizeof(tcp_mem_name); ++i)
35 if (name[i] != tcp_mem_name[i])
36 return 0;
37
38 return 1;
39}
40
41
42SEC("cgroup/sysctl")
43int sysctl_tcp_mem(struct bpf_sysctl *ctx)
44{
45 unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
46 char value[MAX_VALUE_STR_LEN];
47 unsigned char i, off = 0;
48 int ret;
49
50 if (ctx->write)
51 return 0;
52
53 if (!is_tcp_mem(ctx))
54 return 0;
55
56 ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
57 if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
58 return 0;
59
60#pragma clang loop unroll(disable)
61 for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
62 ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
63 tcp_mem + i);
64 if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
65 return 0;
66 off += ret & MAX_ULONG_STR_LEN;
67 }
68
69 return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
70}
71
72char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index a295cad805d7..5cbbff416998 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -8,7 +8,6 @@
8#include <linux/bpf.h> 8#include <linux/bpf.h>
9 9
10#include "bpf_helpers.h" 10#include "bpf_helpers.h"
11#include "bpf_util.h"
12 11
13/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ 12/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
14#define MAX_ULONG_STR_LEN 0xF 13#define MAX_ULONG_STR_LEN 0xF
@@ -16,6 +15,10 @@
16/* Max supported length of sysctl value string (pow2). */ 15/* Max supported length of sysctl value string (pow2). */
17#define MAX_VALUE_STR_LEN 0x40 16#define MAX_VALUE_STR_LEN 0x40
18 17
18#ifndef ARRAY_SIZE
19#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
20#endif
21
19static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) 22static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
20{ 23{
21 char tcp_mem_name[] = "net/ipv4/tcp_mem"; 24 char tcp_mem_name[] = "net/ipv4/tcp_mem";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index bee3bbecc0c4..df98f7e32832 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -148,10 +148,13 @@ struct tcp_estats_basic_event {
148 struct tcp_estats_conn_id conn_id; 148 struct tcp_estats_conn_id conn_id;
149}; 149};
150 150
151struct bpf_map_def SEC("maps") ev_record_map = { 151struct {
152 __u32 type;
153 __u32 max_entries;
154 __u32 *key;
155 struct tcp_estats_basic_event *value;
156} ev_record_map SEC(".maps") = {
152 .type = BPF_MAP_TYPE_HASH, 157 .type = BPF_MAP_TYPE_HASH,
153 .key_size = sizeof(__u32),
154 .value_size = sizeof(struct tcp_estats_basic_event),
155 .max_entries = 1024, 158 .max_entries = 1024,
156}; 159};
157 160
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index c7c3240e0dd4..38e10c9fd996 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -14,17 +14,23 @@
14#include "bpf_endian.h" 14#include "bpf_endian.h"
15#include "test_tcpbpf.h" 15#include "test_tcpbpf.h"
16 16
17struct bpf_map_def SEC("maps") global_map = { 17struct {
18 __u32 type;
19 __u32 max_entries;
20 __u32 *key;
21 struct tcpbpf_globals *value;
22} global_map SEC(".maps") = {
18 .type = BPF_MAP_TYPE_ARRAY, 23 .type = BPF_MAP_TYPE_ARRAY,
19 .key_size = sizeof(__u32),
20 .value_size = sizeof(struct tcpbpf_globals),
21 .max_entries = 4, 24 .max_entries = 4,
22}; 25};
23 26
24struct bpf_map_def SEC("maps") sockopt_results = { 27struct {
28 __u32 type;
29 __u32 max_entries;
30 __u32 *key;
31 int *value;
32} sockopt_results SEC(".maps") = {
25 .type = BPF_MAP_TYPE_ARRAY, 33 .type = BPF_MAP_TYPE_ARRAY,
26 .key_size = sizeof(__u32),
27 .value_size = sizeof(int),
28 .max_entries = 2, 34 .max_entries = 2,
29}; 35};
30 36
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index ec6db6e64c41..d073d37d4e27 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -14,18 +14,26 @@
14#include "bpf_endian.h" 14#include "bpf_endian.h"
15#include "test_tcpnotify.h" 15#include "test_tcpnotify.h"
16 16
17struct bpf_map_def SEC("maps") global_map = { 17struct {
18 __u32 type;
19 __u32 max_entries;
20 __u32 *key;
21 struct tcpnotify_globals *value;
22} global_map SEC(".maps") = {
18 .type = BPF_MAP_TYPE_ARRAY, 23 .type = BPF_MAP_TYPE_ARRAY,
19 .key_size = sizeof(__u32),
20 .value_size = sizeof(struct tcpnotify_globals),
21 .max_entries = 4, 24 .max_entries = 4,
22}; 25};
23 26
24struct bpf_map_def SEC("maps") perf_event_map = { 27struct {
28 __u32 type;
29 __u32 max_entries;
30 __u32 key_size;
31 __u32 value_size;
32} perf_event_map SEC(".maps") = {
25 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 33 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
34 .max_entries = 2,
26 .key_size = sizeof(int), 35 .key_size = sizeof(int),
27 .value_size = sizeof(__u32), 36 .value_size = sizeof(__u32),
28 .max_entries = 2,
29}; 37};
30 38
31int _version SEC("version") = 1; 39int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 5e7df8bb5b5d..ec3d2c1c8cf9 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -22,17 +22,23 @@
22 22
23int _version SEC("version") = 1; 23int _version SEC("version") = 1;
24 24
25struct bpf_map_def SEC("maps") rxcnt = { 25struct {
26 __u32 type;
27 __u32 max_entries;
28 __u32 *key;
29 __u64 *value;
30} rxcnt SEC(".maps") = {
26 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 31 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
27 .key_size = sizeof(__u32),
28 .value_size = sizeof(__u64),
29 .max_entries = 256, 32 .max_entries = 256,
30}; 33};
31 34
32struct bpf_map_def SEC("maps") vip2tnl = { 35struct {
36 __u32 type;
37 __u32 max_entries;
38 struct vip *key;
39 struct iptnl_info *value;
40} vip2tnl SEC(".maps") = {
33 .type = BPF_MAP_TYPE_HASH, 41 .type = BPF_MAP_TYPE_HASH,
34 .key_size = sizeof(struct vip),
35 .value_size = sizeof(struct iptnl_info),
36 .max_entries = MAX_IPTNL_ENTRIES, 42 .max_entries = MAX_IPTNL_ENTRIES,
37}; 43};
38 44
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
new file mode 100644
index 000000000000..7fa4677df22e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -0,0 +1,231 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <stddef.h>
4#include <string.h>
5#include <linux/bpf.h>
6#include <linux/if_ether.h>
7#include <linux/if_packet.h>
8#include <linux/ip.h>
9#include <linux/ipv6.h>
10#include <linux/in.h>
11#include <linux/udp.h>
12#include <linux/tcp.h>
13#include <linux/pkt_cls.h>
14#include <sys/socket.h>
15#include "bpf_helpers.h"
16#include "bpf_endian.h"
17#include "test_iptunnel_common.h"
18
19int _version SEC("version") = 1;
20
21struct bpf_map_def SEC("maps") rxcnt = {
22 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
23 .key_size = sizeof(__u32),
24 .value_size = sizeof(__u64),
25 .max_entries = 256,
26};
27
28struct bpf_map_def SEC("maps") vip2tnl = {
29 .type = BPF_MAP_TYPE_HASH,
30 .key_size = sizeof(struct vip),
31 .value_size = sizeof(struct iptnl_info),
32 .max_entries = MAX_IPTNL_ENTRIES,
33};
34
35static __always_inline void count_tx(__u32 protocol)
36{
37 __u64 *rxcnt_count;
38
39 rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
40 if (rxcnt_count)
41 *rxcnt_count += 1;
42}
43
44static __always_inline int get_dport(void *trans_data, void *data_end,
45 __u8 protocol)
46{
47 struct tcphdr *th;
48 struct udphdr *uh;
49
50 switch (protocol) {
51 case IPPROTO_TCP:
52 th = (struct tcphdr *)trans_data;
53 if (th + 1 > data_end)
54 return -1;
55 return th->dest;
56 case IPPROTO_UDP:
57 uh = (struct udphdr *)trans_data;
58 if (uh + 1 > data_end)
59 return -1;
60 return uh->dest;
61 default:
62 return 0;
63 }
64}
65
66static __always_inline void set_ethhdr(struct ethhdr *new_eth,
67 const struct ethhdr *old_eth,
68 const struct iptnl_info *tnl,
69 __be16 h_proto)
70{
71 memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
72 memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
73 new_eth->h_proto = h_proto;
74}
75
76static __always_inline int handle_ipv4(struct xdp_md *xdp)
77{
78 void *data_end = (void *)(long)xdp->data_end;
79 void *data = (void *)(long)xdp->data;
80 struct iptnl_info *tnl;
81 struct ethhdr *new_eth;
82 struct ethhdr *old_eth;
83 struct iphdr *iph = data + sizeof(struct ethhdr);
84 __u16 *next_iph;
85 __u16 payload_len;
86 struct vip vip = {};
87 int dport;
88 __u32 csum = 0;
89 int i;
90
91 if (iph + 1 > data_end)
92 return XDP_DROP;
93
94 dport = get_dport(iph + 1, data_end, iph->protocol);
95 if (dport == -1)
96 return XDP_DROP;
97
98 vip.protocol = iph->protocol;
99 vip.family = AF_INET;
100 vip.daddr.v4 = iph->daddr;
101 vip.dport = dport;
102 payload_len = bpf_ntohs(iph->tot_len);
103
104 tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
105 /* It only does v4-in-v4 */
106 if (!tnl || tnl->family != AF_INET)
107 return XDP_PASS;
108
109 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
110 return XDP_DROP;
111
112 data = (void *)(long)xdp->data;
113 data_end = (void *)(long)xdp->data_end;
114
115 new_eth = data;
116 iph = data + sizeof(*new_eth);
117 old_eth = data + sizeof(*iph);
118
119 if (new_eth + 1 > data_end ||
120 old_eth + 1 > data_end ||
121 iph + 1 > data_end)
122 return XDP_DROP;
123
124 set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
125
126 iph->version = 4;
127 iph->ihl = sizeof(*iph) >> 2;
128 iph->frag_off = 0;
129 iph->protocol = IPPROTO_IPIP;
130 iph->check = 0;
131 iph->tos = 0;
132 iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
133 iph->daddr = tnl->daddr.v4;
134 iph->saddr = tnl->saddr.v4;
135 iph->ttl = 8;
136
137 next_iph = (__u16 *)iph;
138#pragma clang loop unroll(disable)
139 for (i = 0; i < sizeof(*iph) >> 1; i++)
140 csum += *next_iph++;
141
142 iph->check = ~((csum & 0xffff) + (csum >> 16));
143
144 count_tx(vip.protocol);
145
146 return XDP_TX;
147}
148
149static __always_inline int handle_ipv6(struct xdp_md *xdp)
150{
151 void *data_end = (void *)(long)xdp->data_end;
152 void *data = (void *)(long)xdp->data;
153 struct iptnl_info *tnl;
154 struct ethhdr *new_eth;
155 struct ethhdr *old_eth;
156 struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
157 __u16 payload_len;
158 struct vip vip = {};
159 int dport;
160
161 if (ip6h + 1 > data_end)
162 return XDP_DROP;
163
164 dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
165 if (dport == -1)
166 return XDP_DROP;
167
168 vip.protocol = ip6h->nexthdr;
169 vip.family = AF_INET6;
170 memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
171 vip.dport = dport;
172 payload_len = ip6h->payload_len;
173
174 tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
175 /* It only does v6-in-v6 */
176 if (!tnl || tnl->family != AF_INET6)
177 return XDP_PASS;
178
179 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
180 return XDP_DROP;
181
182 data = (void *)(long)xdp->data;
183 data_end = (void *)(long)xdp->data_end;
184
185 new_eth = data;
186 ip6h = data + sizeof(*new_eth);
187 old_eth = data + sizeof(*ip6h);
188
189 if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
190 ip6h + 1 > data_end)
191 return XDP_DROP;
192
193 set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
194
195 ip6h->version = 6;
196 ip6h->priority = 0;
197 memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
198 ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
199 ip6h->nexthdr = IPPROTO_IPV6;
200 ip6h->hop_limit = 8;
201 memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
202 memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
203
204 count_tx(vip.protocol);
205
206 return XDP_TX;
207}
208
209SEC("xdp_tx_iptunnel")
210int _xdp_tx_iptunnel(struct xdp_md *xdp)
211{
212 void *data_end = (void *)(long)xdp->data_end;
213 void *data = (void *)(long)xdp->data;
214 struct ethhdr *eth = data;
215 __u16 h_proto;
216
217 if (eth + 1 > data_end)
218 return XDP_DROP;
219
220 h_proto = eth->h_proto;
221
222 if (h_proto == bpf_htons(ETH_P_IP))
223 return handle_ipv4(xdp);
224 else if (h_proto == bpf_htons(ETH_P_IPV6))
225
226 return handle_ipv6(xdp);
227 else
228 return XDP_DROP;
229}
230
231char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 4fe6aaad22a4..d2eddb5553d1 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -163,52 +163,66 @@ struct lb_stats {
163 __u64 v1; 163 __u64 v1;
164}; 164};
165 165
166struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { 166struct {
167 __u32 type;
168 __u32 max_entries;
169 struct vip_definition *key;
170 struct vip_meta *value;
171} vip_map SEC(".maps") = {
167 .type = BPF_MAP_TYPE_HASH, 172 .type = BPF_MAP_TYPE_HASH,
168 .key_size = sizeof(struct vip_definition),
169 .value_size = sizeof(struct vip_meta),
170 .max_entries = 512, 173 .max_entries = 512,
171 .map_flags = 0,
172}; 174};
173 175
174struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { 176struct {
177 __u32 type;
178 __u32 max_entries;
179 __u32 map_flags;
180 struct flow_key *key;
181 struct real_pos_lru *value;
182} lru_cache SEC(".maps") = {
175 .type = BPF_MAP_TYPE_LRU_HASH, 183 .type = BPF_MAP_TYPE_LRU_HASH,
176 .key_size = sizeof(struct flow_key),
177 .value_size = sizeof(struct real_pos_lru),
178 .max_entries = 300, 184 .max_entries = 300,
179 .map_flags = 1U << 1, 185 .map_flags = 1U << 1,
180}; 186};
181 187
182struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { 188struct {
189 __u32 type;
190 __u32 max_entries;
191 __u32 *key;
192 __u32 *value;
193} ch_rings SEC(".maps") = {
183 .type = BPF_MAP_TYPE_ARRAY, 194 .type = BPF_MAP_TYPE_ARRAY,
184 .key_size = sizeof(__u32),
185 .value_size = sizeof(__u32),
186 .max_entries = 12 * 655, 195 .max_entries = 12 * 655,
187 .map_flags = 0,
188}; 196};
189 197
190struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { 198struct {
199 __u32 type;
200 __u32 max_entries;
201 __u32 *key;
202 struct real_definition *value;
203} reals SEC(".maps") = {
191 .type = BPF_MAP_TYPE_ARRAY, 204 .type = BPF_MAP_TYPE_ARRAY,
192 .key_size = sizeof(__u32),
193 .value_size = sizeof(struct real_definition),
194 .max_entries = 40, 205 .max_entries = 40,
195 .map_flags = 0,
196}; 206};
197 207
198struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { 208struct {
209 __u32 type;
210 __u32 max_entries;
211 __u32 *key;
212 struct lb_stats *value;
213} stats SEC(".maps") = {
199 .type = BPF_MAP_TYPE_PERCPU_ARRAY, 214 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
200 .key_size = sizeof(__u32),
201 .value_size = sizeof(struct lb_stats),
202 .max_entries = 515, 215 .max_entries = 515,
203 .map_flags = 0,
204}; 216};
205 217
206struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { 218struct {
219 __u32 type;
220 __u32 max_entries;
221 __u32 *key;
222 struct ctl_value *value;
223} ctl_array SEC(".maps") = {
207 .type = BPF_MAP_TYPE_ARRAY, 224 .type = BPF_MAP_TYPE_ARRAY,
208 .key_size = sizeof(__u32),
209 .value_size = sizeof(struct ctl_value),
210 .max_entries = 16, 225 .max_entries = 16,
211 .map_flags = 0,
212}; 226};
213 227
214struct eth_hdr { 228struct eth_hdr {
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 289daf54dec4..8351cb5f4a20 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4016,13 +4016,9 @@ struct btf_file_test {
4016}; 4016};
4017 4017
4018static struct btf_file_test file_tests[] = { 4018static struct btf_file_test file_tests[] = {
4019{ 4019 { .file = "test_btf_haskv.o", },
4020 .file = "test_btf_haskv.o", 4020 { .file = "test_btf_newkv.o", },
4021}, 4021 { .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
4022{
4023 .file = "test_btf_nokv.o",
4024 .btf_kv_notfound = true,
4025},
4026}; 4022};
4027 4023
4028static int do_test_file(unsigned int test_num) 4024static int do_test_file(unsigned int test_num)
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
index 75646d9b34aa..7566c13eb51a 100644
--- a/tools/testing/selftests/bpf/test_select_reuseport.c
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -523,6 +523,58 @@ static void test_pass_on_err(int type, sa_family_t family)
523 printf("OK\n"); 523 printf("OK\n");
524} 524}
525 525
526static void test_detach_bpf(int type, sa_family_t family)
527{
528#ifdef SO_DETACH_REUSEPORT_BPF
529 __u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
530 struct epoll_event ev;
531 int cli_fd, err, nev;
532 struct cmd cmd = {};
533 int optvalue = 0;
534
535 printf("%s: ", __func__);
536 err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
537 &optvalue, sizeof(optvalue));
538 CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
539 "err:%d errno:%d\n", err, errno);
540
541 err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
542 &optvalue, sizeof(optvalue));
543 CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
544 "err:%d errno:%d\n", err, errno);
545
546 for (i = 0; i < NR_RESULTS; i++) {
547 err = bpf_map_lookup_elem(result_map, &i, &tmp);
548 CHECK(err == -1, "lookup_elem(result_map)",
549 "i:%u err:%d errno:%d\n", i, err, errno);
550 nr_run_before += tmp;
551 }
552
553 cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
554 nev = epoll_wait(epfd, &ev, 1, 5);
555 CHECK(nev <= 0, "nev <= 0",
556 "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
557 nev, type, family);
558
559 for (i = 0; i < NR_RESULTS; i++) {
560 err = bpf_map_lookup_elem(result_map, &i, &tmp);
561 CHECK(err == -1, "lookup_elem(result_map)",
562 "i:%u err:%d errno:%d\n", i, err, errno);
563 nr_run_after += tmp;
564 }
565
566 CHECK(nr_run_before != nr_run_after,
567 "nr_run_before != nr_run_after",
568 "nr_run_before:%u nr_run_after:%u\n",
569 nr_run_before, nr_run_after);
570
571 printf("OK\n");
572 close(cli_fd);
573#else
574 printf("%s: SKIP\n", __func__);
575#endif
576}
577
526static void prepare_sk_fds(int type, sa_family_t family, bool inany) 578static void prepare_sk_fds(int type, sa_family_t family, bool inany)
527{ 579{
528 const int first = REUSEPORT_ARRAY_SIZE - 1; 580 const int first = REUSEPORT_ARRAY_SIZE - 1;
@@ -664,6 +716,8 @@ static void test_all(void)
664 test_pass(type, family); 716 test_pass(type, family);
665 test_syncookie(type, family); 717 test_syncookie(type, family);
666 test_pass_on_err(type, family); 718 test_pass_on_err(type, family);
719 /* Must be the last test */
720 test_detach_bpf(type, family);
667 721
668 cleanup_per_test(); 722 cleanup_per_test();
669 printf("\n"); 723 printf("\n");
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index cac8ee57a013..15653b0e26eb 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -18,6 +18,11 @@
18#define CG_PATH "/foo" 18#define CG_PATH "/foo"
19#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o" 19#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o"
20 20
21struct socket_cookie {
22 __u64 cookie_key;
23 __u32 cookie_value;
24};
25
21static int start_server(void) 26static int start_server(void)
22{ 27{
23 struct sockaddr_in6 addr; 28 struct sockaddr_in6 addr;
@@ -89,8 +94,7 @@ static int validate_map(struct bpf_map *map, int client_fd)
89 __u32 cookie_expected_value; 94 __u32 cookie_expected_value;
90 struct sockaddr_in6 addr; 95 struct sockaddr_in6 addr;
91 socklen_t len = sizeof(addr); 96 socklen_t len = sizeof(addr);
92 __u32 cookie_value; 97 struct socket_cookie val;
93 __u64 cookie_key;
94 int err = 0; 98 int err = 0;
95 int map_fd; 99 int map_fd;
96 100
@@ -101,17 +105,7 @@ static int validate_map(struct bpf_map *map, int client_fd)
101 105
102 map_fd = bpf_map__fd(map); 106 map_fd = bpf_map__fd(map);
103 107
104 err = bpf_map_get_next_key(map_fd, NULL, &cookie_key); 108 err = bpf_map_lookup_elem(map_fd, &client_fd, &val);
105 if (err) {
106 log_err("Can't get cookie key from map");
107 goto out;
108 }
109
110 err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
111 if (err) {
112 log_err("Can't get cookie value from map");
113 goto out;
114 }
115 109
116 err = getsockname(client_fd, (struct sockaddr *)&addr, &len); 110 err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
117 if (err) { 111 if (err) {
@@ -120,8 +114,8 @@ static int validate_map(struct bpf_map *map, int client_fd)
120 } 114 }
121 115
122 cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; 116 cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
123 if (cookie_value != cookie_expected_value) { 117 if (val.cookie_value != cookie_expected_value) {
124 log_err("Unexpected value in map: %x != %x", cookie_value, 118 log_err("Unexpected value in map: %x != %x", val.cookie_value,
125 cookie_expected_value); 119 cookie_expected_value);
126 goto err; 120 goto err;
127 } 121 }
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6cb307201958..c5514daf8865 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -234,10 +234,10 @@ static void bpf_fill_scale1(struct bpf_test *self)
234 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 234 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
235 -8 * (k % 64 + 1)); 235 -8 * (k % 64 + 1));
236 } 236 }
237 /* every jump adds 1 step to insn_processed, so to stay exactly 237 /* is_state_visited() doesn't allocate state for pruning for every jump.
238 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT 238 * Hence multiply jmps by 4 to accommodate that heuristic
239 */ 239 */
240 while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) 240 while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
241 insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42); 241 insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
242 insn[i] = BPF_EXIT_INSN(); 242 insn[i] = BPF_EXIT_INSN();
243 self->prog_len = i + 1; 243 self->prog_len = i + 1;
@@ -266,10 +266,7 @@ static void bpf_fill_scale2(struct bpf_test *self)
266 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 266 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
267 -8 * (k % (64 - 4 * FUNC_NEST) + 1)); 267 -8 * (k % (64 - 4 * FUNC_NEST) + 1));
268 } 268 }
269 /* every jump adds 1 step to insn_processed, so to stay exactly 269 while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
270 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
271 */
272 while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
273 insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42); 270 insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
274 insn[i] = BPF_EXIT_INSN(); 271 insn[i] = BPF_EXIT_INSN();
275 self->prog_len = i + 1; 272 self->prog_len = i + 1;
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9093a8f64dc6..2d752c4f8d9d 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -215,9 +215,11 @@
215 BPF_MOV64_IMM(BPF_REG_0, 3), 215 BPF_MOV64_IMM(BPF_REG_0, 3),
216 BPF_JMP_IMM(BPF_JA, 0, 0, -6), 216 BPF_JMP_IMM(BPF_JA, 0, 0, -6),
217 }, 217 },
218 .prog_type = BPF_PROG_TYPE_TRACEPOINT, 218 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
219 .errstr = "back-edge from insn", 219 .errstr_unpriv = "back-edge from insn",
220 .result = REJECT, 220 .result_unpriv = REJECT,
221 .result = ACCEPT,
222 .retval = 1,
221}, 223},
222{ 224{
223 "calls: conditional call 4", 225 "calls: conditional call 4",
@@ -250,22 +252,24 @@
250 BPF_MOV64_IMM(BPF_REG_0, 3), 252 BPF_MOV64_IMM(BPF_REG_0, 3),
251 BPF_EXIT_INSN(), 253 BPF_EXIT_INSN(),
252 }, 254 },
253 .prog_type = BPF_PROG_TYPE_TRACEPOINT, 255 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
254 .errstr = "back-edge from insn", 256 .result = ACCEPT,
255 .result = REJECT, 257 .retval = 1,
256}, 258},
257{ 259{
258 "calls: conditional call 6", 260 "calls: conditional call 6",
259 .insns = { 261 .insns = {
262 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
263 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
260 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), 264 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
261 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), 265 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
262 BPF_EXIT_INSN(), 266 BPF_EXIT_INSN(),
263 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 267 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
264 offsetof(struct __sk_buff, mark)), 268 offsetof(struct __sk_buff, mark)),
265 BPF_EXIT_INSN(), 269 BPF_EXIT_INSN(),
266 }, 270 },
267 .prog_type = BPF_PROG_TYPE_TRACEPOINT, 271 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
268 .errstr = "back-edge from insn", 272 .errstr = "infinite loop detected",
269 .result = REJECT, 273 .result = REJECT,
270}, 274},
271{ 275{
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
index 349c0862fb4c..4eb76ed739ce 100644
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ b/tools/testing/selftests/bpf/verifier/cfg.c
@@ -41,7 +41,8 @@
41 BPF_JMP_IMM(BPF_JA, 0, 0, -1), 41 BPF_JMP_IMM(BPF_JA, 0, 0, -1),
42 BPF_EXIT_INSN(), 42 BPF_EXIT_INSN(),
43 }, 43 },
44 .errstr = "back-edge", 44 .errstr = "unreachable insn 1",
45 .errstr_unpriv = "back-edge",
45 .result = REJECT, 46 .result = REJECT,
46}, 47},
47{ 48{
@@ -53,18 +54,20 @@
53 BPF_JMP_IMM(BPF_JA, 0, 0, -4), 54 BPF_JMP_IMM(BPF_JA, 0, 0, -4),
54 BPF_EXIT_INSN(), 55 BPF_EXIT_INSN(),
55 }, 56 },
56 .errstr = "back-edge", 57 .errstr = "unreachable insn 4",
58 .errstr_unpriv = "back-edge",
57 .result = REJECT, 59 .result = REJECT,
58}, 60},
59{ 61{
60 "conditional loop", 62 "conditional loop",
61 .insns = { 63 .insns = {
62 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 64 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
63 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), 65 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
64 BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), 66 BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
65 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), 67 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
66 BPF_EXIT_INSN(), 68 BPF_EXIT_INSN(),
67 }, 69 },
68 .errstr = "back-edge", 70 .errstr = "infinite loop detected",
71 .errstr_unpriv = "back-edge",
69 .result = REJECT, 72 .result = REJECT,
70}, 73},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index d5c596fdc4b9..2c5fbe7bcd27 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -511,7 +511,8 @@
511 offsetof(struct __sk_buff, data)), 511 offsetof(struct __sk_buff, data)),
512 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 512 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
513 offsetof(struct __sk_buff, data_end)), 513 offsetof(struct __sk_buff, data_end)),
514 BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), 514 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
515 offsetof(struct __sk_buff, mark)),
515 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), 516 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
516 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), 517 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
517 BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), 518 BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 1f39d845c64f..67ab12410050 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,9 +29,9 @@
29{ 29{
30 "helper access to variable memory: stack, bitwise AND, zero included", 30 "helper access to variable memory: stack, bitwise AND, zero included",
31 .insns = { 31 .insns = {
32 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
32 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 33 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
33 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 34 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
34 BPF_MOV64_IMM(BPF_REG_2, 16),
35 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 35 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
36 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 36 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
37 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), 37 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -46,9 +46,9 @@
46{ 46{
47 "helper access to variable memory: stack, bitwise AND + JMP, wrong max", 47 "helper access to variable memory: stack, bitwise AND + JMP, wrong max",
48 .insns = { 48 .insns = {
49 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
49 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 50 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
50 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 51 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
51 BPF_MOV64_IMM(BPF_REG_2, 16),
52 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 52 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
53 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 53 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
54 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65), 54 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
@@ -122,9 +122,9 @@
122{ 122{
123 "helper access to variable memory: stack, JMP, bounds + offset", 123 "helper access to variable memory: stack, JMP, bounds + offset",
124 .insns = { 124 .insns = {
125 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
125 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 126 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
126 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 127 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
127 BPF_MOV64_IMM(BPF_REG_2, 16),
128 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 128 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
129 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 129 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
130 BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5), 130 BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
@@ -143,9 +143,9 @@
143{ 143{
144 "helper access to variable memory: stack, JMP, wrong max", 144 "helper access to variable memory: stack, JMP, wrong max",
145 .insns = { 145 .insns = {
146 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
146 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 147 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
147 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 148 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
148 BPF_MOV64_IMM(BPF_REG_2, 16),
149 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 149 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
150 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 150 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
151 BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4), 151 BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
@@ -163,9 +163,9 @@
163{ 163{
164 "helper access to variable memory: stack, JMP, no max check", 164 "helper access to variable memory: stack, JMP, no max check",
165 .insns = { 165 .insns = {
166 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
166 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 167 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
167 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 168 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
168 BPF_MOV64_IMM(BPF_REG_2, 16),
169 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 169 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
170 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 170 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
171 BPF_MOV64_IMM(BPF_REG_4, 0), 171 BPF_MOV64_IMM(BPF_REG_4, 0),
@@ -183,9 +183,9 @@
183{ 183{
184 "helper access to variable memory: stack, JMP, no min check", 184 "helper access to variable memory: stack, JMP, no min check",
185 .insns = { 185 .insns = {
186 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
186 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 187 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
187 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 188 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
188 BPF_MOV64_IMM(BPF_REG_2, 16),
189 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 189 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
190 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 190 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
191 BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), 191 BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
@@ -201,9 +201,9 @@
201{ 201{
202 "helper access to variable memory: stack, JMP (signed), no min check", 202 "helper access to variable memory: stack, JMP (signed), no min check",
203 .insns = { 203 .insns = {
204 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
204 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 205 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
205 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 206 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
206 BPF_MOV64_IMM(BPF_REG_2, 16),
207 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), 207 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
208 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), 208 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
209 BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), 209 BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
@@ -244,6 +244,7 @@
244{ 244{
245 "helper access to variable memory: map, JMP, wrong max", 245 "helper access to variable memory: map, JMP, wrong max",
246 .insns = { 246 .insns = {
247 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
247 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 248 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
248 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), 249 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
249 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), 250 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -251,7 +252,7 @@
251 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), 252 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
252 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), 253 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
253 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 254 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
254 BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), 255 BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
255 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), 256 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
256 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), 257 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
257 BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4), 258 BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
@@ -262,7 +263,7 @@
262 BPF_MOV64_IMM(BPF_REG_0, 0), 263 BPF_MOV64_IMM(BPF_REG_0, 0),
263 BPF_EXIT_INSN(), 264 BPF_EXIT_INSN(),
264 }, 265 },
265 .fixup_map_hash_48b = { 3 }, 266 .fixup_map_hash_48b = { 4 },
266 .errstr = "invalid access to map value, value_size=48 off=0 size=49", 267 .errstr = "invalid access to map value, value_size=48 off=0 size=49",
267 .result = REJECT, 268 .result = REJECT,
268 .prog_type = BPF_PROG_TYPE_TRACEPOINT, 269 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -296,6 +297,7 @@
296{ 297{
297 "helper access to variable memory: map adjusted, JMP, wrong max", 298 "helper access to variable memory: map adjusted, JMP, wrong max",
298 .insns = { 299 .insns = {
300 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
299 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 301 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
300 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), 302 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
301 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), 303 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -304,7 +306,7 @@
304 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), 306 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
305 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 307 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
306 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), 308 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
307 BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), 309 BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
308 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), 310 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
309 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), 311 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
310 BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4), 312 BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
@@ -315,7 +317,7 @@
315 BPF_MOV64_IMM(BPF_REG_0, 0), 317 BPF_MOV64_IMM(BPF_REG_0, 0),
316 BPF_EXIT_INSN(), 318 BPF_EXIT_INSN(),
317 }, 319 },
318 .fixup_map_hash_48b = { 3 }, 320 .fixup_map_hash_48b = { 4 },
319 .errstr = "R1 min value is outside of the array range", 321 .errstr = "R1 min value is outside of the array range",
320 .result = REJECT, 322 .result = REJECT,
321 .prog_type = BPF_PROG_TYPE_TRACEPOINT, 323 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -337,8 +339,8 @@
337{ 339{
338 "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", 340 "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
339 .insns = { 341 .insns = {
342 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
340 BPF_MOV64_IMM(BPF_REG_1, 0), 343 BPF_MOV64_IMM(BPF_REG_1, 0),
341 BPF_MOV64_IMM(BPF_REG_2, 1),
342 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), 344 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
343 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), 345 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
344 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), 346 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -562,6 +564,7 @@
562{ 564{
563 "helper access to variable memory: 8 bytes leak", 565 "helper access to variable memory: 8 bytes leak",
564 .insns = { 566 .insns = {
567 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
565 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 568 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
566 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), 569 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
567 BPF_MOV64_IMM(BPF_REG_0, 0), 570 BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -572,7 +575,6 @@
572 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), 575 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
573 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), 576 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
574 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), 577 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
575 BPF_MOV64_IMM(BPF_REG_2, 1),
576 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), 578 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
577 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), 579 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
578 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), 580 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
new file mode 100644
index 000000000000..5e980a5ab69d
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/loops1.c
@@ -0,0 +1,161 @@
1{
2 "bounded loop, count to 4",
3 .insns = {
4 BPF_MOV64_IMM(BPF_REG_0, 0),
5 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
6 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
7 BPF_EXIT_INSN(),
8 },
9 .result = ACCEPT,
10 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
11 .retval = 4,
12},
13{
14 "bounded loop, count to 20",
15 .insns = {
16 BPF_MOV64_IMM(BPF_REG_0, 0),
17 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
18 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2),
19 BPF_EXIT_INSN(),
20 },
21 .result = ACCEPT,
22 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
23},
24{
25 "bounded loop, count from positive unknown to 4",
26 .insns = {
27 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
28 BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
29 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
30 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
31 BPF_EXIT_INSN(),
32 },
33 .result = ACCEPT,
34 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
35 .retval = 4,
36},
37{
38 "bounded loop, count from totally unknown to 4",
39 .insns = {
40 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
41 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
42 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
43 BPF_EXIT_INSN(),
44 },
45 .result = ACCEPT,
46 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
47},
48{
49 "bounded loop, count to 4 with equality",
50 .insns = {
51 BPF_MOV64_IMM(BPF_REG_0, 0),
52 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
53 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2),
54 BPF_EXIT_INSN(),
55 },
56 .result = ACCEPT,
57 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
58},
59{
60 "bounded loop, start in the middle",
61 .insns = {
62 BPF_MOV64_IMM(BPF_REG_0, 0),
63 BPF_JMP_A(1),
64 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
65 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
66 BPF_EXIT_INSN(),
67 },
68 .result = REJECT,
69 .errstr = "back-edge",
70 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
71 .retval = 4,
72},
73{
74 "bounded loop containing a forward jump",
75 .insns = {
76 BPF_MOV64_IMM(BPF_REG_0, 0),
77 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
78 BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0),
79 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3),
80 BPF_EXIT_INSN(),
81 },
82 .result = ACCEPT,
83 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
84 .retval = 4,
85},
86{
87 "bounded loop that jumps out rather than in",
88 .insns = {
89 BPF_MOV64_IMM(BPF_REG_6, 0),
90 BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
91 BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2),
92 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
93 BPF_JMP_A(-4),
94 BPF_EXIT_INSN(),
95 },
96 .result = ACCEPT,
97 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
98},
99{
100 "infinite loop after a conditional jump",
101 .insns = {
102 BPF_MOV64_IMM(BPF_REG_0, 5),
103 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2),
104 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
105 BPF_JMP_A(-2),
106 BPF_EXIT_INSN(),
107 },
108 .result = REJECT,
109 .errstr = "program is too large",
110 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
111},
112{
113 "bounded recursion",
114 .insns = {
115 BPF_MOV64_IMM(BPF_REG_1, 0),
116 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
117 BPF_EXIT_INSN(),
118 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
119 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
120 BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1),
121 BPF_EXIT_INSN(),
122 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
123 BPF_EXIT_INSN(),
124 },
125 .result = REJECT,
126 .errstr = "back-edge",
127 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
128},
129{
130 "infinite loop in two jumps",
131 .insns = {
132 BPF_MOV64_IMM(BPF_REG_0, 0),
133 BPF_JMP_A(0),
134 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
135 BPF_EXIT_INSN(),
136 },
137 .result = REJECT,
138 .errstr = "loop detected",
139 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
140},
141{
142 "infinite loop: three-jump trick",
143 .insns = {
144 BPF_MOV64_IMM(BPF_REG_0, 0),
145 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
146 BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
147 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
148 BPF_EXIT_INSN(),
149 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
150 BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
151 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
152 BPF_EXIT_INSN(),
153 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
154 BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
155 BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11),
156 BPF_EXIT_INSN(),
157 },
158 .result = REJECT,
159 .errstr = "loop detected",
160 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
161},
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index bbdba990fefb..da7a4b37cb98 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -29,21 +29,6 @@
29 .prog_type = BPF_PROG_TYPE_SOCK_OPS, 29 .prog_type = BPF_PROG_TYPE_SOCK_OPS,
30}, 30},
31{ 31{
32 "prevent map lookup in xskmap",
33 .insns = {
34 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
35 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
36 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
37 BPF_LD_MAP_FD(BPF_REG_1, 0),
38 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
39 BPF_EXIT_INSN(),
40 },
41 .fixup_map_xskmap = { 3 },
42 .result = REJECT,
43 .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
44 .prog_type = BPF_PROG_TYPE_XDP,
45},
46{
47 "prevent map lookup in stack trace", 32 "prevent map lookup in stack trace",
48 .insns = { 33 .insns = {
49 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), 34 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index b31cd2cf50d0..9ed192e14f5f 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -498,3 +498,21 @@
498 .result = REJECT, 498 .result = REJECT,
499 .errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem", 499 .errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
500}, 500},
501{
502 "bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
503 .insns = {
504 BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
505 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
506 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
507 BPF_LD_MAP_FD(BPF_REG_1, 0),
508 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
509 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
510 BPF_EXIT_INSN(),
511 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
512 BPF_MOV64_IMM(BPF_REG_0, 0),
513 BPF_EXIT_INSN(),
514 },
515 .fixup_map_xskmap = { 3 },
516 .prog_type = BPF_PROG_TYPE_XDP,
517 .result = ACCEPT,
518},