aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-01 00:21:18 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-01 00:21:18 -0400
commit0462eaacee493f7e2d87551a35d38be93ca723f8 (patch)
treec2d454ff64156281c9b4ce071194cb9a47e5dd1a
parent33aae28285b73e013f7f697a61f569c5b48c6650 (diff)
parentcd5385029f1d2e6879b78fff1a7b15514004af17 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-05-31 The following pull-request contains BPF updates for your *net-next* tree. Lots of exciting new features in the first PR of this developement cycle! The main changes are: 1) misc verifier improvements, from Alexei. 2) bpftool can now convert btf to valid C, from Andrii. 3) verifier can insert explicit ZEXT insn when requested by 32-bit JITs. This feature greatly improves BPF speed on 32-bit architectures. From Jiong. 4) cgroups will now auto-detach bpf programs. This fixes issue of thousands bpf programs got stuck in dying cgroups. From Roman. 5) new bpf_send_signal() helper, from Yonghong. 6) cgroup inet skb programs can signal CN to the stack, from Lawrence. 7) miscellaneous cleanups, from many developers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/bpf/bpf_design_QA.rst30
-rw-r--r--arch/arm/net/bpf_jit_32.c42
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c36
-rw-r--r--arch/riscv/net/bpf_jit_comp.c43
-rw-r--r--arch/s390/net/bpf_jit_comp.c41
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c29
-rw-r--r--arch/x86/net/bpf_jit_comp32.c83
-rw-r--r--drivers/media/rc/bpf-lirc.c30
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c115
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c12
-rw-r--r--include/linux/bpf-cgroup.h13
-rw-r--r--include/linux/bpf.h78
-rw-r--r--include/linux/bpf_verifier.h16
-rw-r--r--include/linux/cgroup.h18
-rw-r--r--include/linux/filter.h18
-rw-r--r--include/uapi/linux/bpf.h35
-rw-r--r--kernel/bpf/arraymap.c18
-rw-r--r--kernel/bpf/cgroup.c94
-rw-r--r--kernel/bpf/core.c46
-rw-r--r--kernel/bpf/cpumap.c9
-rw-r--r--kernel/bpf/devmap.c14
-rw-r--r--kernel/bpf/hashtab.c14
-rw-r--r--kernel/bpf/local_storage.c13
-rw-r--r--kernel/bpf/lpm_trie.c8
-rw-r--r--kernel/bpf/queue_stack_maps.c13
-rw-r--r--kernel/bpf/reuseport_array.c17
-rw-r--r--kernel/bpf/stackmap.c28
-rw-r--r--kernel/bpf/syscall.c103
-rw-r--r--kernel/bpf/verifier.c397
-rw-r--r--kernel/bpf/xskmap.c10
-rw-r--r--kernel/cgroup/cgroup.c11
-rw-r--r--kernel/trace/bpf_trace.c96
-rw-r--r--net/core/bpf_sk_storage.c12
-rw-r--r--net/core/sock_map.c9
-rw-r--r--net/ipv4/ip_output.c34
-rw-r--r--net/ipv6/ip6_output.c26
-rw-r--r--samples/bpf/.gitignore1
-rw-r--r--samples/bpf/Makefile2
-rw-r--r--samples/bpf/bpf_load.c8
-rwxr-xr-xsamples/bpf/do_hbm_test.sh10
-rw-r--r--samples/bpf/hbm.c51
-rw-r--r--samples/bpf/hbm.h9
-rw-r--r--samples/bpf/hbm_kern.h77
-rw-r--r--samples/bpf/hbm_out_kern.c48
-rw-r--r--samples/bpf/tcp_basertt_kern.c7
-rw-r--r--samples/bpf/tcp_bufs_kern.c7
-rw-r--r--samples/bpf/tcp_clamp_kern.c7
-rw-r--r--samples/bpf/tcp_cong_kern.c7
-rw-r--r--samples/bpf/tcp_iw_kern.c7
-rw-r--r--samples/bpf/tcp_rwnd_kern.c7
-rw-r--r--samples/bpf/tcp_synrto_kern.c7
-rw-r--r--samples/bpf/tcp_tos_reflect_kern.c7
-rw-r--r--samples/bpf/xdp_sample_pkts_kern.c7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst39
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst5
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst4
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool32
-rw-r--r--tools/bpf/bpftool/btf.c162
-rw-r--r--tools/bpf/bpftool/main.c16
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/bpf/bpftool/prog.c27
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c4
-rw-r--r--tools/include/uapi/linux/bpf.h35
-rw-r--r--tools/include/uapi/linux/if_tun.h114
-rw-r--r--tools/lib/bpf/Build4
-rw-r--r--tools/lib/bpf/Makefile12
-rw-r--r--tools/lib/bpf/bpf.c1
-rw-r--r--tools/lib/bpf/bpf.h1
-rw-r--r--tools/lib/bpf/btf.c329
-rw-r--r--tools/lib/bpf/btf.h19
-rw-r--r--tools/lib/bpf/btf_dump.c1336
-rw-r--r--tools/lib/bpf/hashmap.c229
-rw-r--r--tools/lib/bpf/hashmap.h173
-rw-r--r--tools/lib/bpf/libbpf.c175
-rw-r--r--tools/lib/bpf/libbpf.h7
-rw-r--r--tools/lib/bpf/libbpf.map9
-rw-r--r--tools/lib/bpf/libbpf_internal.h2
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/Makefile17
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c198
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c92
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c35
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c73
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c63
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c75
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c111
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c229
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h268
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf100.c4
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf180.c4
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf50.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c7
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c7
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_send_signal_kern.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c7
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c184
-rw-r--r--tools/testing/selftests/bpf/test_btf.c71
-rw-r--r--tools/testing/selftests/bpf/test_btf_dump.c143
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_attach.c (renamed from samples/bpf/test_cgrp2_attach2.c)146
-rw-r--r--tools/testing/selftests/bpf/test_hashmap.c382
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c1
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c1
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.h7
-rw-r--r--tools/testing/selftests/bpf/test_stub.c40
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh32
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c62
-rwxr-xr-xtools/testing/selftests/bpf/test_xdping.sh99
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/xdping.c258
-rw-r--r--tools/testing/selftests/bpf/xdping.h13
122 files changed, 6430 insertions, 1013 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index cb402c59eca5..12a246fcf6cb 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -172,11 +172,31 @@ registers which makes BPF inefficient virtual machine for 32-bit
172CPU architectures and 32-bit HW accelerators. Can true 32-bit registers 172CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
173be added to BPF in the future? 173be added to BPF in the future?
174 174
175A: NO. The first thing to improve performance on 32-bit archs is to teach 175A: NO.
176LLVM to generate code that uses 32-bit subregisters. Then second step 176
177is to teach verifier to mark operations where zero-ing upper bits 177But some optimizations on zero-ing the upper 32 bits for BPF registers are
178is unnecessary. Then JITs can take advantage of those markings and 178available, and can be leveraged to improve the performance of JITed BPF
179drastically reduce size of generated code and improve performance. 179programs for 32-bit architectures.
180
181Starting with version 7, LLVM is able to generate instructions that operate
182on 32-bit subregisters, provided the option -mattr=+alu32 is passed for
183compiling a program. Furthermore, the verifier can now mark the
184instructions for which zero-ing the upper bits of the destination register
185is required, and insert an explicit zero-extension (zext) instruction
186(a mov32 variant). This means that for architectures without zext hardware
187support, the JIT back-ends do not need to clear the upper bits for
188subregisters written by alu32 instructions or narrow loads. Instead, the
189back-ends simply need to support code generation for that mov32 variant,
190and to overwrite bpf_jit_needs_zext() to make it return "true" (in order to
191enable zext insertion in the verifier).
192
193Note that it is possible for a JIT back-end to have partial hardware
194support for zext. In that case, if verifier zext insertion is enabled,
195it could lead to the insertion of unnecessary zext instructions. Such
196instructions could be removed by creating a simple peephole inside the JIT
197back-end: if one instruction has hardware support for zext and if the next
198instruction is an explicit zext, then the latter can be skipped when doing
199the code generation.
180 200
181Q: Does BPF have a stable ABI? 201Q: Does BPF have a stable ABI?
182------------------------------ 202------------------------------
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index c8bfbbfdfcc3..97a6b4b2a115 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -736,7 +736,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
736 736
737 /* ALU operation */ 737 /* ALU operation */
738 emit_alu_r(rd[1], rs, true, false, op, ctx); 738 emit_alu_r(rd[1], rs, true, false, op, ctx);
739 emit_a32_mov_i(rd[0], 0, ctx); 739 if (!ctx->prog->aux->verifier_zext)
740 emit_a32_mov_i(rd[0], 0, ctx);
740 } 741 }
741 742
742 arm_bpf_put_reg64(dst, rd, ctx); 743 arm_bpf_put_reg64(dst, rd, ctx);
@@ -758,8 +759,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
758 struct jit_ctx *ctx) { 759 struct jit_ctx *ctx) {
759 if (!is64) { 760 if (!is64) {
760 emit_a32_mov_r(dst_lo, src_lo, ctx); 761 emit_a32_mov_r(dst_lo, src_lo, ctx);
761 /* Zero out high 4 bytes */ 762 if (!ctx->prog->aux->verifier_zext)
762 emit_a32_mov_i(dst_hi, 0, ctx); 763 /* Zero out high 4 bytes */
764 emit_a32_mov_i(dst_hi, 0, ctx);
763 } else if (__LINUX_ARM_ARCH__ < 6 && 765 } else if (__LINUX_ARM_ARCH__ < 6 &&
764 ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { 766 ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
765 /* complete 8 byte move */ 767 /* complete 8 byte move */
@@ -1060,17 +1062,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
1060 case BPF_B: 1062 case BPF_B:
1061 /* Load a Byte */ 1063 /* Load a Byte */
1062 emit(ARM_LDRB_I(rd[1], rm, off), ctx); 1064 emit(ARM_LDRB_I(rd[1], rm, off), ctx);
1063 emit_a32_mov_i(rd[0], 0, ctx); 1065 if (!ctx->prog->aux->verifier_zext)
1066 emit_a32_mov_i(rd[0], 0, ctx);
1064 break; 1067 break;
1065 case BPF_H: 1068 case BPF_H:
1066 /* Load a HalfWord */ 1069 /* Load a HalfWord */
1067 emit(ARM_LDRH_I(rd[1], rm, off), ctx); 1070 emit(ARM_LDRH_I(rd[1], rm, off), ctx);
1068 emit_a32_mov_i(rd[0], 0, ctx); 1071 if (!ctx->prog->aux->verifier_zext)
1072 emit_a32_mov_i(rd[0], 0, ctx);
1069 break; 1073 break;
1070 case BPF_W: 1074 case BPF_W:
1071 /* Load a Word */ 1075 /* Load a Word */
1072 emit(ARM_LDR_I(rd[1], rm, off), ctx); 1076 emit(ARM_LDR_I(rd[1], rm, off), ctx);
1073 emit_a32_mov_i(rd[0], 0, ctx); 1077 if (!ctx->prog->aux->verifier_zext)
1078 emit_a32_mov_i(rd[0], 0, ctx);
1074 break; 1079 break;
1075 case BPF_DW: 1080 case BPF_DW:
1076 /* Load a Double Word */ 1081 /* Load a Double Word */
@@ -1359,6 +1364,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1359 case BPF_ALU64 | BPF_MOV | BPF_X: 1364 case BPF_ALU64 | BPF_MOV | BPF_X:
1360 switch (BPF_SRC(code)) { 1365 switch (BPF_SRC(code)) {
1361 case BPF_X: 1366 case BPF_X:
1367 if (imm == 1) {
1368 /* Special mov32 for zext */
1369 emit_a32_mov_i(dst_hi, 0, ctx);
1370 break;
1371 }
1362 emit_a32_mov_r64(is64, dst, src, ctx); 1372 emit_a32_mov_r64(is64, dst, src, ctx);
1363 break; 1373 break;
1364 case BPF_K: 1374 case BPF_K:
@@ -1438,7 +1448,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1438 } 1448 }
1439 emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); 1449 emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
1440 arm_bpf_put_reg32(dst_lo, rd_lo, ctx); 1450 arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
1441 emit_a32_mov_i(dst_hi, 0, ctx); 1451 if (!ctx->prog->aux->verifier_zext)
1452 emit_a32_mov_i(dst_hi, 0, ctx);
1442 break; 1453 break;
1443 case BPF_ALU64 | BPF_DIV | BPF_K: 1454 case BPF_ALU64 | BPF_DIV | BPF_K:
1444 case BPF_ALU64 | BPF_DIV | BPF_X: 1455 case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1453,7 +1464,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1453 return -EINVAL; 1464 return -EINVAL;
1454 if (imm) 1465 if (imm)
1455 emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); 1466 emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
1456 emit_a32_mov_i(dst_hi, 0, ctx); 1467 if (!ctx->prog->aux->verifier_zext)
1468 emit_a32_mov_i(dst_hi, 0, ctx);
1457 break; 1469 break;
1458 /* dst = dst << imm */ 1470 /* dst = dst << imm */
1459 case BPF_ALU64 | BPF_LSH | BPF_K: 1471 case BPF_ALU64 | BPF_LSH | BPF_K:
@@ -1488,7 +1500,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1488 /* dst = ~dst */ 1500 /* dst = ~dst */
1489 case BPF_ALU | BPF_NEG: 1501 case BPF_ALU | BPF_NEG:
1490 emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); 1502 emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
1491 emit_a32_mov_i(dst_hi, 0, ctx); 1503 if (!ctx->prog->aux->verifier_zext)
1504 emit_a32_mov_i(dst_hi, 0, ctx);
1492 break; 1505 break;
1493 /* dst = ~dst (64 bit) */ 1506 /* dst = ~dst (64 bit) */
1494 case BPF_ALU64 | BPF_NEG: 1507 case BPF_ALU64 | BPF_NEG:
@@ -1544,11 +1557,13 @@ emit_bswap_uxt:
1544#else /* ARMv6+ */ 1557#else /* ARMv6+ */
1545 emit(ARM_UXTH(rd[1], rd[1]), ctx); 1558 emit(ARM_UXTH(rd[1], rd[1]), ctx);
1546#endif 1559#endif
1547 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); 1560 if (!ctx->prog->aux->verifier_zext)
1561 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
1548 break; 1562 break;
1549 case 32: 1563 case 32:
1550 /* zero-extend 32 bits into 64 bits */ 1564 /* zero-extend 32 bits into 64 bits */
1551 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); 1565 if (!ctx->prog->aux->verifier_zext)
1566 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
1552 break; 1567 break;
1553 case 64: 1568 case 64:
1554 /* nop */ 1569 /* nop */
@@ -1838,6 +1853,11 @@ void bpf_jit_compile(struct bpf_prog *prog)
1838 /* Nothing to do here. We support Internal BPF. */ 1853 /* Nothing to do here. We support Internal BPF. */
1839} 1854}
1840 1855
1856bool bpf_jit_needs_zext(void)
1857{
1858 return true;
1859}
1860
1841struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1861struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1842{ 1862{
1843 struct bpf_prog *tmp, *orig_prog = prog; 1863 struct bpf_prog *tmp, *orig_prog = prog;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 21a1dcd4b156..0ebd946f178b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -504,6 +504,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
504 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 504 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
505 /* slw clears top 32 bits */ 505 /* slw clears top 32 bits */
506 PPC_SLW(dst_reg, dst_reg, src_reg); 506 PPC_SLW(dst_reg, dst_reg, src_reg);
507 /* skip zero extension move, but set address map. */
508 if (insn_is_zext(&insn[i + 1]))
509 addrs[++i] = ctx->idx * 4;
507 break; 510 break;
508 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 511 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
509 PPC_SLD(dst_reg, dst_reg, src_reg); 512 PPC_SLD(dst_reg, dst_reg, src_reg);
@@ -511,6 +514,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
511 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 514 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
512 /* with imm 0, we still need to clear top 32 bits */ 515 /* with imm 0, we still need to clear top 32 bits */
513 PPC_SLWI(dst_reg, dst_reg, imm); 516 PPC_SLWI(dst_reg, dst_reg, imm);
517 if (insn_is_zext(&insn[i + 1]))
518 addrs[++i] = ctx->idx * 4;
514 break; 519 break;
515 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 520 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
516 if (imm != 0) 521 if (imm != 0)
@@ -518,12 +523,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
518 break; 523 break;
519 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 524 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
520 PPC_SRW(dst_reg, dst_reg, src_reg); 525 PPC_SRW(dst_reg, dst_reg, src_reg);
526 if (insn_is_zext(&insn[i + 1]))
527 addrs[++i] = ctx->idx * 4;
521 break; 528 break;
522 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 529 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
523 PPC_SRD(dst_reg, dst_reg, src_reg); 530 PPC_SRD(dst_reg, dst_reg, src_reg);
524 break; 531 break;
525 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 532 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
526 PPC_SRWI(dst_reg, dst_reg, imm); 533 PPC_SRWI(dst_reg, dst_reg, imm);
534 if (insn_is_zext(&insn[i + 1]))
535 addrs[++i] = ctx->idx * 4;
527 break; 536 break;
528 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 537 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
529 if (imm != 0) 538 if (imm != 0)
@@ -548,6 +557,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
548 */ 557 */
549 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 558 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
550 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 559 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
560 if (imm == 1) {
561 /* special mov32 for zext */
562 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
563 break;
564 }
551 PPC_MR(dst_reg, src_reg); 565 PPC_MR(dst_reg, src_reg);
552 goto bpf_alu32_trunc; 566 goto bpf_alu32_trunc;
553 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 567 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
@@ -555,11 +569,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
555 PPC_LI32(dst_reg, imm); 569 PPC_LI32(dst_reg, imm);
556 if (imm < 0) 570 if (imm < 0)
557 goto bpf_alu32_trunc; 571 goto bpf_alu32_trunc;
572 else if (insn_is_zext(&insn[i + 1]))
573 addrs[++i] = ctx->idx * 4;
558 break; 574 break;
559 575
560bpf_alu32_trunc: 576bpf_alu32_trunc:
561 /* Truncate to 32-bits */ 577 /* Truncate to 32-bits */
562 if (BPF_CLASS(code) == BPF_ALU) 578 if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
563 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); 579 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
564 break; 580 break;
565 581
@@ -618,10 +634,13 @@ emit_clear:
618 case 16: 634 case 16:
619 /* zero-extend 16 bits into 64 bits */ 635 /* zero-extend 16 bits into 64 bits */
620 PPC_RLDICL(dst_reg, dst_reg, 0, 48); 636 PPC_RLDICL(dst_reg, dst_reg, 0, 48);
637 if (insn_is_zext(&insn[i + 1]))
638 addrs[++i] = ctx->idx * 4;
621 break; 639 break;
622 case 32: 640 case 32:
623 /* zero-extend 32 bits into 64 bits */ 641 if (!fp->aux->verifier_zext)
624 PPC_RLDICL(dst_reg, dst_reg, 0, 32); 642 /* zero-extend 32 bits into 64 bits */
643 PPC_RLDICL(dst_reg, dst_reg, 0, 32);
625 break; 644 break;
626 case 64: 645 case 64:
627 /* nop */ 646 /* nop */
@@ -698,14 +717,20 @@ emit_clear:
698 /* dst = *(u8 *)(ul) (src + off) */ 717 /* dst = *(u8 *)(ul) (src + off) */
699 case BPF_LDX | BPF_MEM | BPF_B: 718 case BPF_LDX | BPF_MEM | BPF_B:
700 PPC_LBZ(dst_reg, src_reg, off); 719 PPC_LBZ(dst_reg, src_reg, off);
720 if (insn_is_zext(&insn[i + 1]))
721 addrs[++i] = ctx->idx * 4;
701 break; 722 break;
702 /* dst = *(u16 *)(ul) (src + off) */ 723 /* dst = *(u16 *)(ul) (src + off) */
703 case BPF_LDX | BPF_MEM | BPF_H: 724 case BPF_LDX | BPF_MEM | BPF_H:
704 PPC_LHZ(dst_reg, src_reg, off); 725 PPC_LHZ(dst_reg, src_reg, off);
726 if (insn_is_zext(&insn[i + 1]))
727 addrs[++i] = ctx->idx * 4;
705 break; 728 break;
706 /* dst = *(u32 *)(ul) (src + off) */ 729 /* dst = *(u32 *)(ul) (src + off) */
707 case BPF_LDX | BPF_MEM | BPF_W: 730 case BPF_LDX | BPF_MEM | BPF_W:
708 PPC_LWZ(dst_reg, src_reg, off); 731 PPC_LWZ(dst_reg, src_reg, off);
732 if (insn_is_zext(&insn[i + 1]))
733 addrs[++i] = ctx->idx * 4;
709 break; 734 break;
710 /* dst = *(u64 *)(ul) (src + off) */ 735 /* dst = *(u64 *)(ul) (src + off) */
711 case BPF_LDX | BPF_MEM | BPF_DW: 736 case BPF_LDX | BPF_MEM | BPF_DW:
@@ -1046,6 +1071,11 @@ struct powerpc64_jit_data {
1046 struct codegen_context ctx; 1071 struct codegen_context ctx;
1047}; 1072};
1048 1073
1074bool bpf_jit_needs_zext(void)
1075{
1076 return true;
1077}
1078
1049struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 1079struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1050{ 1080{
1051 u32 proglen; 1081 u32 proglen;
diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c
index 80b12aa5e10d..c4c836e3d318 100644
--- a/arch/riscv/net/bpf_jit_comp.c
+++ b/arch/riscv/net/bpf_jit_comp.c
@@ -731,6 +731,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
731{ 731{
732 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 732 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
733 BPF_CLASS(insn->code) == BPF_JMP; 733 BPF_CLASS(insn->code) == BPF_JMP;
734 struct bpf_prog_aux *aux = ctx->prog->aux;
734 int rvoff, i = insn - ctx->prog->insnsi; 735 int rvoff, i = insn - ctx->prog->insnsi;
735 u8 rd = -1, rs = -1, code = insn->code; 736 u8 rd = -1, rs = -1, code = insn->code;
736 s16 off = insn->off; 737 s16 off = insn->off;
@@ -742,8 +743,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
742 /* dst = src */ 743 /* dst = src */
743 case BPF_ALU | BPF_MOV | BPF_X: 744 case BPF_ALU | BPF_MOV | BPF_X:
744 case BPF_ALU64 | BPF_MOV | BPF_X: 745 case BPF_ALU64 | BPF_MOV | BPF_X:
746 if (imm == 1) {
747 /* Special mov32 for zext */
748 emit_zext_32(rd, ctx);
749 break;
750 }
745 emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx); 751 emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
746 if (!is64) 752 if (!is64 && !aux->verifier_zext)
747 emit_zext_32(rd, ctx); 753 emit_zext_32(rd, ctx);
748 break; 754 break;
749 755
@@ -771,19 +777,19 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
771 case BPF_ALU | BPF_MUL | BPF_X: 777 case BPF_ALU | BPF_MUL | BPF_X:
772 case BPF_ALU64 | BPF_MUL | BPF_X: 778 case BPF_ALU64 | BPF_MUL | BPF_X:
773 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); 779 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
774 if (!is64) 780 if (!is64 && !aux->verifier_zext)
775 emit_zext_32(rd, ctx); 781 emit_zext_32(rd, ctx);
776 break; 782 break;
777 case BPF_ALU | BPF_DIV | BPF_X: 783 case BPF_ALU | BPF_DIV | BPF_X:
778 case BPF_ALU64 | BPF_DIV | BPF_X: 784 case BPF_ALU64 | BPF_DIV | BPF_X:
779 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 785 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
780 if (!is64) 786 if (!is64 && !aux->verifier_zext)
781 emit_zext_32(rd, ctx); 787 emit_zext_32(rd, ctx);
782 break; 788 break;
783 case BPF_ALU | BPF_MOD | BPF_X: 789 case BPF_ALU | BPF_MOD | BPF_X:
784 case BPF_ALU64 | BPF_MOD | BPF_X: 790 case BPF_ALU64 | BPF_MOD | BPF_X:
785 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 791 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
786 if (!is64) 792 if (!is64 && !aux->verifier_zext)
787 emit_zext_32(rd, ctx); 793 emit_zext_32(rd, ctx);
788 break; 794 break;
789 case BPF_ALU | BPF_LSH | BPF_X: 795 case BPF_ALU | BPF_LSH | BPF_X:
@@ -867,7 +873,7 @@ out_be:
867 case BPF_ALU | BPF_MOV | BPF_K: 873 case BPF_ALU | BPF_MOV | BPF_K:
868 case BPF_ALU64 | BPF_MOV | BPF_K: 874 case BPF_ALU64 | BPF_MOV | BPF_K:
869 emit_imm(rd, imm, ctx); 875 emit_imm(rd, imm, ctx);
870 if (!is64) 876 if (!is64 && !aux->verifier_zext)
871 emit_zext_32(rd, ctx); 877 emit_zext_32(rd, ctx);
872 break; 878 break;
873 879
@@ -882,7 +888,7 @@ out_be:
882 emit(is64 ? rv_add(rd, rd, RV_REG_T1) : 888 emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
883 rv_addw(rd, rd, RV_REG_T1), ctx); 889 rv_addw(rd, rd, RV_REG_T1), ctx);
884 } 890 }
885 if (!is64) 891 if (!is64 && !aux->verifier_zext)
886 emit_zext_32(rd, ctx); 892 emit_zext_32(rd, ctx);
887 break; 893 break;
888 case BPF_ALU | BPF_SUB | BPF_K: 894 case BPF_ALU | BPF_SUB | BPF_K:
@@ -895,7 +901,7 @@ out_be:
895 emit(is64 ? rv_sub(rd, rd, RV_REG_T1) : 901 emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
896 rv_subw(rd, rd, RV_REG_T1), ctx); 902 rv_subw(rd, rd, RV_REG_T1), ctx);
897 } 903 }
898 if (!is64) 904 if (!is64 && !aux->verifier_zext)
899 emit_zext_32(rd, ctx); 905 emit_zext_32(rd, ctx);
900 break; 906 break;
901 case BPF_ALU | BPF_AND | BPF_K: 907 case BPF_ALU | BPF_AND | BPF_K:
@@ -906,7 +912,7 @@ out_be:
906 emit_imm(RV_REG_T1, imm, ctx); 912 emit_imm(RV_REG_T1, imm, ctx);
907 emit(rv_and(rd, rd, RV_REG_T1), ctx); 913 emit(rv_and(rd, rd, RV_REG_T1), ctx);
908 } 914 }
909 if (!is64) 915 if (!is64 && !aux->verifier_zext)
910 emit_zext_32(rd, ctx); 916 emit_zext_32(rd, ctx);
911 break; 917 break;
912 case BPF_ALU | BPF_OR | BPF_K: 918 case BPF_ALU | BPF_OR | BPF_K:
@@ -917,7 +923,7 @@ out_be:
917 emit_imm(RV_REG_T1, imm, ctx); 923 emit_imm(RV_REG_T1, imm, ctx);
918 emit(rv_or(rd, rd, RV_REG_T1), ctx); 924 emit(rv_or(rd, rd, RV_REG_T1), ctx);
919 } 925 }
920 if (!is64) 926 if (!is64 && !aux->verifier_zext)
921 emit_zext_32(rd, ctx); 927 emit_zext_32(rd, ctx);
922 break; 928 break;
923 case BPF_ALU | BPF_XOR | BPF_K: 929 case BPF_ALU | BPF_XOR | BPF_K:
@@ -928,7 +934,7 @@ out_be:
928 emit_imm(RV_REG_T1, imm, ctx); 934 emit_imm(RV_REG_T1, imm, ctx);
929 emit(rv_xor(rd, rd, RV_REG_T1), ctx); 935 emit(rv_xor(rd, rd, RV_REG_T1), ctx);
930 } 936 }
931 if (!is64) 937 if (!is64 && !aux->verifier_zext)
932 emit_zext_32(rd, ctx); 938 emit_zext_32(rd, ctx);
933 break; 939 break;
934 case BPF_ALU | BPF_MUL | BPF_K: 940 case BPF_ALU | BPF_MUL | BPF_K:
@@ -936,7 +942,7 @@ out_be:
936 emit_imm(RV_REG_T1, imm, ctx); 942 emit_imm(RV_REG_T1, imm, ctx);
937 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : 943 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
938 rv_mulw(rd, rd, RV_REG_T1), ctx); 944 rv_mulw(rd, rd, RV_REG_T1), ctx);
939 if (!is64) 945 if (!is64 && !aux->verifier_zext)
940 emit_zext_32(rd, ctx); 946 emit_zext_32(rd, ctx);
941 break; 947 break;
942 case BPF_ALU | BPF_DIV | BPF_K: 948 case BPF_ALU | BPF_DIV | BPF_K:
@@ -944,7 +950,7 @@ out_be:
944 emit_imm(RV_REG_T1, imm, ctx); 950 emit_imm(RV_REG_T1, imm, ctx);
945 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 951 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
946 rv_divuw(rd, rd, RV_REG_T1), ctx); 952 rv_divuw(rd, rd, RV_REG_T1), ctx);
947 if (!is64) 953 if (!is64 && !aux->verifier_zext)
948 emit_zext_32(rd, ctx); 954 emit_zext_32(rd, ctx);
949 break; 955 break;
950 case BPF_ALU | BPF_MOD | BPF_K: 956 case BPF_ALU | BPF_MOD | BPF_K:
@@ -952,7 +958,7 @@ out_be:
952 emit_imm(RV_REG_T1, imm, ctx); 958 emit_imm(RV_REG_T1, imm, ctx);
953 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 959 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
954 rv_remuw(rd, rd, RV_REG_T1), ctx); 960 rv_remuw(rd, rd, RV_REG_T1), ctx);
955 if (!is64) 961 if (!is64 && !aux->verifier_zext)
956 emit_zext_32(rd, ctx); 962 emit_zext_32(rd, ctx);
957 break; 963 break;
958 case BPF_ALU | BPF_LSH | BPF_K: 964 case BPF_ALU | BPF_LSH | BPF_K:
@@ -1239,6 +1245,8 @@ out_be:
1239 emit_imm(RV_REG_T1, off, ctx); 1245 emit_imm(RV_REG_T1, off, ctx);
1240 emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 1246 emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1241 emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 1247 emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
1248 if (insn_is_zext(&insn[1]))
1249 return 1;
1242 break; 1250 break;
1243 case BPF_LDX | BPF_MEM | BPF_H: 1251 case BPF_LDX | BPF_MEM | BPF_H:
1244 if (is_12b_int(off)) { 1252 if (is_12b_int(off)) {
@@ -1249,6 +1257,8 @@ out_be:
1249 emit_imm(RV_REG_T1, off, ctx); 1257 emit_imm(RV_REG_T1, off, ctx);
1250 emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 1258 emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1251 emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 1259 emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
1260 if (insn_is_zext(&insn[1]))
1261 return 1;
1252 break; 1262 break;
1253 case BPF_LDX | BPF_MEM | BPF_W: 1263 case BPF_LDX | BPF_MEM | BPF_W:
1254 if (is_12b_int(off)) { 1264 if (is_12b_int(off)) {
@@ -1259,6 +1269,8 @@ out_be:
1259 emit_imm(RV_REG_T1, off, ctx); 1269 emit_imm(RV_REG_T1, off, ctx);
1260 emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 1270 emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1261 emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 1271 emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
1272 if (insn_is_zext(&insn[1]))
1273 return 1;
1262 break; 1274 break;
1263 case BPF_LDX | BPF_MEM | BPF_DW: 1275 case BPF_LDX | BPF_MEM | BPF_DW:
1264 if (is_12b_int(off)) { 1276 if (is_12b_int(off)) {
@@ -1503,6 +1515,11 @@ static void bpf_flush_icache(void *start, void *end)
1503 flush_icache_range((unsigned long)start, (unsigned long)end); 1515 flush_icache_range((unsigned long)start, (unsigned long)end);
1504} 1516}
1505 1517
1518bool bpf_jit_needs_zext(void)
1519{
1520 return true;
1521}
1522
1506struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1523struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1507{ 1524{
1508 bool tmp_blinded = false, extra_pass = false; 1525 bool tmp_blinded = false, extra_pass = false;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5e7c63033159..e636728ab452 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -299,9 +299,11 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
299 299
300#define EMIT_ZERO(b1) \ 300#define EMIT_ZERO(b1) \
301({ \ 301({ \
302 /* llgfr %dst,%dst (zero extend to 64 bit) */ \ 302 if (!fp->aux->verifier_zext) { \
303 EMIT4(0xb9160000, b1, b1); \ 303 /* llgfr %dst,%dst (zero extend to 64 bit) */ \
304 REG_SET_SEEN(b1); \ 304 EMIT4(0xb9160000, b1, b1); \
305 REG_SET_SEEN(b1); \
306 } \
305}) 307})
306 308
307/* 309/*
@@ -520,6 +522,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
520 case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ 522 case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
521 /* llgfr %dst,%src */ 523 /* llgfr %dst,%src */
522 EMIT4(0xb9160000, dst_reg, src_reg); 524 EMIT4(0xb9160000, dst_reg, src_reg);
525 if (insn_is_zext(&insn[1]))
526 insn_count = 2;
523 break; 527 break;
524 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 528 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
525 /* lgr %dst,%src */ 529 /* lgr %dst,%src */
@@ -528,6 +532,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
528 case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ 532 case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
529 /* llilf %dst,imm */ 533 /* llilf %dst,imm */
530 EMIT6_IMM(0xc00f0000, dst_reg, imm); 534 EMIT6_IMM(0xc00f0000, dst_reg, imm);
535 if (insn_is_zext(&insn[1]))
536 insn_count = 2;
531 break; 537 break;
532 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */ 538 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
533 /* lgfi %dst,imm */ 539 /* lgfi %dst,imm */
@@ -639,6 +645,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
639 EMIT4(0xb9970000, REG_W0, src_reg); 645 EMIT4(0xb9970000, REG_W0, src_reg);
640 /* llgfr %dst,%rc */ 646 /* llgfr %dst,%rc */
641 EMIT4(0xb9160000, dst_reg, rc_reg); 647 EMIT4(0xb9160000, dst_reg, rc_reg);
648 if (insn_is_zext(&insn[1]))
649 insn_count = 2;
642 break; 650 break;
643 } 651 }
644 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ 652 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
@@ -676,6 +684,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
676 EMIT_CONST_U32(imm)); 684 EMIT_CONST_U32(imm));
677 /* llgfr %dst,%rc */ 685 /* llgfr %dst,%rc */
678 EMIT4(0xb9160000, dst_reg, rc_reg); 686 EMIT4(0xb9160000, dst_reg, rc_reg);
687 if (insn_is_zext(&insn[1]))
688 insn_count = 2;
679 break; 689 break;
680 } 690 }
681 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ 691 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
@@ -864,10 +874,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
864 case 16: /* dst = (u16) cpu_to_be16(dst) */ 874 case 16: /* dst = (u16) cpu_to_be16(dst) */
865 /* llghr %dst,%dst */ 875 /* llghr %dst,%dst */
866 EMIT4(0xb9850000, dst_reg, dst_reg); 876 EMIT4(0xb9850000, dst_reg, dst_reg);
877 if (insn_is_zext(&insn[1]))
878 insn_count = 2;
867 break; 879 break;
868 case 32: /* dst = (u32) cpu_to_be32(dst) */ 880 case 32: /* dst = (u32) cpu_to_be32(dst) */
869 /* llgfr %dst,%dst */ 881 if (!fp->aux->verifier_zext)
870 EMIT4(0xb9160000, dst_reg, dst_reg); 882 /* llgfr %dst,%dst */
883 EMIT4(0xb9160000, dst_reg, dst_reg);
871 break; 884 break;
872 case 64: /* dst = (u64) cpu_to_be64(dst) */ 885 case 64: /* dst = (u64) cpu_to_be64(dst) */
873 break; 886 break;
@@ -882,12 +895,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
882 EMIT4_DISP(0x88000000, dst_reg, REG_0, 16); 895 EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
883 /* llghr %dst,%dst */ 896 /* llghr %dst,%dst */
884 EMIT4(0xb9850000, dst_reg, dst_reg); 897 EMIT4(0xb9850000, dst_reg, dst_reg);
898 if (insn_is_zext(&insn[1]))
899 insn_count = 2;
885 break; 900 break;
886 case 32: /* dst = (u32) cpu_to_le32(dst) */ 901 case 32: /* dst = (u32) cpu_to_le32(dst) */
887 /* lrvr %dst,%dst */ 902 /* lrvr %dst,%dst */
888 EMIT4(0xb91f0000, dst_reg, dst_reg); 903 EMIT4(0xb91f0000, dst_reg, dst_reg);
889 /* llgfr %dst,%dst */ 904 if (!fp->aux->verifier_zext)
890 EMIT4(0xb9160000, dst_reg, dst_reg); 905 /* llgfr %dst,%dst */
906 EMIT4(0xb9160000, dst_reg, dst_reg);
891 break; 907 break;
892 case 64: /* dst = (u64) cpu_to_le64(dst) */ 908 case 64: /* dst = (u64) cpu_to_le64(dst) */
893 /* lrvgr %dst,%dst */ 909 /* lrvgr %dst,%dst */
@@ -968,16 +984,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
968 /* llgc %dst,0(off,%src) */ 984 /* llgc %dst,0(off,%src) */
969 EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); 985 EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
970 jit->seen |= SEEN_MEM; 986 jit->seen |= SEEN_MEM;
987 if (insn_is_zext(&insn[1]))
988 insn_count = 2;
971 break; 989 break;
972 case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ 990 case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
973 /* llgh %dst,0(off,%src) */ 991 /* llgh %dst,0(off,%src) */
974 EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); 992 EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
975 jit->seen |= SEEN_MEM; 993 jit->seen |= SEEN_MEM;
994 if (insn_is_zext(&insn[1]))
995 insn_count = 2;
976 break; 996 break;
977 case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ 997 case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
978 /* llgf %dst,off(%src) */ 998 /* llgf %dst,off(%src) */
979 jit->seen |= SEEN_MEM; 999 jit->seen |= SEEN_MEM;
980 EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); 1000 EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
1001 if (insn_is_zext(&insn[1]))
1002 insn_count = 2;
981 break; 1003 break;
982 case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ 1004 case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
983 /* lg %dst,0(off,%src) */ 1005 /* lg %dst,0(off,%src) */
@@ -1282,6 +1304,11 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
1282 return 0; 1304 return 0;
1283} 1305}
1284 1306
1307bool bpf_jit_needs_zext(void)
1308{
1309 return true;
1310}
1311
1285/* 1312/*
1286 * Compile eBPF program "fp" 1313 * Compile eBPF program "fp"
1287 */ 1314 */
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 65428e79b2f3..3364e2a00989 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -908,6 +908,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
908 /* dst = src */ 908 /* dst = src */
909 case BPF_ALU | BPF_MOV | BPF_X: 909 case BPF_ALU | BPF_MOV | BPF_X:
910 emit_alu3_K(SRL, src, 0, dst, ctx); 910 emit_alu3_K(SRL, src, 0, dst, ctx);
911 if (insn_is_zext(&insn[1]))
912 return 1;
911 break; 913 break;
912 case BPF_ALU64 | BPF_MOV | BPF_X: 914 case BPF_ALU64 | BPF_MOV | BPF_X:
913 emit_reg_move(src, dst, ctx); 915 emit_reg_move(src, dst, ctx);
@@ -942,6 +944,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
942 case BPF_ALU | BPF_DIV | BPF_X: 944 case BPF_ALU | BPF_DIV | BPF_X:
943 emit_write_y(G0, ctx); 945 emit_write_y(G0, ctx);
944 emit_alu(DIV, src, dst, ctx); 946 emit_alu(DIV, src, dst, ctx);
947 if (insn_is_zext(&insn[1]))
948 return 1;
945 break; 949 break;
946 case BPF_ALU64 | BPF_DIV | BPF_X: 950 case BPF_ALU64 | BPF_DIV | BPF_X:
947 emit_alu(UDIVX, src, dst, ctx); 951 emit_alu(UDIVX, src, dst, ctx);
@@ -975,6 +979,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
975 break; 979 break;
976 case BPF_ALU | BPF_RSH | BPF_X: 980 case BPF_ALU | BPF_RSH | BPF_X:
977 emit_alu(SRL, src, dst, ctx); 981 emit_alu(SRL, src, dst, ctx);
982 if (insn_is_zext(&insn[1]))
983 return 1;
978 break; 984 break;
979 case BPF_ALU64 | BPF_RSH | BPF_X: 985 case BPF_ALU64 | BPF_RSH | BPF_X:
980 emit_alu(SRLX, src, dst, ctx); 986 emit_alu(SRLX, src, dst, ctx);
@@ -997,9 +1003,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
997 case 16: 1003 case 16:
998 emit_alu_K(SLL, dst, 16, ctx); 1004 emit_alu_K(SLL, dst, 16, ctx);
999 emit_alu_K(SRL, dst, 16, ctx); 1005 emit_alu_K(SRL, dst, 16, ctx);
1006 if (insn_is_zext(&insn[1]))
1007 return 1;
1000 break; 1008 break;
1001 case 32: 1009 case 32:
1002 emit_alu_K(SRL, dst, 0, ctx); 1010 if (!ctx->prog->aux->verifier_zext)
1011 emit_alu_K(SRL, dst, 0, ctx);
1003 break; 1012 break;
1004 case 64: 1013 case 64:
1005 /* nop */ 1014 /* nop */
@@ -1021,6 +1030,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1021 emit_alu3_K(AND, dst, 0xff, dst, ctx); 1030 emit_alu3_K(AND, dst, 0xff, dst, ctx);
1022 emit_alu3_K(SLL, tmp, 8, tmp, ctx); 1031 emit_alu3_K(SLL, tmp, 8, tmp, ctx);
1023 emit_alu(OR, tmp, dst, ctx); 1032 emit_alu(OR, tmp, dst, ctx);
1033 if (insn_is_zext(&insn[1]))
1034 return 1;
1024 break; 1035 break;
1025 1036
1026 case 32: 1037 case 32:
@@ -1037,6 +1048,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1037 emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */ 1048 emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */
1038 emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */ 1049 emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */
1039 emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */ 1050 emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */
1051 if (insn_is_zext(&insn[1]))
1052 return 1;
1040 break; 1053 break;
1041 1054
1042 case 64: 1055 case 64:
@@ -1050,6 +1063,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1050 /* dst = imm */ 1063 /* dst = imm */
1051 case BPF_ALU | BPF_MOV | BPF_K: 1064 case BPF_ALU | BPF_MOV | BPF_K:
1052 emit_loadimm32(imm, dst, ctx); 1065 emit_loadimm32(imm, dst, ctx);
1066 if (insn_is_zext(&insn[1]))
1067 return 1;
1053 break; 1068 break;
1054 case BPF_ALU64 | BPF_MOV | BPF_K: 1069 case BPF_ALU64 | BPF_MOV | BPF_K:
1055 emit_loadimm_sext(imm, dst, ctx); 1070 emit_loadimm_sext(imm, dst, ctx);
@@ -1132,6 +1147,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1132 break; 1147 break;
1133 case BPF_ALU | BPF_RSH | BPF_K: 1148 case BPF_ALU | BPF_RSH | BPF_K:
1134 emit_alu_K(SRL, dst, imm, ctx); 1149 emit_alu_K(SRL, dst, imm, ctx);
1150 if (insn_is_zext(&insn[1]))
1151 return 1;
1135 break; 1152 break;
1136 case BPF_ALU64 | BPF_RSH | BPF_K: 1153 case BPF_ALU64 | BPF_RSH | BPF_K:
1137 emit_alu_K(SRLX, dst, imm, ctx); 1154 emit_alu_K(SRLX, dst, imm, ctx);
@@ -1144,7 +1161,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1144 break; 1161 break;
1145 1162
1146 do_alu32_trunc: 1163 do_alu32_trunc:
1147 if (BPF_CLASS(code) == BPF_ALU) 1164 if (BPF_CLASS(code) == BPF_ALU &&
1165 !ctx->prog->aux->verifier_zext)
1148 emit_alu_K(SRL, dst, 0, ctx); 1166 emit_alu_K(SRL, dst, 0, ctx);
1149 break; 1167 break;
1150 1168
@@ -1265,6 +1283,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
1265 rs2 = RS2(tmp); 1283 rs2 = RS2(tmp);
1266 } 1284 }
1267 emit(opcode | RS1(src) | rs2 | RD(dst), ctx); 1285 emit(opcode | RS1(src) | rs2 | RD(dst), ctx);
1286 if (opcode != LD64 && insn_is_zext(&insn[1]))
1287 return 1;
1268 break; 1288 break;
1269 } 1289 }
1270 /* ST: *(size *)(dst + off) = imm */ 1290 /* ST: *(size *)(dst + off) = imm */
@@ -1432,6 +1452,11 @@ static void jit_fill_hole(void *area, unsigned int size)
1432 *ptr++ = 0x91d02005; /* ta 5 */ 1452 *ptr++ = 0x91d02005; /* ta 5 */
1433} 1453}
1434 1454
1455bool bpf_jit_needs_zext(void)
1456{
1457 return true;
1458}
1459
1435struct sparc64_jit_data { 1460struct sparc64_jit_data {
1436 struct bpf_binary_header *header; 1461 struct bpf_binary_header *header;
1437 u8 *image; 1462 u8 *image;
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index b29e82f190c7..133433d181ba 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -253,13 +253,14 @@ static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
253/* dst = src */ 253/* dst = src */
254static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], 254static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
255 const u8 src[], bool dstk, 255 const u8 src[], bool dstk,
256 bool sstk, u8 **pprog) 256 bool sstk, u8 **pprog,
257 const struct bpf_prog_aux *aux)
257{ 258{
258 emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); 259 emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
259 if (is64) 260 if (is64)
260 /* complete 8 byte move */ 261 /* complete 8 byte move */
261 emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); 262 emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
262 else 263 else if (!aux->verifier_zext)
263 /* zero out high 4 bytes */ 264 /* zero out high 4 bytes */
264 emit_ia32_mov_i(dst_hi, 0, dstk, pprog); 265 emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
265} 266}
@@ -313,7 +314,8 @@ static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
313} 314}
314 315
315static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, 316static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
316 bool dstk, u8 **pprog) 317 bool dstk, u8 **pprog,
318 const struct bpf_prog_aux *aux)
317{ 319{
318 u8 *prog = *pprog; 320 u8 *prog = *pprog;
319 int cnt = 0; 321 int cnt = 0;
@@ -334,12 +336,14 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
334 */ 336 */
335 EMIT2(0x0F, 0xB7); 337 EMIT2(0x0F, 0xB7);
336 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); 338 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
337 /* xor dreg_hi,dreg_hi */ 339 if (!aux->verifier_zext)
338 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); 340 /* xor dreg_hi,dreg_hi */
341 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
339 break; 342 break;
340 case 32: 343 case 32:
341 /* xor dreg_hi,dreg_hi */ 344 if (!aux->verifier_zext)
342 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); 345 /* xor dreg_hi,dreg_hi */
346 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
343 break; 347 break;
344 case 64: 348 case 64:
345 /* nop */ 349 /* nop */
@@ -358,7 +362,8 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
358} 362}
359 363
360static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, 364static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
361 bool dstk, u8 **pprog) 365 bool dstk, u8 **pprog,
366 const struct bpf_prog_aux *aux)
362{ 367{
363 u8 *prog = *pprog; 368 u8 *prog = *pprog;
364 int cnt = 0; 369 int cnt = 0;
@@ -380,16 +385,18 @@ static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
380 EMIT2(0x0F, 0xB7); 385 EMIT2(0x0F, 0xB7);
381 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); 386 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
382 387
383 /* xor dreg_hi,dreg_hi */ 388 if (!aux->verifier_zext)
384 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); 389 /* xor dreg_hi,dreg_hi */
390 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
385 break; 391 break;
386 case 32: 392 case 32:
387 /* Emit 'bswap eax' to swap lower 4 bytes */ 393 /* Emit 'bswap eax' to swap lower 4 bytes */
388 EMIT1(0x0F); 394 EMIT1(0x0F);
389 EMIT1(add_1reg(0xC8, dreg_lo)); 395 EMIT1(add_1reg(0xC8, dreg_lo));
390 396
391 /* xor dreg_hi,dreg_hi */ 397 if (!aux->verifier_zext)
392 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); 398 /* xor dreg_hi,dreg_hi */
399 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
393 break; 400 break;
394 case 64: 401 case 64:
395 /* Emit 'bswap eax' to swap lower 4 bytes */ 402 /* Emit 'bswap eax' to swap lower 4 bytes */
@@ -569,7 +576,7 @@ static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
569static inline void emit_ia32_alu_r64(const bool is64, const u8 op, 576static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
570 const u8 dst[], const u8 src[], 577 const u8 dst[], const u8 src[],
571 bool dstk, bool sstk, 578 bool dstk, bool sstk,
572 u8 **pprog) 579 u8 **pprog, const struct bpf_prog_aux *aux)
573{ 580{
574 u8 *prog = *pprog; 581 u8 *prog = *pprog;
575 582
@@ -577,7 +584,7 @@ static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
577 if (is64) 584 if (is64)
578 emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, 585 emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
579 &prog); 586 &prog);
580 else 587 else if (!aux->verifier_zext)
581 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 588 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
582 *pprog = prog; 589 *pprog = prog;
583} 590}
@@ -668,7 +675,8 @@ static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
668/* ALU operation (64 bit) */ 675/* ALU operation (64 bit) */
669static inline void emit_ia32_alu_i64(const bool is64, const u8 op, 676static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
670 const u8 dst[], const u32 val, 677 const u8 dst[], const u32 val,
671 bool dstk, u8 **pprog) 678 bool dstk, u8 **pprog,
679 const struct bpf_prog_aux *aux)
672{ 680{
673 u8 *prog = *pprog; 681 u8 *prog = *pprog;
674 u32 hi = 0; 682 u32 hi = 0;
@@ -679,7 +687,7 @@ static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
679 emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); 687 emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
680 if (is64) 688 if (is64)
681 emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); 689 emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
682 else 690 else if (!aux->verifier_zext)
683 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 691 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
684 692
685 *pprog = prog; 693 *pprog = prog;
@@ -1713,8 +1721,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1713 case BPF_ALU64 | BPF_MOV | BPF_X: 1721 case BPF_ALU64 | BPF_MOV | BPF_X:
1714 switch (BPF_SRC(code)) { 1722 switch (BPF_SRC(code)) {
1715 case BPF_X: 1723 case BPF_X:
1716 emit_ia32_mov_r64(is64, dst, src, dstk, 1724 if (imm32 == 1) {
1717 sstk, &prog); 1725 /* Special mov32 for zext. */
1726 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1727 break;
1728 }
1729 emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
1730 &prog, bpf_prog->aux);
1718 break; 1731 break;
1719 case BPF_K: 1732 case BPF_K:
1720 /* Sign-extend immediate value to dst reg */ 1733 /* Sign-extend immediate value to dst reg */
@@ -1754,11 +1767,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1754 switch (BPF_SRC(code)) { 1767 switch (BPF_SRC(code)) {
1755 case BPF_X: 1768 case BPF_X:
1756 emit_ia32_alu_r64(is64, BPF_OP(code), dst, 1769 emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1757 src, dstk, sstk, &prog); 1770 src, dstk, sstk, &prog,
1771 bpf_prog->aux);
1758 break; 1772 break;
1759 case BPF_K: 1773 case BPF_K:
1760 emit_ia32_alu_i64(is64, BPF_OP(code), dst, 1774 emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1761 imm32, dstk, &prog); 1775 imm32, dstk, &prog,
1776 bpf_prog->aux);
1762 break; 1777 break;
1763 } 1778 }
1764 break; 1779 break;
@@ -1777,7 +1792,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1777 false, &prog); 1792 false, &prog);
1778 break; 1793 break;
1779 } 1794 }
1780 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 1795 if (!bpf_prog->aux->verifier_zext)
1796 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1781 break; 1797 break;
1782 case BPF_ALU | BPF_LSH | BPF_X: 1798 case BPF_ALU | BPF_LSH | BPF_X:
1783 case BPF_ALU | BPF_RSH | BPF_X: 1799 case BPF_ALU | BPF_RSH | BPF_X:
@@ -1797,7 +1813,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1797 &prog); 1813 &prog);
1798 break; 1814 break;
1799 } 1815 }
1800 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 1816 if (!bpf_prog->aux->verifier_zext)
1817 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1801 break; 1818 break;
1802 /* dst = dst / src(imm) */ 1819 /* dst = dst / src(imm) */
1803 /* dst = dst % src(imm) */ 1820 /* dst = dst % src(imm) */
@@ -1819,7 +1836,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1819 &prog); 1836 &prog);
1820 break; 1837 break;
1821 } 1838 }
1822 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 1839 if (!bpf_prog->aux->verifier_zext)
1840 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1823 break; 1841 break;
1824 case BPF_ALU64 | BPF_DIV | BPF_K: 1842 case BPF_ALU64 | BPF_DIV | BPF_K:
1825 case BPF_ALU64 | BPF_DIV | BPF_X: 1843 case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1836,7 +1854,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1836 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); 1854 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1837 emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, 1855 emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1838 false, &prog); 1856 false, &prog);
1839 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 1857 if (!bpf_prog->aux->verifier_zext)
1858 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1840 break; 1859 break;
1841 /* dst = dst << imm */ 1860 /* dst = dst << imm */
1842 case BPF_ALU64 | BPF_LSH | BPF_K: 1861 case BPF_ALU64 | BPF_LSH | BPF_K:
@@ -1872,7 +1891,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1872 case BPF_ALU | BPF_NEG: 1891 case BPF_ALU | BPF_NEG:
1873 emit_ia32_alu_i(is64, false, BPF_OP(code), 1892 emit_ia32_alu_i(is64, false, BPF_OP(code),
1874 dst_lo, 0, dstk, &prog); 1893 dst_lo, 0, dstk, &prog);
1875 emit_ia32_mov_i(dst_hi, 0, dstk, &prog); 1894 if (!bpf_prog->aux->verifier_zext)
1895 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1876 break; 1896 break;
1877 /* dst = ~dst (64 bit) */ 1897 /* dst = ~dst (64 bit) */
1878 case BPF_ALU64 | BPF_NEG: 1898 case BPF_ALU64 | BPF_NEG:
@@ -1892,11 +1912,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1892 break; 1912 break;
1893 /* dst = htole(dst) */ 1913 /* dst = htole(dst) */
1894 case BPF_ALU | BPF_END | BPF_FROM_LE: 1914 case BPF_ALU | BPF_END | BPF_FROM_LE:
1895 emit_ia32_to_le_r64(dst, imm32, dstk, &prog); 1915 emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
1916 bpf_prog->aux);
1896 break; 1917 break;
1897 /* dst = htobe(dst) */ 1918 /* dst = htobe(dst) */
1898 case BPF_ALU | BPF_END | BPF_FROM_BE: 1919 case BPF_ALU | BPF_END | BPF_FROM_BE:
1899 emit_ia32_to_be_r64(dst, imm32, dstk, &prog); 1920 emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
1921 bpf_prog->aux);
1900 break; 1922 break;
1901 /* dst = imm64 */ 1923 /* dst = imm64 */
1902 case BPF_LD | BPF_IMM | BPF_DW: { 1924 case BPF_LD | BPF_IMM | BPF_DW: {
@@ -2051,6 +2073,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
2051 case BPF_B: 2073 case BPF_B:
2052 case BPF_H: 2074 case BPF_H:
2053 case BPF_W: 2075 case BPF_W:
2076 if (!bpf_prog->aux->verifier_zext)
2077 break;
2054 if (dstk) { 2078 if (dstk) {
2055 EMIT3(0xC7, add_1reg(0x40, IA32_EBP), 2079 EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
2056 STACK_VAR(dst_hi)); 2080 STACK_VAR(dst_hi));
@@ -2475,6 +2499,11 @@ notyet:
2475 return proglen; 2499 return proglen;
2476} 2500}
2477 2501
2502bool bpf_jit_needs_zext(void)
2503{
2504 return true;
2505}
2506
2478struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 2507struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2479{ 2508{
2480 struct bpf_binary_header *header = NULL; 2509 struct bpf_binary_header *header = NULL;
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index ee657003c1a1..0a0ce620e4a2 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -8,6 +8,9 @@
8#include <linux/bpf_lirc.h> 8#include <linux/bpf_lirc.h>
9#include "rc-core-priv.h" 9#include "rc-core-priv.h"
10 10
11#define lirc_rcu_dereference(p) \
12 rcu_dereference_protected(p, lockdep_is_held(&ir_raw_handler_lock))
13
11/* 14/*
12 * BPF interface for raw IR 15 * BPF interface for raw IR
13 */ 16 */
@@ -136,7 +139,7 @@ const struct bpf_verifier_ops lirc_mode2_verifier_ops = {
136 139
137static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog) 140static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
138{ 141{
139 struct bpf_prog_array __rcu *old_array; 142 struct bpf_prog_array *old_array;
140 struct bpf_prog_array *new_array; 143 struct bpf_prog_array *new_array;
141 struct ir_raw_event_ctrl *raw; 144 struct ir_raw_event_ctrl *raw;
142 int ret; 145 int ret;
@@ -154,12 +157,12 @@ static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
154 goto unlock; 157 goto unlock;
155 } 158 }
156 159
157 if (raw->progs && bpf_prog_array_length(raw->progs) >= BPF_MAX_PROGS) { 160 old_array = lirc_rcu_dereference(raw->progs);
161 if (old_array && bpf_prog_array_length(old_array) >= BPF_MAX_PROGS) {
158 ret = -E2BIG; 162 ret = -E2BIG;
159 goto unlock; 163 goto unlock;
160 } 164 }
161 165
162 old_array = raw->progs;
163 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); 166 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
164 if (ret < 0) 167 if (ret < 0)
165 goto unlock; 168 goto unlock;
@@ -174,7 +177,7 @@ unlock:
174 177
175static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog) 178static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
176{ 179{
177 struct bpf_prog_array __rcu *old_array; 180 struct bpf_prog_array *old_array;
178 struct bpf_prog_array *new_array; 181 struct bpf_prog_array *new_array;
179 struct ir_raw_event_ctrl *raw; 182 struct ir_raw_event_ctrl *raw;
180 int ret; 183 int ret;
@@ -192,7 +195,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
192 goto unlock; 195 goto unlock;
193 } 196 }
194 197
195 old_array = raw->progs; 198 old_array = lirc_rcu_dereference(raw->progs);
196 ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array); 199 ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
197 /* 200 /*
198 * Do not use bpf_prog_array_delete_safe() as we would end up 201 * Do not use bpf_prog_array_delete_safe() as we would end up
@@ -223,21 +226,22 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
223/* 226/*
224 * This should be called once the rc thread has been stopped, so there can be 227 * This should be called once the rc thread has been stopped, so there can be
225 * no concurrent bpf execution. 228 * no concurrent bpf execution.
229 *
230 * Should be called with the ir_raw_handler_lock held.
226 */ 231 */
227void lirc_bpf_free(struct rc_dev *rcdev) 232void lirc_bpf_free(struct rc_dev *rcdev)
228{ 233{
229 struct bpf_prog_array_item *item; 234 struct bpf_prog_array_item *item;
235 struct bpf_prog_array *array;
230 236
231 if (!rcdev->raw->progs) 237 array = lirc_rcu_dereference(rcdev->raw->progs);
238 if (!array)
232 return; 239 return;
233 240
234 item = rcu_dereference(rcdev->raw->progs)->items; 241 for (item = array->items; item->prog; item++)
235 while (item->prog) {
236 bpf_prog_put(item->prog); 242 bpf_prog_put(item->prog);
237 item++;
238 }
239 243
240 bpf_prog_array_free(rcdev->raw->progs); 244 bpf_prog_array_free(array);
241} 245}
242 246
243int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) 247int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -290,7 +294,7 @@ int lirc_prog_detach(const union bpf_attr *attr)
290int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) 294int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
291{ 295{
292 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 296 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
293 struct bpf_prog_array __rcu *progs; 297 struct bpf_prog_array *progs;
294 struct rc_dev *rcdev; 298 struct rc_dev *rcdev;
295 u32 cnt, flags = 0; 299 u32 cnt, flags = 0;
296 int ret; 300 int ret;
@@ -311,7 +315,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
311 if (ret) 315 if (ret)
312 goto put; 316 goto put;
313 317
314 progs = rcdev->raw->progs; 318 progs = lirc_rcu_dereference(rcdev->raw->progs);
315 cnt = progs ? bpf_prog_array_length(progs) : 0; 319 cnt = progs ? bpf_prog_array_length(progs) : 0;
316 320
317 if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) { 321 if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index d4bf0e694541..4054b70d7719 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -623,6 +623,13 @@ static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
623} 623}
624 624
625static void 625static void
626wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst)
627{
628 if (meta->flags & FLAG_INSN_DO_ZEXT)
629 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
630}
631
632static void
626wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, 633wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
627 enum nfp_relo_type relo) 634 enum nfp_relo_type relo)
628{ 635{
@@ -858,7 +865,8 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
858} 865}
859 866
860static int 867static int
861data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) 868data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset,
869 u8 dst_gpr, int size)
862{ 870{
863 unsigned int i; 871 unsigned int i;
864 u16 shift, sz; 872 u16 shift, sz;
@@ -881,14 +889,15 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
881 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 889 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
882 890
883 if (i < 2) 891 if (i < 2)
884 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 892 wrp_zext(nfp_prog, meta, dst_gpr);
885 893
886 return 0; 894 return 0;
887} 895}
888 896
889static int 897static int
890data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, 898data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
891 swreg lreg, swreg rreg, int size, enum cmd_mode mode) 899 u8 dst_gpr, swreg lreg, swreg rreg, int size,
900 enum cmd_mode mode)
892{ 901{
893 unsigned int i; 902 unsigned int i;
894 u8 mask, sz; 903 u8 mask, sz;
@@ -911,33 +920,34 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
911 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 920 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
912 921
913 if (i < 2) 922 if (i < 2)
914 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 923 wrp_zext(nfp_prog, meta, dst_gpr);
915 924
916 return 0; 925 return 0;
917} 926}
918 927
919static int 928static int
920data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 929data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
921 u8 dst_gpr, u8 size) 930 u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
922{ 931{
923 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset, 932 return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr),
924 size, CMD_MODE_32b); 933 offset, size, CMD_MODE_32b);
925} 934}
926 935
927static int 936static int
928data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 937data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
929 u8 dst_gpr, u8 size) 938 u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
930{ 939{
931 swreg rega, regb; 940 swreg rega, regb;
932 941
933 addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb); 942 addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
934 943
935 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb, 944 return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb,
936 size, CMD_MODE_40b_BA); 945 size, CMD_MODE_40b_BA);
937} 946}
938 947
939static int 948static int
940construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) 949construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
950 u16 offset, u16 src, u8 size)
941{ 951{
942 swreg tmp_reg; 952 swreg tmp_reg;
943 953
@@ -953,10 +963,12 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
953 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); 963 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
954 964
955 /* Load data */ 965 /* Load data */
956 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); 966 return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size);
957} 967}
958 968
959static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) 969static int
970construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
971 u16 offset, u8 size)
960{ 972{
961 swreg tmp_reg; 973 swreg tmp_reg;
962 974
@@ -967,7 +979,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
967 979
968 /* Load data */ 980 /* Load data */
969 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 981 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
970 return data_ld(nfp_prog, tmp_reg, 0, size); 982 return data_ld(nfp_prog, meta, tmp_reg, 0, size);
971} 983}
972 984
973static int 985static int
@@ -1204,7 +1216,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1204 } 1216 }
1205 1217
1206 if (clr_gpr && size < 8) 1218 if (clr_gpr && size < 8)
1207 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1219 wrp_zext(nfp_prog, meta, gpr);
1208 1220
1209 while (size) { 1221 while (size) {
1210 u32 slice_end; 1222 u32 slice_end;
@@ -1305,9 +1317,10 @@ wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1305 enum alu_op alu_op) 1317 enum alu_op alu_op)
1306{ 1318{
1307 const struct bpf_insn *insn = &meta->insn; 1319 const struct bpf_insn *insn = &meta->insn;
1320 u8 dst = insn->dst_reg * 2;
1308 1321
1309 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); 1322 wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm);
1310 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1323 wrp_zext(nfp_prog, meta, dst);
1311 1324
1312 return 0; 1325 return 0;
1313} 1326}
@@ -1319,7 +1332,7 @@ wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1319 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1332 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1320 1333
1321 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1334 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1322 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1335 wrp_zext(nfp_prog, meta, dst);
1323 1336
1324 return 0; 1337 return 0;
1325} 1338}
@@ -2396,12 +2409,14 @@ static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2396 u8 dst = meta->insn.dst_reg * 2; 2409 u8 dst = meta->insn.dst_reg * 2;
2397 2410
2398 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); 2411 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
2399 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 2412 wrp_zext(nfp_prog, meta, dst);
2400 2413
2401 return 0; 2414 return 0;
2402} 2415}
2403 2416
2404static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) 2417static int
2418__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
2419 u8 shift_amt)
2405{ 2420{
2406 if (shift_amt) { 2421 if (shift_amt) {
2407 /* Set signedness bit (MSB of result). */ 2422 /* Set signedness bit (MSB of result). */
@@ -2410,7 +2425,7 @@ static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2410 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, 2425 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2411 reg_b(dst), SHF_SC_R_SHF, shift_amt); 2426 reg_b(dst), SHF_SC_R_SHF, shift_amt);
2412 } 2427 }
2413 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2428 wrp_zext(nfp_prog, meta, dst);
2414 2429
2415 return 0; 2430 return 0;
2416} 2431}
@@ -2425,7 +2440,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2425 umin = meta->umin_src; 2440 umin = meta->umin_src;
2426 umax = meta->umax_src; 2441 umax = meta->umax_src;
2427 if (umin == umax) 2442 if (umin == umax)
2428 return __ashr_imm(nfp_prog, dst, umin); 2443 return __ashr_imm(nfp_prog, meta, dst, umin);
2429 2444
2430 src = insn->src_reg * 2; 2445 src = insn->src_reg * 2;
2431 /* NOTE: the first insn will set both indirect shift amount (source A) 2446 /* NOTE: the first insn will set both indirect shift amount (source A)
@@ -2434,7 +2449,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2434 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst)); 2449 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
2435 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, 2450 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2436 reg_b(dst), SHF_SC_R_SHF); 2451 reg_b(dst), SHF_SC_R_SHF);
2437 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2452 wrp_zext(nfp_prog, meta, dst);
2438 2453
2439 return 0; 2454 return 0;
2440} 2455}
@@ -2444,15 +2459,17 @@ static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2444 const struct bpf_insn *insn = &meta->insn; 2459 const struct bpf_insn *insn = &meta->insn;
2445 u8 dst = insn->dst_reg * 2; 2460 u8 dst = insn->dst_reg * 2;
2446 2461
2447 return __ashr_imm(nfp_prog, dst, insn->imm); 2462 return __ashr_imm(nfp_prog, meta, dst, insn->imm);
2448} 2463}
2449 2464
2450static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) 2465static int
2466__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
2467 u8 shift_amt)
2451{ 2468{
2452 if (shift_amt) 2469 if (shift_amt)
2453 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2470 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2454 reg_b(dst), SHF_SC_R_SHF, shift_amt); 2471 reg_b(dst), SHF_SC_R_SHF, shift_amt);
2455 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2472 wrp_zext(nfp_prog, meta, dst);
2456 return 0; 2473 return 0;
2457} 2474}
2458 2475
@@ -2461,7 +2478,7 @@ static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2461 const struct bpf_insn *insn = &meta->insn; 2478 const struct bpf_insn *insn = &meta->insn;
2462 u8 dst = insn->dst_reg * 2; 2479 u8 dst = insn->dst_reg * 2;
2463 2480
2464 return __shr_imm(nfp_prog, dst, insn->imm); 2481 return __shr_imm(nfp_prog, meta, dst, insn->imm);
2465} 2482}
2466 2483
2467static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2484static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2474,22 +2491,24 @@ static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2474 umin = meta->umin_src; 2491 umin = meta->umin_src;
2475 umax = meta->umax_src; 2492 umax = meta->umax_src;
2476 if (umin == umax) 2493 if (umin == umax)
2477 return __shr_imm(nfp_prog, dst, umin); 2494 return __shr_imm(nfp_prog, meta, dst, umin);
2478 2495
2479 src = insn->src_reg * 2; 2496 src = insn->src_reg * 2;
2480 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2497 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2481 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2498 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2482 reg_b(dst), SHF_SC_R_SHF); 2499 reg_b(dst), SHF_SC_R_SHF);
2483 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2500 wrp_zext(nfp_prog, meta, dst);
2484 return 0; 2501 return 0;
2485} 2502}
2486 2503
2487static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) 2504static int
2505__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
2506 u8 shift_amt)
2488{ 2507{
2489 if (shift_amt) 2508 if (shift_amt)
2490 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2509 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2491 reg_b(dst), SHF_SC_L_SHF, shift_amt); 2510 reg_b(dst), SHF_SC_L_SHF, shift_amt);
2492 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2511 wrp_zext(nfp_prog, meta, dst);
2493 return 0; 2512 return 0;
2494} 2513}
2495 2514
@@ -2498,7 +2517,7 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2498 const struct bpf_insn *insn = &meta->insn; 2517 const struct bpf_insn *insn = &meta->insn;
2499 u8 dst = insn->dst_reg * 2; 2518 u8 dst = insn->dst_reg * 2;
2500 2519
2501 return __shl_imm(nfp_prog, dst, insn->imm); 2520 return __shl_imm(nfp_prog, meta, dst, insn->imm);
2502} 2521}
2503 2522
2504static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2523static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2511,11 +2530,11 @@ static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2511 umin = meta->umin_src; 2530 umin = meta->umin_src;
2512 umax = meta->umax_src; 2531 umax = meta->umax_src;
2513 if (umin == umax) 2532 if (umin == umax)
2514 return __shl_imm(nfp_prog, dst, umin); 2533 return __shl_imm(nfp_prog, meta, dst, umin);
2515 2534
2516 src = insn->src_reg * 2; 2535 src = insn->src_reg * 2;
2517 shl_reg64_lt32_low(nfp_prog, dst, src); 2536 shl_reg64_lt32_low(nfp_prog, dst, src);
2518 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2537 wrp_zext(nfp_prog, meta, dst);
2519 return 0; 2538 return 0;
2520} 2539}
2521 2540
@@ -2577,34 +2596,34 @@ static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2577 2596
2578static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2597static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2579{ 2598{
2580 return construct_data_ld(nfp_prog, meta->insn.imm, 1); 2599 return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1);
2581} 2600}
2582 2601
2583static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2602static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2584{ 2603{
2585 return construct_data_ld(nfp_prog, meta->insn.imm, 2); 2604 return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2);
2586} 2605}
2587 2606
2588static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2607static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2589{ 2608{
2590 return construct_data_ld(nfp_prog, meta->insn.imm, 4); 2609 return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4);
2591} 2610}
2592 2611
2593static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2612static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2594{ 2613{
2595 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 2614 return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
2596 meta->insn.src_reg * 2, 1); 2615 meta->insn.src_reg * 2, 1);
2597} 2616}
2598 2617
2599static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2618static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2600{ 2619{
2601 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 2620 return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
2602 meta->insn.src_reg * 2, 2); 2621 meta->insn.src_reg * 2, 2);
2603} 2622}
2604 2623
2605static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2624static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2606{ 2625{
2607 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 2626 return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
2608 meta->insn.src_reg * 2, 4); 2627 meta->insn.src_reg * 2, 4);
2609} 2628}
2610 2629
@@ -2682,7 +2701,7 @@ mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2682 2701
2683 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2702 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2684 2703
2685 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2, 2704 return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2,
2686 tmp_reg, meta->insn.dst_reg * 2, size); 2705 tmp_reg, meta->insn.dst_reg * 2, size);
2687} 2706}
2688 2707
@@ -2694,7 +2713,7 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2694 2713
2695 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2714 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2696 2715
2697 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2, 2716 return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2,
2698 tmp_reg, meta->insn.dst_reg * 2, size); 2717 tmp_reg, meta->insn.dst_reg * 2, size);
2699} 2718}
2700 2719
@@ -2755,7 +2774,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
2755 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); 2774 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
2756 2775
2757 if (!len_mid) { 2776 if (!len_mid) {
2758 wrp_immed(nfp_prog, dst_hi, 0); 2777 wrp_zext(nfp_prog, meta, dst_gpr);
2759 return 0; 2778 return 0;
2760 } 2779 }
2761 2780
@@ -2763,7 +2782,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
2763 2782
2764 if (size <= REG_WIDTH) { 2783 if (size <= REG_WIDTH) {
2765 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); 2784 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
2766 wrp_immed(nfp_prog, dst_hi, 0); 2785 wrp_zext(nfp_prog, meta, dst_gpr);
2767 } else { 2786 } else {
2768 swreg src_hi = reg_xfer(idx + 2); 2787 swreg src_hi = reg_xfer(idx + 2);
2769 2788
@@ -2794,10 +2813,10 @@ mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
2794 2813
2795 if (size < REG_WIDTH) { 2814 if (size < REG_WIDTH) {
2796 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); 2815 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2797 wrp_immed(nfp_prog, dst_hi, 0); 2816 wrp_zext(nfp_prog, meta, dst_gpr);
2798 } else if (size == REG_WIDTH) { 2817 } else if (size == REG_WIDTH) {
2799 wrp_mov(nfp_prog, dst_lo, src_lo); 2818 wrp_mov(nfp_prog, dst_lo, src_lo);
2800 wrp_immed(nfp_prog, dst_hi, 0); 2819 wrp_zext(nfp_prog, meta, dst_gpr);
2801 } else { 2820 } else {
2802 swreg src_hi = reg_xfer(idx + 1); 2821 swreg src_hi = reg_xfer(idx + 1);
2803 2822
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index e54d1ac84df2..57d6ff51e980 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -238,6 +238,8 @@ struct nfp_bpf_reg_state {
238#define FLAG_INSN_SKIP_PREC_DEPENDENT BIT(4) 238#define FLAG_INSN_SKIP_PREC_DEPENDENT BIT(4)
239/* Instruction is optimized by the verifier */ 239/* Instruction is optimized by the verifier */
240#define FLAG_INSN_SKIP_VERIFIER_OPT BIT(5) 240#define FLAG_INSN_SKIP_VERIFIER_OPT BIT(5)
241/* Instruction needs to zero extend to high 32-bit */
242#define FLAG_INSN_DO_ZEXT BIT(6)
241 243
242#define FLAG_INSN_SKIP_MASK (FLAG_INSN_SKIP_NOOP | \ 244#define FLAG_INSN_SKIP_MASK (FLAG_INSN_SKIP_NOOP | \
243 FLAG_INSN_SKIP_PREC_DEPENDENT | \ 245 FLAG_INSN_SKIP_PREC_DEPENDENT | \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 36f56eb4cbe2..e92ee510fd52 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -744,6 +744,17 @@ continue_subprog:
744 goto continue_subprog; 744 goto continue_subprog;
745} 745}
746 746
747static void nfp_bpf_insn_flag_zext(struct nfp_prog *nfp_prog,
748 struct bpf_insn_aux_data *aux)
749{
750 struct nfp_insn_meta *meta;
751
752 list_for_each_entry(meta, &nfp_prog->insns, l) {
753 if (aux[meta->n].zext_dst)
754 meta->flags |= FLAG_INSN_DO_ZEXT;
755 }
756}
757
747int nfp_bpf_finalize(struct bpf_verifier_env *env) 758int nfp_bpf_finalize(struct bpf_verifier_env *env)
748{ 759{
749 struct bpf_subprog_info *info; 760 struct bpf_subprog_info *info;
@@ -784,6 +795,7 @@ int nfp_bpf_finalize(struct bpf_verifier_env *env)
784 return -EOPNOTSUPP; 795 return -EOPNOTSUPP;
785 } 796 }
786 797
798 nfp_bpf_insn_flag_zext(nfp_prog, env->insn_aux_data);
787 return 0; 799 return 0;
788} 800}
789 801
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index cb3c6b3b89c8..b631ee75762d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/jump_label.h> 7#include <linux/jump_label.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <linux/percpu-refcount.h>
9#include <linux/rbtree.h> 10#include <linux/rbtree.h>
10#include <uapi/linux/bpf.h> 11#include <uapi/linux/bpf.h>
11 12
@@ -71,11 +72,17 @@ struct cgroup_bpf {
71 u32 flags[MAX_BPF_ATTACH_TYPE]; 72 u32 flags[MAX_BPF_ATTACH_TYPE];
72 73
73 /* temp storage for effective prog array used by prog_attach/detach */ 74 /* temp storage for effective prog array used by prog_attach/detach */
74 struct bpf_prog_array __rcu *inactive; 75 struct bpf_prog_array *inactive;
76
77 /* reference counter used to detach bpf programs after cgroup removal */
78 struct percpu_ref refcnt;
79
80 /* cgroup_bpf is released using a work queue */
81 struct work_struct release_work;
75}; 82};
76 83
77void cgroup_bpf_put(struct cgroup *cgrp);
78int cgroup_bpf_inherit(struct cgroup *cgrp); 84int cgroup_bpf_inherit(struct cgroup *cgrp);
85void cgroup_bpf_offline(struct cgroup *cgrp);
79 86
80int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 87int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
81 enum bpf_attach_type type, u32 flags); 88 enum bpf_attach_type type, u32 flags);
@@ -283,8 +290,8 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
283 290
284struct bpf_prog; 291struct bpf_prog;
285struct cgroup_bpf {}; 292struct cgroup_bpf {};
286static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
287static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } 293static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
294static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
288 295
289static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, 296static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
290 enum bpf_prog_type ptype, 297 enum bpf_prog_type ptype,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4fb3aa2dc975..e5a309e6a400 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -66,6 +66,11 @@ struct bpf_map_ops {
66 u64 imm, u32 *off); 66 u64 imm, u32 *off);
67}; 67};
68 68
69struct bpf_map_memory {
70 u32 pages;
71 struct user_struct *user;
72};
73
69struct bpf_map { 74struct bpf_map {
70 /* The first two cachelines with read-mostly members of which some 75 /* The first two cachelines with read-mostly members of which some
71 * are also accessed in fast-path (e.g. ops, max_entries). 76 * are also accessed in fast-path (e.g. ops, max_entries).
@@ -86,7 +91,7 @@ struct bpf_map {
86 u32 btf_key_type_id; 91 u32 btf_key_type_id;
87 u32 btf_value_type_id; 92 u32 btf_value_type_id;
88 struct btf *btf; 93 struct btf *btf;
89 u32 pages; 94 struct bpf_map_memory memory;
90 bool unpriv_array; 95 bool unpriv_array;
91 bool frozen; /* write-once */ 96 bool frozen; /* write-once */
92 /* 48 bytes hole */ 97 /* 48 bytes hole */
@@ -94,8 +99,7 @@ struct bpf_map {
94 /* The 3rd and 4th cacheline with misc members to avoid false sharing 99 /* The 3rd and 4th cacheline with misc members to avoid false sharing
95 * particularly with refcounting. 100 * particularly with refcounting.
96 */ 101 */
97 struct user_struct *user ____cacheline_aligned; 102 atomic_t refcnt ____cacheline_aligned;
98 atomic_t refcnt;
99 atomic_t usercnt; 103 atomic_t usercnt;
100 struct work_struct work; 104 struct work_struct work;
101 char name[BPF_OBJ_NAME_LEN]; 105 char name[BPF_OBJ_NAME_LEN];
@@ -370,6 +374,7 @@ struct bpf_prog_aux {
370 u32 id; 374 u32 id;
371 u32 func_cnt; /* used by non-func prog as the number of func progs */ 375 u32 func_cnt; /* used by non-func prog as the number of func progs */
372 u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ 376 u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
377 bool verifier_zext; /* Zero extensions has been inserted by verifier. */
373 bool offload_requested; 378 bool offload_requested;
374 struct bpf_prog **func; 379 struct bpf_prog **func;
375 void *jit_data; /* JIT specific data. arch dependent */ 380 void *jit_data; /* JIT specific data. arch dependent */
@@ -513,17 +518,17 @@ struct bpf_prog_array {
513}; 518};
514 519
515struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); 520struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
516void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); 521void bpf_prog_array_free(struct bpf_prog_array *progs);
517int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); 522int bpf_prog_array_length(struct bpf_prog_array *progs);
518int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, 523int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
519 __u32 __user *prog_ids, u32 cnt); 524 __u32 __user *prog_ids, u32 cnt);
520 525
521void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, 526void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
522 struct bpf_prog *old_prog); 527 struct bpf_prog *old_prog);
523int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, 528int bpf_prog_array_copy_info(struct bpf_prog_array *array,
524 u32 *prog_ids, u32 request_cnt, 529 u32 *prog_ids, u32 request_cnt,
525 u32 *prog_cnt); 530 u32 *prog_cnt);
526int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, 531int bpf_prog_array_copy(struct bpf_prog_array *old_array,
527 struct bpf_prog *exclude_prog, 532 struct bpf_prog *exclude_prog,
528 struct bpf_prog *include_prog, 533 struct bpf_prog *include_prog,
529 struct bpf_prog_array **new_array); 534 struct bpf_prog_array **new_array);
@@ -551,6 +556,56 @@ _out: \
551 _ret; \ 556 _ret; \
552 }) 557 })
553 558
559/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
560 * so BPF programs can request cwr for TCP packets.
561 *
562 * Current cgroup skb programs can only return 0 or 1 (0 to drop the
563 * packet. This macro changes the behavior so the low order bit
564 * indicates whether the packet should be dropped (0) or not (1)
565 * and the next bit is a congestion notification bit. This could be
566 * used by TCP to call tcp_enter_cwr()
567 *
568 * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
569 * 0: drop packet
570 * 1: keep packet
571 * 2: drop packet and cn
572 * 3: keep packet and cn
573 *
574 * This macro then converts it to one of the NET_XMIT or an error
575 * code that is then interpreted as drop packet (and no cn):
576 * 0: NET_XMIT_SUCCESS skb should be transmitted
577 * 1: NET_XMIT_DROP skb should be dropped and cn
578 * 2: NET_XMIT_CN skb should be transmitted and cn
579 * 3: -EPERM skb should be dropped
580 */
581#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
582 ({ \
583 struct bpf_prog_array_item *_item; \
584 struct bpf_prog *_prog; \
585 struct bpf_prog_array *_array; \
586 u32 ret; \
587 u32 _ret = 1; \
588 u32 _cn = 0; \
589 preempt_disable(); \
590 rcu_read_lock(); \
591 _array = rcu_dereference(array); \
592 _item = &_array->items[0]; \
593 while ((_prog = READ_ONCE(_item->prog))) { \
594 bpf_cgroup_storage_set(_item->cgroup_storage); \
595 ret = func(_prog, ctx); \
596 _ret &= (ret & 1); \
597 _cn |= (ret & 2); \
598 _item++; \
599 } \
600 rcu_read_unlock(); \
601 preempt_enable(); \
602 if (_ret) \
603 _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
604 else \
605 _ret = (_cn ? NET_XMIT_DROP : -EPERM); \
606 _ret; \
607 })
608
554#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ 609#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
555 __BPF_PROG_RUN_ARRAY(array, ctx, func, false) 610 __BPF_PROG_RUN_ARRAY(array, ctx, func, false)
556 611
@@ -595,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
595struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); 650struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
596void bpf_map_put_with_uref(struct bpf_map *map); 651void bpf_map_put_with_uref(struct bpf_map *map);
597void bpf_map_put(struct bpf_map *map); 652void bpf_map_put(struct bpf_map *map);
598int bpf_map_precharge_memlock(u32 pages);
599int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); 653int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
600void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); 654void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
655int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
656void bpf_map_charge_finish(struct bpf_map_memory *mem);
657void bpf_map_charge_move(struct bpf_map_memory *dst,
658 struct bpf_map_memory *src);
601void *bpf_map_area_alloc(size_t size, int numa_node); 659void *bpf_map_area_alloc(size_t size, int numa_node);
602void bpf_map_area_free(void *base); 660void bpf_map_area_free(void *base);
603void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); 661void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1305ccbd8fe6..704ed7971472 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -36,9 +36,11 @@
36 */ 36 */
37enum bpf_reg_liveness { 37enum bpf_reg_liveness {
38 REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ 38 REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
39 REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ 39 REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
40 REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ 40 REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
41 REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */ 41 REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
42 REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
43 REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
42}; 44};
43 45
44struct bpf_reg_state { 46struct bpf_reg_state {
@@ -131,6 +133,11 @@ struct bpf_reg_state {
131 * pointing to bpf_func_state. 133 * pointing to bpf_func_state.
132 */ 134 */
133 u32 frameno; 135 u32 frameno;
136 /* Tracks subreg definition. The stored value is the insn_idx of the
137 * writing insn. This is safe because subreg_def is used before any insn
138 * patching which only happens after main verification finished.
139 */
140 s32 subreg_def;
134 enum bpf_reg_liveness live; 141 enum bpf_reg_liveness live;
135}; 142};
136 143
@@ -187,6 +194,7 @@ struct bpf_func_state {
187struct bpf_verifier_state { 194struct bpf_verifier_state {
188 /* call stack tracking */ 195 /* call stack tracking */
189 struct bpf_func_state *frame[MAX_CALL_FRAMES]; 196 struct bpf_func_state *frame[MAX_CALL_FRAMES];
197 u32 insn_idx;
190 u32 curframe; 198 u32 curframe;
191 u32 active_spin_lock; 199 u32 active_spin_lock;
192 bool speculative; 200 bool speculative;
@@ -232,7 +240,9 @@ struct bpf_insn_aux_data {
232 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ 240 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
233 int sanitize_stack_off; /* stack slot to be cleared */ 241 int sanitize_stack_off; /* stack slot to be cleared */
234 bool seen; /* this insn was processed by the verifier */ 242 bool seen; /* this insn was processed by the verifier */
243 bool zext_dst; /* this insn zero extends dst reg */
235 u8 alu_state; /* used in combination with alu_limit */ 244 u8 alu_state; /* used in combination with alu_limit */
245 bool prune_point;
236 unsigned int orig_idx; /* original instruction index */ 246 unsigned int orig_idx; /* original instruction index */
237}; 247};
238 248
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c0077adeea83..49e8facf7c4a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -924,4 +924,22 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
924 924
925#endif /* !CONFIG_CGROUPS */ 925#endif /* !CONFIG_CGROUPS */
926 926
927#ifdef CONFIG_CGROUP_BPF
928static inline void cgroup_bpf_get(struct cgroup *cgrp)
929{
930 percpu_ref_get(&cgrp->bpf.refcnt);
931}
932
933static inline void cgroup_bpf_put(struct cgroup *cgrp)
934{
935 percpu_ref_put(&cgrp->bpf.refcnt);
936}
937
938#else /* CONFIG_CGROUP_BPF */
939
940static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
941static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
942
943#endif /* CONFIG_CGROUP_BPF */
944
927#endif /* _LINUX_CGROUP_H */ 945#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7148bab96943..43b45d6db36d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -160,6 +160,20 @@ struct ctl_table_header;
160 .off = 0, \ 160 .off = 0, \
161 .imm = IMM }) 161 .imm = IMM })
162 162
163/* Special form of mov32, used for doing explicit zero extension on dst. */
164#define BPF_ZEXT_REG(DST) \
165 ((struct bpf_insn) { \
166 .code = BPF_ALU | BPF_MOV | BPF_X, \
167 .dst_reg = DST, \
168 .src_reg = DST, \
169 .off = 0, \
170 .imm = 1 })
171
172static inline bool insn_is_zext(const struct bpf_insn *insn)
173{
174 return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
175}
176
163/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ 177/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
164#define BPF_LD_IMM64(DST, IMM) \ 178#define BPF_LD_IMM64(DST, IMM) \
165 BPF_LD_IMM64_RAW(DST, 0, IMM) 179 BPF_LD_IMM64_RAW(DST, 0, IMM)
@@ -512,7 +526,8 @@ struct bpf_prog {
512 blinded:1, /* Was blinded */ 526 blinded:1, /* Was blinded */
513 is_func:1, /* program is a bpf function */ 527 is_func:1, /* program is a bpf function */
514 kprobe_override:1, /* Do we override a kprobe? */ 528 kprobe_override:1, /* Do we override a kprobe? */
515 has_callchain_buf:1; /* callchain buffer allocated? */ 529 has_callchain_buf:1, /* callchain buffer allocated? */
530 enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
516 enum bpf_prog_type type; /* Type of BPF program */ 531 enum bpf_prog_type type; /* Type of BPF program */
517 enum bpf_attach_type expected_attach_type; /* For some prog types */ 532 enum bpf_attach_type expected_attach_type; /* For some prog types */
518 u32 len; /* Number of filter blocks */ 533 u32 len; /* Number of filter blocks */
@@ -811,6 +826,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
811 826
812struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); 827struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
813void bpf_jit_compile(struct bpf_prog *prog); 828void bpf_jit_compile(struct bpf_prog *prog);
829bool bpf_jit_needs_zext(void);
814bool bpf_helper_changes_pkt_data(void *func); 830bool bpf_helper_changes_pkt_data(void *func);
815 831
816static inline bool bpf_dump_raw_ok(void) 832static inline bool bpf_dump_raw_ok(void)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 63e0cf66f01a..7c6aef253173 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -260,6 +260,24 @@ enum bpf_attach_type {
260 */ 260 */
261#define BPF_F_ANY_ALIGNMENT (1U << 1) 261#define BPF_F_ANY_ALIGNMENT (1U << 1)
262 262
263/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
264 * Verifier does sub-register def/use analysis and identifies instructions whose
265 * def only matters for low 32-bit, high 32-bit is never referenced later
266 * through implicit zero extension. Therefore verifier notifies JIT back-ends
267 * that it is safe to ignore clearing high 32-bit for these instructions. This
268 * saves some back-ends a lot of code-gen. However such optimization is not
269 * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
270 * hence hasn't used verifier's analysis result. But, we really want to have a
271 * way to be able to verify the correctness of the described optimization on
272 * x86_64 on which testsuites are frequently exercised.
273 *
274 * So, this flag is introduced. Once it is set, verifier will randomize high
275 * 32-bit for those instructions who has been identified as safe to ignore them.
276 * Then, if verifier is not doing correct analysis, such randomization will
277 * regress tests to expose bugs.
278 */
279#define BPF_F_TEST_RND_HI32 (1U << 2)
280
263/* When BPF ldimm64's insn[0].src_reg != 0 then this can have 281/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
264 * two extensions: 282 * two extensions:
265 * 283 *
@@ -2672,6 +2690,20 @@ union bpf_attr {
2672 * 0 on success. 2690 * 0 on success.
2673 * 2691 *
2674 * **-ENOENT** if the bpf-local-storage cannot be found. 2692 * **-ENOENT** if the bpf-local-storage cannot be found.
2693 *
2694 * int bpf_send_signal(u32 sig)
2695 * Description
2696 * Send signal *sig* to the current task.
2697 * Return
2698 * 0 on success or successfully queued.
2699 *
2700 * **-EBUSY** if work queue under nmi is full.
2701 *
2702 * **-EINVAL** if *sig* is invalid.
2703 *
2704 * **-EPERM** if no permission to send the *sig*.
2705 *
2706 * **-EAGAIN** if bpf program can try again.
2675 */ 2707 */
2676#define __BPF_FUNC_MAPPER(FN) \ 2708#define __BPF_FUNC_MAPPER(FN) \
2677 FN(unspec), \ 2709 FN(unspec), \
@@ -2782,7 +2814,8 @@ union bpf_attr {
2782 FN(strtol), \ 2814 FN(strtol), \
2783 FN(strtoul), \ 2815 FN(strtoul), \
2784 FN(sk_storage_get), \ 2816 FN(sk_storage_get), \
2785 FN(sk_storage_delete), 2817 FN(sk_storage_delete), \
2818 FN(send_signal),
2786 2819
2787/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2820/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2788 * function eBPF program intends to call 2821 * function eBPF program intends to call
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 584636c9e2eb..0349cbf23cdb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
83 u32 elem_size, index_mask, max_entries; 83 u32 elem_size, index_mask, max_entries;
84 bool unpriv = !capable(CAP_SYS_ADMIN); 84 bool unpriv = !capable(CAP_SYS_ADMIN);
85 u64 cost, array_size, mask64; 85 u64 cost, array_size, mask64;
86 struct bpf_map_memory mem;
86 struct bpf_array *array; 87 struct bpf_array *array;
87 88
88 elem_size = round_up(attr->value_size, 8); 89 elem_size = round_up(attr->value_size, 8);
@@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
116 117
117 /* make sure there is no u32 overflow later in round_up() */ 118 /* make sure there is no u32 overflow later in round_up() */
118 cost = array_size; 119 cost = array_size;
119 if (cost >= U32_MAX - PAGE_SIZE) 120 if (percpu)
120 return ERR_PTR(-ENOMEM);
121 if (percpu) {
122 cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); 121 cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
123 if (cost >= U32_MAX - PAGE_SIZE)
124 return ERR_PTR(-ENOMEM);
125 }
126 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
127 122
128 ret = bpf_map_precharge_memlock(cost); 123 ret = bpf_map_charge_init(&mem, cost);
129 if (ret < 0) 124 if (ret < 0)
130 return ERR_PTR(ret); 125 return ERR_PTR(ret);
131 126
132 /* allocate all map elements and zero-initialize them */ 127 /* allocate all map elements and zero-initialize them */
133 array = bpf_map_area_alloc(array_size, numa_node); 128 array = bpf_map_area_alloc(array_size, numa_node);
134 if (!array) 129 if (!array) {
130 bpf_map_charge_finish(&mem);
135 return ERR_PTR(-ENOMEM); 131 return ERR_PTR(-ENOMEM);
132 }
136 array->index_mask = index_mask; 133 array->index_mask = index_mask;
137 array->map.unpriv_array = unpriv; 134 array->map.unpriv_array = unpriv;
138 135
139 /* copy mandatory map attributes */ 136 /* copy mandatory map attributes */
140 bpf_map_init_from_attr(&array->map, attr); 137 bpf_map_init_from_attr(&array->map, attr);
141 array->map.pages = cost; 138 bpf_map_charge_move(&array->map.memory, &mem);
142 array->elem_size = elem_size; 139 array->elem_size = elem_size;
143 140
144 if (percpu && bpf_array_alloc_percpu(array)) { 141 if (percpu && bpf_array_alloc_percpu(array)) {
142 bpf_map_charge_finish(&array->map.memory);
145 bpf_map_area_free(array); 143 bpf_map_area_free(array);
146 return ERR_PTR(-ENOMEM); 144 return ERR_PTR(-ENOMEM);
147 } 145 }
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index fcde0f7b2585..1b65ab0df457 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,13 +22,23 @@
22DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); 22DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
23EXPORT_SYMBOL(cgroup_bpf_enabled_key); 23EXPORT_SYMBOL(cgroup_bpf_enabled_key);
24 24
25void cgroup_bpf_offline(struct cgroup *cgrp)
26{
27 cgroup_get(cgrp);
28 percpu_ref_kill(&cgrp->bpf.refcnt);
29}
30
25/** 31/**
26 * cgroup_bpf_put() - put references of all bpf programs 32 * cgroup_bpf_release() - put references of all bpf programs and
27 * @cgrp: the cgroup to modify 33 * release all cgroup bpf data
34 * @work: work structure embedded into the cgroup to modify
28 */ 35 */
29void cgroup_bpf_put(struct cgroup *cgrp) 36static void cgroup_bpf_release(struct work_struct *work)
30{ 37{
38 struct cgroup *cgrp = container_of(work, struct cgroup,
39 bpf.release_work);
31 enum bpf_cgroup_storage_type stype; 40 enum bpf_cgroup_storage_type stype;
41 struct bpf_prog_array *old_array;
32 unsigned int type; 42 unsigned int type;
33 43
34 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { 44 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp)
45 kfree(pl); 55 kfree(pl);
46 static_branch_dec(&cgroup_bpf_enabled_key); 56 static_branch_dec(&cgroup_bpf_enabled_key);
47 } 57 }
48 bpf_prog_array_free(cgrp->bpf.effective[type]); 58 old_array = rcu_dereference_protected(
59 cgrp->bpf.effective[type],
60 percpu_ref_is_dying(&cgrp->bpf.refcnt));
61 bpf_prog_array_free(old_array);
49 } 62 }
63
64 percpu_ref_exit(&cgrp->bpf.refcnt);
65 cgroup_put(cgrp);
66}
67
68/**
69 * cgroup_bpf_release_fn() - callback used to schedule releasing
70 * of bpf cgroup data
71 * @ref: percpu ref counter structure
72 */
73static void cgroup_bpf_release_fn(struct percpu_ref *ref)
74{
75 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
76
77 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
78 queue_work(system_wq, &cgrp->bpf.release_work);
50} 79}
51 80
52/* count number of elements in the list. 81/* count number of elements in the list.
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
101 */ 130 */
102static int compute_effective_progs(struct cgroup *cgrp, 131static int compute_effective_progs(struct cgroup *cgrp,
103 enum bpf_attach_type type, 132 enum bpf_attach_type type,
104 struct bpf_prog_array __rcu **array) 133 struct bpf_prog_array **array)
105{ 134{
106 enum bpf_cgroup_storage_type stype; 135 enum bpf_cgroup_storage_type stype;
107 struct bpf_prog_array *progs; 136 struct bpf_prog_array *progs;
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
139 } 168 }
140 } while ((p = cgroup_parent(p))); 169 } while ((p = cgroup_parent(p)));
141 170
142 rcu_assign_pointer(*array, progs); 171 *array = progs;
143 return 0; 172 return 0;
144} 173}
145 174
146static void activate_effective_progs(struct cgroup *cgrp, 175static void activate_effective_progs(struct cgroup *cgrp,
147 enum bpf_attach_type type, 176 enum bpf_attach_type type,
148 struct bpf_prog_array __rcu *array) 177 struct bpf_prog_array *old_array)
149{ 178{
150 struct bpf_prog_array __rcu *old_array; 179 rcu_swap_protected(cgrp->bpf.effective[type], old_array,
151 180 lockdep_is_held(&cgroup_mutex));
152 old_array = xchg(&cgrp->bpf.effective[type], array);
153 /* free prog array after grace period, since __cgroup_bpf_run_*() 181 /* free prog array after grace period, since __cgroup_bpf_run_*()
154 * might be still walking the array 182 * might be still walking the array
155 */ 183 */
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
166 * that array below is variable length 194 * that array below is variable length
167 */ 195 */
168#define NR ARRAY_SIZE(cgrp->bpf.effective) 196#define NR ARRAY_SIZE(cgrp->bpf.effective)
169 struct bpf_prog_array __rcu *arrays[NR] = {}; 197 struct bpf_prog_array *arrays[NR] = {};
170 int i; 198 int ret, i;
199
200 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
201 GFP_KERNEL);
202 if (ret)
203 return ret;
171 204
172 for (i = 0; i < NR; i++) 205 for (i = 0; i < NR; i++)
173 INIT_LIST_HEAD(&cgrp->bpf.progs[i]); 206 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
183cleanup: 216cleanup:
184 for (i = 0; i < NR; i++) 217 for (i = 0; i < NR; i++)
185 bpf_prog_array_free(arrays[i]); 218 bpf_prog_array_free(arrays[i]);
219
220 percpu_ref_exit(&cgrp->bpf.refcnt);
221
186 return -ENOMEM; 222 return -ENOMEM;
187} 223}
188 224
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
444 enum bpf_attach_type type = attr->query.attach_type; 480 enum bpf_attach_type type = attr->query.attach_type;
445 struct list_head *progs = &cgrp->bpf.progs[type]; 481 struct list_head *progs = &cgrp->bpf.progs[type];
446 u32 flags = cgrp->bpf.flags[type]; 482 u32 flags = cgrp->bpf.flags[type];
483 struct bpf_prog_array *effective;
447 int cnt, ret = 0, i; 484 int cnt, ret = 0, i;
448 485
486 effective = rcu_dereference_protected(cgrp->bpf.effective[type],
487 lockdep_is_held(&cgroup_mutex));
488
449 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) 489 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
450 cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); 490 cnt = bpf_prog_array_length(effective);
451 else 491 else
452 cnt = prog_list_length(progs); 492 cnt = prog_list_length(progs);
453 493
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
464 } 504 }
465 505
466 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { 506 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
467 return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], 507 return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
468 prog_ids, cnt);
469 } else { 508 } else {
470 struct bpf_prog_list *pl; 509 struct bpf_prog_list *pl;
471 u32 id; 510 u32 id;
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
548 * The program type passed in via @type must be suitable for network 587 * The program type passed in via @type must be suitable for network
549 * filtering. No further check is performed to assert that. 588 * filtering. No further check is performed to assert that.
550 * 589 *
551 * This function will return %-EPERM if any if an attached program was found 590 * For egress packets, this function can return:
552 * and if it returned != 1 during execution. In all other cases, 0 is returned. 591 * NET_XMIT_SUCCESS (0) - continue with packet output
592 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
593 * NET_XMIT_CN (2) - continue with packet output and notify TCP
594 * to call cwr
595 * -EPERM - drop packet
596 *
597 * For ingress packets, this function will return -EPERM if any
598 * attached program was found and if it returned != 1 during execution.
599 * Otherwise 0 is returned.
553 */ 600 */
554int __cgroup_bpf_run_filter_skb(struct sock *sk, 601int __cgroup_bpf_run_filter_skb(struct sock *sk,
555 struct sk_buff *skb, 602 struct sk_buff *skb,
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
575 /* compute pointers for the bpf prog */ 622 /* compute pointers for the bpf prog */
576 bpf_compute_and_save_data_end(skb, &saved_data_end); 623 bpf_compute_and_save_data_end(skb, &saved_data_end);
577 624
578 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 625 if (type == BPF_CGROUP_INET_EGRESS) {
579 __bpf_prog_run_save_cb); 626 ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
627 cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
628 } else {
629 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
630 __bpf_prog_run_save_cb);
631 ret = (ret == 1 ? 0 : -EPERM);
632 }
580 bpf_restore_data_end(skb, saved_data_end); 633 bpf_restore_data_end(skb, saved_data_end);
581 __skb_pull(skb, offset); 634 __skb_pull(skb, offset);
582 skb->sk = save_sk; 635 skb->sk = save_sk;
583 return ret == 1 ? 0 : -EPERM; 636
637 return ret;
584} 638}
585EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 639EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
586 640
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 242a643af82f..33fb292f2e30 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1795,38 +1795,33 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
1795 return &empty_prog_array.hdr; 1795 return &empty_prog_array.hdr;
1796} 1796}
1797 1797
1798void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) 1798void bpf_prog_array_free(struct bpf_prog_array *progs)
1799{ 1799{
1800 if (!progs || 1800 if (!progs || progs == &empty_prog_array.hdr)
1801 progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
1802 return; 1801 return;
1803 kfree_rcu(progs, rcu); 1802 kfree_rcu(progs, rcu);
1804} 1803}
1805 1804
1806int bpf_prog_array_length(struct bpf_prog_array __rcu *array) 1805int bpf_prog_array_length(struct bpf_prog_array *array)
1807{ 1806{
1808 struct bpf_prog_array_item *item; 1807 struct bpf_prog_array_item *item;
1809 u32 cnt = 0; 1808 u32 cnt = 0;
1810 1809
1811 rcu_read_lock(); 1810 for (item = array->items; item->prog; item++)
1812 item = rcu_dereference(array)->items;
1813 for (; item->prog; item++)
1814 if (item->prog != &dummy_bpf_prog.prog) 1811 if (item->prog != &dummy_bpf_prog.prog)
1815 cnt++; 1812 cnt++;
1816 rcu_read_unlock();
1817 return cnt; 1813 return cnt;
1818} 1814}
1819 1815
1820 1816
1821static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array, 1817static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
1822 u32 *prog_ids, 1818 u32 *prog_ids,
1823 u32 request_cnt) 1819 u32 request_cnt)
1824{ 1820{
1825 struct bpf_prog_array_item *item; 1821 struct bpf_prog_array_item *item;
1826 int i = 0; 1822 int i = 0;
1827 1823
1828 item = rcu_dereference_check(array, 1)->items; 1824 for (item = array->items; item->prog; item++) {
1829 for (; item->prog; item++) {
1830 if (item->prog == &dummy_bpf_prog.prog) 1825 if (item->prog == &dummy_bpf_prog.prog)
1831 continue; 1826 continue;
1832 prog_ids[i] = item->prog->aux->id; 1827 prog_ids[i] = item->prog->aux->id;
@@ -1839,7 +1834,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
1839 return !!(item->prog); 1834 return !!(item->prog);
1840} 1835}
1841 1836
1842int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array, 1837int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
1843 __u32 __user *prog_ids, u32 cnt) 1838 __u32 __user *prog_ids, u32 cnt)
1844{ 1839{
1845 unsigned long err = 0; 1840 unsigned long err = 0;
@@ -1850,18 +1845,12 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
1850 * cnt = bpf_prog_array_length(); 1845 * cnt = bpf_prog_array_length();
1851 * if (cnt > 0) 1846 * if (cnt > 0)
1852 * bpf_prog_array_copy_to_user(..., cnt); 1847 * bpf_prog_array_copy_to_user(..., cnt);
1853 * so below kcalloc doesn't need extra cnt > 0 check, but 1848 * so below kcalloc doesn't need extra cnt > 0 check.
1854 * bpf_prog_array_length() releases rcu lock and
1855 * prog array could have been swapped with empty or larger array,
1856 * so always copy 'cnt' prog_ids to the user.
1857 * In a rare race the user will see zero prog_ids
1858 */ 1849 */
1859 ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); 1850 ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
1860 if (!ids) 1851 if (!ids)
1861 return -ENOMEM; 1852 return -ENOMEM;
1862 rcu_read_lock();
1863 nospc = bpf_prog_array_copy_core(array, ids, cnt); 1853 nospc = bpf_prog_array_copy_core(array, ids, cnt);
1864 rcu_read_unlock();
1865 err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); 1854 err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
1866 kfree(ids); 1855 kfree(ids);
1867 if (err) 1856 if (err)
@@ -1871,19 +1860,19 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
1871 return 0; 1860 return 0;
1872} 1861}
1873 1862
1874void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array, 1863void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
1875 struct bpf_prog *old_prog) 1864 struct bpf_prog *old_prog)
1876{ 1865{
1877 struct bpf_prog_array_item *item = array->items; 1866 struct bpf_prog_array_item *item;
1878 1867
1879 for (; item->prog; item++) 1868 for (item = array->items; item->prog; item++)
1880 if (item->prog == old_prog) { 1869 if (item->prog == old_prog) {
1881 WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); 1870 WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
1882 break; 1871 break;
1883 } 1872 }
1884} 1873}
1885 1874
1886int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, 1875int bpf_prog_array_copy(struct bpf_prog_array *old_array,
1887 struct bpf_prog *exclude_prog, 1876 struct bpf_prog *exclude_prog,
1888 struct bpf_prog *include_prog, 1877 struct bpf_prog *include_prog,
1889 struct bpf_prog_array **new_array) 1878 struct bpf_prog_array **new_array)
@@ -1947,7 +1936,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
1947 return 0; 1936 return 0;
1948} 1937}
1949 1938
1950int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, 1939int bpf_prog_array_copy_info(struct bpf_prog_array *array,
1951 u32 *prog_ids, u32 request_cnt, 1940 u32 *prog_ids, u32 request_cnt,
1952 u32 *prog_cnt) 1941 u32 *prog_cnt)
1953{ 1942{
@@ -2090,6 +2079,15 @@ bool __weak bpf_helper_changes_pkt_data(void *func)
2090 return false; 2079 return false;
2091} 2080}
2092 2081
2082/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
2083 * analysis code and wants explicit zero extension inserted by verifier.
2084 * Otherwise, return FALSE.
2085 */
2086bool __weak bpf_jit_needs_zext(void)
2087{
2088 return false;
2089}
2090
2093/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call 2091/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
2094 * skb_copy_bits(), so provide a weak definition of it for NET-less config. 2092 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
2095 */ 2093 */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index cf727d77c6c6..b31a71909307 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
106 /* make sure page count doesn't overflow */ 106 /* make sure page count doesn't overflow */
107 cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); 107 cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
108 cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); 108 cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
109 if (cost >= U32_MAX - PAGE_SIZE)
110 goto free_cmap;
111 cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
112 109
113 /* Notice returns -EPERM on if map size is larger than memlock limit */ 110 /* Notice returns -EPERM on if map size is larger than memlock limit */
114 ret = bpf_map_precharge_memlock(cmap->map.pages); 111 ret = bpf_map_charge_init(&cmap->map.memory, cost);
115 if (ret) { 112 if (ret) {
116 err = ret; 113 err = ret;
117 goto free_cmap; 114 goto free_cmap;
@@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
121 cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), 118 cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
122 __alignof__(unsigned long)); 119 __alignof__(unsigned long));
123 if (!cmap->flush_needed) 120 if (!cmap->flush_needed)
124 goto free_cmap; 121 goto free_charge;
125 122
126 /* Alloc array for possible remote "destination" CPUs */ 123 /* Alloc array for possible remote "destination" CPUs */
127 cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * 124 cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
@@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
133 return &cmap->map; 130 return &cmap->map;
134free_percpu: 131free_percpu:
135 free_percpu(cmap->flush_needed); 132 free_percpu(cmap->flush_needed);
133free_charge:
134 bpf_map_charge_finish(&cmap->map.memory);
136free_cmap: 135free_cmap:
137 kfree(cmap); 136 kfree(cmap);
138 return ERR_PTR(err); 137 return ERR_PTR(err);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 1e525d70f833..5ae7cce5ef16 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
108 /* make sure page count doesn't overflow */ 108 /* make sure page count doesn't overflow */
109 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 109 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
110 cost += dev_map_bitmap_size(attr) * num_possible_cpus(); 110 cost += dev_map_bitmap_size(attr) * num_possible_cpus();
111 if (cost >= U32_MAX - PAGE_SIZE)
112 goto free_dtab;
113
114 dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
115 111
116 /* if map size is larger than memlock limit, reject it early */ 112 /* if map size is larger than memlock limit, reject it */
117 err = bpf_map_precharge_memlock(dtab->map.pages); 113 err = bpf_map_charge_init(&dtab->map.memory, cost);
118 if (err) 114 if (err)
119 goto free_dtab; 115 goto free_dtab;
120 116
@@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
125 __alignof__(unsigned long), 121 __alignof__(unsigned long),
126 GFP_KERNEL | __GFP_NOWARN); 122 GFP_KERNEL | __GFP_NOWARN);
127 if (!dtab->flush_needed) 123 if (!dtab->flush_needed)
128 goto free_dtab; 124 goto free_charge;
129 125
130 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 126 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
131 sizeof(struct bpf_dtab_netdev *), 127 sizeof(struct bpf_dtab_netdev *),
132 dtab->map.numa_node); 128 dtab->map.numa_node);
133 if (!dtab->netdev_map) 129 if (!dtab->netdev_map)
134 goto free_dtab; 130 goto free_charge;
135 131
136 spin_lock(&dev_map_lock); 132 spin_lock(&dev_map_lock);
137 list_add_tail_rcu(&dtab->list, &dev_map_list); 133 list_add_tail_rcu(&dtab->list, &dev_map_list);
138 spin_unlock(&dev_map_lock); 134 spin_unlock(&dev_map_lock);
139 135
140 return &dtab->map; 136 return &dtab->map;
137free_charge:
138 bpf_map_charge_finish(&dtab->map.memory);
141free_dtab: 139free_dtab:
142 free_percpu(dtab->flush_needed); 140 free_percpu(dtab->flush_needed);
143 kfree(dtab); 141 kfree(dtab);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 0f2708fde5f7..d92e05d9979b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
360 else 360 else
361 cost += (u64) htab->elem_size * num_possible_cpus(); 361 cost += (u64) htab->elem_size * num_possible_cpus();
362 362
363 if (cost >= U32_MAX - PAGE_SIZE) 363 /* if map size is larger than memlock limit, reject it */
364 /* make sure page count doesn't overflow */ 364 err = bpf_map_charge_init(&htab->map.memory, cost);
365 goto free_htab;
366
367 htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
368
369 /* if map size is larger than memlock limit, reject it early */
370 err = bpf_map_precharge_memlock(htab->map.pages);
371 if (err) 365 if (err)
372 goto free_htab; 366 goto free_htab;
373 367
@@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
376 sizeof(struct bucket), 370 sizeof(struct bucket),
377 htab->map.numa_node); 371 htab->map.numa_node);
378 if (!htab->buckets) 372 if (!htab->buckets)
379 goto free_htab; 373 goto free_charge;
380 374
381 if (htab->map.map_flags & BPF_F_ZERO_SEED) 375 if (htab->map.map_flags & BPF_F_ZERO_SEED)
382 htab->hashrnd = 0; 376 htab->hashrnd = 0;
@@ -409,6 +403,8 @@ free_prealloc:
409 prealloc_destroy(htab); 403 prealloc_destroy(htab);
410free_buckets: 404free_buckets:
411 bpf_map_area_free(htab->buckets); 405 bpf_map_area_free(htab->buckets);
406free_charge:
407 bpf_map_charge_finish(&htab->map.memory);
412free_htab: 408free_htab:
413 kfree(htab); 409 kfree(htab);
414 return ERR_PTR(err); 410 return ERR_PTR(err);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 980e8f1f6cb5..addd6fdceec8 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
272{ 272{
273 int numa_node = bpf_map_attr_numa_node(attr); 273 int numa_node = bpf_map_attr_numa_node(attr);
274 struct bpf_cgroup_storage_map *map; 274 struct bpf_cgroup_storage_map *map;
275 struct bpf_map_memory mem;
276 int ret;
275 277
276 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) 278 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
277 return ERR_PTR(-EINVAL); 279 return ERR_PTR(-EINVAL);
@@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
290 /* max_entries is not used and enforced to be 0 */ 292 /* max_entries is not used and enforced to be 0 */
291 return ERR_PTR(-EINVAL); 293 return ERR_PTR(-EINVAL);
292 294
295 ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
296 if (ret < 0)
297 return ERR_PTR(ret);
298
293 map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), 299 map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
294 __GFP_ZERO | GFP_USER, numa_node); 300 __GFP_ZERO | GFP_USER, numa_node);
295 if (!map) 301 if (!map) {
302 bpf_map_charge_finish(&mem);
296 return ERR_PTR(-ENOMEM); 303 return ERR_PTR(-ENOMEM);
304 }
297 305
298 map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), 306 bpf_map_charge_move(&map->map.memory, &mem);
299 PAGE_SIZE) >> PAGE_SHIFT;
300 307
301 /* copy mandatory map attributes */ 308 /* copy mandatory map attributes */
302 bpf_map_init_from_attr(&map->map, attr); 309 bpf_map_init_from_attr(&map->map, attr);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index e61630c2e50b..09334f13a8a0 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
573 cost_per_node = sizeof(struct lpm_trie_node) + 573 cost_per_node = sizeof(struct lpm_trie_node) +
574 attr->value_size + trie->data_size; 574 attr->value_size + trie->data_size;
575 cost += (u64) attr->max_entries * cost_per_node; 575 cost += (u64) attr->max_entries * cost_per_node;
576 if (cost >= U32_MAX - PAGE_SIZE) {
577 ret = -E2BIG;
578 goto out_err;
579 }
580
581 trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
582 576
583 ret = bpf_map_precharge_memlock(trie->map.pages); 577 ret = bpf_map_charge_init(&trie->map.memory, cost);
584 if (ret) 578 if (ret)
585 goto out_err; 579 goto out_err;
586 580
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 0b140d236889..f697647ceb54 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
67static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) 67static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
68{ 68{
69 int ret, numa_node = bpf_map_attr_numa_node(attr); 69 int ret, numa_node = bpf_map_attr_numa_node(attr);
70 struct bpf_map_memory mem = {0};
70 struct bpf_queue_stack *qs; 71 struct bpf_queue_stack *qs;
71 u64 size, queue_size, cost; 72 u64 size, queue_size, cost;
72 73
73 size = (u64) attr->max_entries + 1; 74 size = (u64) attr->max_entries + 1;
74 cost = queue_size = sizeof(*qs) + size * attr->value_size; 75 cost = queue_size = sizeof(*qs) + size * attr->value_size;
75 if (cost >= U32_MAX - PAGE_SIZE)
76 return ERR_PTR(-E2BIG);
77 76
78 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 77 ret = bpf_map_charge_init(&mem, cost);
79
80 ret = bpf_map_precharge_memlock(cost);
81 if (ret < 0) 78 if (ret < 0)
82 return ERR_PTR(ret); 79 return ERR_PTR(ret);
83 80
84 qs = bpf_map_area_alloc(queue_size, numa_node); 81 qs = bpf_map_area_alloc(queue_size, numa_node);
85 if (!qs) 82 if (!qs) {
83 bpf_map_charge_finish(&mem);
86 return ERR_PTR(-ENOMEM); 84 return ERR_PTR(-ENOMEM);
85 }
87 86
88 memset(qs, 0, sizeof(*qs)); 87 memset(qs, 0, sizeof(*qs));
89 88
90 bpf_map_init_from_attr(&qs->map, attr); 89 bpf_map_init_from_attr(&qs->map, attr);
91 90
92 qs->map.pages = cost; 91 bpf_map_charge_move(&qs->map.memory, &mem);
93 qs->size = size; 92 qs->size = size;
94 93
95 raw_spin_lock_init(&qs->lock); 94 raw_spin_lock_init(&qs->lock);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 18e225de80ff..50c083ba978c 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -151,7 +151,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
151{ 151{
152 int err, numa_node = bpf_map_attr_numa_node(attr); 152 int err, numa_node = bpf_map_attr_numa_node(attr);
153 struct reuseport_array *array; 153 struct reuseport_array *array;
154 u64 cost, array_size; 154 struct bpf_map_memory mem;
155 u64 array_size;
155 156
156 if (!capable(CAP_SYS_ADMIN)) 157 if (!capable(CAP_SYS_ADMIN))
157 return ERR_PTR(-EPERM); 158 return ERR_PTR(-EPERM);
@@ -159,24 +160,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
159 array_size = sizeof(*array); 160 array_size = sizeof(*array);
160 array_size += (u64)attr->max_entries * sizeof(struct sock *); 161 array_size += (u64)attr->max_entries * sizeof(struct sock *);
161 162
162 /* make sure there is no u32 overflow later in round_up() */ 163 err = bpf_map_charge_init(&mem, array_size);
163 cost = array_size;
164 if (cost >= U32_MAX - PAGE_SIZE)
165 return ERR_PTR(-ENOMEM);
166 cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
167
168 err = bpf_map_precharge_memlock(cost);
169 if (err) 164 if (err)
170 return ERR_PTR(err); 165 return ERR_PTR(err);
171 166
172 /* allocate all map elements and zero-initialize them */ 167 /* allocate all map elements and zero-initialize them */
173 array = bpf_map_area_alloc(array_size, numa_node); 168 array = bpf_map_area_alloc(array_size, numa_node);
174 if (!array) 169 if (!array) {
170 bpf_map_charge_finish(&mem);
175 return ERR_PTR(-ENOMEM); 171 return ERR_PTR(-ENOMEM);
172 }
176 173
177 /* copy mandatory map attributes */ 174 /* copy mandatory map attributes */
178 bpf_map_init_from_attr(&array->map, attr); 175 bpf_map_init_from_attr(&array->map, attr);
179 array->map.pages = cost; 176 bpf_map_charge_move(&array->map.memory, &mem);
180 177
181 return &array->map; 178 return &array->map;
182} 179}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 950ab2f28922..3d86072d8e32 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
89{ 89{
90 u32 value_size = attr->value_size; 90 u32 value_size = attr->value_size;
91 struct bpf_stack_map *smap; 91 struct bpf_stack_map *smap;
92 struct bpf_map_memory mem;
92 u64 cost, n_buckets; 93 u64 cost, n_buckets;
93 int err; 94 int err;
94 95
@@ -116,40 +117,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
116 n_buckets = roundup_pow_of_two(attr->max_entries); 117 n_buckets = roundup_pow_of_two(attr->max_entries);
117 118
118 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 119 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
119 if (cost >= U32_MAX - PAGE_SIZE) 120 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
120 return ERR_PTR(-E2BIG); 121 err = bpf_map_charge_init(&mem, cost);
122 if (err)
123 return ERR_PTR(err);
121 124
122 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 125 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
123 if (!smap) 126 if (!smap) {
127 bpf_map_charge_finish(&mem);
124 return ERR_PTR(-ENOMEM); 128 return ERR_PTR(-ENOMEM);
125 129 }
126 err = -E2BIG;
127 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
128 if (cost >= U32_MAX - PAGE_SIZE)
129 goto free_smap;
130 130
131 bpf_map_init_from_attr(&smap->map, attr); 131 bpf_map_init_from_attr(&smap->map, attr);
132 smap->map.value_size = value_size; 132 smap->map.value_size = value_size;
133 smap->n_buckets = n_buckets; 133 smap->n_buckets = n_buckets;
134 smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
135
136 err = bpf_map_precharge_memlock(smap->map.pages);
137 if (err)
138 goto free_smap;
139 134
140 err = get_callchain_buffers(sysctl_perf_event_max_stack); 135 err = get_callchain_buffers(sysctl_perf_event_max_stack);
141 if (err) 136 if (err)
142 goto free_smap; 137 goto free_charge;
143 138
144 err = prealloc_elems_and_freelist(smap); 139 err = prealloc_elems_and_freelist(smap);
145 if (err) 140 if (err)
146 goto put_buffers; 141 goto put_buffers;
147 142
143 bpf_map_charge_move(&smap->map.memory, &mem);
144
148 return &smap->map; 145 return &smap->map;
149 146
150put_buffers: 147put_buffers:
151 put_callchain_buffers(); 148 put_callchain_buffers();
152free_smap: 149free_charge:
150 bpf_map_charge_finish(&mem);
153 bpf_map_area_free(smap); 151 bpf_map_area_free(smap);
154 return ERR_PTR(err); 152 return ERR_PTR(err);
155} 153}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cb5440b02e82..4c53cbd3329d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
188 map->numa_node = bpf_map_attr_numa_node(attr); 188 map->numa_node = bpf_map_attr_numa_node(attr);
189} 189}
190 190
191int bpf_map_precharge_memlock(u32 pages)
192{
193 struct user_struct *user = get_current_user();
194 unsigned long memlock_limit, cur;
195
196 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
197 cur = atomic_long_read(&user->locked_vm);
198 free_uid(user);
199 if (cur + pages > memlock_limit)
200 return -EPERM;
201 return 0;
202}
203
204static int bpf_charge_memlock(struct user_struct *user, u32 pages) 191static int bpf_charge_memlock(struct user_struct *user, u32 pages)
205{ 192{
206 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 193 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -214,45 +201,62 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages)
214 201
215static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) 202static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
216{ 203{
217 atomic_long_sub(pages, &user->locked_vm); 204 if (user)
205 atomic_long_sub(pages, &user->locked_vm);
218} 206}
219 207
220static int bpf_map_init_memlock(struct bpf_map *map) 208int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
221{ 209{
222 struct user_struct *user = get_current_user(); 210 u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
211 struct user_struct *user;
223 int ret; 212 int ret;
224 213
225 ret = bpf_charge_memlock(user, map->pages); 214 if (size >= U32_MAX - PAGE_SIZE)
215 return -E2BIG;
216
217 user = get_current_user();
218 ret = bpf_charge_memlock(user, pages);
226 if (ret) { 219 if (ret) {
227 free_uid(user); 220 free_uid(user);
228 return ret; 221 return ret;
229 } 222 }
230 map->user = user; 223
231 return ret; 224 mem->pages = pages;
225 mem->user = user;
226
227 return 0;
232} 228}
233 229
234static void bpf_map_release_memlock(struct bpf_map *map) 230void bpf_map_charge_finish(struct bpf_map_memory *mem)
235{ 231{
236 struct user_struct *user = map->user; 232 bpf_uncharge_memlock(mem->user, mem->pages);
237 bpf_uncharge_memlock(user, map->pages); 233 free_uid(mem->user);
238 free_uid(user); 234}
235
236void bpf_map_charge_move(struct bpf_map_memory *dst,
237 struct bpf_map_memory *src)
238{
239 *dst = *src;
240
241 /* Make sure src will not be used for the redundant uncharging. */
242 memset(src, 0, sizeof(struct bpf_map_memory));
239} 243}
240 244
241int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) 245int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
242{ 246{
243 int ret; 247 int ret;
244 248
245 ret = bpf_charge_memlock(map->user, pages); 249 ret = bpf_charge_memlock(map->memory.user, pages);
246 if (ret) 250 if (ret)
247 return ret; 251 return ret;
248 map->pages += pages; 252 map->memory.pages += pages;
249 return ret; 253 return ret;
250} 254}
251 255
252void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) 256void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
253{ 257{
254 bpf_uncharge_memlock(map->user, pages); 258 bpf_uncharge_memlock(map->memory.user, pages);
255 map->pages -= pages; 259 map->memory.pages -= pages;
256} 260}
257 261
258static int bpf_map_alloc_id(struct bpf_map *map) 262static int bpf_map_alloc_id(struct bpf_map *map)
@@ -303,11 +307,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
303static void bpf_map_free_deferred(struct work_struct *work) 307static void bpf_map_free_deferred(struct work_struct *work)
304{ 308{
305 struct bpf_map *map = container_of(work, struct bpf_map, work); 309 struct bpf_map *map = container_of(work, struct bpf_map, work);
310 struct bpf_map_memory mem;
306 311
307 bpf_map_release_memlock(map); 312 bpf_map_charge_move(&mem, &map->memory);
308 security_bpf_map_free(map); 313 security_bpf_map_free(map);
309 /* implementation dependent freeing */ 314 /* implementation dependent freeing */
310 map->ops->map_free(map); 315 map->ops->map_free(map);
316 bpf_map_charge_finish(&mem);
311} 317}
312 318
313static void bpf_map_put_uref(struct bpf_map *map) 319static void bpf_map_put_uref(struct bpf_map *map)
@@ -395,7 +401,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
395 map->value_size, 401 map->value_size,
396 map->max_entries, 402 map->max_entries,
397 map->map_flags, 403 map->map_flags,
398 map->pages * 1ULL << PAGE_SHIFT, 404 map->memory.pages * 1ULL << PAGE_SHIFT,
399 map->id, 405 map->id,
400 READ_ONCE(map->frozen)); 406 READ_ONCE(map->frozen));
401 407
@@ -549,6 +555,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
549static int map_create(union bpf_attr *attr) 555static int map_create(union bpf_attr *attr)
550{ 556{
551 int numa_node = bpf_map_attr_numa_node(attr); 557 int numa_node = bpf_map_attr_numa_node(attr);
558 struct bpf_map_memory mem;
552 struct bpf_map *map; 559 struct bpf_map *map;
553 int f_flags; 560 int f_flags;
554 int err; 561 int err;
@@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr)
573 580
574 err = bpf_obj_name_cpy(map->name, attr->map_name); 581 err = bpf_obj_name_cpy(map->name, attr->map_name);
575 if (err) 582 if (err)
576 goto free_map_nouncharge; 583 goto free_map;
577 584
578 atomic_set(&map->refcnt, 1); 585 atomic_set(&map->refcnt, 1);
579 atomic_set(&map->usercnt, 1); 586 atomic_set(&map->usercnt, 1);
@@ -583,20 +590,20 @@ static int map_create(union bpf_attr *attr)
583 590
584 if (!attr->btf_value_type_id) { 591 if (!attr->btf_value_type_id) {
585 err = -EINVAL; 592 err = -EINVAL;
586 goto free_map_nouncharge; 593 goto free_map;
587 } 594 }
588 595
589 btf = btf_get_by_fd(attr->btf_fd); 596 btf = btf_get_by_fd(attr->btf_fd);
590 if (IS_ERR(btf)) { 597 if (IS_ERR(btf)) {
591 err = PTR_ERR(btf); 598 err = PTR_ERR(btf);
592 goto free_map_nouncharge; 599 goto free_map;
593 } 600 }
594 601
595 err = map_check_btf(map, btf, attr->btf_key_type_id, 602 err = map_check_btf(map, btf, attr->btf_key_type_id,
596 attr->btf_value_type_id); 603 attr->btf_value_type_id);
597 if (err) { 604 if (err) {
598 btf_put(btf); 605 btf_put(btf);
599 goto free_map_nouncharge; 606 goto free_map;
600 } 607 }
601 608
602 map->btf = btf; 609 map->btf = btf;
@@ -608,15 +615,11 @@ static int map_create(union bpf_attr *attr)
608 615
609 err = security_bpf_map_alloc(map); 616 err = security_bpf_map_alloc(map);
610 if (err) 617 if (err)
611 goto free_map_nouncharge; 618 goto free_map;
612
613 err = bpf_map_init_memlock(map);
614 if (err)
615 goto free_map_sec;
616 619
617 err = bpf_map_alloc_id(map); 620 err = bpf_map_alloc_id(map);
618 if (err) 621 if (err)
619 goto free_map; 622 goto free_map_sec;
620 623
621 err = bpf_map_new_fd(map, f_flags); 624 err = bpf_map_new_fd(map, f_flags);
622 if (err < 0) { 625 if (err < 0) {
@@ -632,13 +635,13 @@ static int map_create(union bpf_attr *attr)
632 635
633 return err; 636 return err;
634 637
635free_map:
636 bpf_map_release_memlock(map);
637free_map_sec: 638free_map_sec:
638 security_bpf_map_free(map); 639 security_bpf_map_free(map);
639free_map_nouncharge: 640free_map:
640 btf_put(map->btf); 641 btf_put(map->btf);
642 bpf_map_charge_move(&mem, &map->memory);
641 map->ops->map_free(map); 643 map->ops->map_free(map);
644 bpf_map_charge_finish(&mem);
642 return err; 645 return err;
643} 646}
644 647
@@ -1585,6 +1588,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
1585 default: 1588 default:
1586 return -EINVAL; 1589 return -EINVAL;
1587 } 1590 }
1591 case BPF_PROG_TYPE_CGROUP_SKB:
1592 switch (expected_attach_type) {
1593 case BPF_CGROUP_INET_INGRESS:
1594 case BPF_CGROUP_INET_EGRESS:
1595 return 0;
1596 default:
1597 return -EINVAL;
1598 }
1588 default: 1599 default:
1589 return 0; 1600 return 0;
1590 } 1601 }
@@ -1604,7 +1615,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
1604 if (CHECK_ATTR(BPF_PROG_LOAD)) 1615 if (CHECK_ATTR(BPF_PROG_LOAD))
1605 return -EINVAL; 1616 return -EINVAL;
1606 1617
1607 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT)) 1618 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
1619 BPF_F_ANY_ALIGNMENT |
1620 BPF_F_TEST_RND_HI32))
1608 return -EINVAL; 1621 return -EINVAL;
1609 1622
1610 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 1623 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -1834,6 +1847,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
1834 case BPF_PROG_TYPE_CGROUP_SOCK: 1847 case BPF_PROG_TYPE_CGROUP_SOCK:
1835 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 1848 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1836 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 1849 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
1850 case BPF_PROG_TYPE_CGROUP_SKB:
1851 return prog->enforce_expected_attach_type &&
1852 prog->expected_attach_type != attach_type ?
1853 -EINVAL : 0;
1837 default: 1854 default:
1838 return 0; 1855 return 0;
1839 } 1856 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 95f9354495ad..5c2cb5bd84ce 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -176,7 +176,7 @@ struct bpf_verifier_stack_elem {
176 struct bpf_verifier_stack_elem *next; 176 struct bpf_verifier_stack_elem *next;
177}; 177};
178 178
179#define BPF_COMPLEXITY_LIMIT_STACK 1024 179#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
180#define BPF_COMPLEXITY_LIMIT_STATES 64 180#define BPF_COMPLEXITY_LIMIT_STATES 64
181 181
182#define BPF_MAP_PTR_UNPRIV 1UL 182#define BPF_MAP_PTR_UNPRIV 1UL
@@ -782,8 +782,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
782 if (err) 782 if (err)
783 goto err; 783 goto err;
784 elem->st.speculative |= speculative; 784 elem->st.speculative |= speculative;
785 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { 785 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
786 verbose(env, "BPF program is too complex\n"); 786 verbose(env, "The sequence of %d jumps is too complex.\n",
787 env->stack_size);
787 goto err; 788 goto err;
788 } 789 }
789 return &elem->st; 790 return &elem->st;
@@ -981,6 +982,7 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
981 __mark_reg_not_init(regs + regno); 982 __mark_reg_not_init(regs + regno);
982} 983}
983 984
985#define DEF_NOT_SUBREG (0)
984static void init_reg_state(struct bpf_verifier_env *env, 986static void init_reg_state(struct bpf_verifier_env *env,
985 struct bpf_func_state *state) 987 struct bpf_func_state *state)
986{ 988{
@@ -991,6 +993,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
991 mark_reg_not_init(env, regs, i); 993 mark_reg_not_init(env, regs, i);
992 regs[i].live = REG_LIVE_NONE; 994 regs[i].live = REG_LIVE_NONE;
993 regs[i].parent = NULL; 995 regs[i].parent = NULL;
996 regs[i].subreg_def = DEF_NOT_SUBREG;
994 } 997 }
995 998
996 /* frame pointer */ 999 /* frame pointer */
@@ -1136,7 +1139,7 @@ next:
1136 */ 1139 */
1137static int mark_reg_read(struct bpf_verifier_env *env, 1140static int mark_reg_read(struct bpf_verifier_env *env,
1138 const struct bpf_reg_state *state, 1141 const struct bpf_reg_state *state,
1139 struct bpf_reg_state *parent) 1142 struct bpf_reg_state *parent, u8 flag)
1140{ 1143{
1141 bool writes = parent == state->parent; /* Observe write marks */ 1144 bool writes = parent == state->parent; /* Observe write marks */
1142 int cnt = 0; 1145 int cnt = 0;
@@ -1151,17 +1154,26 @@ static int mark_reg_read(struct bpf_verifier_env *env,
1151 parent->var_off.value, parent->off); 1154 parent->var_off.value, parent->off);
1152 return -EFAULT; 1155 return -EFAULT;
1153 } 1156 }
1154 if (parent->live & REG_LIVE_READ) 1157 /* The first condition is more likely to be true than the
1158 * second, checked it first.
1159 */
1160 if ((parent->live & REG_LIVE_READ) == flag ||
1161 parent->live & REG_LIVE_READ64)
1155 /* The parentage chain never changes and 1162 /* The parentage chain never changes and
1156 * this parent was already marked as LIVE_READ. 1163 * this parent was already marked as LIVE_READ.
1157 * There is no need to keep walking the chain again and 1164 * There is no need to keep walking the chain again and
1158 * keep re-marking all parents as LIVE_READ. 1165 * keep re-marking all parents as LIVE_READ.
1159 * This case happens when the same register is read 1166 * This case happens when the same register is read
1160 * multiple times without writes into it in-between. 1167 * multiple times without writes into it in-between.
1168 * Also, if parent has the stronger REG_LIVE_READ64 set,
1169 * then no need to set the weak REG_LIVE_READ32.
1161 */ 1170 */
1162 break; 1171 break;
1163 /* ... then we depend on parent's value */ 1172 /* ... then we depend on parent's value */
1164 parent->live |= REG_LIVE_READ; 1173 parent->live |= flag;
1174 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1175 if (flag == REG_LIVE_READ64)
1176 parent->live &= ~REG_LIVE_READ32;
1165 state = parent; 1177 state = parent;
1166 parent = state->parent; 1178 parent = state->parent;
1167 writes = true; 1179 writes = true;
@@ -1173,12 +1185,129 @@ static int mark_reg_read(struct bpf_verifier_env *env,
1173 return 0; 1185 return 0;
1174} 1186}
1175 1187
1188/* This function is supposed to be used by the following 32-bit optimization
1189 * code only. It returns TRUE if the source or destination register operates
1190 * on 64-bit, otherwise return FALSE.
1191 */
1192static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1193 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1194{
1195 u8 code, class, op;
1196
1197 code = insn->code;
1198 class = BPF_CLASS(code);
1199 op = BPF_OP(code);
1200 if (class == BPF_JMP) {
1201 /* BPF_EXIT for "main" will reach here. Return TRUE
1202 * conservatively.
1203 */
1204 if (op == BPF_EXIT)
1205 return true;
1206 if (op == BPF_CALL) {
1207 /* BPF to BPF call will reach here because of marking
1208 * caller saved clobber with DST_OP_NO_MARK for which we
1209 * don't care the register def because they are anyway
1210 * marked as NOT_INIT already.
1211 */
1212 if (insn->src_reg == BPF_PSEUDO_CALL)
1213 return false;
1214 /* Helper call will reach here because of arg type
1215 * check, conservatively return TRUE.
1216 */
1217 if (t == SRC_OP)
1218 return true;
1219
1220 return false;
1221 }
1222 }
1223
1224 if (class == BPF_ALU64 || class == BPF_JMP ||
1225 /* BPF_END always use BPF_ALU class. */
1226 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1227 return true;
1228
1229 if (class == BPF_ALU || class == BPF_JMP32)
1230 return false;
1231
1232 if (class == BPF_LDX) {
1233 if (t != SRC_OP)
1234 return BPF_SIZE(code) == BPF_DW;
1235 /* LDX source must be ptr. */
1236 return true;
1237 }
1238
1239 if (class == BPF_STX) {
1240 if (reg->type != SCALAR_VALUE)
1241 return true;
1242 return BPF_SIZE(code) == BPF_DW;
1243 }
1244
1245 if (class == BPF_LD) {
1246 u8 mode = BPF_MODE(code);
1247
1248 /* LD_IMM64 */
1249 if (mode == BPF_IMM)
1250 return true;
1251
1252 /* Both LD_IND and LD_ABS return 32-bit data. */
1253 if (t != SRC_OP)
1254 return false;
1255
1256 /* Implicit ctx ptr. */
1257 if (regno == BPF_REG_6)
1258 return true;
1259
1260 /* Explicit source could be any width. */
1261 return true;
1262 }
1263
1264 if (class == BPF_ST)
1265 /* The only source register for BPF_ST is a ptr. */
1266 return true;
1267
1268 /* Conservatively return true at default. */
1269 return true;
1270}
1271
1272/* Return TRUE if INSN doesn't have explicit value define. */
1273static bool insn_no_def(struct bpf_insn *insn)
1274{
1275 u8 class = BPF_CLASS(insn->code);
1276
1277 return (class == BPF_JMP || class == BPF_JMP32 ||
1278 class == BPF_STX || class == BPF_ST);
1279}
1280
1281/* Return TRUE if INSN has defined any 32-bit value explicitly. */
1282static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1283{
1284 if (insn_no_def(insn))
1285 return false;
1286
1287 return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1288}
1289
1290static void mark_insn_zext(struct bpf_verifier_env *env,
1291 struct bpf_reg_state *reg)
1292{
1293 s32 def_idx = reg->subreg_def;
1294
1295 if (def_idx == DEF_NOT_SUBREG)
1296 return;
1297
1298 env->insn_aux_data[def_idx - 1].zext_dst = true;
1299 /* The dst will be zero extended, so won't be sub-register anymore. */
1300 reg->subreg_def = DEF_NOT_SUBREG;
1301}
1302
1176static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 1303static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1177 enum reg_arg_type t) 1304 enum reg_arg_type t)
1178{ 1305{
1179 struct bpf_verifier_state *vstate = env->cur_state; 1306 struct bpf_verifier_state *vstate = env->cur_state;
1180 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 1307 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1308 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1181 struct bpf_reg_state *reg, *regs = state->regs; 1309 struct bpf_reg_state *reg, *regs = state->regs;
1310 bool rw64;
1182 1311
1183 if (regno >= MAX_BPF_REG) { 1312 if (regno >= MAX_BPF_REG) {
1184 verbose(env, "R%d is invalid\n", regno); 1313 verbose(env, "R%d is invalid\n", regno);
@@ -1186,6 +1315,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1186 } 1315 }
1187 1316
1188 reg = &regs[regno]; 1317 reg = &regs[regno];
1318 rw64 = is_reg64(env, insn, regno, reg, t);
1189 if (t == SRC_OP) { 1319 if (t == SRC_OP) {
1190 /* check whether register used as source operand can be read */ 1320 /* check whether register used as source operand can be read */
1191 if (reg->type == NOT_INIT) { 1321 if (reg->type == NOT_INIT) {
@@ -1196,7 +1326,11 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1196 if (regno == BPF_REG_FP) 1326 if (regno == BPF_REG_FP)
1197 return 0; 1327 return 0;
1198 1328
1199 return mark_reg_read(env, reg, reg->parent); 1329 if (rw64)
1330 mark_insn_zext(env, reg);
1331
1332 return mark_reg_read(env, reg, reg->parent,
1333 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1200 } else { 1334 } else {
1201 /* check whether register used as dest operand can be written to */ 1335 /* check whether register used as dest operand can be written to */
1202 if (regno == BPF_REG_FP) { 1336 if (regno == BPF_REG_FP) {
@@ -1204,6 +1338,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1204 return -EACCES; 1338 return -EACCES;
1205 } 1339 }
1206 reg->live |= REG_LIVE_WRITTEN; 1340 reg->live |= REG_LIVE_WRITTEN;
1341 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1207 if (t == DST_OP) 1342 if (t == DST_OP)
1208 mark_reg_unknown(env, regs, regno); 1343 mark_reg_unknown(env, regs, regno);
1209 } 1344 }
@@ -1383,7 +1518,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
1383 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 1518 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1384 } 1519 }
1385 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr, 1520 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1386 reg_state->stack[spi].spilled_ptr.parent); 1521 reg_state->stack[spi].spilled_ptr.parent,
1522 REG_LIVE_READ64);
1387 return 0; 1523 return 0;
1388 } else { 1524 } else {
1389 int zeros = 0; 1525 int zeros = 0;
@@ -1400,7 +1536,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
1400 return -EACCES; 1536 return -EACCES;
1401 } 1537 }
1402 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr, 1538 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1403 reg_state->stack[spi].spilled_ptr.parent); 1539 reg_state->stack[spi].spilled_ptr.parent,
1540 REG_LIVE_READ64);
1404 if (value_regno >= 0) { 1541 if (value_regno >= 0) {
1405 if (zeros == size) { 1542 if (zeros == size) {
1406 /* any size read into register is zero extended, 1543 /* any size read into register is zero extended,
@@ -2109,6 +2246,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
2109 value_regno); 2246 value_regno);
2110 if (reg_type_may_be_null(reg_type)) 2247 if (reg_type_may_be_null(reg_type))
2111 regs[value_regno].id = ++env->id_gen; 2248 regs[value_regno].id = ++env->id_gen;
2249 /* A load of ctx field could have different
2250 * actual load size with the one encoded in the
2251 * insn. When the dst is PTR, it is for sure not
2252 * a sub-register.
2253 */
2254 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
2112 } 2255 }
2113 regs[value_regno].type = reg_type; 2256 regs[value_regno].type = reg_type;
2114 } 2257 }
@@ -2368,7 +2511,8 @@ mark:
2368 * the whole slot to be marked as 'read' 2511 * the whole slot to be marked as 'read'
2369 */ 2512 */
2370 mark_reg_read(env, &state->stack[spi].spilled_ptr, 2513 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2371 state->stack[spi].spilled_ptr.parent); 2514 state->stack[spi].spilled_ptr.parent,
2515 REG_LIVE_READ64);
2372 } 2516 }
2373 return update_stack_depth(env, state, min_off); 2517 return update_stack_depth(env, state, min_off);
2374} 2518}
@@ -3332,6 +3476,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3332 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 3476 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3333 } 3477 }
3334 3478
3479 /* helper call returns 64-bit value. */
3480 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
3481
3335 /* update return register (already marked as written above) */ 3482 /* update return register (already marked as written above) */
3336 if (fn->ret_type == RET_INTEGER) { 3483 if (fn->ret_type == RET_INTEGER) {
3337 /* sets type to SCALAR_VALUE */ 3484 /* sets type to SCALAR_VALUE */
@@ -4263,6 +4410,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
4263 */ 4410 */
4264 *dst_reg = *src_reg; 4411 *dst_reg = *src_reg;
4265 dst_reg->live |= REG_LIVE_WRITTEN; 4412 dst_reg->live |= REG_LIVE_WRITTEN;
4413 dst_reg->subreg_def = DEF_NOT_SUBREG;
4266 } else { 4414 } else {
4267 /* R1 = (u32) R2 */ 4415 /* R1 = (u32) R2 */
4268 if (is_pointer_value(env, insn->src_reg)) { 4416 if (is_pointer_value(env, insn->src_reg)) {
@@ -4273,6 +4421,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
4273 } else if (src_reg->type == SCALAR_VALUE) { 4421 } else if (src_reg->type == SCALAR_VALUE) {
4274 *dst_reg = *src_reg; 4422 *dst_reg = *src_reg;
4275 dst_reg->live |= REG_LIVE_WRITTEN; 4423 dst_reg->live |= REG_LIVE_WRITTEN;
4424 dst_reg->subreg_def = env->insn_idx + 1;
4276 } else { 4425 } else {
4277 mark_reg_unknown(env, regs, 4426 mark_reg_unknown(env, regs,
4278 insn->dst_reg); 4427 insn->dst_reg);
@@ -5352,16 +5501,23 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
5352 * Already marked as written above. 5501 * Already marked as written above.
5353 */ 5502 */
5354 mark_reg_unknown(env, regs, BPF_REG_0); 5503 mark_reg_unknown(env, regs, BPF_REG_0);
5504 /* ld_abs load up to 32-bit skb data. */
5505 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
5355 return 0; 5506 return 0;
5356} 5507}
5357 5508
5358static int check_return_code(struct bpf_verifier_env *env) 5509static int check_return_code(struct bpf_verifier_env *env)
5359{ 5510{
5511 struct tnum enforce_attach_type_range = tnum_unknown;
5360 struct bpf_reg_state *reg; 5512 struct bpf_reg_state *reg;
5361 struct tnum range = tnum_range(0, 1); 5513 struct tnum range = tnum_range(0, 1);
5362 5514
5363 switch (env->prog->type) { 5515 switch (env->prog->type) {
5364 case BPF_PROG_TYPE_CGROUP_SKB: 5516 case BPF_PROG_TYPE_CGROUP_SKB:
5517 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
5518 range = tnum_range(0, 3);
5519 enforce_attach_type_range = tnum_range(2, 3);
5520 }
5365 case BPF_PROG_TYPE_CGROUP_SOCK: 5521 case BPF_PROG_TYPE_CGROUP_SOCK:
5366 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5522 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
5367 case BPF_PROG_TYPE_SOCK_OPS: 5523 case BPF_PROG_TYPE_SOCK_OPS:
@@ -5380,18 +5536,23 @@ static int check_return_code(struct bpf_verifier_env *env)
5380 } 5536 }
5381 5537
5382 if (!tnum_in(range, reg->var_off)) { 5538 if (!tnum_in(range, reg->var_off)) {
5539 char tn_buf[48];
5540
5383 verbose(env, "At program exit the register R0 "); 5541 verbose(env, "At program exit the register R0 ");
5384 if (!tnum_is_unknown(reg->var_off)) { 5542 if (!tnum_is_unknown(reg->var_off)) {
5385 char tn_buf[48];
5386
5387 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 5543 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5388 verbose(env, "has value %s", tn_buf); 5544 verbose(env, "has value %s", tn_buf);
5389 } else { 5545 } else {
5390 verbose(env, "has unknown scalar value"); 5546 verbose(env, "has unknown scalar value");
5391 } 5547 }
5392 verbose(env, " should have been 0 or 1\n"); 5548 tnum_strn(tn_buf, sizeof(tn_buf), range);
5549 verbose(env, " should have been %s\n", tn_buf);
5393 return -EINVAL; 5550 return -EINVAL;
5394 } 5551 }
5552
5553 if (!tnum_is_unknown(enforce_attach_type_range) &&
5554 tnum_in(enforce_attach_type_range, reg->var_off))
5555 env->prog->enforce_expected_attach_type = 1;
5395 return 0; 5556 return 0;
5396} 5557}
5397 5558
@@ -5435,7 +5596,25 @@ enum {
5435 BRANCH = 2, 5596 BRANCH = 2,
5436}; 5597};
5437 5598
5438#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) 5599static u32 state_htab_size(struct bpf_verifier_env *env)
5600{
5601 return env->prog->len;
5602}
5603
5604static struct bpf_verifier_state_list **explored_state(
5605 struct bpf_verifier_env *env,
5606 int idx)
5607{
5608 struct bpf_verifier_state *cur = env->cur_state;
5609 struct bpf_func_state *state = cur->frame[cur->curframe];
5610
5611 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
5612}
5613
5614static void init_explored_state(struct bpf_verifier_env *env, int idx)
5615{
5616 env->insn_aux_data[idx].prune_point = true;
5617}
5439 5618
5440/* t, w, e - match pseudo-code above: 5619/* t, w, e - match pseudo-code above:
5441 * t - index of current instruction 5620 * t - index of current instruction
@@ -5461,7 +5640,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
5461 5640
5462 if (e == BRANCH) 5641 if (e == BRANCH)
5463 /* mark branch target for state pruning */ 5642 /* mark branch target for state pruning */
5464 env->explored_states[w] = STATE_LIST_MARK; 5643 init_explored_state(env, w);
5465 5644
5466 if (insn_state[w] == 0) { 5645 if (insn_state[w] == 0) {
5467 /* tree-edge */ 5646 /* tree-edge */
@@ -5529,9 +5708,9 @@ peek_stack:
5529 else if (ret < 0) 5708 else if (ret < 0)
5530 goto err_free; 5709 goto err_free;
5531 if (t + 1 < insn_cnt) 5710 if (t + 1 < insn_cnt)
5532 env->explored_states[t + 1] = STATE_LIST_MARK; 5711 init_explored_state(env, t + 1);
5533 if (insns[t].src_reg == BPF_PSEUDO_CALL) { 5712 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5534 env->explored_states[t] = STATE_LIST_MARK; 5713 init_explored_state(env, t);
5535 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); 5714 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
5536 if (ret == 1) 5715 if (ret == 1)
5537 goto peek_stack; 5716 goto peek_stack;
@@ -5554,10 +5733,10 @@ peek_stack:
5554 * after every call and jump 5733 * after every call and jump
5555 */ 5734 */
5556 if (t + 1 < insn_cnt) 5735 if (t + 1 < insn_cnt)
5557 env->explored_states[t + 1] = STATE_LIST_MARK; 5736 init_explored_state(env, t + 1);
5558 } else { 5737 } else {
5559 /* conditional jump with two edges */ 5738 /* conditional jump with two edges */
5560 env->explored_states[t] = STATE_LIST_MARK; 5739 init_explored_state(env, t);
5561 ret = push_insn(t, t + 1, FALLTHROUGH, env); 5740 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5562 if (ret == 1) 5741 if (ret == 1)
5563 goto peek_stack; 5742 goto peek_stack;
@@ -6005,12 +6184,10 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
6005 struct bpf_verifier_state_list *sl; 6184 struct bpf_verifier_state_list *sl;
6006 int i; 6185 int i;
6007 6186
6008 sl = env->explored_states[insn]; 6187 sl = *explored_state(env, insn);
6009 if (!sl) 6188 while (sl) {
6010 return; 6189 if (sl->state.insn_idx != insn ||
6011 6190 sl->state.curframe != cur->curframe)
6012 while (sl != STATE_LIST_MARK) {
6013 if (sl->state.curframe != cur->curframe)
6014 goto next; 6191 goto next;
6015 for (i = 0; i <= cur->curframe; i++) 6192 for (i = 0; i <= cur->curframe; i++)
6016 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) 6193 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
@@ -6292,20 +6469,33 @@ static bool states_equal(struct bpf_verifier_env *env,
6292 return true; 6469 return true;
6293} 6470}
6294 6471
6472/* Return 0 if no propagation happened. Return negative error code if error
6473 * happened. Otherwise, return the propagated bit.
6474 */
6295static int propagate_liveness_reg(struct bpf_verifier_env *env, 6475static int propagate_liveness_reg(struct bpf_verifier_env *env,
6296 struct bpf_reg_state *reg, 6476 struct bpf_reg_state *reg,
6297 struct bpf_reg_state *parent_reg) 6477 struct bpf_reg_state *parent_reg)
6298{ 6478{
6479 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
6480 u8 flag = reg->live & REG_LIVE_READ;
6299 int err; 6481 int err;
6300 6482
6301 if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ)) 6483 /* When comes here, read flags of PARENT_REG or REG could be any of
6484 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
6485 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
6486 */
6487 if (parent_flag == REG_LIVE_READ64 ||
6488 /* Or if there is no read flag from REG. */
6489 !flag ||
6490 /* Or if the read flag from REG is the same as PARENT_REG. */
6491 parent_flag == flag)
6302 return 0; 6492 return 0;
6303 6493
6304 err = mark_reg_read(env, reg, parent_reg); 6494 err = mark_reg_read(env, reg, parent_reg, flag);
6305 if (err) 6495 if (err)
6306 return err; 6496 return err;
6307 6497
6308 return 0; 6498 return flag;
6309} 6499}
6310 6500
6311/* A write screens off any subsequent reads; but write marks come from the 6501/* A write screens off any subsequent reads; but write marks come from the
@@ -6339,8 +6529,10 @@ static int propagate_liveness(struct bpf_verifier_env *env,
6339 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { 6529 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
6340 err = propagate_liveness_reg(env, &state_reg[i], 6530 err = propagate_liveness_reg(env, &state_reg[i],
6341 &parent_reg[i]); 6531 &parent_reg[i]);
6342 if (err) 6532 if (err < 0)
6343 return err; 6533 return err;
6534 if (err == REG_LIVE_READ64)
6535 mark_insn_zext(env, &parent_reg[i]);
6344 } 6536 }
6345 6537
6346 /* Propagate stack slots. */ 6538 /* Propagate stack slots. */
@@ -6350,11 +6542,11 @@ static int propagate_liveness(struct bpf_verifier_env *env,
6350 state_reg = &state->stack[i].spilled_ptr; 6542 state_reg = &state->stack[i].spilled_ptr;
6351 err = propagate_liveness_reg(env, state_reg, 6543 err = propagate_liveness_reg(env, state_reg,
6352 parent_reg); 6544 parent_reg);
6353 if (err) 6545 if (err < 0)
6354 return err; 6546 return err;
6355 } 6547 }
6356 } 6548 }
6357 return err; 6549 return 0;
6358} 6550}
6359 6551
6360static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 6552static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@@ -6364,18 +6556,21 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6364 struct bpf_verifier_state *cur = env->cur_state, *new; 6556 struct bpf_verifier_state *cur = env->cur_state, *new;
6365 int i, j, err, states_cnt = 0; 6557 int i, j, err, states_cnt = 0;
6366 6558
6367 pprev = &env->explored_states[insn_idx]; 6559 if (!env->insn_aux_data[insn_idx].prune_point)
6368 sl = *pprev;
6369
6370 if (!sl)
6371 /* this 'insn_idx' instruction wasn't marked, so we will not 6560 /* this 'insn_idx' instruction wasn't marked, so we will not
6372 * be doing state search here 6561 * be doing state search here
6373 */ 6562 */
6374 return 0; 6563 return 0;
6375 6564
6565 pprev = explored_state(env, insn_idx);
6566 sl = *pprev;
6567
6376 clean_live_states(env, insn_idx, cur); 6568 clean_live_states(env, insn_idx, cur);
6377 6569
6378 while (sl != STATE_LIST_MARK) { 6570 while (sl) {
6571 states_cnt++;
6572 if (sl->state.insn_idx != insn_idx)
6573 goto next;
6379 if (states_equal(env, &sl->state, cur)) { 6574 if (states_equal(env, &sl->state, cur)) {
6380 sl->hit_cnt++; 6575 sl->hit_cnt++;
6381 /* reached equivalent register/stack state, 6576 /* reached equivalent register/stack state,
@@ -6393,7 +6588,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6393 return err; 6588 return err;
6394 return 1; 6589 return 1;
6395 } 6590 }
6396 states_cnt++;
6397 sl->miss_cnt++; 6591 sl->miss_cnt++;
6398 /* heuristic to determine whether this state is beneficial 6592 /* heuristic to determine whether this state is beneficial
6399 * to keep checking from state equivalence point of view. 6593 * to keep checking from state equivalence point of view.
@@ -6420,6 +6614,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6420 sl = *pprev; 6614 sl = *pprev;
6421 continue; 6615 continue;
6422 } 6616 }
6617next:
6423 pprev = &sl->next; 6618 pprev = &sl->next;
6424 sl = *pprev; 6619 sl = *pprev;
6425 } 6620 }
@@ -6451,8 +6646,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6451 kfree(new_sl); 6646 kfree(new_sl);
6452 return err; 6647 return err;
6453 } 6648 }
6454 new_sl->next = env->explored_states[insn_idx]; 6649 new->insn_idx = insn_idx;
6455 env->explored_states[insn_idx] = new_sl; 6650 new_sl->next = *explored_state(env, insn_idx);
6651 *explored_state(env, insn_idx) = new_sl;
6456 /* connect new state to parentage chain. Current frame needs all 6652 /* connect new state to parentage chain. Current frame needs all
6457 * registers connected. Only r6 - r9 of the callers are alive (pushed 6653 * registers connected. Only r6 - r9 of the callers are alive (pushed
6458 * to the stack implicitly by JITs) so in callers' frames connect just 6654 * to the stack implicitly by JITs) so in callers' frames connect just
@@ -7130,14 +7326,23 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
7130 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 7326 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
7131 * [0, off) and [off, end) to new locations, so the patched range stays zero 7327 * [0, off) and [off, end) to new locations, so the patched range stays zero
7132 */ 7328 */
7133static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, 7329static int adjust_insn_aux_data(struct bpf_verifier_env *env,
7134 u32 off, u32 cnt) 7330 struct bpf_prog *new_prog, u32 off, u32 cnt)
7135{ 7331{
7136 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; 7332 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
7333 struct bpf_insn *insn = new_prog->insnsi;
7334 u32 prog_len;
7137 int i; 7335 int i;
7138 7336
7337 /* aux info at OFF always needs adjustment, no matter fast path
7338 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
7339 * original insn at old prog.
7340 */
7341 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
7342
7139 if (cnt == 1) 7343 if (cnt == 1)
7140 return 0; 7344 return 0;
7345 prog_len = new_prog->len;
7141 new_data = vzalloc(array_size(prog_len, 7346 new_data = vzalloc(array_size(prog_len,
7142 sizeof(struct bpf_insn_aux_data))); 7347 sizeof(struct bpf_insn_aux_data)));
7143 if (!new_data) 7348 if (!new_data)
@@ -7145,8 +7350,10 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
7145 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); 7350 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
7146 memcpy(new_data + off + cnt - 1, old_data + off, 7351 memcpy(new_data + off + cnt - 1, old_data + off,
7147 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 7352 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
7148 for (i = off; i < off + cnt - 1; i++) 7353 for (i = off; i < off + cnt - 1; i++) {
7149 new_data[i].seen = true; 7354 new_data[i].seen = true;
7355 new_data[i].zext_dst = insn_has_def32(env, insn + i);
7356 }
7150 env->insn_aux_data = new_data; 7357 env->insn_aux_data = new_data;
7151 vfree(old_data); 7358 vfree(old_data);
7152 return 0; 7359 return 0;
@@ -7179,7 +7386,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
7179 env->insn_aux_data[off].orig_idx); 7386 env->insn_aux_data[off].orig_idx);
7180 return NULL; 7387 return NULL;
7181 } 7388 }
7182 if (adjust_insn_aux_data(env, new_prog->len, off, len)) 7389 if (adjust_insn_aux_data(env, new_prog, off, len))
7183 return NULL; 7390 return NULL;
7184 adjust_subprog_starts(env, off, len); 7391 adjust_subprog_starts(env, off, len);
7185 return new_prog; 7392 return new_prog;
@@ -7443,6 +7650,84 @@ static int opt_remove_nops(struct bpf_verifier_env *env)
7443 return 0; 7650 return 0;
7444} 7651}
7445 7652
7653static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
7654 const union bpf_attr *attr)
7655{
7656 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
7657 struct bpf_insn_aux_data *aux = env->insn_aux_data;
7658 int i, patch_len, delta = 0, len = env->prog->len;
7659 struct bpf_insn *insns = env->prog->insnsi;
7660 struct bpf_prog *new_prog;
7661 bool rnd_hi32;
7662
7663 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
7664 zext_patch[1] = BPF_ZEXT_REG(0);
7665 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
7666 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
7667 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
7668 for (i = 0; i < len; i++) {
7669 int adj_idx = i + delta;
7670 struct bpf_insn insn;
7671
7672 insn = insns[adj_idx];
7673 if (!aux[adj_idx].zext_dst) {
7674 u8 code, class;
7675 u32 imm_rnd;
7676
7677 if (!rnd_hi32)
7678 continue;
7679
7680 code = insn.code;
7681 class = BPF_CLASS(code);
7682 if (insn_no_def(&insn))
7683 continue;
7684
7685 /* NOTE: arg "reg" (the fourth one) is only used for
7686 * BPF_STX which has been ruled out in above
7687 * check, it is safe to pass NULL here.
7688 */
7689 if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
7690 if (class == BPF_LD &&
7691 BPF_MODE(code) == BPF_IMM)
7692 i++;
7693 continue;
7694 }
7695
7696 /* ctx load could be transformed into wider load. */
7697 if (class == BPF_LDX &&
7698 aux[adj_idx].ptr_type == PTR_TO_CTX)
7699 continue;
7700
7701 imm_rnd = get_random_int();
7702 rnd_hi32_patch[0] = insn;
7703 rnd_hi32_patch[1].imm = imm_rnd;
7704 rnd_hi32_patch[3].dst_reg = insn.dst_reg;
7705 patch = rnd_hi32_patch;
7706 patch_len = 4;
7707 goto apply_patch_buffer;
7708 }
7709
7710 if (!bpf_jit_needs_zext())
7711 continue;
7712
7713 zext_patch[0] = insn;
7714 zext_patch[1].dst_reg = insn.dst_reg;
7715 zext_patch[1].src_reg = insn.dst_reg;
7716 patch = zext_patch;
7717 patch_len = 2;
7718apply_patch_buffer:
7719 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
7720 if (!new_prog)
7721 return -ENOMEM;
7722 env->prog = new_prog;
7723 insns = new_prog->insnsi;
7724 aux = env->insn_aux_data;
7725 delta += patch_len - 1;
7726 }
7727
7728 return 0;
7729}
7730
7446/* convert load instructions that access fields of a context type into a 7731/* convert load instructions that access fields of a context type into a
7447 * sequence of instructions that access fields of the underlying structure: 7732 * sequence of instructions that access fields of the underlying structure:
7448 * struct __sk_buff -> struct sk_buff 7733 * struct __sk_buff -> struct sk_buff
@@ -8130,16 +8415,15 @@ static void free_states(struct bpf_verifier_env *env)
8130 if (!env->explored_states) 8415 if (!env->explored_states)
8131 return; 8416 return;
8132 8417
8133 for (i = 0; i < env->prog->len; i++) { 8418 for (i = 0; i < state_htab_size(env); i++) {
8134 sl = env->explored_states[i]; 8419 sl = env->explored_states[i];
8135 8420
8136 if (sl) 8421 while (sl) {
8137 while (sl != STATE_LIST_MARK) { 8422 sln = sl->next;
8138 sln = sl->next; 8423 free_verifier_state(&sl->state, false);
8139 free_verifier_state(&sl->state, false); 8424 kfree(sl);
8140 kfree(sl); 8425 sl = sln;
8141 sl = sln; 8426 }
8142 }
8143 } 8427 }
8144 8428
8145 kvfree(env->explored_states); 8429 kvfree(env->explored_states);
@@ -8239,7 +8523,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
8239 goto skip_full_check; 8523 goto skip_full_check;
8240 } 8524 }
8241 8525
8242 env->explored_states = kvcalloc(env->prog->len, 8526 env->explored_states = kvcalloc(state_htab_size(env),
8243 sizeof(struct bpf_verifier_state_list *), 8527 sizeof(struct bpf_verifier_state_list *),
8244 GFP_USER); 8528 GFP_USER);
8245 ret = -ENOMEM; 8529 ret = -ENOMEM;
@@ -8294,6 +8578,15 @@ skip_full_check:
8294 if (ret == 0) 8578 if (ret == 0)
8295 ret = fixup_bpf_calls(env); 8579 ret = fixup_bpf_calls(env);
8296 8580
8581 /* do 32-bit optimization after insn patching has done so those patched
8582 * insns could be handled correctly.
8583 */
8584 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
8585 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
8586 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
8587 : false;
8588 }
8589
8297 if (ret == 0) 8590 if (ret == 0)
8298 ret = fixup_call_args(env); 8591 ret = fixup_call_args(env);
8299 8592
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 686d244e798d..22066c28ba61 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -37,13 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
37 37
38 cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); 38 cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
39 cost += sizeof(struct list_head) * num_possible_cpus(); 39 cost += sizeof(struct list_head) * num_possible_cpus();
40 if (cost >= U32_MAX - PAGE_SIZE)
41 goto free_m;
42
43 m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
44 40
45 /* Notice returns -EPERM on if map size is larger than memlock limit */ 41 /* Notice returns -EPERM on if map size is larger than memlock limit */
46 err = bpf_map_precharge_memlock(m->map.pages); 42 err = bpf_map_charge_init(&m->map.memory, cost);
47 if (err) 43 if (err)
48 goto free_m; 44 goto free_m;
49 45
@@ -51,7 +47,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
51 47
52 m->flush_list = alloc_percpu(struct list_head); 48 m->flush_list = alloc_percpu(struct list_head);
53 if (!m->flush_list) 49 if (!m->flush_list)
54 goto free_m; 50 goto free_charge;
55 51
56 for_each_possible_cpu(cpu) 52 for_each_possible_cpu(cpu)
57 INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); 53 INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
@@ -65,6 +61,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
65 61
66free_percpu: 62free_percpu:
67 free_percpu(m->flush_list); 63 free_percpu(m->flush_list);
64free_charge:
65 bpf_map_charge_finish(&m->map.memory);
68free_m: 66free_m:
69 kfree(m); 67 kfree(m);
70 return ERR_PTR(err); 68 return ERR_PTR(err);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 217cec4e22c6..ef9cfbfc82a9 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4955,8 +4955,6 @@ static void css_release_work_fn(struct work_struct *work)
4955 if (cgrp->kn) 4955 if (cgrp->kn)
4956 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, 4956 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
4957 NULL); 4957 NULL);
4958
4959 cgroup_bpf_put(cgrp);
4960 } 4958 }
4961 4959
4962 mutex_unlock(&cgroup_mutex); 4960 mutex_unlock(&cgroup_mutex);
@@ -5482,6 +5480,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5482 5480
5483 cgroup1_check_for_release(parent); 5481 cgroup1_check_for_release(parent);
5484 5482
5483 cgroup_bpf_offline(cgrp);
5484
5485 /* put the base reference */ 5485 /* put the base reference */
5486 percpu_ref_kill(&cgrp->self.refcnt); 5486 percpu_ref_kill(&cgrp->self.refcnt);
5487 5487
@@ -6221,6 +6221,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6221 * Don't use cgroup_get_live(). 6221 * Don't use cgroup_get_live().
6222 */ 6222 */
6223 cgroup_get(sock_cgroup_ptr(skcd)); 6223 cgroup_get(sock_cgroup_ptr(skcd));
6224 cgroup_bpf_get(sock_cgroup_ptr(skcd));
6224 return; 6225 return;
6225 } 6226 }
6226 6227
@@ -6232,6 +6233,7 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6232 cset = task_css_set(current); 6233 cset = task_css_set(current);
6233 if (likely(cgroup_tryget(cset->dfl_cgrp))) { 6234 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6234 skcd->val = (unsigned long)cset->dfl_cgrp; 6235 skcd->val = (unsigned long)cset->dfl_cgrp;
6236 cgroup_bpf_get(cset->dfl_cgrp);
6235 break; 6237 break;
6236 } 6238 }
6237 cpu_relax(); 6239 cpu_relax();
@@ -6242,7 +6244,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6242 6244
6243void cgroup_sk_free(struct sock_cgroup_data *skcd) 6245void cgroup_sk_free(struct sock_cgroup_data *skcd)
6244{ 6246{
6245 cgroup_put(sock_cgroup_ptr(skcd)); 6247 struct cgroup *cgrp = sock_cgroup_ptr(skcd);
6248
6249 cgroup_bpf_put(cgrp);
6250 cgroup_put(cgrp);
6246} 6251}
6247 6252
6248#endif /* CONFIG_SOCK_CGROUP_DATA */ 6253#endif /* CONFIG_SOCK_CGROUP_DATA */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f92d6ad5e080..3994a231eb92 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -19,6 +19,9 @@
19#include "trace_probe.h" 19#include "trace_probe.h"
20#include "trace.h" 20#include "trace.h"
21 21
22#define bpf_event_rcu_dereference(p) \
23 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
24
22#ifdef CONFIG_MODULES 25#ifdef CONFIG_MODULES
23struct bpf_trace_module { 26struct bpf_trace_module {
24 struct module *module; 27 struct module *module;
@@ -567,6 +570,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
567 .arg3_type = ARG_ANYTHING, 570 .arg3_type = ARG_ANYTHING,
568}; 571};
569 572
573struct send_signal_irq_work {
574 struct irq_work irq_work;
575 struct task_struct *task;
576 u32 sig;
577};
578
579static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
580
581static void do_bpf_send_signal(struct irq_work *entry)
582{
583 struct send_signal_irq_work *work;
584
585 work = container_of(entry, struct send_signal_irq_work, irq_work);
586 group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
587}
588
589BPF_CALL_1(bpf_send_signal, u32, sig)
590{
591 struct send_signal_irq_work *work = NULL;
592
593 /* Similar to bpf_probe_write_user, task needs to be
594 * in a sound condition and kernel memory access be
595 * permitted in order to send signal to the current
596 * task.
597 */
598 if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
599 return -EPERM;
600 if (unlikely(uaccess_kernel()))
601 return -EPERM;
602 if (unlikely(!nmi_uaccess_okay()))
603 return -EPERM;
604
605 if (in_nmi()) {
606 /* Do an early check on signal validity. Otherwise,
607 * the error is lost in deferred irq_work.
608 */
609 if (unlikely(!valid_signal(sig)))
610 return -EINVAL;
611
612 work = this_cpu_ptr(&send_signal_work);
613 if (work->irq_work.flags & IRQ_WORK_BUSY)
614 return -EBUSY;
615
616 /* Add the current task, which is the target of sending signal,
617 * to the irq_work. The current task may change when queued
618 * irq works get executed.
619 */
620 work->task = current;
621 work->sig = sig;
622 irq_work_queue(&work->irq_work);
623 return 0;
624 }
625
626 return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
627}
628
629static const struct bpf_func_proto bpf_send_signal_proto = {
630 .func = bpf_send_signal,
631 .gpl_only = false,
632 .ret_type = RET_INTEGER,
633 .arg1_type = ARG_ANYTHING,
634};
635
570static const struct bpf_func_proto * 636static const struct bpf_func_proto *
571tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 637tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
572{ 638{
@@ -617,6 +683,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
617 case BPF_FUNC_get_current_cgroup_id: 683 case BPF_FUNC_get_current_cgroup_id:
618 return &bpf_get_current_cgroup_id_proto; 684 return &bpf_get_current_cgroup_id_proto;
619#endif 685#endif
686 case BPF_FUNC_send_signal:
687 return &bpf_send_signal_proto;
620 default: 688 default:
621 return NULL; 689 return NULL;
622 } 690 }
@@ -1034,7 +1102,7 @@ static DEFINE_MUTEX(bpf_event_mutex);
1034int perf_event_attach_bpf_prog(struct perf_event *event, 1102int perf_event_attach_bpf_prog(struct perf_event *event,
1035 struct bpf_prog *prog) 1103 struct bpf_prog *prog)
1036{ 1104{
1037 struct bpf_prog_array __rcu *old_array; 1105 struct bpf_prog_array *old_array;
1038 struct bpf_prog_array *new_array; 1106 struct bpf_prog_array *new_array;
1039 int ret = -EEXIST; 1107 int ret = -EEXIST;
1040 1108
@@ -1052,7 +1120,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
1052 if (event->prog) 1120 if (event->prog)
1053 goto unlock; 1121 goto unlock;
1054 1122
1055 old_array = event->tp_event->prog_array; 1123 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1056 if (old_array && 1124 if (old_array &&
1057 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 1125 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
1058 ret = -E2BIG; 1126 ret = -E2BIG;
@@ -1075,7 +1143,7 @@ unlock:
1075 1143
1076void perf_event_detach_bpf_prog(struct perf_event *event) 1144void perf_event_detach_bpf_prog(struct perf_event *event)
1077{ 1145{
1078 struct bpf_prog_array __rcu *old_array; 1146 struct bpf_prog_array *old_array;
1079 struct bpf_prog_array *new_array; 1147 struct bpf_prog_array *new_array;
1080 int ret; 1148 int ret;
1081 1149
@@ -1084,7 +1152,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
1084 if (!event->prog) 1152 if (!event->prog)
1085 goto unlock; 1153 goto unlock;
1086 1154
1087 old_array = event->tp_event->prog_array; 1155 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1088 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); 1156 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
1089 if (ret == -ENOENT) 1157 if (ret == -ENOENT)
1090 goto unlock; 1158 goto unlock;
@@ -1106,6 +1174,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
1106{ 1174{
1107 struct perf_event_query_bpf __user *uquery = info; 1175 struct perf_event_query_bpf __user *uquery = info;
1108 struct perf_event_query_bpf query = {}; 1176 struct perf_event_query_bpf query = {};
1177 struct bpf_prog_array *progs;
1109 u32 *ids, prog_cnt, ids_len; 1178 u32 *ids, prog_cnt, ids_len;
1110 int ret; 1179 int ret;
1111 1180
@@ -1130,10 +1199,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
1130 */ 1199 */
1131 1200
1132 mutex_lock(&bpf_event_mutex); 1201 mutex_lock(&bpf_event_mutex);
1133 ret = bpf_prog_array_copy_info(event->tp_event->prog_array, 1202 progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
1134 ids, 1203 ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
1135 ids_len,
1136 &prog_cnt);
1137 mutex_unlock(&bpf_event_mutex); 1204 mutex_unlock(&bpf_event_mutex);
1138 1205
1139 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || 1206 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
@@ -1343,5 +1410,18 @@ static int __init bpf_event_init(void)
1343 return 0; 1410 return 0;
1344} 1411}
1345 1412
1413static int __init send_signal_irq_work_init(void)
1414{
1415 int cpu;
1416 struct send_signal_irq_work *work;
1417
1418 for_each_possible_cpu(cpu) {
1419 work = per_cpu_ptr(&send_signal_work, cpu);
1420 init_irq_work(&work->irq_work, do_bpf_send_signal);
1421 }
1422 return 0;
1423}
1424
1346fs_initcall(bpf_event_init); 1425fs_initcall(bpf_event_init);
1426subsys_initcall(send_signal_irq_work_init);
1347#endif /* CONFIG_MODULES */ 1427#endif /* CONFIG_MODULES */
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cc9597a87770..f40e3d35fd9c 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -627,6 +627,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
627 unsigned int i; 627 unsigned int i;
628 u32 nbuckets; 628 u32 nbuckets;
629 u64 cost; 629 u64 cost;
630 int ret;
630 631
631 smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN); 632 smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
632 if (!smap) 633 if (!smap)
@@ -635,13 +636,21 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
635 636
636 smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus())); 637 smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus()));
637 nbuckets = 1U << smap->bucket_log; 638 nbuckets = 1U << smap->bucket_log;
639 cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
640
641 ret = bpf_map_charge_init(&smap->map.memory, cost);
642 if (ret < 0) {
643 kfree(smap);
644 return ERR_PTR(ret);
645 }
646
638 smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, 647 smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
639 GFP_USER | __GFP_NOWARN); 648 GFP_USER | __GFP_NOWARN);
640 if (!smap->buckets) { 649 if (!smap->buckets) {
650 bpf_map_charge_finish(&smap->map.memory);
641 kfree(smap); 651 kfree(smap);
642 return ERR_PTR(-ENOMEM); 652 return ERR_PTR(-ENOMEM);
643 } 653 }
644 cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
645 654
646 for (i = 0; i < nbuckets; i++) { 655 for (i = 0; i < nbuckets; i++) {
647 INIT_HLIST_HEAD(&smap->buckets[i].list); 656 INIT_HLIST_HEAD(&smap->buckets[i].list);
@@ -651,7 +660,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
651 smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; 660 smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
652 smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % 661 smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
653 BPF_SK_STORAGE_CACHE_SIZE; 662 BPF_SK_STORAGE_CACHE_SIZE;
654 smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
655 663
656 return &smap->map; 664 return &smap->map;
657} 665}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index be6092ac69f8..52d4faeee18b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -44,13 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
44 44
45 /* Make sure page count doesn't overflow. */ 45 /* Make sure page count doesn't overflow. */
46 cost = (u64) stab->map.max_entries * sizeof(struct sock *); 46 cost = (u64) stab->map.max_entries * sizeof(struct sock *);
47 if (cost >= U32_MAX - PAGE_SIZE) { 47 err = bpf_map_charge_init(&stab->map.memory, cost);
48 err = -EINVAL;
49 goto free_stab;
50 }
51
52 stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
53 err = bpf_map_precharge_memlock(stab->map.pages);
54 if (err) 48 if (err)
55 goto free_stab; 49 goto free_stab;
56 50
@@ -60,6 +54,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
60 if (stab->sks) 54 if (stab->sks)
61 return &stab->map; 55 return &stab->map;
62 err = -ENOMEM; 56 err = -ENOMEM;
57 bpf_map_charge_finish(&stab->map.memory);
63free_stab: 58free_stab:
64 kfree(stab); 59 kfree(stab);
65 return ERR_PTR(err); 60 return ERR_PTR(err);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d8f2a22c6ff7..ceca5285d9b4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -287,16 +287,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
287 return ret; 287 return ret;
288} 288}
289 289
290static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 290static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
291{ 291{
292 unsigned int mtu; 292 unsigned int mtu;
293 int ret;
294
295 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
296 if (ret) {
297 kfree_skb(skb);
298 return ret;
299 }
300 293
301#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 294#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
302 /* Policy lookup after SNAT yielded a new policy */ 295 /* Policy lookup after SNAT yielded a new policy */
@@ -315,18 +308,37 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
315 return ip_finish_output2(net, sk, skb); 308 return ip_finish_output2(net, sk, skb);
316} 309}
317 310
311static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
312{
313 int ret;
314
315 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
316 switch (ret) {
317 case NET_XMIT_SUCCESS:
318 return __ip_finish_output(net, sk, skb);
319 case NET_XMIT_CN:
320 return __ip_finish_output(net, sk, skb) ? : ret;
321 default:
322 kfree_skb(skb);
323 return ret;
324 }
325}
326
318static int ip_mc_finish_output(struct net *net, struct sock *sk, 327static int ip_mc_finish_output(struct net *net, struct sock *sk,
319 struct sk_buff *skb) 328 struct sk_buff *skb)
320{ 329{
321 int ret; 330 int ret;
322 331
323 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 332 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
324 if (ret) { 333 switch (ret) {
334 case NET_XMIT_SUCCESS:
335 return dev_loopback_xmit(net, sk, skb);
336 case NET_XMIT_CN:
337 return dev_loopback_xmit(net, sk, skb) ? : ret;
338 default:
325 kfree_skb(skb); 339 kfree_skb(skb);
326 return ret; 340 return ret;
327 } 341 }
328
329 return dev_loopback_xmit(net, sk, skb);
330} 342}
331 343
332int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) 344int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0bb6b6de7962..8fa83b78f81a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -128,16 +128,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
128 return -EINVAL; 128 return -EINVAL;
129} 129}
130 130
131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 131static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
132{ 132{
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 133#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */ 134 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) { 135 if (skb_dst(skb)->xfrm) {
@@ -154,6 +146,22 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
154 return ip6_finish_output2(net, sk, skb); 146 return ip6_finish_output2(net, sk, skb);
155} 147}
156 148
149static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
150{
151 int ret;
152
153 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
154 switch (ret) {
155 case NET_XMIT_SUCCESS:
156 return __ip6_finish_output(net, sk, skb);
157 case NET_XMIT_CN:
158 return __ip6_finish_output(net, sk, skb) ? : ret;
159 default:
160 kfree_skb(skb);
161 return ret;
162 }
163}
164
157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 165int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
158{ 166{
159 struct net_device *dev = skb_dst(skb)->dev; 167 struct net_device *dev = skb_dst(skb)->dev;
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index c7498457595a..74d31fd3c99c 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -1,6 +1,7 @@
1cpustat 1cpustat
2fds_example 2fds_example
3hbm 3hbm
4ibumad
4lathist 5lathist
5lwt_len_hist 6lwt_len_hist
6map_perf_test 7map_perf_test
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4f0a1cdbfe7c..253e5a2856be 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
26hostprogs-y += test_overhead 26hostprogs-y += test_overhead
27hostprogs-y += test_cgrp2_array_pin 27hostprogs-y += test_cgrp2_array_pin
28hostprogs-y += test_cgrp2_attach 28hostprogs-y += test_cgrp2_attach
29hostprogs-y += test_cgrp2_attach2
30hostprogs-y += test_cgrp2_sock 29hostprogs-y += test_cgrp2_sock
31hostprogs-y += test_cgrp2_sock2 30hostprogs-y += test_cgrp2_sock2
32hostprogs-y += xdp1 31hostprogs-y += xdp1
@@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
81test_overhead-objs := bpf_load.o test_overhead_user.o 80test_overhead-objs := bpf_load.o test_overhead_user.o
82test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o 81test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
83test_cgrp2_attach-objs := test_cgrp2_attach.o 82test_cgrp2_attach-objs := test_cgrp2_attach.o
84test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
85test_cgrp2_sock-objs := test_cgrp2_sock.o 83test_cgrp2_sock-objs := test_cgrp2_sock.o
86test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o 84test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
87xdp1-objs := xdp1_user.o 85xdp1-objs := xdp1_user.o
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index eae7b635343d..1734ade04f7f 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -40,7 +40,7 @@ int prog_cnt;
40int prog_array_fd = -1; 40int prog_array_fd = -1;
41 41
42struct bpf_map_data map_data[MAX_MAPS]; 42struct bpf_map_data map_data[MAX_MAPS];
43int map_data_count = 0; 43int map_data_count;
44 44
45static int populate_prog_array(const char *event, int prog_fd) 45static int populate_prog_array(const char *event, int prog_fd)
46{ 46{
@@ -65,7 +65,7 @@ static int write_kprobe_events(const char *val)
65 else 65 else
66 flags = O_WRONLY | O_APPEND; 66 flags = O_WRONLY | O_APPEND;
67 67
68 fd = open("/sys/kernel/debug/tracing/kprobe_events", flags); 68 fd = open(DEBUGFS "kprobe_events", flags);
69 69
70 ret = write(fd, val, strlen(val)); 70 ret = write(fd, val, strlen(val));
71 close(fd); 71 close(fd);
@@ -490,8 +490,8 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
490 490
491 /* Verify no newer features were requested */ 491 /* Verify no newer features were requested */
492 if (validate_zero) { 492 if (validate_zero) {
493 addr = (unsigned char*) def + map_sz_copy; 493 addr = (unsigned char *) def + map_sz_copy;
494 end = (unsigned char*) def + map_sz_elf; 494 end = (unsigned char *) def + map_sz_elf;
495 for (; addr < end; addr++) { 495 for (; addr < end; addr++) {
496 if (*addr != 0) { 496 if (*addr != 0) {
497 free(sym); 497 free(sym);
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
index 56c8b4115c95..e48b047d4646 100755
--- a/samples/bpf/do_hbm_test.sh
+++ b/samples/bpf/do_hbm_test.sh
@@ -13,10 +13,10 @@ Usage() {
13 echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" 13 echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
14 echo "loads. The output is the goodput in Mbps (unless -D was used)." 14 echo "loads. The output is the goodput in Mbps (unless -D was used)."
15 echo "" 15 echo ""
16 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]" 16 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
17 echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]" 17 echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]"
18 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" 18 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
19 echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]" 19 echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
20 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" 20 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" 21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22 echo " Where:" 22 echo " Where:"
@@ -33,6 +33,7 @@ Usage() {
33 echo " -f or --flows number of concurrent flows (default=1)" 33 echo " -f or --flows number of concurrent flows (default=1)"
34 echo " -i or --id cgroup id (an integer, default is 1)" 34 echo " -i or --id cgroup id (an integer, default is 1)"
35 echo " -N use netperf instead of iperf3" 35 echo " -N use netperf instead of iperf3"
36 echo " --no_cn Do not return CN notifications"
36 echo " -l do not limit flows using loopback" 37 echo " -l do not limit flows using loopback"
37 echo " -h Help" 38 echo " -h Help"
38 echo " -p or --port iperf3 port (default is 5201)" 39 echo " -p or --port iperf3 port (default is 5201)"
@@ -115,6 +116,9 @@ processArgs () {
115 -c=*|--cc=*) 116 -c=*|--cc=*)
116 cc="${i#*=}" 117 cc="${i#*=}"
117 ;; 118 ;;
119 --no_cn)
120 flags="$flags --no_cn"
121 ;;
118 --debug) 122 --debug)
119 flags="$flags -d" 123 flags="$flags -d"
120 debug_flag=1 124 debug_flag=1
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index a79828ab273f..480b7ad6a1f2 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -16,6 +16,7 @@
16 * -l Also limit flows doing loopback 16 * -l Also limit flows doing loopback
17 * -n <#> To create cgroup \"/hbm#\" and attach prog 17 * -n <#> To create cgroup \"/hbm#\" and attach prog
18 * Default is /hbm1 18 * Default is /hbm1
19 * --no_cn Do not return cn notifications
19 * -r <rate> Rate limit in Mbps 20 * -r <rate> Rate limit in Mbps
20 * -s Get HBM stats (marked, dropped, etc.) 21 * -s Get HBM stats (marked, dropped, etc.)
21 * -t <time> Exit after specified seconds (default is 0) 22 * -t <time> Exit after specified seconds (default is 0)
@@ -42,6 +43,7 @@
42 43
43#include <linux/bpf.h> 44#include <linux/bpf.h>
44#include <bpf/bpf.h> 45#include <bpf/bpf.h>
46#include <getopt.h>
45 47
46#include "bpf_load.h" 48#include "bpf_load.h"
47#include "bpf_rlimit.h" 49#include "bpf_rlimit.h"
@@ -59,6 +61,7 @@ bool stats_flag;
59bool loopback_flag; 61bool loopback_flag;
60bool debugFlag; 62bool debugFlag;
61bool work_conserving_flag; 63bool work_conserving_flag;
64bool no_cn_flag;
62 65
63static void Usage(void); 66static void Usage(void);
64static void read_trace_pipe2(void); 67static void read_trace_pipe2(void);
@@ -185,6 +188,7 @@ static int run_bpf_prog(char *prog, int cg_id)
185 qstats.rate = rate; 188 qstats.rate = rate;
186 qstats.stats = stats_flag ? 1 : 0; 189 qstats.stats = stats_flag ? 1 : 0;
187 qstats.loopback = loopback_flag ? 1 : 0; 190 qstats.loopback = loopback_flag ? 1 : 0;
191 qstats.no_cn = no_cn_flag ? 1 : 0;
188 if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) { 192 if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) {
189 printf("ERROR: Could not update map element\n"); 193 printf("ERROR: Could not update map element\n");
190 goto err; 194 goto err;
@@ -312,6 +316,14 @@ static int run_bpf_prog(char *prog, int cg_id)
312 double percent_pkts, percent_bytes; 316 double percent_pkts, percent_bytes;
313 char fname[100]; 317 char fname[100];
314 FILE *fout; 318 FILE *fout;
319 int k;
320 static const char *returnValNames[] = {
321 "DROP_PKT",
322 "ALLOW_PKT",
323 "DROP_PKT_CWR",
324 "ALLOW_PKT_CWR"
325 };
326#define RET_VAL_COUNT 4
315 327
316// Future support of ingress 328// Future support of ingress
317// if (!outFlag) 329// if (!outFlag)
@@ -346,6 +358,31 @@ static int run_bpf_prog(char *prog, int cg_id)
346 (qstats.bytes_total + 1); 358 (qstats.bytes_total + 1);
347 fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts); 359 fprintf(fout, "pkts_dropped_percent:%6.2f\n", percent_pkts);
348 fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes); 360 fprintf(fout, "bytes_dropped_percent:%6.2f\n", percent_bytes);
361
362 // ECN CE markings
363 percent_pkts = (qstats.pkts_ecn_ce * 100.0) /
364 (qstats.pkts_total + 1);
365 fprintf(fout, "pkts_ecn_ce:%6.2f (%d)\n", percent_pkts,
366 (int)qstats.pkts_ecn_ce);
367
368 // Average cwnd
369 fprintf(fout, "avg cwnd:%d\n",
370 (int)(qstats.sum_cwnd / (qstats.sum_cwnd_cnt + 1)));
371 // Average rtt
372 fprintf(fout, "avg rtt:%d\n",
373 (int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
374 // Average credit
375 fprintf(fout, "avg credit:%d\n",
376 (int)(qstats.sum_credit /
377 (1500 * ((int)qstats.pkts_total) + 1)));
378
379 // Return values stats
380 for (k = 0; k < RET_VAL_COUNT; k++) {
381 percent_pkts = (qstats.returnValCount[k] * 100.0) /
382 (qstats.pkts_total + 1);
383 fprintf(fout, "%s:%6.2f (%d)\n", returnValNames[k],
384 percent_pkts, (int)qstats.returnValCount[k]);
385 }
349 fclose(fout); 386 fclose(fout);
350 } 387 }
351 388
@@ -366,14 +403,15 @@ static void Usage(void)
366{ 403{
367 printf("This program loads a cgroup skb BPF program to enforce\n" 404 printf("This program loads a cgroup skb BPF program to enforce\n"
368 "cgroup output (egress) bandwidth limits.\n\n" 405 "cgroup output (egress) bandwidth limits.\n\n"
369 "USAGE: hbm [-o] [-d] [-l] [-n <id>] [-r <rate>] [-s]\n" 406 "USAGE: hbm [-o] [-d] [-l] [-n <id>] [--no_cn] [-r <rate>]\n"
370 " [-t <secs>] [-w] [-h] [prog]\n" 407 " [-s] [-t <secs>] [-w] [-h] [prog]\n"
371 " Where:\n" 408 " Where:\n"
372 " -o indicates egress direction (default)\n" 409 " -o indicates egress direction (default)\n"
373 " -d print BPF trace debug buffer\n" 410 " -d print BPF trace debug buffer\n"
374 " -l also limit flows using loopback\n" 411 " -l also limit flows using loopback\n"
375 " -n <#> to create cgroup \"/hbm#\" and attach prog\n" 412 " -n <#> to create cgroup \"/hbm#\" and attach prog\n"
376 " Default is /hbm1\n" 413 " Default is /hbm1\n"
414 " --no_cn disable CN notifcations\n"
377 " -r <rate> Rate in Mbps\n" 415 " -r <rate> Rate in Mbps\n"
378 " -s Update HBM stats\n" 416 " -s Update HBM stats\n"
379 " -t <time> Exit after specified seconds (default is 0)\n" 417 " -t <time> Exit after specified seconds (default is 0)\n"
@@ -393,9 +431,16 @@ int main(int argc, char **argv)
393 int k; 431 int k;
394 int cg_id = 1; 432 int cg_id = 1;
395 char *optstring = "iodln:r:st:wh"; 433 char *optstring = "iodln:r:st:wh";
434 struct option loptions[] = {
435 {"no_cn", 0, NULL, 1},
436 {NULL, 0, NULL, 0}
437 };
396 438
397 while ((k = getopt(argc, argv, optstring)) != -1) { 439 while ((k = getopt_long(argc, argv, optstring, loptions, NULL)) != -1) {
398 switch (k) { 440 switch (k) {
441 case 1:
442 no_cn_flag = true;
443 break;
399 case'o': 444 case'o':
400 break; 445 break;
401 case 'd': 446 case 'd':
diff --git a/samples/bpf/hbm.h b/samples/bpf/hbm.h
index 518e8147d084..f0963ed6a562 100644
--- a/samples/bpf/hbm.h
+++ b/samples/bpf/hbm.h
@@ -19,7 +19,8 @@ struct hbm_vqueue {
19struct hbm_queue_stats { 19struct hbm_queue_stats {
20 unsigned long rate; /* in Mbps*/ 20 unsigned long rate; /* in Mbps*/
21 unsigned long stats:1, /* get HBM stats (marked, dropped,..) */ 21 unsigned long stats:1, /* get HBM stats (marked, dropped,..) */
22 loopback:1; /* also limit flows using loopback */ 22 loopback:1, /* also limit flows using loopback */
23 no_cn:1; /* do not use cn flags */
23 unsigned long long pkts_marked; 24 unsigned long long pkts_marked;
24 unsigned long long bytes_marked; 25 unsigned long long bytes_marked;
25 unsigned long long pkts_dropped; 26 unsigned long long pkts_dropped;
@@ -28,4 +29,10 @@ struct hbm_queue_stats {
28 unsigned long long bytes_total; 29 unsigned long long bytes_total;
29 unsigned long long firstPacketTime; 30 unsigned long long firstPacketTime;
30 unsigned long long lastPacketTime; 31 unsigned long long lastPacketTime;
32 unsigned long long pkts_ecn_ce;
33 unsigned long long returnValCount[4];
34 unsigned long long sum_cwnd;
35 unsigned long long sum_rtt;
36 unsigned long long sum_cwnd_cnt;
37 long long sum_credit;
31}; 38};
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index c5635d924193..be19cf1d5cd5 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -30,15 +30,8 @@
30#define ALLOW_PKT 1 30#define ALLOW_PKT 1
31#define TCP_ECN_OK 1 31#define TCP_ECN_OK 1
32 32
33#define HBM_DEBUG 0 // Set to 1 to enable debugging 33#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
34#if HBM_DEBUG 34#undef bpf_printk
35#define bpf_printk(fmt, ...) \
36({ \
37 char ____fmt[] = fmt; \
38 bpf_trace_printk(____fmt, sizeof(____fmt), \
39 ##__VA_ARGS__); \
40})
41#else
42#define bpf_printk(fmt, ...) 35#define bpf_printk(fmt, ...)
43#endif 36#endif
44 37
@@ -72,17 +65,43 @@ struct bpf_map_def SEC("maps") queue_stats = {
72BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats); 65BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
73 66
74struct hbm_pkt_info { 67struct hbm_pkt_info {
68 int cwnd;
69 int rtt;
75 bool is_ip; 70 bool is_ip;
76 bool is_tcp; 71 bool is_tcp;
77 short ecn; 72 short ecn;
78}; 73};
79 74
75static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
76{
77 struct bpf_sock *sk;
78 struct bpf_tcp_sock *tp;
79
80 sk = skb->sk;
81 if (sk) {
82 sk = bpf_sk_fullsock(sk);
83 if (sk) {
84 if (sk->protocol == IPPROTO_TCP) {
85 tp = bpf_tcp_sock(sk);
86 if (tp) {
87 pkti->cwnd = tp->snd_cwnd;
88 pkti->rtt = tp->srtt_us >> 3;
89 return 0;
90 }
91 }
92 }
93 }
94 return 1;
95}
96
80static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb, 97static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
81 struct hbm_pkt_info *pkti) 98 struct hbm_pkt_info *pkti)
82{ 99{
83 struct iphdr iph; 100 struct iphdr iph;
84 struct ipv6hdr *ip6h; 101 struct ipv6hdr *ip6h;
85 102
103 pkti->cwnd = 0;
104 pkti->rtt = 0;
86 bpf_skb_load_bytes(skb, 0, &iph, 12); 105 bpf_skb_load_bytes(skb, 0, &iph, 12);
87 if (iph.version == 6) { 106 if (iph.version == 6) {
88 ip6h = (struct ipv6hdr *)&iph; 107 ip6h = (struct ipv6hdr *)&iph;
@@ -98,6 +117,8 @@ static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
98 pkti->is_tcp = false; 117 pkti->is_tcp = false;
99 pkti->ecn = 0; 118 pkti->ecn = 0;
100 } 119 }
120 if (pkti->is_tcp)
121 get_tcp_info(skb, pkti);
101} 122}
102 123
103static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) 124static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
@@ -112,8 +133,14 @@ static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
112 int len, 133 int len,
113 unsigned long long curtime, 134 unsigned long long curtime,
114 bool congestion_flag, 135 bool congestion_flag,
115 bool drop_flag) 136 bool drop_flag,
137 bool cwr_flag,
138 bool ecn_ce_flag,
139 struct hbm_pkt_info *pkti,
140 int credit)
116{ 141{
142 int rv = ALLOW_PKT;
143
117 if (qsp != NULL) { 144 if (qsp != NULL) {
118 // Following is needed for work conserving 145 // Following is needed for work conserving
119 __sync_add_and_fetch(&(qsp->bytes_total), len); 146 __sync_add_and_fetch(&(qsp->bytes_total), len);
@@ -123,7 +150,7 @@ static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
123 qsp->firstPacketTime = curtime; 150 qsp->firstPacketTime = curtime;
124 qsp->lastPacketTime = curtime; 151 qsp->lastPacketTime = curtime;
125 __sync_add_and_fetch(&(qsp->pkts_total), 1); 152 __sync_add_and_fetch(&(qsp->pkts_total), 1);
126 if (congestion_flag || drop_flag) { 153 if (congestion_flag) {
127 __sync_add_and_fetch(&(qsp->pkts_marked), 1); 154 __sync_add_and_fetch(&(qsp->pkts_marked), 1);
128 __sync_add_and_fetch(&(qsp->bytes_marked), len); 155 __sync_add_and_fetch(&(qsp->bytes_marked), len);
129 } 156 }
@@ -132,6 +159,34 @@ static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
132 __sync_add_and_fetch(&(qsp->bytes_dropped), 159 __sync_add_and_fetch(&(qsp->bytes_dropped),
133 len); 160 len);
134 } 161 }
162 if (ecn_ce_flag)
163 __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
164 if (pkti->cwnd) {
165 __sync_add_and_fetch(&(qsp->sum_cwnd),
166 pkti->cwnd);
167 __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
168 }
169 if (pkti->rtt)
170 __sync_add_and_fetch(&(qsp->sum_rtt),
171 pkti->rtt);
172 __sync_add_and_fetch(&(qsp->sum_credit), credit);
173
174 if (drop_flag)
175 rv = DROP_PKT;
176 if (cwr_flag)
177 rv |= 2;
178 if (rv == DROP_PKT)
179 __sync_add_and_fetch(&(qsp->returnValCount[0]),
180 1);
181 else if (rv == ALLOW_PKT)
182 __sync_add_and_fetch(&(qsp->returnValCount[1]),
183 1);
184 else if (rv == 2)
185 __sync_add_and_fetch(&(qsp->returnValCount[2]),
186 1);
187 else if (rv == 3)
188 __sync_add_and_fetch(&(qsp->returnValCount[3]),
189 1);
135 } 190 }
136 } 191 }
137} 192}
diff --git a/samples/bpf/hbm_out_kern.c b/samples/bpf/hbm_out_kern.c
index f806863d0b79..829934bd43cb 100644
--- a/samples/bpf/hbm_out_kern.c
+++ b/samples/bpf/hbm_out_kern.c
@@ -62,11 +62,12 @@ int _hbm_out_cg(struct __sk_buff *skb)
62 unsigned int queue_index = 0; 62 unsigned int queue_index = 0;
63 unsigned long long curtime; 63 unsigned long long curtime;
64 int credit; 64 int credit;
65 signed long long delta = 0, zero = 0; 65 signed long long delta = 0, new_credit;
66 int max_credit = MAX_CREDIT; 66 int max_credit = MAX_CREDIT;
67 bool congestion_flag = false; 67 bool congestion_flag = false;
68 bool drop_flag = false; 68 bool drop_flag = false;
69 bool cwr_flag = false; 69 bool cwr_flag = false;
70 bool ecn_ce_flag = false;
70 struct hbm_vqueue *qdp; 71 struct hbm_vqueue *qdp;
71 struct hbm_queue_stats *qsp = NULL; 72 struct hbm_queue_stats *qsp = NULL;
72 int rv = ALLOW_PKT; 73 int rv = ALLOW_PKT;
@@ -99,9 +100,11 @@ int _hbm_out_cg(struct __sk_buff *skb)
99 */ 100 */
100 if (delta > 0) { 101 if (delta > 0) {
101 qdp->lasttime = curtime; 102 qdp->lasttime = curtime;
102 credit += CREDIT_PER_NS(delta, qdp->rate); 103 new_credit = credit + CREDIT_PER_NS(delta, qdp->rate);
103 if (credit > MAX_CREDIT) 104 if (new_credit > MAX_CREDIT)
104 credit = MAX_CREDIT; 105 credit = MAX_CREDIT;
106 else
107 credit = new_credit;
105 } 108 }
106 credit -= len; 109 credit -= len;
107 qdp->credit = credit; 110 qdp->credit = credit;
@@ -119,13 +122,16 @@ int _hbm_out_cg(struct __sk_buff *skb)
119 // Set flags (drop, congestion, cwr) 122 // Set flags (drop, congestion, cwr)
120 // Dropping => we are congested, so ignore congestion flag 123 // Dropping => we are congested, so ignore congestion flag
121 if (credit < -DROP_THRESH || 124 if (credit < -DROP_THRESH ||
122 (len > LARGE_PKT_THRESH && 125 (len > LARGE_PKT_THRESH && credit < -LARGE_PKT_DROP_THRESH)) {
123 credit < -LARGE_PKT_DROP_THRESH)) { 126 // Very congested, set drop packet
124 // Very congested, set drop flag
125 drop_flag = true; 127 drop_flag = true;
128 if (pkti.ecn)
129 congestion_flag = true;
130 else if (pkti.is_tcp)
131 cwr_flag = true;
126 } else if (credit < 0) { 132 } else if (credit < 0) {
127 // Congested, set congestion flag 133 // Congested, set congestion flag
128 if (pkti.ecn) { 134 if (pkti.ecn || pkti.is_tcp) {
129 if (credit < -MARK_THRESH) 135 if (credit < -MARK_THRESH)
130 congestion_flag = true; 136 congestion_flag = true;
131 else 137 else
@@ -136,22 +142,38 @@ int _hbm_out_cg(struct __sk_buff *skb)
136 } 142 }
137 143
138 if (congestion_flag) { 144 if (congestion_flag) {
139 if (!bpf_skb_ecn_set_ce(skb)) { 145 if (bpf_skb_ecn_set_ce(skb)) {
140 if (len > LARGE_PKT_THRESH) { 146 ecn_ce_flag = true;
147 } else {
148 if (pkti.is_tcp) {
149 unsigned int rand = bpf_get_prandom_u32();
150
151 if (-credit >= MARK_THRESH +
152 (rand % MARK_REGION_SIZE)) {
153 // Do congestion control
154 cwr_flag = true;
155 }
156 } else if (len > LARGE_PKT_THRESH) {
141 // Problem if too many small packets? 157 // Problem if too many small packets?
142 drop_flag = true; 158 drop_flag = true;
143 } 159 }
144 } 160 }
145 } 161 }
146 162
147 if (drop_flag) 163 if (qsp != NULL)
148 rv = DROP_PKT; 164 if (qsp->no_cn)
165 cwr_flag = false;
149 166
150 hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag); 167 hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
168 cwr_flag, ecn_ce_flag, &pkti, credit);
151 169
152 if (rv == DROP_PKT) 170 if (drop_flag) {
153 __sync_add_and_fetch(&(qdp->credit), len); 171 __sync_add_and_fetch(&(qdp->credit), len);
172 rv = DROP_PKT;
173 }
154 174
175 if (cwr_flag)
176 rv |= 2;
155 return rv; 177 return rv;
156} 178}
157char _license[] SEC("license") = "GPL"; 179char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
index 6ef1625e8b2c..9dba48c2b920 100644
--- a/samples/bpf/tcp_basertt_kern.c
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -21,13 +21,6 @@
21 21
22#define DEBUG 1 22#define DEBUG 1
23 23
24#define bpf_printk(fmt, ...) \
25({ \
26 char ____fmt[] = fmt; \
27 bpf_trace_printk(____fmt, sizeof(____fmt), \
28 ##__VA_ARGS__); \
29})
30
31SEC("sockops") 24SEC("sockops")
32int bpf_basertt(struct bpf_sock_ops *skops) 25int bpf_basertt(struct bpf_sock_ops *skops)
33{ 26{
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
index e03e204739fa..af8486f33771 100644
--- a/samples/bpf/tcp_bufs_kern.c
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -22,13 +22,6 @@
22 22
23#define DEBUG 1 23#define DEBUG 1
24 24
25#define bpf_printk(fmt, ...) \
26({ \
27 char ____fmt[] = fmt; \
28 bpf_trace_printk(____fmt, sizeof(____fmt), \
29 ##__VA_ARGS__); \
30})
31
32SEC("sockops") 25SEC("sockops")
33int bpf_bufs(struct bpf_sock_ops *skops) 26int bpf_bufs(struct bpf_sock_ops *skops)
34{ 27{
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
index a0dc2d254aca..26c0fd091f3c 100644
--- a/samples/bpf/tcp_clamp_kern.c
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -22,13 +22,6 @@
22 22
23#define DEBUG 1 23#define DEBUG 1
24 24
25#define bpf_printk(fmt, ...) \
26({ \
27 char ____fmt[] = fmt; \
28 bpf_trace_printk(____fmt, sizeof(____fmt), \
29 ##__VA_ARGS__); \
30})
31
32SEC("sockops") 25SEC("sockops")
33int bpf_clamp(struct bpf_sock_ops *skops) 26int bpf_clamp(struct bpf_sock_ops *skops)
34{ 27{
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
index 4fd3ca979a06..6d4dc4c7dd1e 100644
--- a/samples/bpf/tcp_cong_kern.c
+++ b/samples/bpf/tcp_cong_kern.c
@@ -21,13 +21,6 @@
21 21
22#define DEBUG 1 22#define DEBUG 1
23 23
24#define bpf_printk(fmt, ...) \
25({ \
26 char ____fmt[] = fmt; \
27 bpf_trace_printk(____fmt, sizeof(____fmt), \
28 ##__VA_ARGS__); \
29})
30
31SEC("sockops") 24SEC("sockops")
32int bpf_cong(struct bpf_sock_ops *skops) 25int bpf_cong(struct bpf_sock_ops *skops)
33{ 26{
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
index 9b139ec69560..da61d53378b3 100644
--- a/samples/bpf/tcp_iw_kern.c
+++ b/samples/bpf/tcp_iw_kern.c
@@ -22,13 +22,6 @@
22 22
23#define DEBUG 1 23#define DEBUG 1
24 24
25#define bpf_printk(fmt, ...) \
26({ \
27 char ____fmt[] = fmt; \
28 bpf_trace_printk(____fmt, sizeof(____fmt), \
29 ##__VA_ARGS__); \
30})
31
32SEC("sockops") 25SEC("sockops")
33int bpf_iw(struct bpf_sock_ops *skops) 26int bpf_iw(struct bpf_sock_ops *skops)
34{ 27{
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
index cc71ee96e044..d011e38b80d2 100644
--- a/samples/bpf/tcp_rwnd_kern.c
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -21,13 +21,6 @@
21 21
22#define DEBUG 1 22#define DEBUG 1
23 23
24#define bpf_printk(fmt, ...) \
25({ \
26 char ____fmt[] = fmt; \
27 bpf_trace_printk(____fmt, sizeof(____fmt), \
28 ##__VA_ARGS__); \
29})
30
31SEC("sockops") 24SEC("sockops")
32int bpf_rwnd(struct bpf_sock_ops *skops) 25int bpf_rwnd(struct bpf_sock_ops *skops)
33{ 26{
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
index ca87ed34f896..720d1950322d 100644
--- a/samples/bpf/tcp_synrto_kern.c
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -21,13 +21,6 @@
21 21
22#define DEBUG 1 22#define DEBUG 1
23 23
24#define bpf_printk(fmt, ...) \
25({ \
26 char ____fmt[] = fmt; \
27 bpf_trace_printk(____fmt, sizeof(____fmt), \
28 ##__VA_ARGS__); \
29})
30
31SEC("sockops") 24SEC("sockops")
32int bpf_synrto(struct bpf_sock_ops *skops) 25int bpf_synrto(struct bpf_sock_ops *skops)
33{ 26{
diff --git a/samples/bpf/tcp_tos_reflect_kern.c b/samples/bpf/tcp_tos_reflect_kern.c
index de788be6f862..369faca70a15 100644
--- a/samples/bpf/tcp_tos_reflect_kern.c
+++ b/samples/bpf/tcp_tos_reflect_kern.c
@@ -20,13 +20,6 @@
20 20
21#define DEBUG 1 21#define DEBUG 1
22 22
23#define bpf_printk(fmt, ...) \
24({ \
25 char ____fmt[] = fmt; \
26 bpf_trace_printk(____fmt, sizeof(____fmt), \
27 ##__VA_ARGS__); \
28})
29
30SEC("sockops") 23SEC("sockops")
31int bpf_basertt(struct bpf_sock_ops *skops) 24int bpf_basertt(struct bpf_sock_ops *skops)
32{ 25{
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
index f7ca8b850978..6c7c7e0aaeda 100644
--- a/samples/bpf/xdp_sample_pkts_kern.c
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -7,13 +7,6 @@
7#define SAMPLE_SIZE 64ul 7#define SAMPLE_SIZE 64ul
8#define MAX_CPUS 128 8#define MAX_CPUS 128
9 9
10#define bpf_printk(fmt, ...) \
11({ \
12 char ____fmt[] = fmt; \
13 bpf_trace_printk(____fmt, sizeof(____fmt), \
14 ##__VA_ARGS__); \
15})
16
17struct bpf_map_def SEC("maps") my_map = { 10struct bpf_map_def SEC("maps") my_map = {
18 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 11 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
19 .key_size = sizeof(int), 12 .key_size = sizeof(int),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 2dbc1413fabd..6694a0fc8f99 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -19,10 +19,11 @@ SYNOPSIS
19BTF COMMANDS 19BTF COMMANDS
20============= 20=============
21 21
22| **bpftool** **btf dump** *BTF_SRC* 22| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
23| **bpftool** **btf help** 23| **bpftool** **btf help**
24| 24|
25| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } 25| *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
26| *FORMAT* := { **raw** | **c** }
26| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } 27| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
27| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } 28| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
28 29
@@ -31,23 +32,27 @@ DESCRIPTION
31 **bpftool btf dump** *BTF_SRC* 32 **bpftool btf dump** *BTF_SRC*
32 Dump BTF entries from a given *BTF_SRC*. 33 Dump BTF entries from a given *BTF_SRC*.
33 34
34 When **id** is specified, BTF object with that ID will be 35 When **id** is specified, BTF object with that ID will be
35 loaded and all its BTF types emitted. 36 loaded and all its BTF types emitted.
36 37
37 When **map** is provided, it's expected that map has 38 When **map** is provided, it's expected that map has
38 associated BTF object with BTF types describing key and 39 associated BTF object with BTF types describing key and
39 value. It's possible to select whether to dump only BTF 40 value. It's possible to select whether to dump only BTF
40 type(s) associated with key (**key**), value (**value**), 41 type(s) associated with key (**key**), value (**value**),
41 both key and value (**kv**), or all BTF types present in 42 both key and value (**kv**), or all BTF types present in
42 associated BTF object (**all**). If not specified, **kv** 43 associated BTF object (**all**). If not specified, **kv**
43 is assumed. 44 is assumed.
44 45
45 When **prog** is provided, it's expected that program has 46 When **prog** is provided, it's expected that program has
46 associated BTF object with BTF types. 47 associated BTF object with BTF types.
47 48
48 When specifying *FILE*, an ELF file is expected, containing 49 When specifying *FILE*, an ELF file is expected, containing
49 .BTF section with well-defined BTF binary format data, 50 .BTF section with well-defined BTF binary format data,
50 typically produced by clang or pahole. 51 typically produced by clang or pahole.
52
53 **format** option can be used to override default (raw)
54 output format. Raw (**raw**) or C-syntax (**c**) output
55 formats are supported.
51 56
52 **bpftool btf help** 57 **bpftool btf help**
53 Print short help message. 58 Print short help message.
@@ -67,6 +72,10 @@ OPTIONS
67 -p, --pretty 72 -p, --pretty
68 Generate human-readable JSON output. Implies **-j**. 73 Generate human-readable JSON output. Implies **-j**.
69 74
75 -d, --debug
76 Print all logs available from libbpf, including debug-level
77 information.
78
70EXAMPLES 79EXAMPLES
71======== 80========
72**# bpftool btf dump id 1226** 81**# bpftool btf dump id 1226**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index ac26876389c2..36807735e2a5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -113,6 +113,10 @@ OPTIONS
113 -f, --bpffs 113 -f, --bpffs
114 Show file names of pinned programs. 114 Show file names of pinned programs.
115 115
116 -d, --debug
117 Print all logs available from libbpf, including debug-level
118 information.
119
116EXAMPLES 120EXAMPLES
117======== 121========
118| 122|
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 14180e887082..4d08f35034a2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -73,6 +73,10 @@ OPTIONS
73 -p, --pretty 73 -p, --pretty
74 Generate human-readable JSON output. Implies **-j**. 74 Generate human-readable JSON output. Implies **-j**.
75 75
76 -d, --debug
77 Print all logs available from libbpf, including debug-level
78 information.
79
76SEE ALSO 80SEE ALSO
77======== 81========
78 **bpf**\ (2), 82 **bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 13ef27b39f20..490b4501cb6e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -152,6 +152,10 @@ OPTIONS
152 Do not automatically attempt to mount any virtual file system 152 Do not automatically attempt to mount any virtual file system
153 (such as tracefs or BPF virtual file system) when necessary. 153 (such as tracefs or BPF virtual file system) when necessary.
154 154
155 -d, --debug
156 Print all logs available from libbpf, including debug-level
157 information.
158
155EXAMPLES 159EXAMPLES
156======== 160========
157**# bpftool map show** 161**# bpftool map show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 934580850f42..d8e5237a2085 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -65,6 +65,10 @@ OPTIONS
65 -p, --pretty 65 -p, --pretty
66 Generate human-readable JSON output. Implies **-j**. 66 Generate human-readable JSON output. Implies **-j**.
67 67
68 -d, --debug
69 Print all logs available from libbpf, including debug-level
70 information.
71
68EXAMPLES 72EXAMPLES
69======== 73========
70 74
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 0c7576523a21..e252bd0bc434 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -53,6 +53,10 @@ OPTIONS
53 -p, --pretty 53 -p, --pretty
54 Generate human-readable JSON output. Implies **-j**. 54 Generate human-readable JSON output. Implies **-j**.
55 55
56 -d, --debug
57 Print all logs available from libbpf, including debug-level
58 information.
59
56EXAMPLES 60EXAMPLES
57======== 61========
58 62
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index e8118544d118..228a5c863cc7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -174,6 +174,11 @@ OPTIONS
174 Do not automatically attempt to mount any virtual file system 174 Do not automatically attempt to mount any virtual file system
175 (such as tracefs or BPF virtual file system) when necessary. 175 (such as tracefs or BPF virtual file system) when necessary.
176 176
177 -d, --debug
178 Print all logs available, even debug-level information. This
179 includes logs from libbpf as well as from the verifier, when
180 attempting to load programs.
181
177EXAMPLES 182EXAMPLES
178======== 183========
179**# bpftool prog show** 184**# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 3e562d7fd56f..6a9c52ef84a9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -66,6 +66,10 @@ OPTIONS
66 Do not automatically attempt to mount any virtual file system 66 Do not automatically attempt to mount any virtual file system
67 (such as tracefs or BPF virtual file system) when necessary. 67 (such as tracefs or BPF virtual file system) when necessary.
68 68
69 -d, --debug
70 Print all logs available, even debug-level information. This
71 includes logs from libbpf as well as from the verifier, when
72 attempting to load programs.
69 73
70SEE ALSO 74SEE ALSO
71======== 75========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 50e402a5a9c8..2725e27dfa42 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -71,6 +71,12 @@ _bpftool_get_prog_tags()
71 command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) 71 command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
72} 72}
73 73
74_bpftool_get_btf_ids()
75{
76 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
77 command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) )
78}
79
74_bpftool_get_obj_map_names() 80_bpftool_get_obj_map_names()
75{ 81{
76 local obj 82 local obj
@@ -181,7 +187,7 @@ _bpftool()
181 187
182 # Deal with options 188 # Deal with options
183 if [[ ${words[cword]} == -* ]]; then 189 if [[ ${words[cword]} == -* ]]; then
184 local c='--version --json --pretty --bpffs --mapcompat' 190 local c='--version --json --pretty --bpffs --mapcompat --debug'
185 COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) 191 COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
186 return 0 192 return 0
187 fi 193 fi
@@ -635,14 +641,30 @@ _bpftool()
635 map) 641 map)
636 _bpftool_get_map_ids 642 _bpftool_get_map_ids
637 ;; 643 ;;
644 dump)
645 _bpftool_get_btf_ids
646 ;;
638 esac 647 esac
639 return 0 648 return 0
640 ;; 649 ;;
650 format)
651 COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
652 ;;
641 *) 653 *)
642 if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then 654 # emit extra options
643 COMPREPLY+=( $( compgen -W 'key value kv all' -- \ 655 case ${words[3]} in
644 "$cur" ) ) 656 id|file)
645 fi 657 _bpftool_once_attr 'format'
658 ;;
659 map|prog)
660 if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then
661 COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) )
662 fi
663 _bpftool_once_attr 'format'
664 ;;
665 *)
666 ;;
667 esac
646 return 0 668 return 0
647 ;; 669 ;;
648 esac 670 esac
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 7317438ecd9e..1b8ec91899e6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -8,8 +8,8 @@
8#include <stdio.h> 8#include <stdio.h>
9#include <string.h> 9#include <string.h>
10#include <unistd.h> 10#include <unistd.h>
11#include <gelf.h>
12#include <bpf.h> 11#include <bpf.h>
12#include <libbpf.h>
13#include <linux/btf.h> 13#include <linux/btf.h>
14 14
15#include "btf.h" 15#include "btf.h"
@@ -340,109 +340,40 @@ static int dump_btf_raw(const struct btf *btf,
340 return 0; 340 return 0;
341} 341}
342 342
343static bool check_btf_endianness(GElf_Ehdr *ehdr) 343static void __printf(2, 0) btf_dump_printf(void *ctx,
344 const char *fmt, va_list args)
344{ 345{
345 static unsigned int const endian = 1; 346 vfprintf(stdout, fmt, args);
346
347 switch (ehdr->e_ident[EI_DATA]) {
348 case ELFDATA2LSB:
349 return *(unsigned char const *)&endian == 1;
350 case ELFDATA2MSB:
351 return *(unsigned char const *)&endian == 0;
352 default:
353 return 0;
354 }
355} 347}
356 348
357static int btf_load_from_elf(const char *path, struct btf **btf) 349static int dump_btf_c(const struct btf *btf,
350 __u32 *root_type_ids, int root_type_cnt)
358{ 351{
359 int err = -1, fd = -1, idx = 0; 352 struct btf_dump *d;
360 Elf_Data *btf_data = NULL; 353 int err = 0, i;
361 Elf_Scn *scn = NULL;
362 Elf *elf = NULL;
363 GElf_Ehdr ehdr;
364
365 if (elf_version(EV_CURRENT) == EV_NONE) {
366 p_err("failed to init libelf for %s", path);
367 return -1;
368 }
369
370 fd = open(path, O_RDONLY);
371 if (fd < 0) {
372 p_err("failed to open %s: %s", path, strerror(errno));
373 return -1;
374 }
375
376 elf = elf_begin(fd, ELF_C_READ, NULL);
377 if (!elf) {
378 p_err("failed to open %s as ELF file", path);
379 goto done;
380 }
381 if (!gelf_getehdr(elf, &ehdr)) {
382 p_err("failed to get EHDR from %s", path);
383 goto done;
384 }
385 if (!check_btf_endianness(&ehdr)) {
386 p_err("non-native ELF endianness is not supported");
387 goto done;
388 }
389 if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
390 p_err("failed to get e_shstrndx from %s\n", path);
391 goto done;
392 }
393 354
394 while ((scn = elf_nextscn(elf, scn)) != NULL) { 355 d = btf_dump__new(btf, NULL, NULL, btf_dump_printf);
395 GElf_Shdr sh; 356 if (IS_ERR(d))
396 char *name; 357 return PTR_ERR(d);
397 358
398 idx++; 359 if (root_type_cnt) {
399 if (gelf_getshdr(scn, &sh) != &sh) { 360 for (i = 0; i < root_type_cnt; i++) {
400 p_err("failed to get section(%d) header from %s", 361 err = btf_dump__dump_type(d, root_type_ids[i]);
401 idx, path); 362 if (err)
402 goto done;
403 }
404 name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
405 if (!name) {
406 p_err("failed to get section(%d) name from %s",
407 idx, path);
408 goto done;
409 }
410 if (strcmp(name, BTF_ELF_SEC) == 0) {
411 btf_data = elf_getdata(scn, 0);
412 if (!btf_data) {
413 p_err("failed to get section(%d, %s) data from %s",
414 idx, name, path);
415 goto done; 363 goto done;
416 }
417 break;
418 } 364 }
419 } 365 } else {
420 366 int cnt = btf__get_nr_types(btf);
421 if (!btf_data) {
422 p_err("%s ELF section not found in %s", BTF_ELF_SEC, path);
423 goto done;
424 }
425 367
426 *btf = btf__new(btf_data->d_buf, btf_data->d_size); 368 for (i = 1; i <= cnt; i++) {
427 if (IS_ERR(*btf)) { 369 err = btf_dump__dump_type(d, i);
428 err = PTR_ERR(*btf); 370 if (err)
429 *btf = NULL; 371 goto done;
430 p_err("failed to load BTF data from %s: %s", 372 }
431 path, strerror(err));
432 goto done;
433 } 373 }
434 374
435 err = 0;
436done: 375done:
437 if (err) { 376 btf_dump__free(d);
438 if (*btf) {
439 btf__free(*btf);
440 *btf = NULL;
441 }
442 }
443 if (elf)
444 elf_end(elf);
445 close(fd);
446 return err; 377 return err;
447} 378}
448 379
@@ -451,6 +382,7 @@ static int do_dump(int argc, char **argv)
451 struct btf *btf = NULL; 382 struct btf *btf = NULL;
452 __u32 root_type_ids[2]; 383 __u32 root_type_ids[2];
453 int root_type_cnt = 0; 384 int root_type_cnt = 0;
385 bool dump_c = false;
454 __u32 btf_id = -1; 386 __u32 btf_id = -1;
455 const char *src; 387 const char *src;
456 int fd = -1; 388 int fd = -1;
@@ -522,9 +454,14 @@ static int do_dump(int argc, char **argv)
522 } 454 }
523 NEXT_ARG(); 455 NEXT_ARG();
524 } else if (is_prefix(src, "file")) { 456 } else if (is_prefix(src, "file")) {
525 err = btf_load_from_elf(*argv, &btf); 457 btf = btf__parse_elf(*argv, NULL);
526 if (err) 458 if (IS_ERR(btf)) {
459 err = PTR_ERR(btf);
460 btf = NULL;
461 p_err("failed to load BTF from %s: %s",
462 *argv, strerror(err));
527 goto done; 463 goto done;
464 }
528 NEXT_ARG(); 465 NEXT_ARG();
529 } else { 466 } else {
530 err = -1; 467 err = -1;
@@ -532,6 +469,29 @@ static int do_dump(int argc, char **argv)
532 goto done; 469 goto done;
533 } 470 }
534 471
472 while (argc) {
473 if (is_prefix(*argv, "format")) {
474 NEXT_ARG();
475 if (argc < 1) {
476 p_err("expecting value for 'format' option\n");
477 goto done;
478 }
479 if (strcmp(*argv, "c") == 0) {
480 dump_c = true;
481 } else if (strcmp(*argv, "raw") == 0) {
482 dump_c = false;
483 } else {
484 p_err("unrecognized format specifier: '%s', possible values: raw, c",
485 *argv);
486 goto done;
487 }
488 NEXT_ARG();
489 } else {
490 p_err("unrecognized option: '%s'", *argv);
491 goto done;
492 }
493 }
494
535 if (!btf) { 495 if (!btf) {
536 err = btf__get_from_id(btf_id, &btf); 496 err = btf__get_from_id(btf_id, &btf);
537 if (err) { 497 if (err) {
@@ -545,7 +505,16 @@ static int do_dump(int argc, char **argv)
545 } 505 }
546 } 506 }
547 507
548 dump_btf_raw(btf, root_type_ids, root_type_cnt); 508 if (dump_c) {
509 if (json_output) {
510 p_err("JSON output for C-syntax dump is not supported");
511 err = -ENOTSUP;
512 goto done;
513 }
514 err = dump_btf_c(btf, root_type_ids, root_type_cnt);
515 } else {
516 err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
517 }
549 518
550done: 519done:
551 close(fd); 520 close(fd);
@@ -561,10 +530,11 @@ static int do_help(int argc, char **argv)
561 } 530 }
562 531
563 fprintf(stderr, 532 fprintf(stderr,
564 "Usage: %s btf dump BTF_SRC\n" 533 "Usage: %s btf dump BTF_SRC [format FORMAT]\n"
565 " %s btf help\n" 534 " %s btf help\n"
566 "\n" 535 "\n"
567 " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" 536 " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
537 " FORMAT := { raw | c }\n"
568 " " HELP_SPEC_MAP "\n" 538 " " HELP_SPEC_MAP "\n"
569 " " HELP_SPEC_PROGRAM "\n" 539 " " HELP_SPEC_PROGRAM "\n"
570 " " HELP_SPEC_OPTIONS "\n" 540 " " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ac1fc520e6a..4879f6395c7e 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -10,6 +10,7 @@
10#include <string.h> 10#include <string.h>
11 11
12#include <bpf.h> 12#include <bpf.h>
13#include <libbpf.h>
13 14
14#include "main.h" 15#include "main.h"
15 16
@@ -25,6 +26,7 @@ bool pretty_output;
25bool json_output; 26bool json_output;
26bool show_pinned; 27bool show_pinned;
27bool block_mount; 28bool block_mount;
29bool verifier_logs;
28int bpf_flags; 30int bpf_flags;
29struct pinned_obj_table prog_table; 31struct pinned_obj_table prog_table;
30struct pinned_obj_table map_table; 32struct pinned_obj_table map_table;
@@ -77,6 +79,13 @@ static int do_version(int argc, char **argv)
77 return 0; 79 return 0;
78} 80}
79 81
82static int __printf(2, 0)
83print_all_levels(__maybe_unused enum libbpf_print_level level,
84 const char *format, va_list args)
85{
86 return vfprintf(stderr, format, args);
87}
88
80int cmd_select(const struct cmd *cmds, int argc, char **argv, 89int cmd_select(const struct cmd *cmds, int argc, char **argv,
81 int (*help)(int argc, char **argv)) 90 int (*help)(int argc, char **argv))
82{ 91{
@@ -317,6 +326,7 @@ int main(int argc, char **argv)
317 { "bpffs", no_argument, NULL, 'f' }, 326 { "bpffs", no_argument, NULL, 'f' },
318 { "mapcompat", no_argument, NULL, 'm' }, 327 { "mapcompat", no_argument, NULL, 'm' },
319 { "nomount", no_argument, NULL, 'n' }, 328 { "nomount", no_argument, NULL, 'n' },
329 { "debug", no_argument, NULL, 'd' },
320 { 0 } 330 { 0 }
321 }; 331 };
322 int opt, ret; 332 int opt, ret;
@@ -332,7 +342,7 @@ int main(int argc, char **argv)
332 hash_init(map_table.table); 342 hash_init(map_table.table);
333 343
334 opterr = 0; 344 opterr = 0;
335 while ((opt = getopt_long(argc, argv, "Vhpjfmn", 345 while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
336 options, NULL)) >= 0) { 346 options, NULL)) >= 0) {
337 switch (opt) { 347 switch (opt) {
338 case 'V': 348 case 'V':
@@ -362,6 +372,10 @@ int main(int argc, char **argv)
362 case 'n': 372 case 'n':
363 block_mount = true; 373 block_mount = true;
364 break; 374 break;
375 case 'd':
376 libbpf_set_print(print_all_levels);
377 verifier_logs = true;
378 break;
365 default: 379 default:
366 p_err("unrecognized option '%s'", argv[optind - 1]); 380 p_err("unrecognized option '%s'", argv[optind - 1]);
367 if (json_output) 381 if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 3d63feb7f852..28a2a5857e14 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -91,6 +91,7 @@ extern json_writer_t *json_wtr;
91extern bool json_output; 91extern bool json_output;
92extern bool show_pinned; 92extern bool show_pinned;
93extern bool block_mount; 93extern bool block_mount;
94extern bool verifier_logs;
94extern int bpf_flags; 95extern int bpf_flags;
95extern struct pinned_obj_table prog_table; 96extern struct pinned_obj_table prog_table;
96extern struct pinned_obj_table map_table; 97extern struct pinned_obj_table map_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 26336bad0442..1f209c80d906 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -750,10 +750,11 @@ static int do_detach(int argc, char **argv)
750 750
751static int load_with_options(int argc, char **argv, bool first_prog_only) 751static int load_with_options(int argc, char **argv, bool first_prog_only)
752{ 752{
753 enum bpf_attach_type expected_attach_type; 753 struct bpf_object_load_attr load_attr = { 0 };
754 struct bpf_object_open_attr attr = { 754 struct bpf_object_open_attr open_attr = {
755 .prog_type = BPF_PROG_TYPE_UNSPEC, 755 .prog_type = BPF_PROG_TYPE_UNSPEC,
756 }; 756 };
757 enum bpf_attach_type expected_attach_type;
757 struct map_replace *map_replace = NULL; 758 struct map_replace *map_replace = NULL;
758 struct bpf_program *prog = NULL, *pos; 759 struct bpf_program *prog = NULL, *pos;
759 unsigned int old_map_fds = 0; 760 unsigned int old_map_fds = 0;
@@ -767,7 +768,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
767 768
768 if (!REQ_ARGS(2)) 769 if (!REQ_ARGS(2))
769 return -1; 770 return -1;
770 attr.file = GET_ARG(); 771 open_attr.file = GET_ARG();
771 pinfile = GET_ARG(); 772 pinfile = GET_ARG();
772 773
773 while (argc) { 774 while (argc) {
@@ -776,7 +777,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
776 777
777 NEXT_ARG(); 778 NEXT_ARG();
778 779
779 if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) { 780 if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
780 p_err("program type already specified"); 781 p_err("program type already specified");
781 goto err_free_reuse_maps; 782 goto err_free_reuse_maps;
782 } 783 }
@@ -793,7 +794,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
793 strcat(type, *argv); 794 strcat(type, *argv);
794 strcat(type, "/"); 795 strcat(type, "/");
795 796
796 err = libbpf_prog_type_by_name(type, &attr.prog_type, 797 err = libbpf_prog_type_by_name(type,
798 &open_attr.prog_type,
797 &expected_attach_type); 799 &expected_attach_type);
798 free(type); 800 free(type);
799 if (err < 0) 801 if (err < 0)
@@ -881,16 +883,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
881 883
882 set_max_rlimit(); 884 set_max_rlimit();
883 885
884 obj = __bpf_object__open_xattr(&attr, bpf_flags); 886 obj = __bpf_object__open_xattr(&open_attr, bpf_flags);
885 if (IS_ERR_OR_NULL(obj)) { 887 if (IS_ERR_OR_NULL(obj)) {
886 p_err("failed to open object file"); 888 p_err("failed to open object file");
887 goto err_free_reuse_maps; 889 goto err_free_reuse_maps;
888 } 890 }
889 891
890 bpf_object__for_each_program(pos, obj) { 892 bpf_object__for_each_program(pos, obj) {
891 enum bpf_prog_type prog_type = attr.prog_type; 893 enum bpf_prog_type prog_type = open_attr.prog_type;
892 894
893 if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { 895 if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
894 const char *sec_name = bpf_program__title(pos, false); 896 const char *sec_name = bpf_program__title(pos, false);
895 897
896 err = libbpf_prog_type_by_name(sec_name, &prog_type, 898 err = libbpf_prog_type_by_name(sec_name, &prog_type,
@@ -960,7 +962,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
960 goto err_close_obj; 962 goto err_close_obj;
961 } 963 }
962 964
963 err = bpf_object__load(obj); 965 load_attr.obj = obj;
966 if (verifier_logs)
967 /* log_level1 + log_level2 + stats, but not stable UAPI */
968 load_attr.log_level = 1 + 2 + 4;
969
970 err = bpf_object__load_xattr(&load_attr);
964 if (err) { 971 if (err) {
965 p_err("failed to load object file"); 972 p_err("failed to load object file");
966 goto err_close_obj; 973 goto err_close_obj;
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 0bb17bf88b18..494d7ae3614d 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -31,9 +31,7 @@ void kernel_syms_load(struct dump_data *dd)
31 if (!fp) 31 if (!fp)
32 return; 32 return;
33 33
34 while (!feof(fp)) { 34 while (fgets(buff, sizeof(buff), fp)) {
35 if (!fgets(buff, sizeof(buff), fp))
36 break;
37 tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1, 35 tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
38 sizeof(*dd->sym_mapping)); 36 sizeof(*dd->sym_mapping));
39 if (!tmp) { 37 if (!tmp) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 63e0cf66f01a..7c6aef253173 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -260,6 +260,24 @@ enum bpf_attach_type {
260 */ 260 */
261#define BPF_F_ANY_ALIGNMENT (1U << 1) 261#define BPF_F_ANY_ALIGNMENT (1U << 1)
262 262
263/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
264 * Verifier does sub-register def/use analysis and identifies instructions whose
265 * def only matters for low 32-bit, high 32-bit is never referenced later
266 * through implicit zero extension. Therefore verifier notifies JIT back-ends
267 * that it is safe to ignore clearing high 32-bit for these instructions. This
268 * saves some back-ends a lot of code-gen. However such optimization is not
269 * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
270 * hence hasn't used verifier's analysis result. But, we really want to have a
271 * way to be able to verify the correctness of the described optimization on
272 * x86_64 on which testsuites are frequently exercised.
273 *
274 * So, this flag is introduced. Once it is set, verifier will randomize high
275 * 32-bit for those instructions who has been identified as safe to ignore them.
276 * Then, if verifier is not doing correct analysis, such randomization will
277 * regress tests to expose bugs.
278 */
279#define BPF_F_TEST_RND_HI32 (1U << 2)
280
263/* When BPF ldimm64's insn[0].src_reg != 0 then this can have 281/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
264 * two extensions: 282 * two extensions:
265 * 283 *
@@ -2672,6 +2690,20 @@ union bpf_attr {
2672 * 0 on success. 2690 * 0 on success.
2673 * 2691 *
2674 * **-ENOENT** if the bpf-local-storage cannot be found. 2692 * **-ENOENT** if the bpf-local-storage cannot be found.
2693 *
2694 * int bpf_send_signal(u32 sig)
2695 * Description
2696 * Send signal *sig* to the current task.
2697 * Return
2698 * 0 on success or successfully queued.
2699 *
2700 * **-EBUSY** if work queue under nmi is full.
2701 *
2702 * **-EINVAL** if *sig* is invalid.
2703 *
2704 * **-EPERM** if no permission to send the *sig*.
2705 *
2706 * **-EAGAIN** if bpf program can try again.
2675 */ 2707 */
2676#define __BPF_FUNC_MAPPER(FN) \ 2708#define __BPF_FUNC_MAPPER(FN) \
2677 FN(unspec), \ 2709 FN(unspec), \
@@ -2782,7 +2814,8 @@ union bpf_attr {
2782 FN(strtol), \ 2814 FN(strtol), \
2783 FN(strtoul), \ 2815 FN(strtoul), \
2784 FN(sk_storage_get), \ 2816 FN(sk_storage_get), \
2785 FN(sk_storage_delete), 2817 FN(sk_storage_delete), \
2818 FN(send_signal),
2786 2819
2787/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2820/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2788 * function eBPF program intends to call 2821 * function eBPF program intends to call
diff --git a/tools/include/uapi/linux/if_tun.h b/tools/include/uapi/linux/if_tun.h
new file mode 100644
index 000000000000..454ae31b93c7
--- /dev/null
+++ b/tools/include/uapi/linux/if_tun.h
@@ -0,0 +1,114 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2/*
3 * Universal TUN/TAP device driver.
4 * Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#ifndef _UAPI__IF_TUN_H
18#define _UAPI__IF_TUN_H
19
20#include <linux/types.h>
21#include <linux/if_ether.h>
22#include <linux/filter.h>
23
24/* Read queue size */
25#define TUN_READQ_SIZE 500
26/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
27#define TUN_TUN_DEV IFF_TUN
28#define TUN_TAP_DEV IFF_TAP
29#define TUN_TYPE_MASK 0x000f
30
31/* Ioctl defines */
32#define TUNSETNOCSUM _IOW('T', 200, int)
33#define TUNSETDEBUG _IOW('T', 201, int)
34#define TUNSETIFF _IOW('T', 202, int)
35#define TUNSETPERSIST _IOW('T', 203, int)
36#define TUNSETOWNER _IOW('T', 204, int)
37#define TUNSETLINK _IOW('T', 205, int)
38#define TUNSETGROUP _IOW('T', 206, int)
39#define TUNGETFEATURES _IOR('T', 207, unsigned int)
40#define TUNSETOFFLOAD _IOW('T', 208, unsigned int)
41#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
42#define TUNGETIFF _IOR('T', 210, unsigned int)
43#define TUNGETSNDBUF _IOR('T', 211, int)
44#define TUNSETSNDBUF _IOW('T', 212, int)
45#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
46#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
47#define TUNGETVNETHDRSZ _IOR('T', 215, int)
48#define TUNSETVNETHDRSZ _IOW('T', 216, int)
49#define TUNSETQUEUE _IOW('T', 217, int)
50#define TUNSETIFINDEX _IOW('T', 218, unsigned int)
51#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
52#define TUNSETVNETLE _IOW('T', 220, int)
53#define TUNGETVNETLE _IOR('T', 221, int)
54/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
55 * little-endian hosts. Not all kernel configurations support them, but all
56 * configurations that support SET also support GET.
57 */
58#define TUNSETVNETBE _IOW('T', 222, int)
59#define TUNGETVNETBE _IOR('T', 223, int)
60#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
61#define TUNSETFILTEREBPF _IOR('T', 225, int)
62#define TUNSETCARRIER _IOW('T', 226, int)
63#define TUNGETDEVNETNS _IO('T', 227)
64
65/* TUNSETIFF ifr flags */
66#define IFF_TUN 0x0001
67#define IFF_TAP 0x0002
68#define IFF_NAPI 0x0010
69#define IFF_NAPI_FRAGS 0x0020
70#define IFF_NO_PI 0x1000
71/* This flag has no real effect */
72#define IFF_ONE_QUEUE 0x2000
73#define IFF_VNET_HDR 0x4000
74#define IFF_TUN_EXCL 0x8000
75#define IFF_MULTI_QUEUE 0x0100
76#define IFF_ATTACH_QUEUE 0x0200
77#define IFF_DETACH_QUEUE 0x0400
78/* read-only flag */
79#define IFF_PERSIST 0x0800
80#define IFF_NOFILTER 0x1000
81
82/* Socket options */
83#define TUN_TX_TIMESTAMP 1
84
85/* Features for GSO (TUNSETOFFLOAD). */
86#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */
87#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */
88#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */
89#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */
90#define TUN_F_UFO 0x10 /* I can handle UFO packets */
91
92/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
93#define TUN_PKT_STRIP 0x0001
94struct tun_pi {
95 __u16 flags;
96 __be16 proto;
97};
98
99/*
100 * Filter spec (used for SETXXFILTER ioctls)
101 * This stuff is applicable only to the TAP (Ethernet) devices.
102 * If the count is zero the filter is disabled and the driver accepts
103 * all packets (promisc mode).
104 * If the filter is enabled in order to accept broadcast packets
105 * broadcast addr must be explicitly included in the addr list.
106 */
107#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */
108struct tun_filter {
109 __u16 flags; /* TUN_FLT_ flags see above */
110 __u16 count; /* Number of addresses */
111 __u8 addr[0][ETH_ALEN];
112};
113
114#endif /* _UAPI__IF_TUN_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index ee9d5362f35b..e3962cfbc9a6 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1,3 @@
1libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o 1libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
2 netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
3 btf_dump.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index f91639bf5650..9312066a1ae3 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
3 3
4BPF_VERSION = 0 4BPF_VERSION = 0
5BPF_PATCHLEVEL = 0 5BPF_PATCHLEVEL = 0
6BPF_EXTRAVERSION = 3 6BPF_EXTRAVERSION = 4
7 7
8MAKEFLAGS += --no-print-directory 8MAKEFLAGS += --no-print-directory
9 9
@@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so
204 "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ 204 "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
205 "Please make sure all LIBBPF_API symbols are" \ 205 "Please make sure all LIBBPF_API symbols are" \
206 "versioned in $(VERSION_SCRIPT)." >&2; \ 206 "versioned in $(VERSION_SCRIPT)." >&2; \
207 readelf -s --wide $(OUTPUT)libbpf-in.o | \
208 awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'| \
209 sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
210 readelf -s --wide $(OUTPUT)libbpf.so | \
211 grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
212 sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
213 diff -u $(OUTPUT)libbpf_global_syms.tmp \
214 $(OUTPUT)libbpf_versioned_syms.tmp; \
215 rm $(OUTPUT)libbpf_global_syms.tmp \
216 $(OUTPUT)libbpf_versioned_syms.tmp; \
207 exit 1; \ 217 exit 1; \
208 fi 218 fi
209 219
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c4a48086dc9a..0d4b4fe10a84 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -256,6 +256,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
256 if (load_attr->name) 256 if (load_attr->name)
257 memcpy(attr.prog_name, load_attr->name, 257 memcpy(attr.prog_name, load_attr->name,
258 min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); 258 min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
259 attr.prog_flags = load_attr->prog_flags;
259 260
260 fd = sys_bpf_prog_load(&attr, sizeof(attr)); 261 fd = sys_bpf_prog_load(&attr, sizeof(attr));
261 if (fd >= 0) 262 if (fd >= 0)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9593fec75652..ff42ca043dc8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -87,6 +87,7 @@ struct bpf_load_program_attr {
87 const void *line_info; 87 const void *line_info;
88 __u32 line_info_cnt; 88 __u32 line_info_cnt;
89 __u32 log_level; 89 __u32 log_level;
90 __u32 prog_flags;
90}; 91};
91 92
92/* Flags to direct loading requirements */ 93/* Flags to direct loading requirements */
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 03348c4d6bd4..b2478e98c367 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4,14 +4,17 @@
4#include <stdio.h> 4#include <stdio.h>
5#include <stdlib.h> 5#include <stdlib.h>
6#include <string.h> 6#include <string.h>
7#include <fcntl.h>
7#include <unistd.h> 8#include <unistd.h>
8#include <errno.h> 9#include <errno.h>
9#include <linux/err.h> 10#include <linux/err.h>
10#include <linux/btf.h> 11#include <linux/btf.h>
12#include <gelf.h>
11#include "btf.h" 13#include "btf.h"
12#include "bpf.h" 14#include "bpf.h"
13#include "libbpf.h" 15#include "libbpf.h"
14#include "libbpf_internal.h" 16#include "libbpf_internal.h"
17#include "hashmap.h"
15 18
16#define max(a, b) ((a) > (b) ? (a) : (b)) 19#define max(a, b) ((a) > (b) ? (a) : (b))
17#define min(a, b) ((a) < (b) ? (a) : (b)) 20#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -417,6 +420,132 @@ done:
417 return btf; 420 return btf;
418} 421}
419 422
423static bool btf_check_endianness(const GElf_Ehdr *ehdr)
424{
425#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
426 return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
427#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
428 return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
429#else
430# error "Unrecognized __BYTE_ORDER__"
431#endif
432}
433
434struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
435{
436 Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
437 int err = 0, fd = -1, idx = 0;
438 struct btf *btf = NULL;
439 Elf_Scn *scn = NULL;
440 Elf *elf = NULL;
441 GElf_Ehdr ehdr;
442
443 if (elf_version(EV_CURRENT) == EV_NONE) {
444 pr_warning("failed to init libelf for %s\n", path);
445 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
446 }
447
448 fd = open(path, O_RDONLY);
449 if (fd < 0) {
450 err = -errno;
451 pr_warning("failed to open %s: %s\n", path, strerror(errno));
452 return ERR_PTR(err);
453 }
454
455 err = -LIBBPF_ERRNO__FORMAT;
456
457 elf = elf_begin(fd, ELF_C_READ, NULL);
458 if (!elf) {
459 pr_warning("failed to open %s as ELF file\n", path);
460 goto done;
461 }
462 if (!gelf_getehdr(elf, &ehdr)) {
463 pr_warning("failed to get EHDR from %s\n", path);
464 goto done;
465 }
466 if (!btf_check_endianness(&ehdr)) {
467 pr_warning("non-native ELF endianness is not supported\n");
468 goto done;
469 }
470 if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
471 pr_warning("failed to get e_shstrndx from %s\n", path);
472 goto done;
473 }
474
475 while ((scn = elf_nextscn(elf, scn)) != NULL) {
476 GElf_Shdr sh;
477 char *name;
478
479 idx++;
480 if (gelf_getshdr(scn, &sh) != &sh) {
481 pr_warning("failed to get section(%d) header from %s\n",
482 idx, path);
483 goto done;
484 }
485 name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
486 if (!name) {
487 pr_warning("failed to get section(%d) name from %s\n",
488 idx, path);
489 goto done;
490 }
491 if (strcmp(name, BTF_ELF_SEC) == 0) {
492 btf_data = elf_getdata(scn, 0);
493 if (!btf_data) {
494 pr_warning("failed to get section(%d, %s) data from %s\n",
495 idx, name, path);
496 goto done;
497 }
498 continue;
499 } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
500 btf_ext_data = elf_getdata(scn, 0);
501 if (!btf_ext_data) {
502 pr_warning("failed to get section(%d, %s) data from %s\n",
503 idx, name, path);
504 goto done;
505 }
506 continue;
507 }
508 }
509
510 err = 0;
511
512 if (!btf_data) {
513 err = -ENOENT;
514 goto done;
515 }
516 btf = btf__new(btf_data->d_buf, btf_data->d_size);
517 if (IS_ERR(btf))
518 goto done;
519
520 if (btf_ext && btf_ext_data) {
521 *btf_ext = btf_ext__new(btf_ext_data->d_buf,
522 btf_ext_data->d_size);
523 if (IS_ERR(*btf_ext))
524 goto done;
525 } else if (btf_ext) {
526 *btf_ext = NULL;
527 }
528done:
529 if (elf)
530 elf_end(elf);
531 close(fd);
532
533 if (err)
534 return ERR_PTR(err);
535 /*
536 * btf is always parsed before btf_ext, so no need to clean up
537 * btf_ext, if btf loading failed
538 */
539 if (IS_ERR(btf))
540 return btf;
541 if (btf_ext && IS_ERR(*btf_ext)) {
542 btf__free(btf);
543 err = PTR_ERR(*btf_ext);
544 return ERR_PTR(err);
545 }
546 return btf;
547}
548
420static int compare_vsi_off(const void *_a, const void *_b) 549static int compare_vsi_off(const void *_a, const void *_b)
421{ 550{
422 const struct btf_var_secinfo *a = _a; 551 const struct btf_var_secinfo *a = _a;
@@ -1165,16 +1294,9 @@ done:
1165 return err; 1294 return err;
1166} 1295}
1167 1296
1168#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
1169#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31
1170#define BTF_UNPROCESSED_ID ((__u32)-1) 1297#define BTF_UNPROCESSED_ID ((__u32)-1)
1171#define BTF_IN_PROGRESS_ID ((__u32)-2) 1298#define BTF_IN_PROGRESS_ID ((__u32)-2)
1172 1299
1173struct btf_dedup_node {
1174 struct btf_dedup_node *next;
1175 __u32 type_id;
1176};
1177
1178struct btf_dedup { 1300struct btf_dedup {
1179 /* .BTF section to be deduped in-place */ 1301 /* .BTF section to be deduped in-place */
1180 struct btf *btf; 1302 struct btf *btf;
@@ -1190,7 +1312,7 @@ struct btf_dedup {
1190 * candidates, which is fine because we rely on subsequent 1312 * candidates, which is fine because we rely on subsequent
1191 * btf_xxx_equal() checks to authoritatively verify type equality. 1313 * btf_xxx_equal() checks to authoritatively verify type equality.
1192 */ 1314 */
1193 struct btf_dedup_node **dedup_table; 1315 struct hashmap *dedup_table;
1194 /* Canonical types map */ 1316 /* Canonical types map */
1195 __u32 *map; 1317 __u32 *map;
1196 /* Hypothetical mapping, used during type graph equivalence checks */ 1318 /* Hypothetical mapping, used during type graph equivalence checks */
@@ -1215,30 +1337,18 @@ struct btf_str_ptrs {
1215 __u32 cap; 1337 __u32 cap;
1216}; 1338};
1217 1339
1218static inline __u32 hash_combine(__u32 h, __u32 value) 1340static long hash_combine(long h, long value)
1219{ 1341{
1220/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ 1342 return h * 31 + value;
1221#define GOLDEN_RATIO_PRIME 0x9e370001UL
1222 return h * 37 + value * GOLDEN_RATIO_PRIME;
1223#undef GOLDEN_RATIO_PRIME
1224} 1343}
1225 1344
1226#define for_each_dedup_cand(d, hash, node) \ 1345#define for_each_dedup_cand(d, node, hash) \
1227 for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \ 1346 hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
1228 node; \
1229 node = node->next)
1230 1347
1231static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) 1348static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
1232{ 1349{
1233 struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); 1350 return hashmap__append(d->dedup_table,
1234 int bucket = hash & (d->opts.dedup_table_size - 1); 1351 (void *)hash, (void *)(long)type_id);
1235
1236 if (!node)
1237 return -ENOMEM;
1238 node->type_id = type_id;
1239 node->next = d->dedup_table[bucket];
1240 d->dedup_table[bucket] = node;
1241 return 0;
1242} 1352}
1243 1353
1244static int btf_dedup_hypot_map_add(struct btf_dedup *d, 1354static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -1267,36 +1377,10 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
1267 d->hypot_cnt = 0; 1377 d->hypot_cnt = 0;
1268} 1378}
1269 1379
1270static void btf_dedup_table_free(struct btf_dedup *d)
1271{
1272 struct btf_dedup_node *head, *tmp;
1273 int i;
1274
1275 if (!d->dedup_table)
1276 return;
1277
1278 for (i = 0; i < d->opts.dedup_table_size; i++) {
1279 while (d->dedup_table[i]) {
1280 tmp = d->dedup_table[i];
1281 d->dedup_table[i] = tmp->next;
1282 free(tmp);
1283 }
1284
1285 head = d->dedup_table[i];
1286 while (head) {
1287 tmp = head;
1288 head = head->next;
1289 free(tmp);
1290 }
1291 }
1292
1293 free(d->dedup_table);
1294 d->dedup_table = NULL;
1295}
1296
1297static void btf_dedup_free(struct btf_dedup *d) 1380static void btf_dedup_free(struct btf_dedup *d)
1298{ 1381{
1299 btf_dedup_table_free(d); 1382 hashmap__free(d->dedup_table);
1383 d->dedup_table = NULL;
1300 1384
1301 free(d->map); 1385 free(d->map);
1302 d->map = NULL; 1386 d->map = NULL;
@@ -1310,40 +1394,43 @@ static void btf_dedup_free(struct btf_dedup *d)
1310 free(d); 1394 free(d);
1311} 1395}
1312 1396
1313/* Find closest power of two >= to size, capped at 2^max_size_log */ 1397static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
1314static __u32 roundup_pow2_max(__u32 size, int max_size_log)
1315{ 1398{
1316 int i; 1399 return (size_t)key;
1400}
1317 1401
1318 for (i = 0; i < max_size_log && (1U << i) < size; i++) 1402static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
1319 ; 1403{
1320 return 1U << i; 1404 return 0;
1321} 1405}
1322 1406
1407static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
1408{
1409 return k1 == k2;
1410}
1323 1411
1324static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, 1412static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1325 const struct btf_dedup_opts *opts) 1413 const struct btf_dedup_opts *opts)
1326{ 1414{
1327 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); 1415 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
1416 hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
1328 int i, err = 0; 1417 int i, err = 0;
1329 __u32 sz;
1330 1418
1331 if (!d) 1419 if (!d)
1332 return ERR_PTR(-ENOMEM); 1420 return ERR_PTR(-ENOMEM);
1333 1421
1334 d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; 1422 d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
1335 sz = opts && opts->dedup_table_size ? opts->dedup_table_size 1423 /* dedup_table_size is now used only to force collisions in tests */
1336 : BTF_DEDUP_TABLE_DEFAULT_SIZE; 1424 if (opts && opts->dedup_table_size == 1)
1337 sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG); 1425 hash_fn = btf_dedup_collision_hash_fn;
1338 d->opts.dedup_table_size = sz;
1339 1426
1340 d->btf = btf; 1427 d->btf = btf;
1341 d->btf_ext = btf_ext; 1428 d->btf_ext = btf_ext;
1342 1429
1343 d->dedup_table = calloc(d->opts.dedup_table_size, 1430 d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
1344 sizeof(struct btf_dedup_node *)); 1431 if (IS_ERR(d->dedup_table)) {
1345 if (!d->dedup_table) { 1432 err = PTR_ERR(d->dedup_table);
1346 err = -ENOMEM; 1433 d->dedup_table = NULL;
1347 goto done; 1434 goto done;
1348 } 1435 }
1349 1436
@@ -1662,9 +1749,9 @@ done:
1662 return err; 1749 return err;
1663} 1750}
1664 1751
1665static __u32 btf_hash_common(struct btf_type *t) 1752static long btf_hash_common(struct btf_type *t)
1666{ 1753{
1667 __u32 h; 1754 long h;
1668 1755
1669 h = hash_combine(0, t->name_off); 1756 h = hash_combine(0, t->name_off);
1670 h = hash_combine(h, t->info); 1757 h = hash_combine(h, t->info);
@@ -1680,10 +1767,10 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
1680} 1767}
1681 1768
1682/* Calculate type signature hash of INT. */ 1769/* Calculate type signature hash of INT. */
1683static __u32 btf_hash_int(struct btf_type *t) 1770static long btf_hash_int(struct btf_type *t)
1684{ 1771{
1685 __u32 info = *(__u32 *)(t + 1); 1772 __u32 info = *(__u32 *)(t + 1);
1686 __u32 h; 1773 long h;
1687 1774
1688 h = btf_hash_common(t); 1775 h = btf_hash_common(t);
1689 h = hash_combine(h, info); 1776 h = hash_combine(h, info);
@@ -1703,9 +1790,9 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
1703} 1790}
1704 1791
1705/* Calculate type signature hash of ENUM. */ 1792/* Calculate type signature hash of ENUM. */
1706static __u32 btf_hash_enum(struct btf_type *t) 1793static long btf_hash_enum(struct btf_type *t)
1707{ 1794{
1708 __u32 h; 1795 long h;
1709 1796
1710 /* don't hash vlen and enum members to support enum fwd resolving */ 1797 /* don't hash vlen and enum members to support enum fwd resolving */
1711 h = hash_combine(0, t->name_off); 1798 h = hash_combine(0, t->name_off);
@@ -1757,11 +1844,11 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
1757 * as referenced type IDs equivalence is established separately during type 1844 * as referenced type IDs equivalence is established separately during type
1758 * graph equivalence check algorithm. 1845 * graph equivalence check algorithm.
1759 */ 1846 */
1760static __u32 btf_hash_struct(struct btf_type *t) 1847static long btf_hash_struct(struct btf_type *t)
1761{ 1848{
1762 struct btf_member *member = (struct btf_member *)(t + 1); 1849 struct btf_member *member = (struct btf_member *)(t + 1);
1763 __u32 vlen = BTF_INFO_VLEN(t->info); 1850 __u32 vlen = BTF_INFO_VLEN(t->info);
1764 __u32 h = btf_hash_common(t); 1851 long h = btf_hash_common(t);
1765 int i; 1852 int i;
1766 1853
1767 for (i = 0; i < vlen; i++) { 1854 for (i = 0; i < vlen; i++) {
@@ -1804,10 +1891,10 @@ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
1804 * under assumption that they were already resolved to canonical type IDs and 1891 * under assumption that they were already resolved to canonical type IDs and
1805 * are not going to change. 1892 * are not going to change.
1806 */ 1893 */
1807static __u32 btf_hash_array(struct btf_type *t) 1894static long btf_hash_array(struct btf_type *t)
1808{ 1895{
1809 struct btf_array *info = (struct btf_array *)(t + 1); 1896 struct btf_array *info = (struct btf_array *)(t + 1);
1810 __u32 h = btf_hash_common(t); 1897 long h = btf_hash_common(t);
1811 1898
1812 h = hash_combine(h, info->type); 1899 h = hash_combine(h, info->type);
1813 h = hash_combine(h, info->index_type); 1900 h = hash_combine(h, info->index_type);
@@ -1858,11 +1945,11 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
1858 * under assumption that they were already resolved to canonical type IDs and 1945 * under assumption that they were already resolved to canonical type IDs and
1859 * are not going to change. 1946 * are not going to change.
1860 */ 1947 */
1861static inline __u32 btf_hash_fnproto(struct btf_type *t) 1948static long btf_hash_fnproto(struct btf_type *t)
1862{ 1949{
1863 struct btf_param *member = (struct btf_param *)(t + 1); 1950 struct btf_param *member = (struct btf_param *)(t + 1);
1864 __u16 vlen = BTF_INFO_VLEN(t->info); 1951 __u16 vlen = BTF_INFO_VLEN(t->info);
1865 __u32 h = btf_hash_common(t); 1952 long h = btf_hash_common(t);
1866 int i; 1953 int i;
1867 1954
1868 for (i = 0; i < vlen; i++) { 1955 for (i = 0; i < vlen; i++) {
@@ -1880,7 +1967,7 @@ static inline __u32 btf_hash_fnproto(struct btf_type *t)
1880 * This function is called during reference types deduplication to compare 1967 * This function is called during reference types deduplication to compare
1881 * FUNC_PROTO to potential canonical representative. 1968 * FUNC_PROTO to potential canonical representative.
1882 */ 1969 */
1883static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) 1970static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
1884{ 1971{
1885 struct btf_param *m1, *m2; 1972 struct btf_param *m1, *m2;
1886 __u16 vlen; 1973 __u16 vlen;
@@ -1906,7 +1993,7 @@ static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
1906 * IDs. This check is performed during type graph equivalence check and 1993 * IDs. This check is performed during type graph equivalence check and
1907 * referenced types equivalence is checked separately. 1994 * referenced types equivalence is checked separately.
1908 */ 1995 */
1909static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) 1996static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
1910{ 1997{
1911 struct btf_param *m1, *m2; 1998 struct btf_param *m1, *m2;
1912 __u16 vlen; 1999 __u16 vlen;
@@ -1937,11 +2024,12 @@ static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
1937static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) 2024static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1938{ 2025{
1939 struct btf_type *t = d->btf->types[type_id]; 2026 struct btf_type *t = d->btf->types[type_id];
2027 struct hashmap_entry *hash_entry;
1940 struct btf_type *cand; 2028 struct btf_type *cand;
1941 struct btf_dedup_node *cand_node;
1942 /* if we don't find equivalent type, then we are canonical */ 2029 /* if we don't find equivalent type, then we are canonical */
1943 __u32 new_id = type_id; 2030 __u32 new_id = type_id;
1944 __u32 h; 2031 __u32 cand_id;
2032 long h;
1945 2033
1946 switch (BTF_INFO_KIND(t->info)) { 2034 switch (BTF_INFO_KIND(t->info)) {
1947 case BTF_KIND_CONST: 2035 case BTF_KIND_CONST:
@@ -1960,10 +2048,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1960 2048
1961 case BTF_KIND_INT: 2049 case BTF_KIND_INT:
1962 h = btf_hash_int(t); 2050 h = btf_hash_int(t);
1963 for_each_dedup_cand(d, h, cand_node) { 2051 for_each_dedup_cand(d, hash_entry, h) {
1964 cand = d->btf->types[cand_node->type_id]; 2052 cand_id = (__u32)(long)hash_entry->value;
2053 cand = d->btf->types[cand_id];
1965 if (btf_equal_int(t, cand)) { 2054 if (btf_equal_int(t, cand)) {
1966 new_id = cand_node->type_id; 2055 new_id = cand_id;
1967 break; 2056 break;
1968 } 2057 }
1969 } 2058 }
@@ -1971,10 +2060,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1971 2060
1972 case BTF_KIND_ENUM: 2061 case BTF_KIND_ENUM:
1973 h = btf_hash_enum(t); 2062 h = btf_hash_enum(t);
1974 for_each_dedup_cand(d, h, cand_node) { 2063 for_each_dedup_cand(d, hash_entry, h) {
1975 cand = d->btf->types[cand_node->type_id]; 2064 cand_id = (__u32)(long)hash_entry->value;
2065 cand = d->btf->types[cand_id];
1976 if (btf_equal_enum(t, cand)) { 2066 if (btf_equal_enum(t, cand)) {
1977 new_id = cand_node->type_id; 2067 new_id = cand_id;
1978 break; 2068 break;
1979 } 2069 }
1980 if (d->opts.dont_resolve_fwds) 2070 if (d->opts.dont_resolve_fwds)
@@ -1982,21 +2072,22 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1982 if (btf_compat_enum(t, cand)) { 2072 if (btf_compat_enum(t, cand)) {
1983 if (btf_is_enum_fwd(t)) { 2073 if (btf_is_enum_fwd(t)) {
1984 /* resolve fwd to full enum */ 2074 /* resolve fwd to full enum */
1985 new_id = cand_node->type_id; 2075 new_id = cand_id;
1986 break; 2076 break;
1987 } 2077 }
1988 /* resolve canonical enum fwd to full enum */ 2078 /* resolve canonical enum fwd to full enum */
1989 d->map[cand_node->type_id] = type_id; 2079 d->map[cand_id] = type_id;
1990 } 2080 }
1991 } 2081 }
1992 break; 2082 break;
1993 2083
1994 case BTF_KIND_FWD: 2084 case BTF_KIND_FWD:
1995 h = btf_hash_common(t); 2085 h = btf_hash_common(t);
1996 for_each_dedup_cand(d, h, cand_node) { 2086 for_each_dedup_cand(d, hash_entry, h) {
1997 cand = d->btf->types[cand_node->type_id]; 2087 cand_id = (__u32)(long)hash_entry->value;
2088 cand = d->btf->types[cand_id];
1998 if (btf_equal_common(t, cand)) { 2089 if (btf_equal_common(t, cand)) {
1999 new_id = cand_node->type_id; 2090 new_id = cand_id;
2000 break; 2091 break;
2001 } 2092 }
2002 } 2093 }
@@ -2397,12 +2488,12 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
2397 */ 2488 */
2398static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) 2489static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2399{ 2490{
2400 struct btf_dedup_node *cand_node;
2401 struct btf_type *cand_type, *t; 2491 struct btf_type *cand_type, *t;
2492 struct hashmap_entry *hash_entry;
2402 /* if we don't find equivalent type, then we are canonical */ 2493 /* if we don't find equivalent type, then we are canonical */
2403 __u32 new_id = type_id; 2494 __u32 new_id = type_id;
2404 __u16 kind; 2495 __u16 kind;
2405 __u32 h; 2496 long h;
2406 2497
2407 /* already deduped or is in process of deduping (loop detected) */ 2498 /* already deduped or is in process of deduping (loop detected) */
2408 if (d->map[type_id] <= BTF_MAX_NR_TYPES) 2499 if (d->map[type_id] <= BTF_MAX_NR_TYPES)
@@ -2415,7 +2506,8 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2415 return 0; 2506 return 0;
2416 2507
2417 h = btf_hash_struct(t); 2508 h = btf_hash_struct(t);
2418 for_each_dedup_cand(d, h, cand_node) { 2509 for_each_dedup_cand(d, hash_entry, h) {
2510 __u32 cand_id = (__u32)(long)hash_entry->value;
2419 int eq; 2511 int eq;
2420 2512
2421 /* 2513 /*
@@ -2428,17 +2520,17 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2428 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because 2520 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
2429 * FWD and compatible STRUCT/UNION are considered equivalent. 2521 * FWD and compatible STRUCT/UNION are considered equivalent.
2430 */ 2522 */
2431 cand_type = d->btf->types[cand_node->type_id]; 2523 cand_type = d->btf->types[cand_id];
2432 if (!btf_shallow_equal_struct(t, cand_type)) 2524 if (!btf_shallow_equal_struct(t, cand_type))
2433 continue; 2525 continue;
2434 2526
2435 btf_dedup_clear_hypot_map(d); 2527 btf_dedup_clear_hypot_map(d);
2436 eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); 2528 eq = btf_dedup_is_equiv(d, type_id, cand_id);
2437 if (eq < 0) 2529 if (eq < 0)
2438 return eq; 2530 return eq;
2439 if (!eq) 2531 if (!eq)
2440 continue; 2532 continue;
2441 new_id = cand_node->type_id; 2533 new_id = cand_id;
2442 btf_dedup_merge_hypot_map(d); 2534 btf_dedup_merge_hypot_map(d);
2443 break; 2535 break;
2444 } 2536 }
@@ -2488,12 +2580,12 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
2488 */ 2580 */
2489static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) 2581static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2490{ 2582{
2491 struct btf_dedup_node *cand_node; 2583 struct hashmap_entry *hash_entry;
2584 __u32 new_id = type_id, cand_id;
2492 struct btf_type *t, *cand; 2585 struct btf_type *t, *cand;
2493 /* if we don't find equivalent type, then we are representative type */ 2586 /* if we don't find equivalent type, then we are representative type */
2494 __u32 new_id = type_id;
2495 int ref_type_id; 2587 int ref_type_id;
2496 __u32 h; 2588 long h;
2497 2589
2498 if (d->map[type_id] == BTF_IN_PROGRESS_ID) 2590 if (d->map[type_id] == BTF_IN_PROGRESS_ID)
2499 return -ELOOP; 2591 return -ELOOP;
@@ -2516,10 +2608,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2516 t->type = ref_type_id; 2608 t->type = ref_type_id;
2517 2609
2518 h = btf_hash_common(t); 2610 h = btf_hash_common(t);
2519 for_each_dedup_cand(d, h, cand_node) { 2611 for_each_dedup_cand(d, hash_entry, h) {
2520 cand = d->btf->types[cand_node->type_id]; 2612 cand_id = (__u32)(long)hash_entry->value;
2613 cand = d->btf->types[cand_id];
2521 if (btf_equal_common(t, cand)) { 2614 if (btf_equal_common(t, cand)) {
2522 new_id = cand_node->type_id; 2615 new_id = cand_id;
2523 break; 2616 break;
2524 } 2617 }
2525 } 2618 }
@@ -2539,10 +2632,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2539 info->index_type = ref_type_id; 2632 info->index_type = ref_type_id;
2540 2633
2541 h = btf_hash_array(t); 2634 h = btf_hash_array(t);
2542 for_each_dedup_cand(d, h, cand_node) { 2635 for_each_dedup_cand(d, hash_entry, h) {
2543 cand = d->btf->types[cand_node->type_id]; 2636 cand_id = (__u32)(long)hash_entry->value;
2637 cand = d->btf->types[cand_id];
2544 if (btf_equal_array(t, cand)) { 2638 if (btf_equal_array(t, cand)) {
2545 new_id = cand_node->type_id; 2639 new_id = cand_id;
2546 break; 2640 break;
2547 } 2641 }
2548 } 2642 }
@@ -2570,10 +2664,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2570 } 2664 }
2571 2665
2572 h = btf_hash_fnproto(t); 2666 h = btf_hash_fnproto(t);
2573 for_each_dedup_cand(d, h, cand_node) { 2667 for_each_dedup_cand(d, hash_entry, h) {
2574 cand = d->btf->types[cand_node->type_id]; 2668 cand_id = (__u32)(long)hash_entry->value;
2669 cand = d->btf->types[cand_id];
2575 if (btf_equal_fnproto(t, cand)) { 2670 if (btf_equal_fnproto(t, cand)) {
2576 new_id = cand_node->type_id; 2671 new_id = cand_id;
2577 break; 2672 break;
2578 } 2673 }
2579 } 2674 }
@@ -2600,7 +2695,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
2600 if (err < 0) 2695 if (err < 0)
2601 return err; 2696 return err;
2602 } 2697 }
2603 btf_dedup_table_free(d); 2698 /* we won't need d->dedup_table anymore */
2699 hashmap__free(d->dedup_table);
2700 d->dedup_table = NULL;
2604 return 0; 2701 return 0;
2605} 2702}
2606 2703
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index c7b399e81fce..ba4ffa831aa4 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -4,6 +4,7 @@
4#ifndef __LIBBPF_BTF_H 4#ifndef __LIBBPF_BTF_H
5#define __LIBBPF_BTF_H 5#define __LIBBPF_BTF_H
6 6
7#include <stdarg.h>
7#include <linux/types.h> 8#include <linux/types.h>
8 9
9#ifdef __cplusplus 10#ifdef __cplusplus
@@ -59,6 +60,8 @@ struct btf_ext_header {
59 60
60LIBBPF_API void btf__free(struct btf *btf); 61LIBBPF_API void btf__free(struct btf *btf);
61LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); 62LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
63LIBBPF_API struct btf *btf__parse_elf(const char *path,
64 struct btf_ext **btf_ext);
62LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); 65LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
63LIBBPF_API int btf__load(struct btf *btf); 66LIBBPF_API int btf__load(struct btf *btf);
64LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, 67LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
@@ -100,6 +103,22 @@ struct btf_dedup_opts {
100LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, 103LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
101 const struct btf_dedup_opts *opts); 104 const struct btf_dedup_opts *opts);
102 105
106struct btf_dump;
107
108struct btf_dump_opts {
109 void *ctx;
110};
111
112typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
113
114LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
115 const struct btf_ext *btf_ext,
116 const struct btf_dump_opts *opts,
117 btf_dump_printf_fn_t printf_fn);
118LIBBPF_API void btf_dump__free(struct btf_dump *d);
119
120LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
121
103#ifdef __cplusplus 122#ifdef __cplusplus
104} /* extern "C" */ 123} /* extern "C" */
105#endif 124#endif
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
new file mode 100644
index 000000000000..4b22db77e2cc
--- /dev/null
+++ b/tools/lib/bpf/btf_dump.c
@@ -0,0 +1,1336 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C type converter.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8
9#include <stdbool.h>
10#include <stddef.h>
11#include <stdlib.h>
12#include <string.h>
13#include <errno.h>
14#include <linux/err.h>
15#include <linux/btf.h>
16#include "btf.h"
17#include "hashmap.h"
18#include "libbpf.h"
19#include "libbpf_internal.h"
20
21#define min(x, y) ((x) < (y) ? (x) : (y))
22#define max(x, y) ((x) < (y) ? (y) : (x))
23
24static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
25static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
26
27static const char *pfx(int lvl)
28{
29 return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl];
30}
31
32enum btf_dump_type_order_state {
33 NOT_ORDERED,
34 ORDERING,
35 ORDERED,
36};
37
38enum btf_dump_type_emit_state {
39 NOT_EMITTED,
40 EMITTING,
41 EMITTED,
42};
43
44/* per-type auxiliary state */
45struct btf_dump_type_aux_state {
46 /* topological sorting state */
47 enum btf_dump_type_order_state order_state: 2;
48 /* emitting state used to determine the need for forward declaration */
49 enum btf_dump_type_emit_state emit_state: 2;
50 /* whether forward declaration was already emitted */
51 __u8 fwd_emitted: 1;
52 /* whether unique non-duplicate name was already assigned */
53 __u8 name_resolved: 1;
54};
55
56struct btf_dump {
57 const struct btf *btf;
58 const struct btf_ext *btf_ext;
59 btf_dump_printf_fn_t printf_fn;
60 struct btf_dump_opts opts;
61
62 /* per-type auxiliary state */
63 struct btf_dump_type_aux_state *type_states;
64 /* per-type optional cached unique name, must be freed, if present */
65 const char **cached_names;
66
67 /* topo-sorted list of dependent type definitions */
68 __u32 *emit_queue;
69 int emit_queue_cap;
70 int emit_queue_cnt;
71
72 /*
73 * stack of type declarations (e.g., chain of modifiers, arrays,
74 * funcs, etc)
75 */
76 __u32 *decl_stack;
77 int decl_stack_cap;
78 int decl_stack_cnt;
79
80 /* maps struct/union/enum name to a number of name occurrences */
81 struct hashmap *type_names;
82 /*
83 * maps typedef identifiers and enum value names to a number of such
84 * name occurrences
85 */
86 struct hashmap *ident_names;
87};
88
89static size_t str_hash_fn(const void *key, void *ctx)
90{
91 const char *s = key;
92 size_t h = 0;
93
94 while (*s) {
95 h = h * 31 + *s;
96 s++;
97 }
98 return h;
99}
100
101static bool str_equal_fn(const void *a, const void *b, void *ctx)
102{
103 return strcmp(a, b) == 0;
104}
105
106static __u16 btf_kind_of(const struct btf_type *t)
107{
108 return BTF_INFO_KIND(t->info);
109}
110
111static __u16 btf_vlen_of(const struct btf_type *t)
112{
113 return BTF_INFO_VLEN(t->info);
114}
115
116static bool btf_kflag_of(const struct btf_type *t)
117{
118 return BTF_INFO_KFLAG(t->info);
119}
120
121static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
122{
123 return btf__name_by_offset(d->btf, name_off);
124}
125
126static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
127{
128 va_list args;
129
130 va_start(args, fmt);
131 d->printf_fn(d->opts.ctx, fmt, args);
132 va_end(args);
133}
134
135struct btf_dump *btf_dump__new(const struct btf *btf,
136 const struct btf_ext *btf_ext,
137 const struct btf_dump_opts *opts,
138 btf_dump_printf_fn_t printf_fn)
139{
140 struct btf_dump *d;
141 int err;
142
143 d = calloc(1, sizeof(struct btf_dump));
144 if (!d)
145 return ERR_PTR(-ENOMEM);
146
147 d->btf = btf;
148 d->btf_ext = btf_ext;
149 d->printf_fn = printf_fn;
150 d->opts.ctx = opts ? opts->ctx : NULL;
151
152 d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
153 if (IS_ERR(d->type_names)) {
154 err = PTR_ERR(d->type_names);
155 d->type_names = NULL;
156 btf_dump__free(d);
157 return ERR_PTR(err);
158 }
159 d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
160 if (IS_ERR(d->ident_names)) {
161 err = PTR_ERR(d->ident_names);
162 d->ident_names = NULL;
163 btf_dump__free(d);
164 return ERR_PTR(err);
165 }
166
167 return d;
168}
169
170void btf_dump__free(struct btf_dump *d)
171{
172 int i, cnt;
173
174 if (!d)
175 return;
176
177 free(d->type_states);
178 if (d->cached_names) {
179 /* any set cached name is owned by us and should be freed */
180 for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
181 if (d->cached_names[i])
182 free((void *)d->cached_names[i]);
183 }
184 }
185 free(d->cached_names);
186 free(d->emit_queue);
187 free(d->decl_stack);
188 hashmap__free(d->type_names);
189 hashmap__free(d->ident_names);
190
191 free(d);
192}
193
194static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
195static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
196
197/*
198 * Dump BTF type in a compilable C syntax, including all the necessary
199 * dependent types, necessary for compilation. If some of the dependent types
200 * were already emitted as part of previous btf_dump__dump_type() invocation
201 * for another type, they won't be emitted again. This API allows callers to
202 * filter out BTF types according to user-defined criterias and emitted only
203 * minimal subset of types, necessary to compile everything. Full struct/union
204 * definitions will still be emitted, even if the only usage is through
205 * pointer and could be satisfied with just a forward declaration.
206 *
207 * Dumping is done in two high-level passes:
208 * 1. Topologically sort type definitions to satisfy C rules of compilation.
209 * 2. Emit type definitions in C syntax.
210 *
211 * Returns 0 on success; <0, otherwise.
212 */
213int btf_dump__dump_type(struct btf_dump *d, __u32 id)
214{
215 int err, i;
216
217 if (id > btf__get_nr_types(d->btf))
218 return -EINVAL;
219
220 /* type states are lazily allocated, as they might not be needed */
221 if (!d->type_states) {
222 d->type_states = calloc(1 + btf__get_nr_types(d->btf),
223 sizeof(d->type_states[0]));
224 if (!d->type_states)
225 return -ENOMEM;
226 d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
227 sizeof(d->cached_names[0]));
228 if (!d->cached_names)
229 return -ENOMEM;
230
231 /* VOID is special */
232 d->type_states[0].order_state = ORDERED;
233 d->type_states[0].emit_state = EMITTED;
234 }
235
236 d->emit_queue_cnt = 0;
237 err = btf_dump_order_type(d, id, false);
238 if (err < 0)
239 return err;
240
241 for (i = 0; i < d->emit_queue_cnt; i++)
242 btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
243
244 return 0;
245}
246
247static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
248{
249 __u32 *new_queue;
250 size_t new_cap;
251
252 if (d->emit_queue_cnt >= d->emit_queue_cap) {
253 new_cap = max(16, d->emit_queue_cap * 3 / 2);
254 new_queue = realloc(d->emit_queue,
255 new_cap * sizeof(new_queue[0]));
256 if (!new_queue)
257 return -ENOMEM;
258 d->emit_queue = new_queue;
259 d->emit_queue_cap = new_cap;
260 }
261
262 d->emit_queue[d->emit_queue_cnt++] = id;
263 return 0;
264}
265
266/*
267 * Determine order of emitting dependent types and specified type to satisfy
268 * C compilation rules. This is done through topological sorting with an
269 * additional complication which comes from C rules. The main idea for C is
270 * that if some type is "embedded" into a struct/union, it's size needs to be
271 * known at the time of definition of containing type. E.g., for:
272 *
273 * struct A {};
274 * struct B { struct A x; }
275 *
276 * struct A *HAS* to be defined before struct B, because it's "embedded",
277 * i.e., it is part of struct B layout. But in the following case:
278 *
279 * struct A;
280 * struct B { struct A *x; }
281 * struct A {};
282 *
283 * it's enough to just have a forward declaration of struct A at the time of
284 * struct B definition, as struct B has a pointer to struct A, so the size of
285 * field x is known without knowing struct A size: it's sizeof(void *).
286 *
287 * Unfortunately, there are some trickier cases we need to handle, e.g.:
288 *
289 * struct A {}; // if this was forward-declaration: compilation error
290 * struct B {
291 * struct { // anonymous struct
292 * struct A y;
293 * } *x;
294 * };
295 *
296 * In this case, struct B's field x is a pointer, so it's size is known
297 * regardless of the size of (anonymous) struct it points to. But because this
298 * struct is anonymous and thus defined inline inside struct B, *and* it
299 * embeds struct A, compiler requires full definition of struct A to be known
300 * before struct B can be defined. This creates a transitive dependency
301 * between struct A and struct B. If struct A was forward-declared before
302 * struct B definition and fully defined after struct B definition, that would
303 * trigger compilation error.
304 *
305 * All this means that while we are doing topological sorting on BTF type
306 * graph, we need to determine relationships between different types (graph
307 * nodes):
308 * - weak link (relationship) between X and Y, if Y *CAN* be
309 * forward-declared at the point of X definition;
310 * - strong link, if Y *HAS* to be fully-defined before X can be defined.
311 *
312 * The rule is as follows. Given a chain of BTF types from X to Y, if there is
313 * BTF_KIND_PTR type in the chain and at least one non-anonymous type
314 * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
315 * Weak/strong relationship is determined recursively during DFS traversal and
316 * is returned as a result from btf_dump_order_type().
317 *
318 * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
319 * but it is not guaranteeing that no extraneous forward declarations will be
320 * emitted.
321 *
322 * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
323 * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
324 * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
325 * entire graph path, so depending where from one came to that BTF type, it
326 * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
327 * once they are processed, there is no need to do it again, so they are
328 * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
329 * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
330 * in any case, once those are processed, no need to do it again, as the
331 * result won't change.
332 *
333 * Returns:
334 * - 1, if type is part of strong link (so there is strong topological
335 * ordering requirements);
336 * - 0, if type is part of weak link (so can be satisfied through forward
337 * declaration);
338 * - <0, on error (e.g., unsatisfiable type loop detected).
339 */
340static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
341{
342 /*
343 * Order state is used to detect strong link cycles, but only for BTF
344 * kinds that are or could be an independent definition (i.e.,
345 * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays,
346 * func_protos, modifiers are just means to get to these definitions.
347 * Int/void don't need definitions, they are assumed to be always
348 * properly defined. We also ignore datasec, var, and funcs for now.
349 * So for all non-defining kinds, we never even set ordering state,
350 * for defining kinds we set ORDERING and subsequently ORDERED if it
351 * forms a strong link.
352 */
353 struct btf_dump_type_aux_state *tstate = &d->type_states[id];
354 const struct btf_type *t;
355 __u16 kind, vlen;
356 int err, i;
357
358 /* return true, letting typedefs know that it's ok to be emitted */
359 if (tstate->order_state == ORDERED)
360 return 1;
361
362 t = btf__type_by_id(d->btf, id);
363 kind = btf_kind_of(t);
364
365 if (tstate->order_state == ORDERING) {
366 /* type loop, but resolvable through fwd declaration */
367 if ((kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) &&
368 through_ptr && t->name_off != 0)
369 return 0;
370 pr_warning("unsatisfiable type cycle, id:[%u]\n", id);
371 return -ELOOP;
372 }
373
374 switch (kind) {
375 case BTF_KIND_INT:
376 tstate->order_state = ORDERED;
377 return 0;
378
379 case BTF_KIND_PTR:
380 err = btf_dump_order_type(d, t->type, true);
381 tstate->order_state = ORDERED;
382 return err;
383
384 case BTF_KIND_ARRAY: {
385 const struct btf_array *a = (void *)(t + 1);
386
387 return btf_dump_order_type(d, a->type, through_ptr);
388 }
389 case BTF_KIND_STRUCT:
390 case BTF_KIND_UNION: {
391 const struct btf_member *m = (void *)(t + 1);
392 /*
393 * struct/union is part of strong link, only if it's embedded
394 * (so no ptr in a path) or it's anonymous (so has to be
395 * defined inline, even if declared through ptr)
396 */
397 if (through_ptr && t->name_off != 0)
398 return 0;
399
400 tstate->order_state = ORDERING;
401
402 vlen = btf_vlen_of(t);
403 for (i = 0; i < vlen; i++, m++) {
404 err = btf_dump_order_type(d, m->type, false);
405 if (err < 0)
406 return err;
407 }
408
409 if (t->name_off != 0) {
410 err = btf_dump_add_emit_queue_id(d, id);
411 if (err < 0)
412 return err;
413 }
414
415 tstate->order_state = ORDERED;
416 return 1;
417 }
418 case BTF_KIND_ENUM:
419 case BTF_KIND_FWD:
420 if (t->name_off != 0) {
421 err = btf_dump_add_emit_queue_id(d, id);
422 if (err)
423 return err;
424 }
425 tstate->order_state = ORDERED;
426 return 1;
427
428 case BTF_KIND_TYPEDEF: {
429 int is_strong;
430
431 is_strong = btf_dump_order_type(d, t->type, through_ptr);
432 if (is_strong < 0)
433 return is_strong;
434
435 /* typedef is similar to struct/union w.r.t. fwd-decls */
436 if (through_ptr && !is_strong)
437 return 0;
438
439 /* typedef is always a named definition */
440 err = btf_dump_add_emit_queue_id(d, id);
441 if (err)
442 return err;
443
444 d->type_states[id].order_state = ORDERED;
445 return 1;
446 }
447 case BTF_KIND_VOLATILE:
448 case BTF_KIND_CONST:
449 case BTF_KIND_RESTRICT:
450 return btf_dump_order_type(d, t->type, through_ptr);
451
452 case BTF_KIND_FUNC_PROTO: {
453 const struct btf_param *p = (void *)(t + 1);
454 bool is_strong;
455
456 err = btf_dump_order_type(d, t->type, through_ptr);
457 if (err < 0)
458 return err;
459 is_strong = err > 0;
460
461 vlen = btf_vlen_of(t);
462 for (i = 0; i < vlen; i++, p++) {
463 err = btf_dump_order_type(d, p->type, through_ptr);
464 if (err < 0)
465 return err;
466 if (err > 0)
467 is_strong = true;
468 }
469 return is_strong;
470 }
471 case BTF_KIND_FUNC:
472 case BTF_KIND_VAR:
473 case BTF_KIND_DATASEC:
474 d->type_states[id].order_state = ORDERED;
475 return 0;
476
477 default:
478 return -EINVAL;
479 }
480}
481
482static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
483 const struct btf_type *t);
484static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
485 const struct btf_type *t, int lvl);
486
487static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
488 const struct btf_type *t);
489static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
490 const struct btf_type *t, int lvl);
491
492static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
493 const struct btf_type *t);
494
495static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
496 const struct btf_type *t, int lvl);
497
498/* a local view into a shared stack */
499struct id_stack {
500 const __u32 *ids;
501 int cnt;
502};
503
504static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
505 const char *fname, int lvl);
506static void btf_dump_emit_type_chain(struct btf_dump *d,
507 struct id_stack *decl_stack,
508 const char *fname, int lvl);
509
510static const char *btf_dump_type_name(struct btf_dump *d, __u32 id);
511static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
512static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
513 const char *orig_name);
514
515static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
516{
517 const struct btf_type *t = btf__type_by_id(d->btf, id);
518
519 /* __builtin_va_list is a compiler built-in, which causes compilation
520 * errors, when compiling w/ different compiler, then used to compile
521 * original code (e.g., GCC to compile kernel, Clang to use generated
522 * C header from BTF). As it is built-in, it should be already defined
523 * properly internally in compiler.
524 */
525 if (t->name_off == 0)
526 return false;
527 return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
528}
529
530/*
531 * Emit C-syntax definitions of types from chains of BTF types.
532 *
533 * High-level handling of determining necessary forward declarations are handled
534 * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
535 * declarations/definitions in C syntax are handled by a combo of
536 * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
537 * corresponding btf_dump_emit_*_{def,fwd}() functions.
538 *
539 * We also keep track of "containing struct/union type ID" to determine when
540 * we reference it from inside and thus can avoid emitting unnecessary forward
541 * declaration.
542 *
543 * This algorithm is designed in such a way, that even if some error occurs
544 * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
545 * that doesn't comply to C rules completely), algorithm will try to proceed
546 * and produce as much meaningful output as possible.
547 */
548static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
549{
550 struct btf_dump_type_aux_state *tstate = &d->type_states[id];
551 bool top_level_def = cont_id == 0;
552 const struct btf_type *t;
553 __u16 kind;
554
555 if (tstate->emit_state == EMITTED)
556 return;
557
558 t = btf__type_by_id(d->btf, id);
559 kind = btf_kind_of(t);
560
561 if (top_level_def && t->name_off == 0) {
562 pr_warning("unexpected nameless definition, id:[%u]\n", id);
563 return;
564 }
565
566 if (tstate->emit_state == EMITTING) {
567 if (tstate->fwd_emitted)
568 return;
569
570 switch (kind) {
571 case BTF_KIND_STRUCT:
572 case BTF_KIND_UNION:
573 /*
574 * if we are referencing a struct/union that we are
575 * part of - then no need for fwd declaration
576 */
577 if (id == cont_id)
578 return;
579 if (t->name_off == 0) {
580 pr_warning("anonymous struct/union loop, id:[%u]\n",
581 id);
582 return;
583 }
584 btf_dump_emit_struct_fwd(d, id, t);
585 btf_dump_printf(d, ";\n\n");
586 tstate->fwd_emitted = 1;
587 break;
588 case BTF_KIND_TYPEDEF:
589 /*
590 * for typedef fwd_emitted means typedef definition
591 * was emitted, but it can be used only for "weak"
592 * references through pointer only, not for embedding
593 */
594 if (!btf_dump_is_blacklisted(d, id)) {
595 btf_dump_emit_typedef_def(d, id, t, 0);
596 btf_dump_printf(d, ";\n\n");
597 };
598 tstate->fwd_emitted = 1;
599 break;
600 default:
601 break;
602 }
603
604 return;
605 }
606
607 switch (kind) {
608 case BTF_KIND_INT:
609 tstate->emit_state = EMITTED;
610 break;
611 case BTF_KIND_ENUM:
612 if (top_level_def) {
613 btf_dump_emit_enum_def(d, id, t, 0);
614 btf_dump_printf(d, ";\n\n");
615 }
616 tstate->emit_state = EMITTED;
617 break;
618 case BTF_KIND_PTR:
619 case BTF_KIND_VOLATILE:
620 case BTF_KIND_CONST:
621 case BTF_KIND_RESTRICT:
622 btf_dump_emit_type(d, t->type, cont_id);
623 break;
624 case BTF_KIND_ARRAY: {
625 const struct btf_array *a = (void *)(t + 1);
626
627 btf_dump_emit_type(d, a->type, cont_id);
628 break;
629 }
630 case BTF_KIND_FWD:
631 btf_dump_emit_fwd_def(d, id, t);
632 btf_dump_printf(d, ";\n\n");
633 tstate->emit_state = EMITTED;
634 break;
635 case BTF_KIND_TYPEDEF:
636 tstate->emit_state = EMITTING;
637 btf_dump_emit_type(d, t->type, id);
638 /*
639 * typedef can server as both definition and forward
640 * declaration; at this stage someone depends on
641 * typedef as a forward declaration (refers to it
642 * through pointer), so unless we already did it,
643 * emit typedef as a forward declaration
644 */
645 if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
646 btf_dump_emit_typedef_def(d, id, t, 0);
647 btf_dump_printf(d, ";\n\n");
648 }
649 tstate->emit_state = EMITTED;
650 break;
651 case BTF_KIND_STRUCT:
652 case BTF_KIND_UNION:
653 tstate->emit_state = EMITTING;
654 /* if it's a top-level struct/union definition or struct/union
655 * is anonymous, then in C we'll be emitting all fields and
656 * their types (as opposed to just `struct X`), so we need to
657 * make sure that all types, referenced from struct/union
658 * members have necessary forward-declarations, where
659 * applicable
660 */
661 if (top_level_def || t->name_off == 0) {
662 const struct btf_member *m = (void *)(t + 1);
663 __u16 vlen = btf_vlen_of(t);
664 int i, new_cont_id;
665
666 new_cont_id = t->name_off == 0 ? cont_id : id;
667 for (i = 0; i < vlen; i++, m++)
668 btf_dump_emit_type(d, m->type, new_cont_id);
669 } else if (!tstate->fwd_emitted && id != cont_id) {
670 btf_dump_emit_struct_fwd(d, id, t);
671 btf_dump_printf(d, ";\n\n");
672 tstate->fwd_emitted = 1;
673 }
674
675 if (top_level_def) {
676 btf_dump_emit_struct_def(d, id, t, 0);
677 btf_dump_printf(d, ";\n\n");
678 tstate->emit_state = EMITTED;
679 } else {
680 tstate->emit_state = NOT_EMITTED;
681 }
682 break;
683 case BTF_KIND_FUNC_PROTO: {
684 const struct btf_param *p = (void *)(t + 1);
685 __u16 vlen = btf_vlen_of(t);
686 int i;
687
688 btf_dump_emit_type(d, t->type, cont_id);
689 for (i = 0; i < vlen; i++, p++)
690 btf_dump_emit_type(d, p->type, cont_id);
691
692 break;
693 }
694 default:
695 break;
696 }
697}
698
699static int btf_align_of(const struct btf *btf, __u32 id)
700{
701 const struct btf_type *t = btf__type_by_id(btf, id);
702 __u16 kind = btf_kind_of(t);
703
704 switch (kind) {
705 case BTF_KIND_INT:
706 case BTF_KIND_ENUM:
707 return min(sizeof(void *), t->size);
708 case BTF_KIND_PTR:
709 return sizeof(void *);
710 case BTF_KIND_TYPEDEF:
711 case BTF_KIND_VOLATILE:
712 case BTF_KIND_CONST:
713 case BTF_KIND_RESTRICT:
714 return btf_align_of(btf, t->type);
715 case BTF_KIND_ARRAY: {
716 const struct btf_array *a = (void *)(t + 1);
717
718 return btf_align_of(btf, a->type);
719 }
720 case BTF_KIND_STRUCT:
721 case BTF_KIND_UNION: {
722 const struct btf_member *m = (void *)(t + 1);
723 __u16 vlen = btf_vlen_of(t);
724 int i, align = 1;
725
726 for (i = 0; i < vlen; i++, m++)
727 align = max(align, btf_align_of(btf, m->type));
728
729 return align;
730 }
731 default:
732 pr_warning("unsupported BTF_KIND:%u\n", btf_kind_of(t));
733 return 1;
734 }
735}
736
737static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
738 const struct btf_type *t)
739{
740 const struct btf_member *m;
741 int align, i, bit_sz;
742 __u16 vlen;
743 bool kflag;
744
745 align = btf_align_of(btf, id);
746 /* size of a non-packed struct has to be a multiple of its alignment*/
747 if (t->size % align)
748 return true;
749
750 m = (void *)(t + 1);
751 kflag = btf_kflag_of(t);
752 vlen = btf_vlen_of(t);
753 /* all non-bitfield fields have to be naturally aligned */
754 for (i = 0; i < vlen; i++, m++) {
755 align = btf_align_of(btf, m->type);
756 bit_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
757 if (bit_sz == 0 && m->offset % (8 * align) != 0)
758 return true;
759 }
760
761 /*
762 * if original struct was marked as packed, but its layout is
763 * naturally aligned, we'll detect that it's not packed
764 */
765 return false;
766}
767
768static int chip_away_bits(int total, int at_most)
769{
770 return total % at_most ? : at_most;
771}
772
773static void btf_dump_emit_bit_padding(const struct btf_dump *d,
774 int cur_off, int m_off, int m_bit_sz,
775 int align, int lvl)
776{
777 int off_diff = m_off - cur_off;
778 int ptr_bits = sizeof(void *) * 8;
779
780 if (off_diff <= 0)
781 /* no gap */
782 return;
783 if (m_bit_sz == 0 && off_diff < align * 8)
784 /* natural padding will take care of a gap */
785 return;
786
787 while (off_diff > 0) {
788 const char *pad_type;
789 int pad_bits;
790
791 if (ptr_bits > 32 && off_diff > 32) {
792 pad_type = "long";
793 pad_bits = chip_away_bits(off_diff, ptr_bits);
794 } else if (off_diff > 16) {
795 pad_type = "int";
796 pad_bits = chip_away_bits(off_diff, 32);
797 } else if (off_diff > 8) {
798 pad_type = "short";
799 pad_bits = chip_away_bits(off_diff, 16);
800 } else {
801 pad_type = "char";
802 pad_bits = chip_away_bits(off_diff, 8);
803 }
804 btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
805 off_diff -= pad_bits;
806 }
807}
808
809static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
810 const struct btf_type *t)
811{
812 btf_dump_printf(d, "%s %s",
813 btf_kind_of(t) == BTF_KIND_STRUCT ? "struct" : "union",
814 btf_dump_type_name(d, id));
815}
816
817static void btf_dump_emit_struct_def(struct btf_dump *d,
818 __u32 id,
819 const struct btf_type *t,
820 int lvl)
821{
822 const struct btf_member *m = (void *)(t + 1);
823 bool kflag = btf_kflag_of(t), is_struct;
824 int align, i, packed, off = 0;
825 __u16 vlen = btf_vlen_of(t);
826
827 is_struct = btf_kind_of(t) == BTF_KIND_STRUCT;
828 packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
829 align = packed ? 1 : btf_align_of(d->btf, id);
830
831 btf_dump_printf(d, "%s%s%s {",
832 is_struct ? "struct" : "union",
833 t->name_off ? " " : "",
834 btf_dump_type_name(d, id));
835
836 for (i = 0; i < vlen; i++, m++) {
837 const char *fname;
838 int m_off, m_sz;
839
840 fname = btf_name_of(d, m->name_off);
841 m_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
842 m_off = kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
843 align = packed ? 1 : btf_align_of(d->btf, m->type);
844
845 btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
846 btf_dump_printf(d, "\n%s", pfx(lvl + 1));
847 btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
848
849 if (m_sz) {
850 btf_dump_printf(d, ": %d", m_sz);
851 off = m_off + m_sz;
852 } else {
853 m_sz = max(0, btf__resolve_size(d->btf, m->type));
854 off = m_off + m_sz * 8;
855 }
856 btf_dump_printf(d, ";");
857 }
858
859 if (vlen)
860 btf_dump_printf(d, "\n");
861 btf_dump_printf(d, "%s}", pfx(lvl));
862 if (packed)
863 btf_dump_printf(d, " __attribute__((packed))");
864}
865
866static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
867 const struct btf_type *t)
868{
869 btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
870}
871
872static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
873 const struct btf_type *t,
874 int lvl)
875{
876 const struct btf_enum *v = (void *)(t+1);
877 __u16 vlen = btf_vlen_of(t);
878 const char *name;
879 size_t dup_cnt;
880 int i;
881
882 btf_dump_printf(d, "enum%s%s",
883 t->name_off ? " " : "",
884 btf_dump_type_name(d, id));
885
886 if (vlen) {
887 btf_dump_printf(d, " {");
888 for (i = 0; i < vlen; i++, v++) {
889 name = btf_name_of(d, v->name_off);
890 /* enumerators share namespace with typedef idents */
891 dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
892 if (dup_cnt > 1) {
893 btf_dump_printf(d, "\n%s%s___%zu = %d,",
894 pfx(lvl + 1), name, dup_cnt,
895 (__s32)v->val);
896 } else {
897 btf_dump_printf(d, "\n%s%s = %d,",
898 pfx(lvl + 1), name,
899 (__s32)v->val);
900 }
901 }
902 btf_dump_printf(d, "\n%s}", pfx(lvl));
903 }
904}
905
906static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
907 const struct btf_type *t)
908{
909 const char *name = btf_dump_type_name(d, id);
910
911 if (btf_kflag_of(t))
912 btf_dump_printf(d, "union %s", name);
913 else
914 btf_dump_printf(d, "struct %s", name);
915}
916
917static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
918 const struct btf_type *t, int lvl)
919{
920 const char *name = btf_dump_ident_name(d, id);
921
922 btf_dump_printf(d, "typedef ");
923 btf_dump_emit_type_decl(d, t->type, name, lvl);
924}
925
926static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
927{
928 __u32 *new_stack;
929 size_t new_cap;
930
931 if (d->decl_stack_cnt >= d->decl_stack_cap) {
932 new_cap = max(16, d->decl_stack_cap * 3 / 2);
933 new_stack = realloc(d->decl_stack,
934 new_cap * sizeof(new_stack[0]));
935 if (!new_stack)
936 return -ENOMEM;
937 d->decl_stack = new_stack;
938 d->decl_stack_cap = new_cap;
939 }
940
941 d->decl_stack[d->decl_stack_cnt++] = id;
942
943 return 0;
944}
945
946/*
947 * Emit type declaration (e.g., field type declaration in a struct or argument
948 * declaration in function prototype) in correct C syntax.
949 *
950 * For most types it's trivial, but there are few quirky type declaration
951 * cases worth mentioning:
952 * - function prototypes (especially nesting of function prototypes);
953 * - arrays;
954 * - const/volatile/restrict for pointers vs other types.
955 *
956 * For a good discussion of *PARSING* C syntax (as a human), see
957 * Peter van der Linden's "Expert C Programming: Deep C Secrets",
958 * Ch.3 "Unscrambling Declarations in C".
959 *
960 * It won't help with BTF to C conversion much, though, as it's an opposite
961 * problem. So we came up with this algorithm in reverse to van der Linden's
962 * parsing algorithm. It goes from structured BTF representation of type
963 * declaration to a valid compilable C syntax.
964 *
965 * For instance, consider this C typedef:
966 * typedef const int * const * arr[10] arr_t;
967 * It will be represented in BTF with this chain of BTF types:
968 * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int]
969 *
970 * Notice how [const] modifier always goes before type it modifies in BTF type
971 * graph, but in C syntax, const/volatile/restrict modifiers are written to
972 * the right of pointers, but to the left of other types. There are also other
973 * quirks, like function pointers, arrays of them, functions returning other
974 * functions, etc.
975 *
976 * We handle that by pushing all the types to a stack, until we hit "terminal"
977 * type (int/enum/struct/union/fwd). Then depending on the kind of a type on
978 * top of a stack, modifiers are handled differently. Array/function pointers
979 * have also wildly different syntax and how nesting of them are done. See
980 * code for authoritative definition.
981 *
982 * To avoid allocating new stack for each independent chain of BTF types, we
983 * share one bigger stack, with each chain working only on its own local view
984 * of a stack frame. Some care is required to "pop" stack frames after
985 * processing type declaration chain.
986 */
987static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
988 const char *fname, int lvl)
989{
990 struct id_stack decl_stack;
991 const struct btf_type *t;
992 int err, stack_start;
993 __u16 kind;
994
995 stack_start = d->decl_stack_cnt;
996 for (;;) {
997 err = btf_dump_push_decl_stack_id(d, id);
998 if (err < 0) {
999 /*
1000 * if we don't have enough memory for entire type decl
1001 * chain, restore stack, emit warning, and try to
1002 * proceed nevertheless
1003 */
1004 pr_warning("not enough memory for decl stack:%d", err);
1005 d->decl_stack_cnt = stack_start;
1006 return;
1007 }
1008
1009 /* VOID */
1010 if (id == 0)
1011 break;
1012
1013 t = btf__type_by_id(d->btf, id);
1014 kind = btf_kind_of(t);
1015 switch (kind) {
1016 case BTF_KIND_PTR:
1017 case BTF_KIND_VOLATILE:
1018 case BTF_KIND_CONST:
1019 case BTF_KIND_RESTRICT:
1020 case BTF_KIND_FUNC_PROTO:
1021 id = t->type;
1022 break;
1023 case BTF_KIND_ARRAY: {
1024 const struct btf_array *a = (void *)(t + 1);
1025
1026 id = a->type;
1027 break;
1028 }
1029 case BTF_KIND_INT:
1030 case BTF_KIND_ENUM:
1031 case BTF_KIND_FWD:
1032 case BTF_KIND_STRUCT:
1033 case BTF_KIND_UNION:
1034 case BTF_KIND_TYPEDEF:
1035 goto done;
1036 default:
1037 pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
1038 kind, id);
1039 goto done;
1040 }
1041 }
1042done:
1043 /*
1044 * We might be inside a chain of declarations (e.g., array of function
1045 * pointers returning anonymous (so inlined) structs, having another
1046 * array field). Each of those needs its own "stack frame" to handle
1047 * emitting of declarations. Those stack frames are non-overlapping
1048 * portions of shared btf_dump->decl_stack. To make it a bit nicer to
1049 * handle this set of nested stacks, we create a view corresponding to
1050 * our own "stack frame" and work with it as an independent stack.
1051 * We'll need to clean up after emit_type_chain() returns, though.
1052 */
1053 decl_stack.ids = d->decl_stack + stack_start;
1054 decl_stack.cnt = d->decl_stack_cnt - stack_start;
1055 btf_dump_emit_type_chain(d, &decl_stack, fname, lvl);
1056 /*
1057 * emit_type_chain() guarantees that it will pop its entire decl_stack
1058 * frame before returning. But it works with a read-only view into
1059 * decl_stack, so it doesn't actually pop anything from the
1060 * perspective of shared btf_dump->decl_stack, per se. We need to
1061 * reset decl_stack state to how it was before us to avoid it growing
1062 * all the time.
1063 */
1064 d->decl_stack_cnt = stack_start;
1065}
1066
1067static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
1068{
1069 const struct btf_type *t;
1070 __u32 id;
1071
1072 while (decl_stack->cnt) {
1073 id = decl_stack->ids[decl_stack->cnt - 1];
1074 t = btf__type_by_id(d->btf, id);
1075
1076 switch (btf_kind_of(t)) {
1077 case BTF_KIND_VOLATILE:
1078 btf_dump_printf(d, "volatile ");
1079 break;
1080 case BTF_KIND_CONST:
1081 btf_dump_printf(d, "const ");
1082 break;
1083 case BTF_KIND_RESTRICT:
1084 btf_dump_printf(d, "restrict ");
1085 break;
1086 default:
1087 return;
1088 }
1089 decl_stack->cnt--;
1090 }
1091}
1092
1093static bool btf_is_mod_kind(const struct btf *btf, __u32 id)
1094{
1095 const struct btf_type *t = btf__type_by_id(btf, id);
1096
1097 switch (btf_kind_of(t)) {
1098 case BTF_KIND_VOLATILE:
1099 case BTF_KIND_CONST:
1100 case BTF_KIND_RESTRICT:
1101 return true;
1102 default:
1103 return false;
1104 }
1105}
1106
1107static void btf_dump_emit_name(const struct btf_dump *d,
1108 const char *name, bool last_was_ptr)
1109{
1110 bool separate = name[0] && !last_was_ptr;
1111
1112 btf_dump_printf(d, "%s%s", separate ? " " : "", name);
1113}
1114
1115static void btf_dump_emit_type_chain(struct btf_dump *d,
1116 struct id_stack *decls,
1117 const char *fname, int lvl)
1118{
1119 /*
1120 * last_was_ptr is used to determine if we need to separate pointer
1121 * asterisk (*) from previous part of type signature with space, so
1122 * that we get `int ***`, instead of `int * * *`. We default to true
1123 * for cases where we have single pointer in a chain. E.g., in ptr ->
1124 * func_proto case. func_proto will start a new emit_type_chain call
1125 * with just ptr, which should be emitted as (*) or (*<fname>), so we
1126 * don't want to prepend space for that last pointer.
1127 */
1128 bool last_was_ptr = true;
1129 const struct btf_type *t;
1130 const char *name;
1131 __u16 kind;
1132 __u32 id;
1133
1134 while (decls->cnt) {
1135 id = decls->ids[--decls->cnt];
1136 if (id == 0) {
1137 /* VOID is a special snowflake */
1138 btf_dump_emit_mods(d, decls);
1139 btf_dump_printf(d, "void");
1140 last_was_ptr = false;
1141 continue;
1142 }
1143
1144 t = btf__type_by_id(d->btf, id);
1145 kind = btf_kind_of(t);
1146
1147 switch (kind) {
1148 case BTF_KIND_INT:
1149 btf_dump_emit_mods(d, decls);
1150 name = btf_name_of(d, t->name_off);
1151 btf_dump_printf(d, "%s", name);
1152 break;
1153 case BTF_KIND_STRUCT:
1154 case BTF_KIND_UNION:
1155 btf_dump_emit_mods(d, decls);
1156 /* inline anonymous struct/union */
1157 if (t->name_off == 0)
1158 btf_dump_emit_struct_def(d, id, t, lvl);
1159 else
1160 btf_dump_emit_struct_fwd(d, id, t);
1161 break;
1162 case BTF_KIND_ENUM:
1163 btf_dump_emit_mods(d, decls);
1164 /* inline anonymous enum */
1165 if (t->name_off == 0)
1166 btf_dump_emit_enum_def(d, id, t, lvl);
1167 else
1168 btf_dump_emit_enum_fwd(d, id, t);
1169 break;
1170 case BTF_KIND_FWD:
1171 btf_dump_emit_mods(d, decls);
1172 btf_dump_emit_fwd_def(d, id, t);
1173 break;
1174 case BTF_KIND_TYPEDEF:
1175 btf_dump_emit_mods(d, decls);
1176 btf_dump_printf(d, "%s", btf_dump_ident_name(d, id));
1177 break;
1178 case BTF_KIND_PTR:
1179 btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *");
1180 break;
1181 case BTF_KIND_VOLATILE:
1182 btf_dump_printf(d, " volatile");
1183 break;
1184 case BTF_KIND_CONST:
1185 btf_dump_printf(d, " const");
1186 break;
1187 case BTF_KIND_RESTRICT:
1188 btf_dump_printf(d, " restrict");
1189 break;
1190 case BTF_KIND_ARRAY: {
1191 const struct btf_array *a = (void *)(t + 1);
1192 const struct btf_type *next_t;
1193 __u32 next_id;
1194 bool multidim;
1195 /*
1196 * GCC has a bug
1197 * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354)
1198 * which causes it to emit extra const/volatile
1199 * modifiers for an array, if array's element type has
1200 * const/volatile modifiers. Clang doesn't do that.
1201 * In general, it doesn't seem very meaningful to have
1202 * a const/volatile modifier for array, so we are
1203 * going to silently skip them here.
1204 */
1205 while (decls->cnt) {
1206 next_id = decls->ids[decls->cnt - 1];
1207 if (btf_is_mod_kind(d->btf, next_id))
1208 decls->cnt--;
1209 else
1210 break;
1211 }
1212
1213 if (decls->cnt == 0) {
1214 btf_dump_emit_name(d, fname, last_was_ptr);
1215 btf_dump_printf(d, "[%u]", a->nelems);
1216 return;
1217 }
1218
1219 next_t = btf__type_by_id(d->btf, next_id);
1220 multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY;
1221 /* we need space if we have named non-pointer */
1222 if (fname[0] && !last_was_ptr)
1223 btf_dump_printf(d, " ");
1224 /* no parentheses for multi-dimensional array */
1225 if (!multidim)
1226 btf_dump_printf(d, "(");
1227 btf_dump_emit_type_chain(d, decls, fname, lvl);
1228 if (!multidim)
1229 btf_dump_printf(d, ")");
1230 btf_dump_printf(d, "[%u]", a->nelems);
1231 return;
1232 }
1233 case BTF_KIND_FUNC_PROTO: {
1234 const struct btf_param *p = (void *)(t + 1);
1235 __u16 vlen = btf_vlen_of(t);
1236 int i;
1237
1238 btf_dump_emit_mods(d, decls);
1239 if (decls->cnt) {
1240 btf_dump_printf(d, " (");
1241 btf_dump_emit_type_chain(d, decls, fname, lvl);
1242 btf_dump_printf(d, ")");
1243 } else {
1244 btf_dump_emit_name(d, fname, last_was_ptr);
1245 }
1246 btf_dump_printf(d, "(");
1247 /*
1248 * Clang for BPF target generates func_proto with no
1249 * args as a func_proto with a single void arg (e.g.,
1250 * `int (*f)(void)` vs just `int (*f)()`). We are
1251 * going to pretend there are no args for such case.
1252 */
1253 if (vlen == 1 && p->type == 0) {
1254 btf_dump_printf(d, ")");
1255 return;
1256 }
1257
1258 for (i = 0; i < vlen; i++, p++) {
1259 if (i > 0)
1260 btf_dump_printf(d, ", ");
1261
1262 /* last arg of type void is vararg */
1263 if (i == vlen - 1 && p->type == 0) {
1264 btf_dump_printf(d, "...");
1265 break;
1266 }
1267
1268 name = btf_name_of(d, p->name_off);
1269 btf_dump_emit_type_decl(d, p->type, name, lvl);
1270 }
1271
1272 btf_dump_printf(d, ")");
1273 return;
1274 }
1275 default:
1276 pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
1277 kind, id);
1278 return;
1279 }
1280
1281 last_was_ptr = kind == BTF_KIND_PTR;
1282 }
1283
1284 btf_dump_emit_name(d, fname, last_was_ptr);
1285}
1286
1287/* return number of duplicates (occurrences) of a given name */
1288static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
1289 const char *orig_name)
1290{
1291 size_t dup_cnt = 0;
1292
1293 hashmap__find(name_map, orig_name, (void **)&dup_cnt);
1294 dup_cnt++;
1295 hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
1296
1297 return dup_cnt;
1298}
1299
1300static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
1301 struct hashmap *name_map)
1302{
1303 struct btf_dump_type_aux_state *s = &d->type_states[id];
1304 const struct btf_type *t = btf__type_by_id(d->btf, id);
1305 const char *orig_name = btf_name_of(d, t->name_off);
1306 const char **cached_name = &d->cached_names[id];
1307 size_t dup_cnt;
1308
1309 if (t->name_off == 0)
1310 return "";
1311
1312 if (s->name_resolved)
1313 return *cached_name ? *cached_name : orig_name;
1314
1315 dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
1316 if (dup_cnt > 1) {
1317 const size_t max_len = 256;
1318 char new_name[max_len];
1319
1320 snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
1321 *cached_name = strdup(new_name);
1322 }
1323
1324 s->name_resolved = 1;
1325 return *cached_name ? *cached_name : orig_name;
1326}
1327
1328static const char *btf_dump_type_name(struct btf_dump *d, __u32 id)
1329{
1330 return btf_dump_resolve_name(d, id, d->type_names);
1331}
1332
1333static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
1334{
1335 return btf_dump_resolve_name(d, id, d->ident_names);
1336}
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
new file mode 100644
index 000000000000..6122272943e6
--- /dev/null
+++ b/tools/lib/bpf/hashmap.c
@@ -0,0 +1,229 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * Generic non-thread safe hash map implementation.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8#include <stdint.h>
9#include <stdlib.h>
10#include <stdio.h>
11#include <errno.h>
12#include <linux/err.h>
13#include "hashmap.h"
14
15/* start with 4 buckets */
16#define HASHMAP_MIN_CAP_BITS 2
17
18static void hashmap_add_entry(struct hashmap_entry **pprev,
19 struct hashmap_entry *entry)
20{
21 entry->next = *pprev;
22 *pprev = entry;
23}
24
25static void hashmap_del_entry(struct hashmap_entry **pprev,
26 struct hashmap_entry *entry)
27{
28 *pprev = entry->next;
29 entry->next = NULL;
30}
31
32void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
33 hashmap_equal_fn equal_fn, void *ctx)
34{
35 map->hash_fn = hash_fn;
36 map->equal_fn = equal_fn;
37 map->ctx = ctx;
38
39 map->buckets = NULL;
40 map->cap = 0;
41 map->cap_bits = 0;
42 map->sz = 0;
43}
44
45struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
46 hashmap_equal_fn equal_fn,
47 void *ctx)
48{
49 struct hashmap *map = malloc(sizeof(struct hashmap));
50
51 if (!map)
52 return ERR_PTR(-ENOMEM);
53 hashmap__init(map, hash_fn, equal_fn, ctx);
54 return map;
55}
56
57void hashmap__clear(struct hashmap *map)
58{
59 free(map->buckets);
60 map->cap = map->cap_bits = map->sz = 0;
61}
62
63void hashmap__free(struct hashmap *map)
64{
65 if (!map)
66 return;
67
68 hashmap__clear(map);
69 free(map);
70}
71
72size_t hashmap__size(const struct hashmap *map)
73{
74 return map->sz;
75}
76
77size_t hashmap__capacity(const struct hashmap *map)
78{
79 return map->cap;
80}
81
82static bool hashmap_needs_to_grow(struct hashmap *map)
83{
84 /* grow if empty or more than 75% filled */
85 return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
86}
87
88static int hashmap_grow(struct hashmap *map)
89{
90 struct hashmap_entry **new_buckets;
91 struct hashmap_entry *cur, *tmp;
92 size_t new_cap_bits, new_cap;
93 size_t h;
94 int bkt;
95
96 new_cap_bits = map->cap_bits + 1;
97 if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
98 new_cap_bits = HASHMAP_MIN_CAP_BITS;
99
100 new_cap = 1UL << new_cap_bits;
101 new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
102 if (!new_buckets)
103 return -ENOMEM;
104
105 hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
106 h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
107 hashmap_add_entry(&new_buckets[h], cur);
108 }
109
110 map->cap = new_cap;
111 map->cap_bits = new_cap_bits;
112 free(map->buckets);
113 map->buckets = new_buckets;
114
115 return 0;
116}
117
118static bool hashmap_find_entry(const struct hashmap *map,
119 const void *key, size_t hash,
120 struct hashmap_entry ***pprev,
121 struct hashmap_entry **entry)
122{
123 struct hashmap_entry *cur, **prev_ptr;
124
125 if (!map->buckets)
126 return false;
127
128 for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
129 cur;
130 prev_ptr = &cur->next, cur = cur->next) {
131 if (map->equal_fn(cur->key, key, map->ctx)) {
132 if (pprev)
133 *pprev = prev_ptr;
134 *entry = cur;
135 return true;
136 }
137 }
138
139 return false;
140}
141
142int hashmap__insert(struct hashmap *map, const void *key, void *value,
143 enum hashmap_insert_strategy strategy,
144 const void **old_key, void **old_value)
145{
146 struct hashmap_entry *entry;
147 size_t h;
148 int err;
149
150 if (old_key)
151 *old_key = NULL;
152 if (old_value)
153 *old_value = NULL;
154
155 h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
156 if (strategy != HASHMAP_APPEND &&
157 hashmap_find_entry(map, key, h, NULL, &entry)) {
158 if (old_key)
159 *old_key = entry->key;
160 if (old_value)
161 *old_value = entry->value;
162
163 if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
164 entry->key = key;
165 entry->value = value;
166 return 0;
167 } else if (strategy == HASHMAP_ADD) {
168 return -EEXIST;
169 }
170 }
171
172 if (strategy == HASHMAP_UPDATE)
173 return -ENOENT;
174
175 if (hashmap_needs_to_grow(map)) {
176 err = hashmap_grow(map);
177 if (err)
178 return err;
179 h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
180 }
181
182 entry = malloc(sizeof(struct hashmap_entry));
183 if (!entry)
184 return -ENOMEM;
185
186 entry->key = key;
187 entry->value = value;
188 hashmap_add_entry(&map->buckets[h], entry);
189 map->sz++;
190
191 return 0;
192}
193
194bool hashmap__find(const struct hashmap *map, const void *key, void **value)
195{
196 struct hashmap_entry *entry;
197 size_t h;
198
199 h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
200 if (!hashmap_find_entry(map, key, h, NULL, &entry))
201 return false;
202
203 if (value)
204 *value = entry->value;
205 return true;
206}
207
208bool hashmap__delete(struct hashmap *map, const void *key,
209 const void **old_key, void **old_value)
210{
211 struct hashmap_entry **pprev, *entry;
212 size_t h;
213
214 h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
215 if (!hashmap_find_entry(map, key, h, &pprev, &entry))
216 return false;
217
218 if (old_key)
219 *old_key = entry->key;
220 if (old_value)
221 *old_value = entry->value;
222
223 hashmap_del_entry(pprev, entry);
224 free(entry);
225 map->sz--;
226
227 return true;
228}
229
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
new file mode 100644
index 000000000000..03748a742146
--- /dev/null
+++ b/tools/lib/bpf/hashmap.h
@@ -0,0 +1,173 @@
1/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2
3/*
4 * Generic non-thread safe hash map implementation.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8#ifndef __LIBBPF_HASHMAP_H
9#define __LIBBPF_HASHMAP_H
10
11#include <stdbool.h>
12#include <stddef.h>
13#include "libbpf_internal.h"
14
15static inline size_t hash_bits(size_t h, int bits)
16{
17 /* shuffle bits and return requested number of upper bits */
18 return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
19}
20
21typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
22typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
23
24struct hashmap_entry {
25 const void *key;
26 void *value;
27 struct hashmap_entry *next;
28};
29
30struct hashmap {
31 hashmap_hash_fn hash_fn;
32 hashmap_equal_fn equal_fn;
33 void *ctx;
34
35 struct hashmap_entry **buckets;
36 size_t cap;
37 size_t cap_bits;
38 size_t sz;
39};
40
41#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
42 .hash_fn = (hash_fn), \
43 .equal_fn = (equal_fn), \
44 .ctx = (ctx), \
45 .buckets = NULL, \
46 .cap = 0, \
47 .cap_bits = 0, \
48 .sz = 0, \
49}
50
51void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
52 hashmap_equal_fn equal_fn, void *ctx);
53struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
54 hashmap_equal_fn equal_fn,
55 void *ctx);
56void hashmap__clear(struct hashmap *map);
57void hashmap__free(struct hashmap *map);
58
59size_t hashmap__size(const struct hashmap *map);
60size_t hashmap__capacity(const struct hashmap *map);
61
62/*
63 * Hashmap insertion strategy:
64 * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
65 * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
66 * update value;
67 * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
68 * nothing and return -ENOENT;
69 * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
70 * This turns hashmap into a multimap by allowing multiple values to be
71 * associated with the same key. Most useful read API for such hashmap is
72 * hashmap__for_each_key_entry() iteration. If hashmap__find() is still
73 * used, it will return last inserted key/value entry (first in a bucket
74 * chain).
75 */
76enum hashmap_insert_strategy {
77 HASHMAP_ADD,
78 HASHMAP_SET,
79 HASHMAP_UPDATE,
80 HASHMAP_APPEND,
81};
82
83/*
84 * hashmap__insert() adds key/value entry w/ various semantics, depending on
85 * provided strategy value. If a given key/value pair replaced already
86 * existing key/value pair, both old key and old value will be returned
87 * through old_key and old_value to allow calling code do proper memory
88 * management.
89 */
90int hashmap__insert(struct hashmap *map, const void *key, void *value,
91 enum hashmap_insert_strategy strategy,
92 const void **old_key, void **old_value);
93
94static inline int hashmap__add(struct hashmap *map,
95 const void *key, void *value)
96{
97 return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
98}
99
100static inline int hashmap__set(struct hashmap *map,
101 const void *key, void *value,
102 const void **old_key, void **old_value)
103{
104 return hashmap__insert(map, key, value, HASHMAP_SET,
105 old_key, old_value);
106}
107
108static inline int hashmap__update(struct hashmap *map,
109 const void *key, void *value,
110 const void **old_key, void **old_value)
111{
112 return hashmap__insert(map, key, value, HASHMAP_UPDATE,
113 old_key, old_value);
114}
115
116static inline int hashmap__append(struct hashmap *map,
117 const void *key, void *value)
118{
119 return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
120}
121
122bool hashmap__delete(struct hashmap *map, const void *key,
123 const void **old_key, void **old_value);
124
125bool hashmap__find(const struct hashmap *map, const void *key, void **value);
126
127/*
128 * hashmap__for_each_entry - iterate over all entries in hashmap
129 * @map: hashmap to iterate
130 * @cur: struct hashmap_entry * used as a loop cursor
131 * @bkt: integer used as a bucket loop cursor
132 */
133#define hashmap__for_each_entry(map, cur, bkt) \
134 for (bkt = 0; bkt < map->cap; bkt++) \
135 for (cur = map->buckets[bkt]; cur; cur = cur->next)
136
137/*
138 * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
139 * against removals
140 * @map: hashmap to iterate
141 * @cur: struct hashmap_entry * used as a loop cursor
142 * @tmp: struct hashmap_entry * used as a temporary next cursor storage
143 * @bkt: integer used as a bucket loop cursor
144 */
145#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
146 for (bkt = 0; bkt < map->cap; bkt++) \
147 for (cur = map->buckets[bkt]; \
148 cur && ({tmp = cur->next; true; }); \
149 cur = tmp)
150
151/*
152 * hashmap__for_each_key_entry - iterate over entries associated with given key
153 * @map: hashmap to iterate
154 * @cur: struct hashmap_entry * used as a loop cursor
155 * @key: key to iterate entries for
156 */
157#define hashmap__for_each_key_entry(map, cur, _key) \
158 for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
159 map->cap_bits); \
160 map->buckets ? map->buckets[bkt] : NULL; }); \
161 cur; \
162 cur = cur->next) \
163 if (map->equal_fn(cur->key, (_key), map->ctx))
164
165#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
166 for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
167 map->cap_bits); \
168 cur = map->buckets ? map->buckets[bkt] : NULL; }); \
169 cur && ({ tmp = cur->next; true; }); \
170 cur = tmp) \
171 if (map->equal_fn(cur->key, (_key), map->ctx))
172
173#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 197b574406b3..ba89d9727137 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -188,6 +188,7 @@ struct bpf_program {
188 void *line_info; 188 void *line_info;
189 __u32 line_info_rec_size; 189 __u32 line_info_rec_size;
190 __u32 line_info_cnt; 190 __u32 line_info_cnt;
191 __u32 prog_flags;
191}; 192};
192 193
193enum libbpf_map_type { 194enum libbpf_map_type {
@@ -348,8 +349,11 @@ static int
348bpf_program__init(void *data, size_t size, char *section_name, int idx, 349bpf_program__init(void *data, size_t size, char *section_name, int idx,
349 struct bpf_program *prog) 350 struct bpf_program *prog)
350{ 351{
351 if (size < sizeof(struct bpf_insn)) { 352 const size_t bpf_insn_sz = sizeof(struct bpf_insn);
352 pr_warning("corrupted section '%s'\n", section_name); 353
354 if (size == 0 || size % bpf_insn_sz) {
355 pr_warning("corrupted section '%s', size: %zu\n",
356 section_name, size);
353 return -EINVAL; 357 return -EINVAL;
354 } 358 }
355 359
@@ -375,9 +379,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
375 section_name); 379 section_name);
376 goto errout; 380 goto errout;
377 } 381 }
378 prog->insns_cnt = size / sizeof(struct bpf_insn); 382 prog->insns_cnt = size / bpf_insn_sz;
379 memcpy(prog->insns, data, 383 memcpy(prog->insns, data, size);
380 prog->insns_cnt * sizeof(struct bpf_insn));
381 prog->idx = idx; 384 prog->idx = idx;
382 prog->instances.fds = NULL; 385 prog->instances.fds = NULL;
383 prog->instances.nr = -1; 386 prog->instances.nr = -1;
@@ -494,15 +497,14 @@ static struct bpf_object *bpf_object__new(const char *path,
494 497
495 strcpy(obj->path, path); 498 strcpy(obj->path, path);
496 /* Using basename() GNU version which doesn't modify arg. */ 499 /* Using basename() GNU version which doesn't modify arg. */
497 strncpy(obj->name, basename((void *)path), 500 strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1);
498 sizeof(obj->name) - 1);
499 end = strchr(obj->name, '.'); 501 end = strchr(obj->name, '.');
500 if (end) 502 if (end)
501 *end = 0; 503 *end = 0;
502 504
503 obj->efile.fd = -1; 505 obj->efile.fd = -1;
504 /* 506 /*
505 * Caller of this function should also calls 507 * Caller of this function should also call
506 * bpf_object__elf_finish() after data collection to return 508 * bpf_object__elf_finish() after data collection to return
507 * obj_buf to user. If not, we should duplicate the buffer to 509 * obj_buf to user. If not, we should duplicate the buffer to
508 * avoid user freeing them before elf finish. 510 * avoid user freeing them before elf finish.
@@ -562,38 +564,35 @@ static int bpf_object__elf_init(struct bpf_object *obj)
562 } else { 564 } else {
563 obj->efile.fd = open(obj->path, O_RDONLY); 565 obj->efile.fd = open(obj->path, O_RDONLY);
564 if (obj->efile.fd < 0) { 566 if (obj->efile.fd < 0) {
565 char errmsg[STRERR_BUFSIZE]; 567 char errmsg[STRERR_BUFSIZE], *cp;
566 char *cp = libbpf_strerror_r(errno, errmsg,
567 sizeof(errmsg));
568 568
569 err = -errno;
570 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
569 pr_warning("failed to open %s: %s\n", obj->path, cp); 571 pr_warning("failed to open %s: %s\n", obj->path, cp);
570 return -errno; 572 return err;
571 } 573 }
572 574
573 obj->efile.elf = elf_begin(obj->efile.fd, 575 obj->efile.elf = elf_begin(obj->efile.fd,
574 LIBBPF_ELF_C_READ_MMAP, 576 LIBBPF_ELF_C_READ_MMAP, NULL);
575 NULL);
576 } 577 }
577 578
578 if (!obj->efile.elf) { 579 if (!obj->efile.elf) {
579 pr_warning("failed to open %s as ELF file\n", 580 pr_warning("failed to open %s as ELF file\n", obj->path);
580 obj->path);
581 err = -LIBBPF_ERRNO__LIBELF; 581 err = -LIBBPF_ERRNO__LIBELF;
582 goto errout; 582 goto errout;
583 } 583 }
584 584
585 if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { 585 if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
586 pr_warning("failed to get EHDR from %s\n", 586 pr_warning("failed to get EHDR from %s\n", obj->path);
587 obj->path);
588 err = -LIBBPF_ERRNO__FORMAT; 587 err = -LIBBPF_ERRNO__FORMAT;
589 goto errout; 588 goto errout;
590 } 589 }
591 ep = &obj->efile.ehdr; 590 ep = &obj->efile.ehdr;
592 591
593 /* Old LLVM set e_machine to EM_NONE */ 592 /* Old LLVM set e_machine to EM_NONE */
594 if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) { 593 if (ep->e_type != ET_REL ||
595 pr_warning("%s is not an eBPF object file\n", 594 (ep->e_machine && ep->e_machine != EM_BPF)) {
596 obj->path); 595 pr_warning("%s is not an eBPF object file\n", obj->path);
597 err = -LIBBPF_ERRNO__FORMAT; 596 err = -LIBBPF_ERRNO__FORMAT;
598 goto errout; 597 goto errout;
599 } 598 }
@@ -604,47 +603,31 @@ errout:
604 return err; 603 return err;
605} 604}
606 605
607static int 606static int bpf_object__check_endianness(struct bpf_object *obj)
608bpf_object__check_endianness(struct bpf_object *obj) 607{
609{ 608#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
610 static unsigned int const endian = 1; 609 if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
611 610 return 0;
612 switch (obj->efile.ehdr.e_ident[EI_DATA]) { 611#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
613 case ELFDATA2LSB: 612 if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
614 /* We are big endian, BPF obj is little endian. */ 613 return 0;
615 if (*(unsigned char const *)&endian != 1) 614#else
616 goto mismatch; 615# error "Unrecognized __BYTE_ORDER__"
617 break; 616#endif
618 617 pr_warning("endianness mismatch.\n");
619 case ELFDATA2MSB:
620 /* We are little endian, BPF obj is big endian. */
621 if (*(unsigned char const *)&endian != 0)
622 goto mismatch;
623 break;
624 default:
625 return -LIBBPF_ERRNO__ENDIAN;
626 }
627
628 return 0;
629
630mismatch:
631 pr_warning("Error: endianness mismatch.\n");
632 return -LIBBPF_ERRNO__ENDIAN; 618 return -LIBBPF_ERRNO__ENDIAN;
633} 619}
634 620
635static int 621static int
636bpf_object__init_license(struct bpf_object *obj, 622bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
637 void *data, size_t size)
638{ 623{
639 memcpy(obj->license, data, 624 memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
640 min(size, sizeof(obj->license) - 1));
641 pr_debug("license of %s is %s\n", obj->path, obj->license); 625 pr_debug("license of %s is %s\n", obj->path, obj->license);
642 return 0; 626 return 0;
643} 627}
644 628
645static int 629static int
646bpf_object__init_kversion(struct bpf_object *obj, 630bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
647 void *data, size_t size)
648{ 631{
649 __u32 kver; 632 __u32 kver;
650 633
@@ -654,8 +637,7 @@ bpf_object__init_kversion(struct bpf_object *obj,
654 } 637 }
655 memcpy(&kver, data, sizeof(kver)); 638 memcpy(&kver, data, sizeof(kver));
656 obj->kern_version = kver; 639 obj->kern_version = kver;
657 pr_debug("kernel version of %s is %x\n", obj->path, 640 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
658 obj->kern_version);
659 return 0; 641 return 0;
660} 642}
661 643
@@ -811,8 +793,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
811 def->key_size = sizeof(int); 793 def->key_size = sizeof(int);
812 def->value_size = data->d_size; 794 def->value_size = data->d_size;
813 def->max_entries = 1; 795 def->max_entries = 1;
814 def->map_flags = type == LIBBPF_MAP_RODATA ? 796 def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
815 BPF_F_RDONLY_PROG : 0;
816 if (data_buff) { 797 if (data_buff) {
817 *data_buff = malloc(data->d_size); 798 *data_buff = malloc(data->d_size);
818 if (!*data_buff) { 799 if (!*data_buff) {
@@ -827,8 +808,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
827 return 0; 808 return 0;
828} 809}
829 810
830static int 811static int bpf_object__init_maps(struct bpf_object *obj, int flags)
831bpf_object__init_maps(struct bpf_object *obj, int flags)
832{ 812{
833 int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0; 813 int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
834 bool strict = !(flags & MAPS_RELAX_COMPAT); 814 bool strict = !(flags & MAPS_RELAX_COMPAT);
@@ -930,6 +910,11 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
930 map_name = elf_strptr(obj->efile.elf, 910 map_name = elf_strptr(obj->efile.elf,
931 obj->efile.strtabidx, 911 obj->efile.strtabidx,
932 sym.st_name); 912 sym.st_name);
913 if (!map_name) {
914 pr_warning("failed to get map #%d name sym string for obj %s\n",
915 map_idx, obj->path);
916 return -LIBBPF_ERRNO__FORMAT;
917 }
933 918
934 obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; 919 obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
935 obj->maps[map_idx].offset = sym.st_value; 920 obj->maps[map_idx].offset = sym.st_value;
@@ -1104,8 +1089,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1104 1089
1105 /* Elf is corrupted/truncated, avoid calling elf_strptr. */ 1090 /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1106 if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { 1091 if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
1107 pr_warning("failed to get e_shstrndx from %s\n", 1092 pr_warning("failed to get e_shstrndx from %s\n", obj->path);
1108 obj->path);
1109 return -LIBBPF_ERRNO__FORMAT; 1093 return -LIBBPF_ERRNO__FORMAT;
1110 } 1094 }
1111 1095
@@ -1226,7 +1210,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1226 1210
1227 if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { 1211 if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
1228 pr_warning("Corrupted ELF file: index of strtab invalid\n"); 1212 pr_warning("Corrupted ELF file: index of strtab invalid\n");
1229 return LIBBPF_ERRNO__FORMAT; 1213 return -LIBBPF_ERRNO__FORMAT;
1230 } 1214 }
1231 if (btf_data) { 1215 if (btf_data) {
1232 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 1216 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
@@ -1346,8 +1330,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1346 size_t nr_maps = obj->nr_maps; 1330 size_t nr_maps = obj->nr_maps;
1347 int i, nrels; 1331 int i, nrels;
1348 1332
1349 pr_debug("collecting relocating info for: '%s'\n", 1333 pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
1350 prog->section_name);
1351 nrels = shdr->sh_size / shdr->sh_entsize; 1334 nrels = shdr->sh_size / shdr->sh_entsize;
1352 1335
1353 prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); 1336 prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
@@ -1372,9 +1355,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1372 return -LIBBPF_ERRNO__FORMAT; 1355 return -LIBBPF_ERRNO__FORMAT;
1373 } 1356 }
1374 1357
1375 if (!gelf_getsym(symbols, 1358 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
1376 GELF_R_SYM(rel.r_info),
1377 &sym)) {
1378 pr_warning("relocation: symbol %"PRIx64" not found\n", 1359 pr_warning("relocation: symbol %"PRIx64" not found\n",
1379 GELF_R_SYM(rel.r_info)); 1360 GELF_R_SYM(rel.r_info));
1380 return -LIBBPF_ERRNO__FORMAT; 1361 return -LIBBPF_ERRNO__FORMAT;
@@ -1435,8 +1416,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1435 if (maps[map_idx].libbpf_type != type) 1416 if (maps[map_idx].libbpf_type != type)
1436 continue; 1417 continue;
1437 if (type != LIBBPF_MAP_UNSPEC || 1418 if (type != LIBBPF_MAP_UNSPEC ||
1438 (type == LIBBPF_MAP_UNSPEC && 1419 maps[map_idx].offset == sym.st_value) {
1439 maps[map_idx].offset == sym.st_value)) {
1440 pr_debug("relocation: find map %zd (%s) for insn %u\n", 1420 pr_debug("relocation: find map %zd (%s) for insn %u\n",
1441 map_idx, maps[map_idx].name, insn_idx); 1421 map_idx, maps[map_idx].name, insn_idx);
1442 break; 1422 break;
@@ -1444,7 +1424,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1444 } 1424 }
1445 1425
1446 if (map_idx >= nr_maps) { 1426 if (map_idx >= nr_maps) {
1447 pr_warning("bpf relocation: map_idx %d large than %d\n", 1427 pr_warning("bpf relocation: map_idx %d larger than %d\n",
1448 (int)map_idx, (int)nr_maps - 1); 1428 (int)map_idx, (int)nr_maps - 1);
1449 return -LIBBPF_ERRNO__RELOC; 1429 return -LIBBPF_ERRNO__RELOC;
1450 } 1430 }
@@ -1756,7 +1736,7 @@ bpf_object__create_maps(struct bpf_object *obj)
1756 create_attr.key_size = def->key_size; 1736 create_attr.key_size = def->key_size;
1757 create_attr.value_size = def->value_size; 1737 create_attr.value_size = def->value_size;
1758 create_attr.max_entries = def->max_entries; 1738 create_attr.max_entries = def->max_entries;
1759 create_attr.btf_fd = 0; 1739 create_attr.btf_fd = -1;
1760 create_attr.btf_key_type_id = 0; 1740 create_attr.btf_key_type_id = 0;
1761 create_attr.btf_value_type_id = 0; 1741 create_attr.btf_value_type_id = 0;
1762 if (bpf_map_type__is_map_in_map(def->type) && 1742 if (bpf_map_type__is_map_in_map(def->type) &&
@@ -1770,11 +1750,11 @@ bpf_object__create_maps(struct bpf_object *obj)
1770 } 1750 }
1771 1751
1772 *pfd = bpf_create_map_xattr(&create_attr); 1752 *pfd = bpf_create_map_xattr(&create_attr);
1773 if (*pfd < 0 && create_attr.btf_key_type_id) { 1753 if (*pfd < 0 && create_attr.btf_fd >= 0) {
1774 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 1754 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
1775 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 1755 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
1776 map->name, cp, errno); 1756 map->name, cp, errno);
1777 create_attr.btf_fd = 0; 1757 create_attr.btf_fd = -1;
1778 create_attr.btf_key_type_id = 0; 1758 create_attr.btf_key_type_id = 0;
1779 create_attr.btf_value_type_id = 0; 1759 create_attr.btf_value_type_id = 0;
1780 map->btf_key_type_id = 0; 1760 map->btf_key_type_id = 0;
@@ -1803,7 +1783,7 @@ err_out:
1803 } 1783 }
1804 } 1784 }
1805 1785
1806 pr_debug("create map %s: fd=%d\n", map->name, *pfd); 1786 pr_debug("created map %s: fd=%d\n", map->name, *pfd);
1807 } 1787 }
1808 1788
1809 return 0; 1789 return 0;
@@ -1824,18 +1804,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,
1824 if (btf_prog_info) { 1804 if (btf_prog_info) {
1825 /* 1805 /*
1826 * Some info has already been found but has problem 1806 * Some info has already been found but has problem
1827 * in the last btf_ext reloc. Must have to error 1807 * in the last btf_ext reloc. Must have to error out.
1828 * out.
1829 */ 1808 */
1830 pr_warning("Error in relocating %s for sec %s.\n", 1809 pr_warning("Error in relocating %s for sec %s.\n",
1831 info_name, prog->section_name); 1810 info_name, prog->section_name);
1832 return err; 1811 return err;
1833 } 1812 }
1834 1813
1835 /* 1814 /* Have problem loading the very first info. Ignore the rest. */
1836 * Have problem loading the very first info. Ignore
1837 * the rest.
1838 */
1839 pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", 1815 pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n",
1840 info_name, prog->section_name, info_name); 1816 info_name, prog->section_name, info_name);
1841 return 0; 1817 return 0;
@@ -2039,9 +2015,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
2039 return -LIBBPF_ERRNO__RELOC; 2015 return -LIBBPF_ERRNO__RELOC;
2040 } 2016 }
2041 2017
2042 err = bpf_program__collect_reloc(prog, 2018 err = bpf_program__collect_reloc(prog, shdr, data, obj);
2043 shdr, data,
2044 obj);
2045 if (err) 2019 if (err)
2046 return err; 2020 return err;
2047 } 2021 }
@@ -2058,6 +2032,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
2058 char *log_buf; 2032 char *log_buf;
2059 int ret; 2033 int ret;
2060 2034
2035 if (!insns || !insns_cnt)
2036 return -EINVAL;
2037
2061 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); 2038 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
2062 load_attr.prog_type = prog->type; 2039 load_attr.prog_type = prog->type;
2063 load_attr.expected_attach_type = prog->expected_attach_type; 2040 load_attr.expected_attach_type = prog->expected_attach_type;
@@ -2068,7 +2045,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
2068 load_attr.license = license; 2045 load_attr.license = license;
2069 load_attr.kern_version = kern_version; 2046 load_attr.kern_version = kern_version;
2070 load_attr.prog_ifindex = prog->prog_ifindex; 2047 load_attr.prog_ifindex = prog->prog_ifindex;
2071 load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; 2048 load_attr.prog_btf_fd = prog->btf_fd;
2072 load_attr.func_info = prog->func_info; 2049 load_attr.func_info = prog->func_info;
2073 load_attr.func_info_rec_size = prog->func_info_rec_size; 2050 load_attr.func_info_rec_size = prog->func_info_rec_size;
2074 load_attr.func_info_cnt = prog->func_info_cnt; 2051 load_attr.func_info_cnt = prog->func_info_cnt;
@@ -2076,8 +2053,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
2076 load_attr.line_info_rec_size = prog->line_info_rec_size; 2053 load_attr.line_info_rec_size = prog->line_info_rec_size;
2077 load_attr.line_info_cnt = prog->line_info_cnt; 2054 load_attr.line_info_cnt = prog->line_info_cnt;
2078 load_attr.log_level = prog->log_level; 2055 load_attr.log_level = prog->log_level;
2079 if (!load_attr.insns || !load_attr.insns_cnt) 2056 load_attr.prog_flags = prog->prog_flags;
2080 return -EINVAL;
2081 2057
2082retry_load: 2058retry_load:
2083 log_buf = malloc(log_buf_size); 2059 log_buf = malloc(log_buf_size);
@@ -2222,7 +2198,7 @@ static bool bpf_program__is_function_storage(struct bpf_program *prog,
2222} 2198}
2223 2199
2224static int 2200static int
2225bpf_object__load_progs(struct bpf_object *obj) 2201bpf_object__load_progs(struct bpf_object *obj, int log_level)
2226{ 2202{
2227 size_t i; 2203 size_t i;
2228 int err; 2204 int err;
@@ -2230,6 +2206,7 @@ bpf_object__load_progs(struct bpf_object *obj)
2230 for (i = 0; i < obj->nr_programs; i++) { 2206 for (i = 0; i < obj->nr_programs; i++) {
2231 if (bpf_program__is_function_storage(&obj->programs[i], obj)) 2207 if (bpf_program__is_function_storage(&obj->programs[i], obj))
2232 continue; 2208 continue;
2209 obj->programs[i].log_level |= log_level;
2233 err = bpf_program__load(&obj->programs[i], 2210 err = bpf_program__load(&obj->programs[i],
2234 obj->license, 2211 obj->license,
2235 obj->kern_version); 2212 obj->kern_version);
@@ -2356,11 +2333,9 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
2356 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", 2333 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
2357 (unsigned long)obj_buf, 2334 (unsigned long)obj_buf,
2358 (unsigned long)obj_buf_sz); 2335 (unsigned long)obj_buf_sz);
2359 tmp_name[sizeof(tmp_name) - 1] = '\0';
2360 name = tmp_name; 2336 name = tmp_name;
2361 } 2337 }
2362 pr_debug("loading object '%s' from buffer\n", 2338 pr_debug("loading object '%s' from buffer\n", name);
2363 name);
2364 2339
2365 return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true); 2340 return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
2366} 2341}
@@ -2381,10 +2356,14 @@ int bpf_object__unload(struct bpf_object *obj)
2381 return 0; 2356 return 0;
2382} 2357}
2383 2358
2384int bpf_object__load(struct bpf_object *obj) 2359int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
2385{ 2360{
2361 struct bpf_object *obj;
2386 int err; 2362 int err;
2387 2363
2364 if (!attr)
2365 return -EINVAL;
2366 obj = attr->obj;
2388 if (!obj) 2367 if (!obj)
2389 return -EINVAL; 2368 return -EINVAL;
2390 2369
@@ -2397,7 +2376,7 @@ int bpf_object__load(struct bpf_object *obj)
2397 2376
2398 CHECK_ERR(bpf_object__create_maps(obj), err, out); 2377 CHECK_ERR(bpf_object__create_maps(obj), err, out);
2399 CHECK_ERR(bpf_object__relocate(obj), err, out); 2378 CHECK_ERR(bpf_object__relocate(obj), err, out);
2400 CHECK_ERR(bpf_object__load_progs(obj), err, out); 2379 CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
2401 2380
2402 return 0; 2381 return 0;
2403out: 2382out:
@@ -2406,6 +2385,15 @@ out:
2406 return err; 2385 return err;
2407} 2386}
2408 2387
2388int bpf_object__load(struct bpf_object *obj)
2389{
2390 struct bpf_object_load_attr attr = {
2391 .obj = obj,
2392 };
2393
2394 return bpf_object__load_xattr(&attr);
2395}
2396
2409static int check_path(const char *path) 2397static int check_path(const char *path)
2410{ 2398{
2411 char *cp, errmsg[STRERR_BUFSIZE]; 2399 char *cp, errmsg[STRERR_BUFSIZE];
@@ -3458,9 +3446,7 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
3458 3446
3459long libbpf_get_error(const void *ptr) 3447long libbpf_get_error(const void *ptr)
3460{ 3448{
3461 if (IS_ERR(ptr)) 3449 return PTR_ERR_OR_ZERO(ptr);
3462 return PTR_ERR(ptr);
3463 return 0;
3464} 3450}
3465 3451
3466int bpf_prog_load(const char *file, enum bpf_prog_type type, 3452int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -3521,6 +3507,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
3521 expected_attach_type); 3507 expected_attach_type);
3522 3508
3523 prog->log_level = attr->log_level; 3509 prog->log_level = attr->log_level;
3510 prog->prog_flags = attr->prog_flags;
3524 if (!first_prog) 3511 if (!first_prog)
3525 first_prog = prog; 3512 first_prog = prog;
3526 } 3513 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c5ff00515ce7..1af0d48178c8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -89,8 +89,14 @@ LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
89LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); 89LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
90LIBBPF_API void bpf_object__close(struct bpf_object *object); 90LIBBPF_API void bpf_object__close(struct bpf_object *object);
91 91
92struct bpf_object_load_attr {
93 struct bpf_object *obj;
94 int log_level;
95};
96
92/* Load/unload object into/from kernel */ 97/* Load/unload object into/from kernel */
93LIBBPF_API int bpf_object__load(struct bpf_object *obj); 98LIBBPF_API int bpf_object__load(struct bpf_object *obj);
99LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
94LIBBPF_API int bpf_object__unload(struct bpf_object *obj); 100LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
95LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); 101LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
96LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); 102LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
@@ -320,6 +326,7 @@ struct bpf_prog_load_attr {
320 enum bpf_attach_type expected_attach_type; 326 enum bpf_attach_type expected_attach_type;
321 int ifindex; 327 int ifindex;
322 int log_level; 328 int log_level;
329 int prog_flags;
323}; 330};
324 331
325LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 332LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 673001787cba..46dcda89df21 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -164,3 +164,12 @@ LIBBPF_0.0.3 {
164 bpf_map_freeze; 164 bpf_map_freeze;
165 btf__finalize_data; 165 btf__finalize_data;
166} LIBBPF_0.0.2; 166} LIBBPF_0.0.2;
167
168LIBBPF_0.0.4 {
169 global:
170 btf_dump__dump_type;
171 btf_dump__free;
172 btf_dump__new;
173 btf__parse_elf;
174 bpf_object__load_xattr;
175} LIBBPF_0.0.3;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index f3025b4d90e1..850f7bdec5cb 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,8 @@
9#ifndef __LIBBPF_LIBBPF_INTERNAL_H 9#ifndef __LIBBPF_LIBBPF_INTERNAL_H
10#define __LIBBPF_LIBBPF_INTERNAL_H 10#define __LIBBPF_LIBBPF_INTERNAL_H
11 11
12#include "libbpf.h"
13
12#define BTF_INFO_ENC(kind, kind_flag, vlen) \ 14#define BTF_INFO_ENC(kind, kind_flag, vlen) \
13 ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) 15 ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
14#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) 16#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index dd5d69529382..7470327edcfe 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -22,6 +22,7 @@ test_lirc_mode2_user
22get_cgroup_id_user 22get_cgroup_id_user
23test_skb_cgroup_id_user 23test_skb_cgroup_id_user
24test_socket_cookie 24test_socket_cookie
25test_cgroup_attach
25test_cgroup_storage 26test_cgroup_storage
26test_select_reuseport 27test_select_reuseport
27test_flow_dissector 28test_flow_dissector
@@ -35,3 +36,6 @@ test_sysctl
35alu32 36alu32
36libbpf.pc 37libbpf.pc
37libbpf.so.* 38libbpf.so.*
39test_hashmap
40test_btf_dump
41xdping
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 66f2dca1dee1..2b426ae1cdc9 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,7 +15,9 @@ LLC ?= llc
15LLVM_OBJCOPY ?= llvm-objcopy 15LLVM_OBJCOPY ?= llvm-objcopy
16LLVM_READELF ?= llvm-readelf 16LLVM_READELF ?= llvm-readelf
17BTF_PAHOLE ?= pahole 17BTF_PAHOLE ?= pahole
18CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include 18CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
19 -Dbpf_prog_load=bpf_prog_test_load \
20 -Dbpf_load_program=bpf_test_load_program
19LDLIBS += -lcap -lelf -lrt -lpthread 21LDLIBS += -lcap -lelf -lrt -lpthread
20 22
21# Order correspond to 'make run_tests' order 23# Order correspond to 'make run_tests' order
@@ -23,7 +25,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
23 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ 25 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
24 test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ 26 test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
25 test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ 27 test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
26 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl 28 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
29 test_btf_dump test_cgroup_attach xdping
27 30
28BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) 31BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
29TEST_GEN_FILES = $(BPF_OBJ_FILES) 32TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -54,7 +57,8 @@ TEST_PROGS := test_kmod.sh \
54 test_lwt_ip_encap.sh \ 57 test_lwt_ip_encap.sh \
55 test_tcp_check_syncookie.sh \ 58 test_tcp_check_syncookie.sh \
56 test_tc_tunnel.sh \ 59 test_tc_tunnel.sh \
57 test_tc_edt.sh 60 test_tc_edt.sh \
61 test_xdping.sh
58 62
59TEST_PROGS_EXTENDED := with_addr.sh \ 63TEST_PROGS_EXTENDED := with_addr.sh \
60 with_tunnels.sh \ 64 with_tunnels.sh \
@@ -78,9 +82,9 @@ $(OUTPUT)/test_maps: map_tests/*.c
78 82
79BPFOBJ := $(OUTPUT)/libbpf.a 83BPFOBJ := $(OUTPUT)/libbpf.a
80 84
81$(TEST_GEN_PROGS): $(BPFOBJ) 85$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ)
82 86
83$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a 87$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a
84 88
85$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c 89$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
86$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c 90$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -96,6 +100,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
96$(OUTPUT)/test_netcnt: cgroup_helpers.c 100$(OUTPUT)/test_netcnt: cgroup_helpers.c
97$(OUTPUT)/test_sock_fields: cgroup_helpers.c 101$(OUTPUT)/test_sock_fields: cgroup_helpers.c
98$(OUTPUT)/test_sysctl: cgroup_helpers.c 102$(OUTPUT)/test_sysctl: cgroup_helpers.c
103$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
99 104
100.PHONY: force 105.PHONY: force
101 106
@@ -176,7 +181,7 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
176 $(ALU32_BUILD_DIR)/urandom_read 181 $(ALU32_BUILD_DIR)/urandom_read
177 $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \ 182 $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
178 -o $(ALU32_BUILD_DIR)/test_progs_32 \ 183 -o $(ALU32_BUILD_DIR)/test_progs_32 \
179 test_progs.c trace_helpers.c prog_tests/*.c \ 184 test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \
180 $(OUTPUT)/libbpf.a $(LDLIBS) 185 $(OUTPUT)/libbpf.a $(LDLIBS)
181 186
182$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H) 187$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 5f6f9e7aba2a..e6d243b7cd74 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -8,6 +8,14 @@
8 */ 8 */
9#define SEC(NAME) __attribute__((section(NAME), used)) 9#define SEC(NAME) __attribute__((section(NAME), used))
10 10
11/* helper macro to print out debug messages */
12#define bpf_printk(fmt, ...) \
13({ \
14 char ____fmt[] = fmt; \
15 bpf_trace_printk(____fmt, sizeof(____fmt), \
16 ##__VA_ARGS__); \
17})
18
11/* helper functions called from eBPF programs written in C */ 19/* helper functions called from eBPF programs written in C */
12static void *(*bpf_map_lookup_elem)(void *map, const void *key) = 20static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
13 (void *) BPF_FUNC_map_lookup_elem; 21 (void *) BPF_FUNC_map_lookup_elem;
@@ -216,6 +224,7 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
216 (void *) BPF_FUNC_sk_storage_get; 224 (void *) BPF_FUNC_sk_storage_get;
217static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = 225static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
218 (void *)BPF_FUNC_sk_storage_delete; 226 (void *)BPF_FUNC_sk_storage_delete;
227static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
219 228
220/* llvm builtin functions that eBPF C program may use to 229/* llvm builtin functions that eBPF C program may use to
221 * emit BPF_LD_ABS and BPF_LD_IND instructions 230 * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 6692a40a6979..0d89f0396be4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -34,6 +34,60 @@
34 CGROUP_WORK_DIR, path) 34 CGROUP_WORK_DIR, path)
35 35
36/** 36/**
37 * enable_all_controllers() - Enable all available cgroup v2 controllers
38 *
39 * Enable all available cgroup v2 controllers in order to increase
40 * the code coverage.
41 *
42 * If successful, 0 is returned.
43 */
44int enable_all_controllers(char *cgroup_path)
45{
46 char path[PATH_MAX + 1];
47 char buf[PATH_MAX];
48 char *c, *c2;
49 int fd, cfd;
50 size_t len;
51
52 snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
53 fd = open(path, O_RDONLY);
54 if (fd < 0) {
55 log_err("Opening cgroup.controllers: %s", path);
56 return 1;
57 }
58
59 len = read(fd, buf, sizeof(buf) - 1);
60 if (len < 0) {
61 close(fd);
62 log_err("Reading cgroup.controllers: %s", path);
63 return 1;
64 }
65 buf[len] = 0;
66 close(fd);
67
68 /* No controllers available? We're probably on cgroup v1. */
69 if (len == 0)
70 return 0;
71
72 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
73 cfd = open(path, O_RDWR);
74 if (cfd < 0) {
75 log_err("Opening cgroup.subtree_control: %s", path);
76 return 1;
77 }
78
79 for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
80 if (dprintf(cfd, "+%s\n", c) <= 0) {
81 log_err("Enabling controller %s: %s", c, path);
82 close(cfd);
83 return 1;
84 }
85 }
86 close(cfd);
87 return 0;
88}
89
90/**
37 * setup_cgroup_environment() - Setup the cgroup environment 91 * setup_cgroup_environment() - Setup the cgroup environment
38 * 92 *
39 * After calling this function, cleanup_cgroup_environment should be called 93 * After calling this function, cleanup_cgroup_environment should be called
@@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
71 return 1; 125 return 1;
72 } 126 }
73 127
128 if (enable_all_controllers(cgroup_workdir))
129 return 1;
130
74 return 0; 131 return 0;
75} 132}
76 133
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index b74e2f6e96d0..c0091137074b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -12,7 +12,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
12 return vfprintf(stderr, "%s", args); 12 return vfprintf(stderr, "%s", args);
13} 13}
14 14
15static int check_load(const char *file) 15static int check_load(const char *file, enum bpf_prog_type type)
16{ 16{
17 struct bpf_prog_load_attr attr; 17 struct bpf_prog_load_attr attr;
18 struct bpf_object *obj = NULL; 18 struct bpf_object *obj = NULL;
@@ -20,8 +20,9 @@ static int check_load(const char *file)
20 20
21 memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); 21 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
22 attr.file = file; 22 attr.file = file;
23 attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; 23 attr.prog_type = type;
24 attr.log_level = 4; 24 attr.log_level = 4;
25 attr.prog_flags = BPF_F_TEST_RND_HI32;
25 err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); 26 err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
26 bpf_object__close(obj); 27 bpf_object__close(obj);
27 if (err) 28 if (err)
@@ -31,19 +32,24 @@ static int check_load(const char *file)
31 32
32void test_bpf_verif_scale(void) 33void test_bpf_verif_scale(void)
33{ 34{
34 const char *file1 = "./test_verif_scale1.o"; 35 const char *scale[] = {
35 const char *file2 = "./test_verif_scale2.o"; 36 "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
36 const char *file3 = "./test_verif_scale3.o"; 37 };
37 int err; 38 const char *pyperf[] = {
39 "./pyperf50.o", "./pyperf100.o", "./pyperf180.o"
40 };
41 int err, i;
38 42
39 if (verifier_stats) 43 if (verifier_stats)
40 libbpf_set_print(libbpf_debug_print); 44 libbpf_set_print(libbpf_debug_print);
41 45
42 err = check_load(file1); 46 for (i = 0; i < ARRAY_SIZE(scale); i++) {
43 err |= check_load(file2); 47 err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
44 err |= check_load(file3); 48 printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
45 if (!err) 49 }
46 printf("test_verif_scale:OK\n"); 50
47 else 51 for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
48 printf("test_verif_scale:FAIL\n"); 52 err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
53 printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
54 }
49} 55}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
new file mode 100644
index 000000000000..67cea1686305
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -0,0 +1,198 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <test_progs.h>
3
4static volatile int sigusr1_received = 0;
5
6static void sigusr1_handler(int signum)
7{
8 sigusr1_received++;
9}
10
11static int test_send_signal_common(struct perf_event_attr *attr,
12 int prog_type,
13 const char *test_name)
14{
15 int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
16 const char *file = "./test_send_signal_kern.o";
17 struct bpf_object *obj = NULL;
18 int pipe_c2p[2], pipe_p2c[2];
19 __u32 key = 0, duration = 0;
20 char buf[256];
21 pid_t pid;
22 __u64 val;
23
24 if (CHECK(pipe(pipe_c2p), test_name,
25 "pipe pipe_c2p error: %s\n", strerror(errno)))
26 goto no_fork_done;
27
28 if (CHECK(pipe(pipe_p2c), test_name,
29 "pipe pipe_p2c error: %s\n", strerror(errno))) {
30 close(pipe_c2p[0]);
31 close(pipe_c2p[1]);
32 goto no_fork_done;
33 }
34
35 pid = fork();
36 if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
37 close(pipe_c2p[0]);
38 close(pipe_c2p[1]);
39 close(pipe_p2c[0]);
40 close(pipe_p2c[1]);
41 goto no_fork_done;
42 }
43
44 if (pid == 0) {
45 /* install signal handler and notify parent */
46 signal(SIGUSR1, sigusr1_handler);
47
48 close(pipe_c2p[0]); /* close read */
49 close(pipe_p2c[1]); /* close write */
50
51 /* notify parent signal handler is installed */
52 write(pipe_c2p[1], buf, 1);
53
54 /* make sure parent enabled bpf program to send_signal */
55 read(pipe_p2c[0], buf, 1);
56
57 /* wait a little for signal handler */
58 sleep(1);
59
60 if (sigusr1_received)
61 write(pipe_c2p[1], "2", 1);
62 else
63 write(pipe_c2p[1], "0", 1);
64
65 /* wait for parent notification and exit */
66 read(pipe_p2c[0], buf, 1);
67
68 close(pipe_c2p[1]);
69 close(pipe_p2c[0]);
70 exit(0);
71 }
72
73 close(pipe_c2p[1]); /* close write */
74 close(pipe_p2c[0]); /* close read */
75
76 err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
77 if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
78 strerror(errno)))
79 goto prog_load_failure;
80
81 pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
82 -1 /* group id */, 0 /* flags */);
83 if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
84 strerror(errno))) {
85 err = -1;
86 goto close_prog;
87 }
88
89 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
90 if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
91 strerror(errno)))
92 goto disable_pmu;
93
94 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
95 if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
96 strerror(errno)))
97 goto disable_pmu;
98
99 err = -1;
100 info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
101 if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
102 goto disable_pmu;
103
104 status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
105 if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
106 goto disable_pmu;
107
108 /* wait until child signal handler installed */
109 read(pipe_c2p[0], buf, 1);
110
111 /* trigger the bpf send_signal */
112 key = 0;
113 val = (((__u64)(SIGUSR1)) << 32) | pid;
114 bpf_map_update_elem(info_map_fd, &key, &val, 0);
115
116 /* notify child that bpf program can send_signal now */
117 write(pipe_p2c[1], buf, 1);
118
119 /* wait for result */
120 err = read(pipe_c2p[0], buf, 1);
121 if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
122 goto disable_pmu;
123 if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
124 err = -1;
125 goto disable_pmu;
126 }
127
128 err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
129
130 /* notify child safe to exit */
131 write(pipe_p2c[1], buf, 1);
132
133disable_pmu:
134 close(pmu_fd);
135close_prog:
136 bpf_object__close(obj);
137prog_load_failure:
138 close(pipe_c2p[0]);
139 close(pipe_p2c[1]);
140 wait(NULL);
141no_fork_done:
142 return err;
143}
144
145static int test_send_signal_tracepoint(void)
146{
147 const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
148 struct perf_event_attr attr = {
149 .type = PERF_TYPE_TRACEPOINT,
150 .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
151 .sample_period = 1,
152 .wakeup_events = 1,
153 };
154 __u32 duration = 0;
155 int bytes, efd;
156 char buf[256];
157
158 efd = open(id_path, O_RDONLY, 0);
159 if (CHECK(efd < 0, "tracepoint",
160 "open syscalls/sys_enter_nanosleep/id failure: %s\n",
161 strerror(errno)))
162 return -1;
163
164 bytes = read(efd, buf, sizeof(buf));
165 close(efd);
166 if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
167 "read syscalls/sys_enter_nanosleep/id failure: %s\n",
168 strerror(errno)))
169 return -1;
170
171 attr.config = strtol(buf, NULL, 0);
172
173 return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
174}
175
176static int test_send_signal_nmi(void)
177{
178 struct perf_event_attr attr = {
179 .sample_freq = 50,
180 .freq = 1,
181 .type = PERF_TYPE_HARDWARE,
182 .config = PERF_COUNT_HW_CPU_CYCLES,
183 };
184
185 return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event");
186}
187
188void test_send_signal(void)
189{
190 int ret = 0;
191
192 ret |= test_send_signal_tracepoint();
193 ret |= test_send_signal_nmi();
194 if (!ret)
195 printf("test_send_signal:OK\n");
196 else
197 printf("test_send_signal:FAIL\n");
198}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
new file mode 100644
index 000000000000..8f44767a75fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -0,0 +1,92 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper tests for bitfield.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8#include <stdbool.h>
9
10/* ----- START-EXPECTED-OUTPUT ----- */
11/*
12 *struct bitfields_only_mixed_types {
13 * int a: 3;
14 * long int b: 2;
15 * _Bool c: 1;
16 * enum {
17 * A = 0,
18 * B = 1,
19 * } d: 1;
20 * short e: 5;
21 * int: 20;
22 * unsigned int f: 30;
23 *};
24 *
25 */
26/* ------ END-EXPECTED-OUTPUT ------ */
27
28struct bitfields_only_mixed_types {
29 int a: 3;
30 long int b: 2;
31 bool c: 1; /* it's really a _Bool type */
32 enum {
33 A, /* A = 0, dumper is very explicit */
34 B, /* B = 1, same */
35 } d: 1;
36 short e: 5;
37 /* 20-bit padding here */
38 unsigned f: 30; /* this gets aligned on 4-byte boundary */
39};
40
41/* ----- START-EXPECTED-OUTPUT ----- */
42/*
43 *struct bitfield_mixed_with_others {
44 * char: 4;
45 * int a: 4;
46 * short b;
47 * long int c;
48 * long int d: 8;
49 * int e;
50 * int f;
51 *};
52 *
53 */
54/* ------ END-EXPECTED-OUTPUT ------ */
55struct bitfield_mixed_with_others {
56 long: 4; /* char is enough as a backing field */
57 int a: 4;
58 /* 8-bit implicit padding */
59 short b; /* combined with previous bitfield */
60 /* 4 more bytes of implicit padding */
61 long c;
62 long d: 8;
63 /* 24 bits implicit padding */
64 int e; /* combined with previous bitfield */
65 int f;
66 /* 4 bytes of padding */
67};
68
69/* ----- START-EXPECTED-OUTPUT ----- */
70/*
71 *struct bitfield_flushed {
72 * int a: 4;
73 * long: 60;
74 * long int b: 16;
75 *};
76 *
77 */
78/* ------ END-EXPECTED-OUTPUT ------ */
79struct bitfield_flushed {
80 int a: 4;
81 long: 0; /* flush until next natural alignment boundary */
82 long b: 16;
83};
84
85int f(struct {
86 struct bitfields_only_mixed_types _1;
87 struct bitfield_mixed_with_others _2;
88 struct bitfield_flushed _3;
89} *_)
90{
91 return 0;
92}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
new file mode 100644
index 000000000000..ba97165bdb28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
@@ -0,0 +1,35 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper test for multi-dimensional array output.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8/* ----- START-EXPECTED-OUTPUT ----- */
9typedef int arr_t[2];
10
11typedef int multiarr_t[3][4][5];
12
13typedef int *ptr_arr_t[6];
14
15typedef int *ptr_multiarr_t[7][8][9][10];
16
17typedef int * (*fn_ptr_arr_t[11])();
18
19typedef int * (*fn_ptr_multiarr_t[12][13])();
20
21struct root_struct {
22 arr_t _1;
23 multiarr_t _2;
24 ptr_arr_t _3;
25 ptr_multiarr_t _4;
26 fn_ptr_arr_t _5;
27 fn_ptr_multiarr_t _6;
28};
29
30/* ------ END-EXPECTED-OUTPUT ------ */
31
32int f(struct root_struct *s)
33{
34 return 0;
35}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
new file mode 100644
index 000000000000..92a4ad428710
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
@@ -0,0 +1,73 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper test validating no name versioning happens between
5 * independent C namespaces (struct/union/enum vs typedef/enum values).
6 *
7 * Copyright (c) 2019 Facebook
8 */
9/* ----- START-EXPECTED-OUTPUT ----- */
10struct S {
11 int S;
12 int U;
13};
14
15typedef struct S S;
16
17union U {
18 int S;
19 int U;
20};
21
22typedef union U U;
23
24enum E {
25 V = 0,
26};
27
28typedef enum E E;
29
30struct A {};
31
32union B {};
33
34enum C {
35 A = 1,
36 B = 2,
37 C = 3,
38};
39
40struct X {};
41
42union Y {};
43
44enum Z;
45
46typedef int X;
47
48typedef int Y;
49
50typedef int Z;
51
52/*------ END-EXPECTED-OUTPUT ------ */
53
54int f(struct {
55 struct S _1;
56 S _2;
57 union U _3;
58 U _4;
59 enum E _5;
60 E _6;
61 struct A a;
62 union B b;
63 enum C c;
64 struct X x;
65 union Y y;
66 enum Z *z;
67 X xx;
68 Y yy;
69 Z zz;
70} *_)
71{
72 return 0;
73}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
new file mode 100644
index 000000000000..7c95702ee4cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
@@ -0,0 +1,63 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper test for topological sorting of dependent structs.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8/* ----- START-EXPECTED-OUTPUT ----- */
9struct s1 {};
10
11struct s3;
12
13struct s4;
14
15struct s2 {
16 struct s2 *s2;
17 struct s3 *s3;
18 struct s4 *s4;
19};
20
21struct s3 {
22 struct s1 s1;
23 struct s2 s2;
24};
25
26struct s4 {
27 struct s1 s1;
28 struct s3 s3;
29};
30
31struct list_head {
32 struct list_head *next;
33 struct list_head *prev;
34};
35
36struct hlist_node {
37 struct hlist_node *next;
38 struct hlist_node **pprev;
39};
40
41struct hlist_head {
42 struct hlist_node *first;
43};
44
45struct callback_head {
46 struct callback_head *next;
47 void (*func)(struct callback_head *);
48};
49
50struct root_struct {
51 struct s4 s4;
52 struct list_head l;
53 struct hlist_node n;
54 struct hlist_head h;
55 struct callback_head cb;
56};
57
58/*------ END-EXPECTED-OUTPUT ------ */
59
60int f(struct root_struct *root)
61{
62 return 0;
63}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
new file mode 100644
index 000000000000..1cef3bec1dc7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -0,0 +1,75 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper tests for struct packing determination.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8/* ----- START-EXPECTED-OUTPUT ----- */
9struct packed_trailing_space {
10 int a;
11 short b;
12} __attribute__((packed));
13
14struct non_packed_trailing_space {
15 int a;
16 short b;
17};
18
19struct packed_fields {
20 short a;
21 int b;
22} __attribute__((packed));
23
24struct non_packed_fields {
25 short a;
26 int b;
27};
28
29struct nested_packed {
30 char: 4;
31 int a: 4;
32 long int b;
33 struct {
34 char c;
35 int d;
36 } __attribute__((packed)) e;
37} __attribute__((packed));
38
39union union_is_never_packed {
40 int a: 4;
41 char b;
42 char c: 1;
43};
44
45union union_does_not_need_packing {
46 struct {
47 long int a;
48 int b;
49 } __attribute__((packed));
50 int c;
51};
52
53union jump_code_union {
54 char code[5];
55 struct {
56 char jump;
57 int offset;
58 } __attribute__((packed));
59};
60
61/*------ END-EXPECTED-OUTPUT ------ */
62
63int f(struct {
64 struct packed_trailing_space _1;
65 struct non_packed_trailing_space _2;
66 struct packed_fields _3;
67 struct non_packed_fields _4;
68 struct nested_packed _5;
69 union union_is_never_packed _6;
70 union union_does_not_need_packing _7;
71 union jump_code_union _8;
72} *_)
73{
74 return 0;
75}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
new file mode 100644
index 000000000000..3a62119c7498
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -0,0 +1,111 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper tests for implicit and explicit padding between fields and
5 * at the end of a struct.
6 *
7 * Copyright (c) 2019 Facebook
8 */
9/* ----- START-EXPECTED-OUTPUT ----- */
10struct padded_implicitly {
11 int a;
12 long int b;
13 char c;
14};
15
16/* ------ END-EXPECTED-OUTPUT ------ */
17
18/* ----- START-EXPECTED-OUTPUT ----- */
19/*
20 *struct padded_explicitly {
21 * int a;
22 * int: 32;
23 * int b;
24 *};
25 *
26 */
27/* ------ END-EXPECTED-OUTPUT ------ */
28
29struct padded_explicitly {
30 int a;
31 int: 1; /* algo will explicitly pad with full 32 bits here */
32 int b;
33};
34
35/* ----- START-EXPECTED-OUTPUT ----- */
36/*
37 *struct padded_a_lot {
38 * int a;
39 * long: 32;
40 * long: 64;
41 * long: 64;
42 * int b;
43 *};
44 *
45 */
46/* ------ END-EXPECTED-OUTPUT ------ */
47
48struct padded_a_lot {
49 int a;
50 /* 32 bit of implicit padding here, which algo will make explicit */
51 long: 64;
52 long: 64;
53 int b;
54};
55
56/* ----- START-EXPECTED-OUTPUT ----- */
57/*
58 *struct padded_cache_line {
59 * int a;
60 * long: 32;
61 * long: 64;
62 * long: 64;
63 * long: 64;
64 * int b;
65 *};
66 *
67 */
68/* ------ END-EXPECTED-OUTPUT ------ */
69
70struct padded_cache_line {
71 int a;
72 int b __attribute__((aligned(32)));
73};
74
75/* ----- START-EXPECTED-OUTPUT ----- */
76/*
77 *struct zone_padding {
78 * char x[0];
79 *};
80 *
81 *struct zone {
82 * int a;
83 * short b;
84 * short: 16;
85 * struct zone_padding __pad__;
86 *};
87 *
88 */
89/* ------ END-EXPECTED-OUTPUT ------ */
90
91struct zone_padding {
92 char x[0];
93} __attribute__((__aligned__(8)));
94
95struct zone {
96 int a;
97 short b;
98 short: 16;
99 struct zone_padding __pad__;
100};
101
102int f(struct {
103 struct padded_implicitly _1;
104 struct padded_explicitly _2;
105 struct padded_a_lot _3;
106 struct padded_cache_line _4;
107 struct zone _5;
108} *_)
109{
110 return 0;
111}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
new file mode 100644
index 000000000000..d4a02fe44a12
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -0,0 +1,229 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * BTF-to-C dumper test for majority of C syntax quirks.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8/* ----- START-EXPECTED-OUTPUT ----- */
9enum e1 {
10 A = 0,
11 B = 1,
12};
13
14enum e2 {
15 C = 100,
16 D = -100,
17 E = 0,
18};
19
20typedef enum e2 e2_t;
21
22typedef enum {
23 F = 0,
24 G = 1,
25 H = 2,
26} e3_t;
27
28typedef int int_t;
29
30typedef volatile const int * volatile const crazy_ptr_t;
31
32typedef int *****we_need_to_go_deeper_ptr_t;
33
34typedef volatile const we_need_to_go_deeper_ptr_t * restrict * volatile * const * restrict volatile * restrict const * volatile const * restrict volatile const how_about_this_ptr_t;
35
36typedef int *ptr_arr_t[10];
37
38typedef void (*fn_ptr1_t)(int);
39
40typedef void (*printf_fn_t)(const char *, ...);
41
42/* ------ END-EXPECTED-OUTPUT ------ */
43/*
44 * While previous function pointers are pretty trivial (C-syntax-level
45 * trivial), the following are deciphered here for future generations:
46 *
47 * - `fn_ptr2_t`: function, taking anonymous struct as a first arg and pointer
48 * to a function, that takes int and returns int, as a second arg; returning
49 * a pointer to a const pointer to a char. Equivalent to:
50 * typedef struct { int a; } s_t;
51 * typedef int (*fn_t)(int);
52 * typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
53 *
54 * - `fn_complext_t`: pointer to a function returning struct and accepting
55 * union and struct. All structs and enum are anonymous and defined inline.
56 *
57 * - `signal_t: pointer to a function accepting a pointer to a function as an
58 * argument and returning pointer to a function as a result. Sane equivalent:
59 * typedef void (*signal_handler_t)(int);
60 * typedef signal_handler_t (*signal_ptr_t)(int, signal_handler_t);
61 *
62 * - fn_ptr_arr1_t: array of pointers to a function accepting pointer to
63 * a pointer to an int and returning pointer to a char. Easy.
64 *
65 * - fn_ptr_arr2_t: array of const pointers to a function taking no arguments
66 * and returning a const pointer to a function, that takes pointer to a
67 * `int -> char *` function and returns pointer to a char. Equivalent:
68 * typedef char * (*fn_input_t)(int);
69 * typedef char * (*fn_output_outer_t)(fn_input_t);
70 * typedef const fn_output_outer_t (* fn_output_inner_t)();
71 * typedef const fn_output_inner_t fn_ptr_arr2_t[5];
72 */
73/* ----- START-EXPECTED-OUTPUT ----- */
74typedef char * const * (*fn_ptr2_t)(struct {
75 int a;
76}, int (*)(int));
77
78typedef struct {
79 int a;
80 void (*b)(int, struct {
81 int c;
82 }, union {
83 char d;
84 int e[5];
85 });
86} (*fn_complex_t)(union {
87 void *f;
88 char g[16];
89}, struct {
90 int h;
91});
92
93typedef void (* (*signal_t)(int, void (*)(int)))(int);
94
95typedef char * (*fn_ptr_arr1_t[10])(int **);
96
97typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
98
99struct struct_w_typedefs {
100 int_t a;
101 crazy_ptr_t b;
102 we_need_to_go_deeper_ptr_t c;
103 how_about_this_ptr_t d;
104 ptr_arr_t e;
105 fn_ptr1_t f;
106 printf_fn_t g;
107 fn_ptr2_t h;
108 fn_complex_t i;
109 signal_t j;
110 fn_ptr_arr1_t k;
111 fn_ptr_arr2_t l;
112};
113
114typedef struct {
115 int x;
116 int y;
117 int z;
118} anon_struct_t;
119
120struct struct_fwd;
121
122typedef struct struct_fwd struct_fwd_t;
123
124typedef struct struct_fwd *struct_fwd_ptr_t;
125
126union union_fwd;
127
128typedef union union_fwd union_fwd_t;
129
130typedef union union_fwd *union_fwd_ptr_t;
131
132struct struct_empty {};
133
134struct struct_simple {
135 int a;
136 char b;
137 const int_t *p;
138 struct struct_empty s;
139 enum e2 e;
140 enum {
141 ANON_VAL1 = 1,
142 ANON_VAL2 = 2,
143 } f;
144 int arr1[13];
145 enum e2 arr2[5];
146};
147
148union union_empty {};
149
150union union_simple {
151 void *ptr;
152 int num;
153 int_t num2;
154 union union_empty u;
155};
156
157struct struct_in_struct {
158 struct struct_simple simple;
159 union union_simple also_simple;
160 struct {
161 int a;
162 } not_so_hard_as_well;
163 union {
164 int b;
165 int c;
166 } anon_union_is_good;
167 struct {
168 int d;
169 int e;
170 };
171 union {
172 int f;
173 int g;
174 };
175};
176
177struct struct_with_embedded_stuff {
178 int a;
179 struct {
180 int b;
181 struct {
182 struct struct_with_embedded_stuff *c;
183 const char *d;
184 } e;
185 union {
186 volatile long int f;
187 void * restrict g;
188 };
189 };
190 union {
191 const int_t *h;
192 void (*i)(char, int, void *);
193 } j;
194 enum {
195 K = 100,
196 L = 200,
197 } m;
198 char n[16];
199 struct {
200 char o;
201 int p;
202 void (*q)(int);
203 } r[5];
204 struct struct_in_struct s[10];
205 int t[11];
206};
207
208struct root_struct {
209 enum e1 _1;
210 enum e2 _2;
211 e2_t _2_1;
212 e3_t _2_2;
213 struct struct_w_typedefs _3;
214 anon_struct_t _7;
215 struct struct_fwd *_8;
216 struct_fwd_t *_9;
217 struct_fwd_ptr_t _10;
218 union union_fwd *_11;
219 union_fwd_t *_12;
220 union_fwd_ptr_t _13;
221 struct struct_with_embedded_stuff _14;
222};
223
224/* ------ END-EXPECTED-OUTPUT ------ */
225
226int f(struct root_struct *s)
227{
228 return 0;
229}
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
new file mode 100644
index 000000000000..0cc5e4ee90bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -0,0 +1,268 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdbool.h>
8#include <linux/bpf.h>
9#include "bpf_helpers.h"
10
11#define FUNCTION_NAME_LEN 64
12#define FILE_NAME_LEN 128
13#define TASK_COMM_LEN 16
14
15typedef struct {
16 int PyThreadState_frame;
17 int PyThreadState_thread;
18 int PyFrameObject_back;
19 int PyFrameObject_code;
20 int PyFrameObject_lineno;
21 int PyCodeObject_filename;
22 int PyCodeObject_name;
23 int String_data;
24 int String_size;
25} OffsetConfig;
26
27typedef struct {
28 uintptr_t current_state_addr;
29 uintptr_t tls_key_addr;
30 OffsetConfig offsets;
31 bool use_tls;
32} PidData;
33
34typedef struct {
35 uint32_t success;
36} Stats;
37
38typedef struct {
39 char name[FUNCTION_NAME_LEN];
40 char file[FILE_NAME_LEN];
41} Symbol;
42
43typedef struct {
44 uint32_t pid;
45 uint32_t tid;
46 char comm[TASK_COMM_LEN];
47 int32_t kernel_stack_id;
48 int32_t user_stack_id;
49 bool thread_current;
50 bool pthread_match;
51 bool stack_complete;
52 int16_t stack_len;
53 int32_t stack[STACK_MAX_LEN];
54
55 int has_meta;
56 int metadata;
57 char dummy_safeguard;
58} Event;
59
60
61struct bpf_elf_map {
62 __u32 type;
63 __u32 size_key;
64 __u32 size_value;
65 __u32 max_elem;
66 __u32 flags;
67};
68
69typedef int pid_t;
70
71typedef struct {
72 void* f_back; // PyFrameObject.f_back, previous frame
73 void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
74 void* co_filename; // PyCodeObject.co_filename
75 void* co_name; // PyCodeObject.co_name
76} FrameData;
77
78static inline __attribute__((__always_inline__)) void*
79get_thread_state(void* tls_base, PidData* pidData)
80{
81 void* thread_state;
82 int key;
83
84 bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
85 bpf_probe_read(&thread_state, sizeof(thread_state),
86 tls_base + 0x310 + key * 0x10 + 0x08);
87 return thread_state;
88}
89
90static inline __attribute__((__always_inline__)) bool
91get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
92{
93 // read data from PyFrameObject
94 bpf_probe_read(&frame->f_back,
95 sizeof(frame->f_back),
96 frame_ptr + pidData->offsets.PyFrameObject_back);
97 bpf_probe_read(&frame->f_code,
98 sizeof(frame->f_code),
99 frame_ptr + pidData->offsets.PyFrameObject_code);
100
101 // read data from PyCodeObject
102 if (!frame->f_code)
103 return false;
104 bpf_probe_read(&frame->co_filename,
105 sizeof(frame->co_filename),
106 frame->f_code + pidData->offsets.PyCodeObject_filename);
107 bpf_probe_read(&frame->co_name,
108 sizeof(frame->co_name),
109 frame->f_code + pidData->offsets.PyCodeObject_name);
110 // read actual names into symbol
111 if (frame->co_filename)
112 bpf_probe_read_str(&symbol->file,
113 sizeof(symbol->file),
114 frame->co_filename + pidData->offsets.String_data);
115 if (frame->co_name)
116 bpf_probe_read_str(&symbol->name,
117 sizeof(symbol->name),
118 frame->co_name + pidData->offsets.String_data);
119 return true;
120}
121
122struct bpf_elf_map SEC("maps") pidmap = {
123 .type = BPF_MAP_TYPE_HASH,
124 .size_key = sizeof(int),
125 .size_value = sizeof(PidData),
126 .max_elem = 1,
127};
128
129struct bpf_elf_map SEC("maps") eventmap = {
130 .type = BPF_MAP_TYPE_HASH,
131 .size_key = sizeof(int),
132 .size_value = sizeof(Event),
133 .max_elem = 1,
134};
135
136struct bpf_elf_map SEC("maps") symbolmap = {
137 .type = BPF_MAP_TYPE_HASH,
138 .size_key = sizeof(Symbol),
139 .size_value = sizeof(int),
140 .max_elem = 1,
141};
142
143struct bpf_elf_map SEC("maps") statsmap = {
144 .type = BPF_MAP_TYPE_ARRAY,
145 .size_key = sizeof(Stats),
146 .size_value = sizeof(int),
147 .max_elem = 1,
148};
149
150struct bpf_elf_map SEC("maps") perfmap = {
151 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
152 .size_key = sizeof(int),
153 .size_value = sizeof(int),
154 .max_elem = 32,
155};
156
157struct bpf_elf_map SEC("maps") stackmap = {
158 .type = BPF_MAP_TYPE_STACK_TRACE,
159 .size_key = sizeof(int),
160 .size_value = sizeof(long long) * 127,
161 .max_elem = 1000,
162};
163
164static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
165{
166 uint64_t pid_tgid = bpf_get_current_pid_tgid();
167 pid_t pid = (pid_t)(pid_tgid >> 32);
168 PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
169 if (!pidData)
170 return 0;
171
172 int zero = 0;
173 Event* event = bpf_map_lookup_elem(&eventmap, &zero);
174 if (!event)
175 return 0;
176
177 event->pid = pid;
178
179 event->tid = (pid_t)pid_tgid;
180 bpf_get_current_comm(&event->comm, sizeof(event->comm));
181
182 event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
183 event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
184
185 void* thread_state_current = (void*)0;
186 bpf_probe_read(&thread_state_current,
187 sizeof(thread_state_current),
188 (void*)(long)pidData->current_state_addr);
189
190 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
191 void* tls_base = (void*)task;
192
193 void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
194 : thread_state_current;
195 event->thread_current = thread_state == thread_state_current;
196
197 if (pidData->use_tls) {
198 uint64_t pthread_created;
199 uint64_t pthread_self;
200 bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
201
202 bpf_probe_read(&pthread_created,
203 sizeof(pthread_created),
204 thread_state + pidData->offsets.PyThreadState_thread);
205 event->pthread_match = pthread_created == pthread_self;
206 } else {
207 event->pthread_match = 1;
208 }
209
210 if (event->pthread_match || !pidData->use_tls) {
211 void* frame_ptr;
212 FrameData frame;
213 Symbol sym = {};
214 int cur_cpu = bpf_get_smp_processor_id();
215
216 bpf_probe_read(&frame_ptr,
217 sizeof(frame_ptr),
218 thread_state + pidData->offsets.PyThreadState_frame);
219
220 int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
221 if (symbol_counter == NULL)
222 return 0;
223#pragma unroll
224 /* Unwind python stack */
225 for (int i = 0; i < STACK_MAX_LEN; ++i) {
226 if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
227 int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
228 int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
229 if (!symbol_id) {
230 bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
231 symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
232 if (!symbol_id)
233 return 0;
234 }
235 if (*symbol_id == new_symbol_id)
236 (*symbol_counter)++;
237 event->stack[i] = *symbol_id;
238 event->stack_len = i + 1;
239 frame_ptr = frame.f_back;
240 }
241 }
242 event->stack_complete = frame_ptr == NULL;
243 } else {
244 event->stack_complete = 1;
245 }
246
247 Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
248 if (stats)
249 stats->success++;
250
251 event->has_meta = 0;
252 bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
253 return 0;
254}
255
256SEC("raw_tracepoint/kfree_skb")
257int on_event(struct pt_regs* ctx)
258{
259 int i, ret = 0;
260 ret |= __on_event(ctx);
261 ret |= __on_event(ctx);
262 ret |= __on_event(ctx);
263 ret |= __on_event(ctx);
264 ret |= __on_event(ctx);
265 return ret;
266}
267
268char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf100.c b/tools/testing/selftests/bpf/progs/pyperf100.c
new file mode 100644
index 000000000000..29786325db54
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf100.c
@@ -0,0 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#define STACK_MAX_LEN 100
4#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
new file mode 100644
index 000000000000..c39f559d3100
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -0,0 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#define STACK_MAX_LEN 180
4#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf50.c b/tools/testing/selftests/bpf/progs/pyperf50.c
new file mode 100644
index 000000000000..ef7ce340a292
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf50.c
@@ -0,0 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#define STACK_MAX_LEN 50
4#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index 0f92858f6226..ed3e4a551c57 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -5,13 +5,6 @@
5 5
6int _version SEC("version") = 1; 6int _version SEC("version") = 1;
7 7
8#define bpf_printk(fmt, ...) \
9({ \
10 char ____fmt[] = fmt; \
11 bpf_trace_printk(____fmt, sizeof(____fmt), \
12 ##__VA_ARGS__); \
13})
14
15SEC("sk_skb1") 8SEC("sk_skb1")
16int bpf_prog1(struct __sk_buff *skb) 9int bpf_prog1(struct __sk_buff *skb)
17{ 10{
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index 12a7b5c82ed6..65fbfdb6cd3a 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -5,13 +5,6 @@
5 5
6int _version SEC("version") = 1; 6int _version SEC("version") = 1;
7 7
8#define bpf_printk(fmt, ...) \
9({ \
10 char ____fmt[] = fmt; \
11 bpf_trace_printk(____fmt, sizeof(____fmt), \
12 ##__VA_ARGS__); \
13})
14
15SEC("sk_msg1") 8SEC("sk_msg1")
16int bpf_prog1(struct sk_msg_md *msg) 9int bpf_prog1(struct sk_msg_md *msg)
17{ 10{
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 2ce7634a4012..bdc22be46f2e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -5,13 +5,6 @@
5 5
6int _version SEC("version") = 1; 6int _version SEC("version") = 1;
7 7
8#define bpf_printk(fmt, ...) \
9({ \
10 char ____fmt[] = fmt; \
11 bpf_trace_printk(____fmt, sizeof(____fmt), \
12 ##__VA_ARGS__); \
13})
14
15struct bpf_map_def SEC("maps") sock_map_rx = { 8struct bpf_map_def SEC("maps") sock_map_rx = {
16 .type = BPF_MAP_TYPE_SOCKMAP, 9 .type = BPF_MAP_TYPE_SOCKMAP,
17 .key_size = sizeof(int), 10 .key_size = sizeof(int),
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 0575751bc1bc..7c7cb3177463 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,13 +6,6 @@
6#include "bpf_helpers.h" 6#include "bpf_helpers.h"
7#include "bpf_endian.h" 7#include "bpf_endian.h"
8 8
9#define bpf_printk(fmt, ...) \
10({ \
11 char ____fmt[] = fmt; \
12 bpf_trace_printk(____fmt, sizeof(____fmt), \
13 ##__VA_ARGS__); \
14})
15
16/* Packet parsing state machine helpers. */ 9/* Packet parsing state machine helpers. */
17#define cursor_advance(_cursor, _len) \ 10#define cursor_advance(_cursor, _len) \
18 ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) 11 ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
new file mode 100644
index 000000000000..45a1a1a2c345
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -0,0 +1,51 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/bpf.h>
4#include <linux/version.h>
5#include "bpf_helpers.h"
6
7struct bpf_map_def SEC("maps") info_map = {
8 .type = BPF_MAP_TYPE_ARRAY,
9 .key_size = sizeof(__u32),
10 .value_size = sizeof(__u64),
11 .max_entries = 1,
12};
13
14BPF_ANNOTATE_KV_PAIR(info_map, __u32, __u64);
15
16struct bpf_map_def SEC("maps") status_map = {
17 .type = BPF_MAP_TYPE_ARRAY,
18 .key_size = sizeof(__u32),
19 .value_size = sizeof(__u64),
20 .max_entries = 1,
21};
22
23BPF_ANNOTATE_KV_PAIR(status_map, __u32, __u64);
24
25SEC("send_signal_demo")
26int bpf_send_signal_test(void *ctx)
27{
28 __u64 *info_val, *status_val;
29 __u32 key = 0, pid, sig;
30 int ret;
31
32 status_val = bpf_map_lookup_elem(&status_map, &key);
33 if (!status_val || *status_val != 0)
34 return 0;
35
36 info_val = bpf_map_lookup_elem(&info_map, &key);
37 if (!info_val || *info_val == 0)
38 return 0;
39
40 sig = *info_val >> 32;
41 pid = *info_val & 0xffffFFFF;
42
43 if ((bpf_get_current_pid_tgid() >> 32) == pid) {
44 ret = bpf_send_signal(sig);
45 if (ret == 0)
46 *status_val = 1;
47 }
48
49 return 0;
50}
51char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 5e4aac74f9d0..4fe6aaad22a4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,13 +15,6 @@
15#include <linux/udp.h> 15#include <linux/udp.h>
16#include "bpf_helpers.h" 16#include "bpf_helpers.h"
17 17
18#define bpf_printk(fmt, ...) \
19({ \
20 char ____fmt[] = fmt; \
21 bpf_trace_printk(____fmt, sizeof(____fmt), \
22 ##__VA_ARGS__); \
23})
24
25static __u32 rol32(__u32 word, unsigned int shift) 18static __u32 rol32(__u32 word, unsigned int shift)
26{ 19{
27 return (word << shift) | (word >> ((-shift) & 31)); 20 return (word << shift) | (word >> ((-shift) & 31));
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
new file mode 100644
index 000000000000..87393e7c667c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -0,0 +1,184 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
3
4#define KBUILD_MODNAME "foo"
5#include <stddef.h>
6#include <string.h>
7#include <linux/bpf.h>
8#include <linux/icmp.h>
9#include <linux/in.h>
10#include <linux/if_ether.h>
11#include <linux/if_packet.h>
12#include <linux/if_vlan.h>
13#include <linux/ip.h>
14
15#include "bpf_helpers.h"
16#include "bpf_endian.h"
17
18#include "xdping.h"
19
20struct bpf_map_def SEC("maps") ping_map = {
21 .type = BPF_MAP_TYPE_HASH,
22 .key_size = sizeof(__u32),
23 .value_size = sizeof(struct pinginfo),
24 .max_entries = 256,
25};
26
27static __always_inline void swap_src_dst_mac(void *data)
28{
29 unsigned short *p = data;
30 unsigned short dst[3];
31
32 dst[0] = p[0];
33 dst[1] = p[1];
34 dst[2] = p[2];
35 p[0] = p[3];
36 p[1] = p[4];
37 p[2] = p[5];
38 p[3] = dst[0];
39 p[4] = dst[1];
40 p[5] = dst[2];
41}
42
43static __always_inline __u16 csum_fold_helper(__wsum sum)
44{
45 sum = (sum & 0xffff) + (sum >> 16);
46 return ~((sum & 0xffff) + (sum >> 16));
47}
48
49static __always_inline __u16 ipv4_csum(void *data_start, int data_size)
50{
51 __wsum sum;
52
53 sum = bpf_csum_diff(0, 0, data_start, data_size, 0);
54 return csum_fold_helper(sum);
55}
56
57#define ICMP_ECHO_LEN 64
58
59static __always_inline int icmp_check(struct xdp_md *ctx, int type)
60{
61 void *data_end = (void *)(long)ctx->data_end;
62 void *data = (void *)(long)ctx->data;
63 struct ethhdr *eth = data;
64 struct icmphdr *icmph;
65 struct iphdr *iph;
66
67 if (data + sizeof(*eth) + sizeof(*iph) + ICMP_ECHO_LEN > data_end)
68 return XDP_PASS;
69
70 if (eth->h_proto != bpf_htons(ETH_P_IP))
71 return XDP_PASS;
72
73 iph = data + sizeof(*eth);
74
75 if (iph->protocol != IPPROTO_ICMP)
76 return XDP_PASS;
77
78 if (bpf_ntohs(iph->tot_len) - sizeof(*iph) != ICMP_ECHO_LEN)
79 return XDP_PASS;
80
81 icmph = data + sizeof(*eth) + sizeof(*iph);
82
83 if (icmph->type != type)
84 return XDP_PASS;
85
86 return XDP_TX;
87}
88
89SEC("xdpclient")
90int xdping_client(struct xdp_md *ctx)
91{
92 void *data_end = (void *)(long)ctx->data_end;
93 void *data = (void *)(long)ctx->data;
94 struct pinginfo *pinginfo = NULL;
95 struct ethhdr *eth = data;
96 struct icmphdr *icmph;
97 struct iphdr *iph;
98 __u64 recvtime;
99 __be32 raddr;
100 __be16 seq;
101 int ret;
102 __u8 i;
103
104 ret = icmp_check(ctx, ICMP_ECHOREPLY);
105
106 if (ret != XDP_TX)
107 return ret;
108
109 iph = data + sizeof(*eth);
110 icmph = data + sizeof(*eth) + sizeof(*iph);
111 raddr = iph->saddr;
112
113 /* Record time reply received. */
114 recvtime = bpf_ktime_get_ns();
115 pinginfo = bpf_map_lookup_elem(&ping_map, &raddr);
116 if (!pinginfo || pinginfo->seq != icmph->un.echo.sequence)
117 return XDP_PASS;
118
119 if (pinginfo->start) {
120#pragma clang loop unroll(full)
121 for (i = 0; i < XDPING_MAX_COUNT; i++) {
122 if (pinginfo->times[i] == 0)
123 break;
124 }
125 /* verifier is fussy here... */
126 if (i < XDPING_MAX_COUNT) {
127 pinginfo->times[i] = recvtime -
128 pinginfo->start;
129 pinginfo->start = 0;
130 i++;
131 }
132 /* No more space for values? */
133 if (i == pinginfo->count || i == XDPING_MAX_COUNT)
134 return XDP_PASS;
135 }
136
137 /* Now convert reply back into echo request. */
138 swap_src_dst_mac(data);
139 iph->saddr = iph->daddr;
140 iph->daddr = raddr;
141 icmph->type = ICMP_ECHO;
142 seq = bpf_htons(bpf_ntohs(icmph->un.echo.sequence) + 1);
143 icmph->un.echo.sequence = seq;
144 icmph->checksum = 0;
145 icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN);
146
147 pinginfo->seq = seq;
148 pinginfo->start = bpf_ktime_get_ns();
149
150 return XDP_TX;
151}
152
153SEC("xdpserver")
154int xdping_server(struct xdp_md *ctx)
155{
156 void *data_end = (void *)(long)ctx->data_end;
157 void *data = (void *)(long)ctx->data;
158 struct ethhdr *eth = data;
159 struct icmphdr *icmph;
160 struct iphdr *iph;
161 __be32 raddr;
162 int ret;
163
164 ret = icmp_check(ctx, ICMP_ECHO);
165
166 if (ret != XDP_TX)
167 return ret;
168
169 iph = data + sizeof(*eth);
170 icmph = data + sizeof(*eth) + sizeof(*iph);
171 raddr = iph->saddr;
172
173 /* Now convert request into echo reply. */
174 swap_src_dst_mac(data);
175 iph->saddr = iph->daddr;
176 iph->daddr = raddr;
177 icmph->type = ICMP_ECHOREPLY;
178 icmph->checksum = 0;
179 icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN);
180
181 return XDP_TX;
182}
183
184char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 42c1ce988945..289daf54dec4 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4025,62 +4025,13 @@ static struct btf_file_test file_tests[] = {
4025}, 4025},
4026}; 4026};
4027 4027
4028static int file_has_btf_elf(const char *fn, bool *has_btf_ext)
4029{
4030 Elf_Scn *scn = NULL;
4031 GElf_Ehdr ehdr;
4032 int ret = 0;
4033 int elf_fd;
4034 Elf *elf;
4035
4036 if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
4037 "elf_version(EV_CURRENT) == EV_NONE"))
4038 return -1;
4039
4040 elf_fd = open(fn, O_RDONLY);
4041 if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
4042 return -1;
4043
4044 elf = elf_begin(elf_fd, ELF_C_READ, NULL);
4045 if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
4046 ret = -1;
4047 goto done;
4048 }
4049
4050 if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
4051 ret = -1;
4052 goto done;
4053 }
4054
4055 while ((scn = elf_nextscn(elf, scn))) {
4056 const char *sh_name;
4057 GElf_Shdr sh;
4058
4059 if (CHECK(gelf_getshdr(scn, &sh) != &sh,
4060 "file:%s gelf_getshdr != &sh", fn)) {
4061 ret = -1;
4062 goto done;
4063 }
4064
4065 sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
4066 if (!strcmp(sh_name, BTF_ELF_SEC))
4067 ret = 1;
4068 if (!strcmp(sh_name, BTF_EXT_ELF_SEC))
4069 *has_btf_ext = true;
4070 }
4071
4072done:
4073 close(elf_fd);
4074 elf_end(elf);
4075 return ret;
4076}
4077
4078static int do_test_file(unsigned int test_num) 4028static int do_test_file(unsigned int test_num)
4079{ 4029{
4080 const struct btf_file_test *test = &file_tests[test_num - 1]; 4030 const struct btf_file_test *test = &file_tests[test_num - 1];
4081 const char *expected_fnames[] = {"_dummy_tracepoint", 4031 const char *expected_fnames[] = {"_dummy_tracepoint",
4082 "test_long_fname_1", 4032 "test_long_fname_1",
4083 "test_long_fname_2"}; 4033 "test_long_fname_2"};
4034 struct btf_ext *btf_ext = NULL;
4084 struct bpf_prog_info info = {}; 4035 struct bpf_prog_info info = {};
4085 struct bpf_object *obj = NULL; 4036 struct bpf_object *obj = NULL;
4086 struct bpf_func_info *finfo; 4037 struct bpf_func_info *finfo;
@@ -4095,15 +4046,19 @@ static int do_test_file(unsigned int test_num)
4095 fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, 4046 fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
4096 test->file); 4047 test->file);
4097 4048
4098 err = file_has_btf_elf(test->file, &has_btf_ext); 4049 btf = btf__parse_elf(test->file, &btf_ext);
4099 if (err == -1) 4050 if (IS_ERR(btf)) {
4100 return err; 4051 if (PTR_ERR(btf) == -ENOENT) {
4101 4052 fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
4102 if (err == 0) { 4053 skip_cnt++;
4103 fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); 4054 return 0;
4104 skip_cnt++; 4055 }
4105 return 0; 4056 return PTR_ERR(btf);
4106 } 4057 }
4058 btf__free(btf);
4059
4060 has_btf_ext = btf_ext != NULL;
4061 btf_ext__free(btf_ext);
4107 4062
4108 obj = bpf_object__open(test->file); 4063 obj = bpf_object__open(test->file);
4109 if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) 4064 if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c
new file mode 100644
index 000000000000..8f850823d35f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_dump.c
@@ -0,0 +1,143 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <unistd.h>
5#include <errno.h>
6#include <linux/err.h>
7#include <btf.h>
8
9#define CHECK(condition, format...) ({ \
10 int __ret = !!(condition); \
11 if (__ret) { \
12 fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
13 fprintf(stderr, format); \
14 } \
15 __ret; \
16})
17
18void btf_dump_printf(void *ctx, const char *fmt, va_list args)
19{
20 vfprintf(ctx, fmt, args);
21}
22
23struct btf_dump_test_case {
24 const char *name;
25 struct btf_dump_opts opts;
26} btf_dump_test_cases[] = {
27 {.name = "btf_dump_test_case_syntax", .opts = {}},
28 {.name = "btf_dump_test_case_ordering", .opts = {}},
29 {.name = "btf_dump_test_case_padding", .opts = {}},
30 {.name = "btf_dump_test_case_packing", .opts = {}},
31 {.name = "btf_dump_test_case_bitfields", .opts = {}},
32 {.name = "btf_dump_test_case_multidim", .opts = {}},
33 {.name = "btf_dump_test_case_namespacing", .opts = {}},
34};
35
36static int btf_dump_all_types(const struct btf *btf,
37 const struct btf_dump_opts *opts)
38{
39 size_t type_cnt = btf__get_nr_types(btf);
40 struct btf_dump *d;
41 int err = 0, id;
42
43 d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
44 if (IS_ERR(d))
45 return PTR_ERR(d);
46
47 for (id = 1; id <= type_cnt; id++) {
48 err = btf_dump__dump_type(d, id);
49 if (err)
50 goto done;
51 }
52
53done:
54 btf_dump__free(d);
55 return err;
56}
57
58int test_btf_dump_case(int n, struct btf_dump_test_case *test_case)
59{
60 char test_file[256], out_file[256], diff_cmd[1024];
61 struct btf *btf = NULL;
62 int err = 0, fd = -1;
63 FILE *f = NULL;
64
65 fprintf(stderr, "Test case #%d (%s): ", n, test_case->name);
66
67 snprintf(test_file, sizeof(test_file), "%s.o", test_case->name);
68
69 btf = btf__parse_elf(test_file, NULL);
70 if (CHECK(IS_ERR(btf),
71 "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
72 err = -PTR_ERR(btf);
73 btf = NULL;
74 goto done;
75 }
76
77 snprintf(out_file, sizeof(out_file),
78 "/tmp/%s.output.XXXXXX", test_case->name);
79 fd = mkstemp(out_file);
80 if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) {
81 err = fd;
82 goto done;
83 }
84 f = fdopen(fd, "w");
85 if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n",
86 strerror(errno), errno)) {
87 close(fd);
88 goto done;
89 }
90
91 test_case->opts.ctx = f;
92 err = btf_dump_all_types(btf, &test_case->opts);
93 fclose(f);
94 close(fd);
95 if (CHECK(err, "failure during C dumping: %d\n", err)) {
96 goto done;
97 }
98
99 snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name);
100 /*
101 * Diff test output and expected test output, contained between
102 * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
103 * For expected output lines, everything before '*' is stripped out.
104 * Also lines containing comment start and comment end markers are
105 * ignored.
106 */
107 snprintf(diff_cmd, sizeof(diff_cmd),
108 "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
109 "/END-EXPECTED-OUTPUT/{out=0} "
110 "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
111 "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
112 test_file, out_file);
113 err = system(diff_cmd);
114 if (CHECK(err,
115 "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
116 out_file, err, diff_cmd))
117 goto done;
118
119 remove(out_file);
120 fprintf(stderr, "OK\n");
121
122done:
123 btf__free(btf);
124 return err;
125}
126
127int main() {
128 int test_case_cnt, i, err, failed = 0;
129
130 test_case_cnt = sizeof(btf_dump_test_cases) /
131 sizeof(btf_dump_test_cases[0]);
132
133 for (i = 0; i < test_case_cnt; i++) {
134 err = test_btf_dump_case(i, &btf_dump_test_cases[i]);
135 if (err)
136 failed++;
137 }
138
139 fprintf(stderr, "%d tests succeeded, %d tests failed.\n",
140 test_case_cnt - failed, failed);
141
142 return failed;
143}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
index 0bb6507256b7..7671909ee1cb 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -1,3 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0
2
1/* eBPF example program: 3/* eBPF example program:
2 * 4 *
3 * - Creates arraymap in kernel with 4 bytes keys and 8 byte values 5 * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
@@ -25,20 +27,27 @@
25#include <sys/resource.h> 27#include <sys/resource.h>
26#include <sys/time.h> 28#include <sys/time.h>
27#include <unistd.h> 29#include <unistd.h>
30#include <linux/filter.h>
28 31
29#include <linux/bpf.h> 32#include <linux/bpf.h>
30#include <bpf/bpf.h> 33#include <bpf/bpf.h>
31 34
32#include "bpf_insn.h" 35#include "bpf_util.h"
33#include "bpf_rlimit.h" 36#include "bpf_rlimit.h"
34#include "cgroup_helpers.h" 37#include "cgroup_helpers.h"
35 38
36#define FOO "/foo" 39#define FOO "/foo"
37#define BAR "/foo/bar/" 40#define BAR "/foo/bar/"
38#define PING_CMD "ping -c1 -w1 127.0.0.1 > /dev/null" 41#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
39 42
40char bpf_log_buf[BPF_LOG_BUF_SIZE]; 43char bpf_log_buf[BPF_LOG_BUF_SIZE];
41 44
45#ifdef DEBUG
46#define debug(args...) printf(args)
47#else
48#define debug(args...)
49#endif
50
42static int prog_load(int verdict) 51static int prog_load(int verdict)
43{ 52{
44 int ret; 53 int ret;
@@ -89,7 +98,7 @@ static int test_foo_bar(void)
89 goto err; 98 goto err;
90 } 99 }
91 100
92 printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); 101 debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
93 assert(system(PING_CMD) != 0); 102 assert(system(PING_CMD) != 0);
94 103
95 /* Create cgroup /foo/bar, get fd, and join it */ 104 /* Create cgroup /foo/bar, get fd, and join it */
@@ -100,7 +109,7 @@ static int test_foo_bar(void)
100 if (join_cgroup(BAR)) 109 if (join_cgroup(BAR))
101 goto err; 110 goto err;
102 111
103 printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); 112 debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
104 assert(system(PING_CMD) != 0); 113 assert(system(PING_CMD) != 0);
105 114
106 if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 115 if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
@@ -109,7 +118,7 @@ static int test_foo_bar(void)
109 goto err; 118 goto err;
110 } 119 }
111 120
112 printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); 121 debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
113 assert(system(PING_CMD) == 0); 122 assert(system(PING_CMD) == 0);
114 123
115 if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 124 if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
@@ -117,7 +126,7 @@ static int test_foo_bar(void)
117 goto err; 126 goto err;
118 } 127 }
119 128
120 printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" 129 debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
121 "This ping in cgroup /foo/bar should fail...\n"); 130 "This ping in cgroup /foo/bar should fail...\n");
122 assert(system(PING_CMD) != 0); 131 assert(system(PING_CMD) != 0);
123 132
@@ -132,7 +141,7 @@ static int test_foo_bar(void)
132 goto err; 141 goto err;
133 } 142 }
134 143
135 printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" 144 debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
136 "This ping in cgroup /foo/bar should pass...\n"); 145 "This ping in cgroup /foo/bar should pass...\n");
137 assert(system(PING_CMD) == 0); 146 assert(system(PING_CMD) == 0);
138 147
@@ -199,9 +208,9 @@ out:
199 close(bar); 208 close(bar);
200 cleanup_cgroup_environment(); 209 cleanup_cgroup_environment();
201 if (!rc) 210 if (!rc)
202 printf("### override:PASS\n"); 211 printf("#override:PASS\n");
203 else 212 else
204 printf("### override:FAIL\n"); 213 printf("#override:FAIL\n");
205 return rc; 214 return rc;
206} 215}
207 216
@@ -441,19 +450,122 @@ out:
441 close(cg5); 450 close(cg5);
442 cleanup_cgroup_environment(); 451 cleanup_cgroup_environment();
443 if (!rc) 452 if (!rc)
444 printf("### multi:PASS\n"); 453 printf("#multi:PASS\n");
445 else 454 else
446 printf("### multi:FAIL\n"); 455 printf("#multi:FAIL\n");
447 return rc; 456 return rc;
448} 457}
449 458
450int main(int argc, char **argv) 459static int test_autodetach(void)
451{ 460{
452 int rc = 0; 461 __u32 prog_cnt = 4, attach_flags;
462 int allow_prog[2] = {0};
463 __u32 prog_ids[2] = {0};
464 int cg = 0, i, rc = -1;
465 void *ptr = NULL;
466 int attempts;
467
468 for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
469 allow_prog[i] = prog_load_cnt(1, 1 << i);
470 if (!allow_prog[i])
471 goto err;
472 }
473
474 if (setup_cgroup_environment())
475 goto err;
476
477 /* create a cgroup, attach two programs and remember their ids */
478 cg = create_and_get_cgroup("/cg_autodetach");
479 if (cg < 0)
480 goto err;
481
482 if (join_cgroup("/cg_autodetach"))
483 goto err;
484
485 for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
486 if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
487 BPF_F_ALLOW_MULTI)) {
488 log_err("Attaching prog[%d] to cg:egress", i);
489 goto err;
490 }
491 }
492
493 /* make sure that programs are attached and run some traffic */
494 assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
495 prog_ids, &prog_cnt) == 0);
496 assert(system(PING_CMD) == 0);
497
498 /* allocate some memory (4Mb) to pin the original cgroup */
499 ptr = malloc(4 * (1 << 20));
500 if (!ptr)
501 goto err;
502
503 /* close programs and cgroup fd */
504 for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
505 close(allow_prog[i]);
506 allow_prog[i] = 0;
507 }
508
509 close(cg);
510 cg = 0;
453 511
454 rc = test_foo_bar(); 512 /* leave the cgroup and remove it. don't detach programs */
455 if (rc) 513 cleanup_cgroup_environment();
456 return rc; 514
515 /* wait for the asynchronous auto-detachment.
516 * wait for no more than 5 sec and give up.
517 */
518 for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
519 for (attempts = 5; attempts >= 0; attempts--) {
520 int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
521
522 if (fd < 0)
523 break;
524
525 /* don't leave the fd open */
526 close(fd);
527
528 if (!attempts)
529 goto err;
530
531 sleep(1);
532 }
533 }
534
535 rc = 0;
536err:
537 for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
538 if (allow_prog[i] > 0)
539 close(allow_prog[i]);
540 if (cg)
541 close(cg);
542 free(ptr);
543 cleanup_cgroup_environment();
544 if (!rc)
545 printf("#autodetach:PASS\n");
546 else
547 printf("#autodetach:FAIL\n");
548 return rc;
549}
550
551int main(void)
552{
553 int (*tests[])(void) = {
554 test_foo_bar,
555 test_multiprog,
556 test_autodetach,
557 };
558 int errors = 0;
559 int i;
560
561 for (i = 0; i < ARRAY_SIZE(tests); i++)
562 if (tests[i]())
563 errors++;
564
565 if (errors)
566 printf("test_cgroup_attach:FAIL\n");
567 else
568 printf("test_cgroup_attach:PASS\n");
457 569
458 return test_multiprog(); 570 return errors ? EXIT_FAILURE : EXIT_SUCCESS;
459} 571}
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c
new file mode 100644
index 000000000000..b64094c981e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_hashmap.c
@@ -0,0 +1,382 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * Tests for libbpf's hashmap.
5 *
6 * Copyright (c) 2019 Facebook
7 */
8#include <stdio.h>
9#include <errno.h>
10#include <linux/err.h>
11#include "hashmap.h"
12
13#define CHECK(condition, format...) ({ \
14 int __ret = !!(condition); \
15 if (__ret) { \
16 fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
17 fprintf(stderr, format); \
18 } \
19 __ret; \
20})
21
22size_t hash_fn(const void *k, void *ctx)
23{
24 return (long)k;
25}
26
27bool equal_fn(const void *a, const void *b, void *ctx)
28{
29 return (long)a == (long)b;
30}
31
32static inline size_t next_pow_2(size_t n)
33{
34 size_t r = 1;
35
36 while (r < n)
37 r <<= 1;
38 return r;
39}
40
41static inline size_t exp_cap(size_t sz)
42{
43 size_t r = next_pow_2(sz);
44
45 if (sz * 4 / 3 > r)
46 r <<= 1;
47 return r;
48}
49
50#define ELEM_CNT 62
51
52int test_hashmap_generic(void)
53{
54 struct hashmap_entry *entry, *tmp;
55 int err, bkt, found_cnt, i;
56 long long found_msk;
57 struct hashmap *map;
58
59 fprintf(stderr, "%s: ", __func__);
60
61 map = hashmap__new(hash_fn, equal_fn, NULL);
62 if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
63 return 1;
64
65 for (i = 0; i < ELEM_CNT; i++) {
66 const void *oldk, *k = (const void *)(long)i;
67 void *oldv, *v = (void *)(long)(1024 + i);
68
69 err = hashmap__update(map, k, v, &oldk, &oldv);
70 if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
71 return 1;
72
73 if (i % 2) {
74 err = hashmap__add(map, k, v);
75 } else {
76 err = hashmap__set(map, k, v, &oldk, &oldv);
77 if (CHECK(oldk != NULL || oldv != NULL,
78 "unexpected k/v: %p=%p\n", oldk, oldv))
79 return 1;
80 }
81
82 if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
83 (long)k, (long)v, err))
84 return 1;
85
86 if (CHECK(!hashmap__find(map, k, &oldv),
87 "failed to find key %ld\n", (long)k))
88 return 1;
89 if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
90 return 1;
91 }
92
93 if (CHECK(hashmap__size(map) != ELEM_CNT,
94 "invalid map size: %zu\n", hashmap__size(map)))
95 return 1;
96 if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
97 "unexpected map capacity: %zu\n", hashmap__capacity(map)))
98 return 1;
99
100 found_msk = 0;
101 hashmap__for_each_entry(map, entry, bkt) {
102 long k = (long)entry->key;
103 long v = (long)entry->value;
104
105 found_msk |= 1ULL << k;
106 if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
107 return 1;
108 }
109 if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
110 "not all keys iterated: %llx\n", found_msk))
111 return 1;
112
113 for (i = 0; i < ELEM_CNT; i++) {
114 const void *oldk, *k = (const void *)(long)i;
115 void *oldv, *v = (void *)(long)(256 + i);
116
117 err = hashmap__add(map, k, v);
118 if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
119 return 1;
120
121 if (i % 2)
122 err = hashmap__update(map, k, v, &oldk, &oldv);
123 else
124 err = hashmap__set(map, k, v, &oldk, &oldv);
125
126 if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
127 (long)k, (long)v, err))
128 return 1;
129 if (CHECK(!hashmap__find(map, k, &oldv),
130 "failed to find key %ld\n", (long)k))
131 return 1;
132 if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
133 return 1;
134 }
135
136 if (CHECK(hashmap__size(map) != ELEM_CNT,
137 "invalid updated map size: %zu\n", hashmap__size(map)))
138 return 1;
139 if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
140 "unexpected map capacity: %zu\n", hashmap__capacity(map)))
141 return 1;
142
143 found_msk = 0;
144 hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
145 long k = (long)entry->key;
146 long v = (long)entry->value;
147
148 found_msk |= 1ULL << k;
149 if (CHECK(v - k != 256,
150 "invalid updated k/v pair: %ld = %ld\n", k, v))
151 return 1;
152 }
153 if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
154 "not all keys iterated after update: %llx\n", found_msk))
155 return 1;
156
157 found_cnt = 0;
158 hashmap__for_each_key_entry(map, entry, (void *)0) {
159 found_cnt++;
160 }
161 if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
162 return 1;
163
164 found_msk = 0;
165 found_cnt = 0;
166 hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
167 const void *oldk, *k;
168 void *oldv, *v;
169
170 k = entry->key;
171 v = entry->value;
172
173 found_cnt++;
174 found_msk |= 1ULL << (long)k;
175
176 if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
177 "failed to delete k/v %ld = %ld\n",
178 (long)k, (long)v))
179 return 1;
180 if (CHECK(oldk != k || oldv != v,
181 "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
182 (long)k, (long)v, (long)oldk, (long)oldv))
183 return 1;
184 if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
185 "unexpectedly deleted k/v %ld = %ld\n",
186 (long)oldk, (long)oldv))
187 return 1;
188 }
189
190 if (CHECK(!found_cnt || !found_msk,
191 "didn't delete any key entries\n"))
192 return 1;
193 if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
194 "invalid updated map size (already deleted: %d): %zu\n",
195 found_cnt, hashmap__size(map)))
196 return 1;
197 if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
198 "unexpected map capacity: %zu\n", hashmap__capacity(map)))
199 return 1;
200
201 hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
202 const void *oldk, *k;
203 void *oldv, *v;
204
205 k = entry->key;
206 v = entry->value;
207
208 found_cnt++;
209 found_msk |= 1ULL << (long)k;
210
211 if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
212 "failed to delete k/v %ld = %ld\n",
213 (long)k, (long)v))
214 return 1;
215 if (CHECK(oldk != k || oldv != v,
216 "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
217 (long)k, (long)v, (long)oldk, (long)oldv))
218 return 1;
219 if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
220 "unexpectedly deleted k/v %ld = %ld\n",
221 (long)k, (long)v))
222 return 1;
223 }
224
225 if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
226 "not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
227 found_cnt, found_msk))
228 return 1;
229 if (CHECK(hashmap__size(map) != 0,
230 "invalid updated map size (already deleted: %d): %zu\n",
231 found_cnt, hashmap__size(map)))
232 return 1;
233
234 found_cnt = 0;
235 hashmap__for_each_entry(map, entry, bkt) {
236 CHECK(false, "unexpected map entries left: %ld = %ld\n",
237 (long)entry->key, (long)entry->value);
238 return 1;
239 }
240
241 hashmap__free(map);
242 hashmap__for_each_entry(map, entry, bkt) {
243 CHECK(false, "unexpected map entries left: %ld = %ld\n",
244 (long)entry->key, (long)entry->value);
245 return 1;
246 }
247
248 fprintf(stderr, "OK\n");
249 return 0;
250}
251
252size_t collision_hash_fn(const void *k, void *ctx)
253{
254 return 0;
255}
256
257int test_hashmap_multimap(void)
258{
259 void *k1 = (void *)0, *k2 = (void *)1;
260 struct hashmap_entry *entry;
261 struct hashmap *map;
262 long found_msk;
263 int err, bkt;
264
265 fprintf(stderr, "%s: ", __func__);
266
267 /* force collisions */
268 map = hashmap__new(collision_hash_fn, equal_fn, NULL);
269 if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
270 return 1;
271
272
273 /* set up multimap:
274 * [0] -> 1, 2, 4;
275 * [1] -> 8, 16, 32;
276 */
277 err = hashmap__append(map, k1, (void *)1);
278 if (CHECK(err, "failed to add k/v: %d\n", err))
279 return 1;
280 err = hashmap__append(map, k1, (void *)2);
281 if (CHECK(err, "failed to add k/v: %d\n", err))
282 return 1;
283 err = hashmap__append(map, k1, (void *)4);
284 if (CHECK(err, "failed to add k/v: %d\n", err))
285 return 1;
286
287 err = hashmap__append(map, k2, (void *)8);
288 if (CHECK(err, "failed to add k/v: %d\n", err))
289 return 1;
290 err = hashmap__append(map, k2, (void *)16);
291 if (CHECK(err, "failed to add k/v: %d\n", err))
292 return 1;
293 err = hashmap__append(map, k2, (void *)32);
294 if (CHECK(err, "failed to add k/v: %d\n", err))
295 return 1;
296
297 if (CHECK(hashmap__size(map) != 6,
298 "invalid map size: %zu\n", hashmap__size(map)))
299 return 1;
300
301 /* verify global iteration still works and sees all values */
302 found_msk = 0;
303 hashmap__for_each_entry(map, entry, bkt) {
304 found_msk |= (long)entry->value;
305 }
306 if (CHECK(found_msk != (1 << 6) - 1,
307 "not all keys iterated: %lx\n", found_msk))
308 return 1;
309
310 /* iterate values for key 1 */
311 found_msk = 0;
312 hashmap__for_each_key_entry(map, entry, k1) {
313 found_msk |= (long)entry->value;
314 }
315 if (CHECK(found_msk != (1 | 2 | 4),
316 "invalid k1 values: %lx\n", found_msk))
317 return 1;
318
319 /* iterate values for key 2 */
320 found_msk = 0;
321 hashmap__for_each_key_entry(map, entry, k2) {
322 found_msk |= (long)entry->value;
323 }
324 if (CHECK(found_msk != (8 | 16 | 32),
325 "invalid k2 values: %lx\n", found_msk))
326 return 1;
327
328 fprintf(stderr, "OK\n");
329 return 0;
330}
331
332int test_hashmap_empty()
333{
334 struct hashmap_entry *entry;
335 int bkt;
336 struct hashmap *map;
337 void *k = (void *)0;
338
339 fprintf(stderr, "%s: ", __func__);
340
341 /* force collisions */
342 map = hashmap__new(hash_fn, equal_fn, NULL);
343 if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
344 return 1;
345
346 if (CHECK(hashmap__size(map) != 0,
347 "invalid map size: %zu\n", hashmap__size(map)))
348 return 1;
349 if (CHECK(hashmap__capacity(map) != 0,
350 "invalid map capacity: %zu\n", hashmap__capacity(map)))
351 return 1;
352 if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
353 return 1;
354 if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
355 return 1;
356
357 hashmap__for_each_entry(map, entry, bkt) {
358 CHECK(false, "unexpected iterated entry\n");
359 return 1;
360 }
361 hashmap__for_each_key_entry(map, entry, k) {
362 CHECK(false, "unexpected key entry\n");
363 return 1;
364 }
365
366 fprintf(stderr, "OK\n");
367 return 0;
368}
369
370int main(int argc, char **argv)
371{
372 bool failed = false;
373
374 if (test_hashmap_generic())
375 failed = true;
376 if (test_hashmap_multimap())
377 failed = true;
378 if (test_hashmap_empty())
379 failed = true;
380
381 return failed;
382}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 3f110eaaf29c..5d0c4f0baeff 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -745,6 +745,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
745 attr.file = path; 745 attr.file = path;
746 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 746 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
747 attr.expected_attach_type = test->expected_attach_type; 747 attr.expected_attach_type = test->expected_attach_type;
748 attr.prog_flags = BPF_F_TEST_RND_HI32;
748 749
749 if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { 750 if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
750 if (test->expected_result != LOAD_REJECT) 751 if (test->expected_result != LOAD_REJECT)
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index e089477fa0a3..f0fc103261a4 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -414,6 +414,7 @@ int main(int argc, char **argv)
414 struct bpf_prog_load_attr attr = { 414 struct bpf_prog_load_attr attr = {
415 .file = "test_sock_fields_kern.o", 415 .file = "test_sock_fields_kern.o",
416 .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 416 .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
417 .prog_flags = BPF_F_TEST_RND_HI32,
417 }; 418 };
418 int cgroup_fd, egress_fd, ingress_fd, err; 419 int cgroup_fd, egress_fd, ingress_fd, err;
419 struct bpf_program *ingress_prog; 420 struct bpf_program *ingress_prog;
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index e51d63786ff8..cac8ee57a013 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -148,6 +148,7 @@ static int run_test(int cgfd)
148 memset(&attr, 0, sizeof(attr)); 148 memset(&attr, 0, sizeof(attr));
149 attr.file = SOCKET_COOKIE_PROG; 149 attr.file = SOCKET_COOKIE_PROG;
150 attr.prog_type = BPF_PROG_TYPE_UNSPEC; 150 attr.prog_type = BPF_PROG_TYPE_UNSPEC;
151 attr.prog_flags = BPF_F_TEST_RND_HI32;
151 152
152 err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd); 153 err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
153 if (err) { 154 if (err) {
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index e7639f66a941..4e7d3da21357 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -28,13 +28,6 @@
28 * are established and verdicts are decided. 28 * are established and verdicts are decided.
29 */ 29 */
30 30
31#define bpf_printk(fmt, ...) \
32({ \
33 char ____fmt[] = fmt; \
34 bpf_trace_printk(____fmt, sizeof(____fmt), \
35 ##__VA_ARGS__); \
36})
37
38struct bpf_map_def SEC("maps") sock_map = { 31struct bpf_map_def SEC("maps") sock_map = {
39 .type = TEST_MAP_TYPE, 32 .type = TEST_MAP_TYPE,
40 .key_size = sizeof(int), 33 .key_size = sizeof(int),
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
new file mode 100644
index 000000000000..84e81a89e2f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stub.c
@@ -0,0 +1,40 @@
1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2/* Copyright (C) 2019 Netronome Systems, Inc. */
3
4#include <bpf/bpf.h>
5#include <bpf/libbpf.h>
6#include <string.h>
7
8int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
9 struct bpf_object **pobj, int *prog_fd)
10{
11 struct bpf_prog_load_attr attr;
12
13 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
14 attr.file = file;
15 attr.prog_type = type;
16 attr.expected_attach_type = 0;
17 attr.prog_flags = BPF_F_TEST_RND_HI32;
18
19 return bpf_prog_load_xattr(&attr, pobj, prog_fd);
20}
21
22int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
23 size_t insns_cnt, const char *license,
24 __u32 kern_version, char *log_buf,
25 size_t log_buf_sz)
26{
27 struct bpf_load_program_attr load_attr;
28
29 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
30 load_attr.prog_type = type;
31 load_attr.expected_attach_type = 0;
32 load_attr.name = NULL;
33 load_attr.insns = insns;
34 load_attr.insns_cnt = insns_cnt;
35 load_attr.license = license;
36 load_attr.kern_version = kern_version;
37 load_attr.prog_flags = BPF_F_TEST_RND_HI32;
38
39 return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
40}
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 546aee3e9fb4..bd12ec97a44d 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -696,30 +696,57 @@ check_err()
696 696
697bpf_tunnel_test() 697bpf_tunnel_test()
698{ 698{
699 local errors=0
700
699 echo "Testing GRE tunnel..." 701 echo "Testing GRE tunnel..."
700 test_gre 702 test_gre
703 errors=$(( $errors + $? ))
704
701 echo "Testing IP6GRE tunnel..." 705 echo "Testing IP6GRE tunnel..."
702 test_ip6gre 706 test_ip6gre
707 errors=$(( $errors + $? ))
708
703 echo "Testing IP6GRETAP tunnel..." 709 echo "Testing IP6GRETAP tunnel..."
704 test_ip6gretap 710 test_ip6gretap
711 errors=$(( $errors + $? ))
712
705 echo "Testing ERSPAN tunnel..." 713 echo "Testing ERSPAN tunnel..."
706 test_erspan v2 714 test_erspan v2
715 errors=$(( $errors + $? ))
716
707 echo "Testing IP6ERSPAN tunnel..." 717 echo "Testing IP6ERSPAN tunnel..."
708 test_ip6erspan v2 718 test_ip6erspan v2
719 errors=$(( $errors + $? ))
720
709 echo "Testing VXLAN tunnel..." 721 echo "Testing VXLAN tunnel..."
710 test_vxlan 722 test_vxlan
723 errors=$(( $errors + $? ))
724
711 echo "Testing IP6VXLAN tunnel..." 725 echo "Testing IP6VXLAN tunnel..."
712 test_ip6vxlan 726 test_ip6vxlan
727 errors=$(( $errors + $? ))
728
713 echo "Testing GENEVE tunnel..." 729 echo "Testing GENEVE tunnel..."
714 test_geneve 730 test_geneve
731 errors=$(( $errors + $? ))
732
715 echo "Testing IP6GENEVE tunnel..." 733 echo "Testing IP6GENEVE tunnel..."
716 test_ip6geneve 734 test_ip6geneve
735 errors=$(( $errors + $? ))
736
717 echo "Testing IPIP tunnel..." 737 echo "Testing IPIP tunnel..."
718 test_ipip 738 test_ipip
739 errors=$(( $errors + $? ))
740
719 echo "Testing IPIP6 tunnel..." 741 echo "Testing IPIP6 tunnel..."
720 test_ipip6 742 test_ipip6
743 errors=$(( $errors + $? ))
744
721 echo "Testing IPSec tunnel..." 745 echo "Testing IPSec tunnel..."
722 test_xfrm_tunnel 746 test_xfrm_tunnel
747 errors=$(( $errors + $? ))
748
749 return $errors
723} 750}
724 751
725trap cleanup 0 3 6 752trap cleanup 0 3 6
@@ -728,4 +755,9 @@ trap cleanup_exit 2 9
728cleanup 755cleanup
729bpf_tunnel_test 756bpf_tunnel_test
730 757
758if [ $? -ne 0 ]; then
759 echo -e "$(basename $0): ${RED}FAIL${NC}"
760 exit 1
761fi
762echo -e "$(basename $0): ${GREEN}PASS${NC}"
731exit 0 763exit 0
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ccd896b98cac..cd0248c54e25 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -138,32 +138,36 @@ static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
138loop: 138loop:
139 for (j = 0; j < PUSH_CNT; j++) { 139 for (j = 0; j < PUSH_CNT; j++) {
140 insn[i++] = BPF_LD_ABS(BPF_B, 0); 140 insn[i++] = BPF_LD_ABS(BPF_B, 0);
141 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); 141 /* jump to error label */
142 insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3);
142 i++; 143 i++;
143 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); 144 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
144 insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1); 145 insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
145 insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2); 146 insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
146 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 147 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
147 BPF_FUNC_skb_vlan_push), 148 BPF_FUNC_skb_vlan_push),
148 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); 149 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
149 i++; 150 i++;
150 } 151 }
151 152
152 for (j = 0; j < PUSH_CNT; j++) { 153 for (j = 0; j < PUSH_CNT; j++) {
153 insn[i++] = BPF_LD_ABS(BPF_B, 0); 154 insn[i++] = BPF_LD_ABS(BPF_B, 0);
154 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); 155 insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3);
155 i++; 156 i++;
156 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); 157 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
157 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 158 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
158 BPF_FUNC_skb_vlan_pop), 159 BPF_FUNC_skb_vlan_pop),
159 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); 160 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
160 i++; 161 i++;
161 } 162 }
162 if (++k < 5) 163 if (++k < 5)
163 goto loop; 164 goto loop;
164 165
165 for (; i < len - 1; i++) 166 for (; i < len - 3; i++)
166 insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef); 167 insn[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0xbef);
168 insn[len - 3] = BPF_JMP_A(1);
169 /* error label */
170 insn[len - 2] = BPF_MOV32_IMM(BPF_REG_0, 0);
167 insn[len - 1] = BPF_EXIT_INSN(); 171 insn[len - 1] = BPF_EXIT_INSN();
168 self->prog_len = len; 172 self->prog_len = len;
169} 173}
@@ -171,8 +175,13 @@ loop:
171static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) 175static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
172{ 176{
173 struct bpf_insn *insn = self->fill_insns; 177 struct bpf_insn *insn = self->fill_insns;
174 /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */ 178 /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns,
175 unsigned int len = (1 << 15) / 6; 179 * but on arches like arm, ppc etc, there will be one BPF_ZEXT inserted
180 * to extend the error value of the inlined ld_abs sequence which then
181 * contains 7 insns. so, set the dividend to 7 so the testcase could
182 * work on all arches.
183 */
184 unsigned int len = (1 << 15) / 7;
176 int i = 0; 185 int i = 0;
177 186
178 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); 187 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -210,33 +219,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
210 self->retval = (uint32_t)res; 219 self->retval = (uint32_t)res;
211} 220}
212 221
213/* test the sequence of 1k jumps */ 222#define MAX_JMP_SEQ 8192
223
224/* test the sequence of 8k jumps */
214static void bpf_fill_scale1(struct bpf_test *self) 225static void bpf_fill_scale1(struct bpf_test *self)
215{ 226{
216 struct bpf_insn *insn = self->fill_insns; 227 struct bpf_insn *insn = self->fill_insns;
217 int i = 0, k = 0; 228 int i = 0, k = 0;
218 229
219 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); 230 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
220 /* test to check that the sequence of 1024 jumps is acceptable */ 231 /* test to check that the long sequence of jumps is acceptable */
221 while (k++ < 1024) { 232 while (k++ < MAX_JMP_SEQ) {
222 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 233 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
223 BPF_FUNC_get_prandom_u32); 234 BPF_FUNC_get_prandom_u32);
224 insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); 235 insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
225 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); 236 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
226 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 237 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
227 -8 * (k % 64 + 1)); 238 -8 * (k % 64 + 1));
228 } 239 }
229 /* every jump adds 1024 steps to insn_processed, so to stay exactly 240 /* every jump adds 1 step to insn_processed, so to stay exactly
230 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT 241 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
231 */ 242 */
232 while (i < MAX_TEST_INSNS - 1025) 243 while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
233 insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); 244 insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
234 insn[i] = BPF_EXIT_INSN(); 245 insn[i] = BPF_EXIT_INSN();
235 self->prog_len = i + 1; 246 self->prog_len = i + 1;
236 self->retval = 42; 247 self->retval = 42;
237} 248}
238 249
239/* test the sequence of 1k jumps in inner most function (function depth 8)*/ 250/* test the sequence of 8k jumps in inner most function (function depth 8)*/
240static void bpf_fill_scale2(struct bpf_test *self) 251static void bpf_fill_scale2(struct bpf_test *self)
241{ 252{
242 struct bpf_insn *insn = self->fill_insns; 253 struct bpf_insn *insn = self->fill_insns;
@@ -248,20 +259,21 @@ static void bpf_fill_scale2(struct bpf_test *self)
248 insn[i++] = BPF_EXIT_INSN(); 259 insn[i++] = BPF_EXIT_INSN();
249 } 260 }
250 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); 261 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
251 /* test to check that the sequence of 1024 jumps is acceptable */ 262 /* test to check that the long sequence of jumps is acceptable */
252 while (k++ < 1024) { 263 k = 0;
264 while (k++ < MAX_JMP_SEQ) {
253 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 265 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
254 BPF_FUNC_get_prandom_u32); 266 BPF_FUNC_get_prandom_u32);
255 insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); 267 insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
256 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); 268 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
257 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 269 insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
258 -8 * (k % (64 - 4 * FUNC_NEST) + 1)); 270 -8 * (k % (64 - 4 * FUNC_NEST) + 1));
259 } 271 }
260 /* every jump adds 1024 steps to insn_processed, so to stay exactly 272 /* every jump adds 1 step to insn_processed, so to stay exactly
261 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT 273 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
262 */ 274 */
263 while (i < MAX_TEST_INSNS - 1025) 275 while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
264 insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); 276 insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
265 insn[i] = BPF_EXIT_INSN(); 277 insn[i] = BPF_EXIT_INSN();
266 self->prog_len = i + 1; 278 self->prog_len = i + 1;
267 self->retval = 42; 279 self->retval = 42;
@@ -870,7 +882,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
870 if (fixup_skips != skips) 882 if (fixup_skips != skips)
871 return; 883 return;
872 884
873 pflags = 0; 885 pflags = BPF_F_TEST_RND_HI32;
874 if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) 886 if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
875 pflags |= BPF_F_STRICT_ALIGNMENT; 887 pflags |= BPF_F_STRICT_ALIGNMENT;
876 if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) 888 if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
new file mode 100755
index 000000000000..c2f0ddb45531
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -0,0 +1,99 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# xdping tests
5# Here we setup and teardown configuration required to run
6# xdping, exercising its options.
7#
8# Setup is similar to test_tunnel tests but without the tunnel.
9#
10# Topology:
11# ---------
12# root namespace | tc_ns0 namespace
13# |
14# ---------- | ----------
15# | veth1 | --------- | veth0 |
16# ---------- peer ----------
17#
18# Device Configuration
19# --------------------
20# Root namespace with BPF
21# Device names and addresses:
22# veth1 IP: 10.1.1.200
23# xdp added to veth1, xdpings originate from here.
24#
25# Namespace tc_ns0 with BPF
26# Device names and addresses:
27# veth0 IPv4: 10.1.1.100
28# For some tests xdping run in server mode here.
29#
30
31readonly TARGET_IP="10.1.1.100"
32readonly TARGET_NS="xdp_ns0"
33
34readonly LOCAL_IP="10.1.1.200"
35
36setup()
37{
38 ip netns add $TARGET_NS
39 ip link add veth0 type veth peer name veth1
40 ip link set veth0 netns $TARGET_NS
41 ip netns exec $TARGET_NS ip addr add ${TARGET_IP}/24 dev veth0
42 ip addr add ${LOCAL_IP}/24 dev veth1
43 ip netns exec $TARGET_NS ip link set veth0 up
44 ip link set veth1 up
45}
46
47cleanup()
48{
49 set +e
50 ip netns delete $TARGET_NS 2>/dev/null
51 ip link del veth1 2>/dev/null
52 if [[ $server_pid -ne 0 ]]; then
53 kill -TERM $server_pid
54 fi
55}
56
57test()
58{
59 client_args="$1"
60 server_args="$2"
61
62 echo "Test client args '$client_args'; server args '$server_args'"
63
64 server_pid=0
65 if [[ -n "$server_args" ]]; then
66 ip netns exec $TARGET_NS ./xdping $server_args &
67 server_pid=$!
68 sleep 10
69 fi
70 ./xdping $client_args $TARGET_IP
71
72 if [[ $server_pid -ne 0 ]]; then
73 kill -TERM $server_pid
74 server_pid=0
75 fi
76
77 echo "Test client args '$client_args'; server args '$server_args': PASS"
78}
79
80set -e
81
82server_pid=0
83
84trap cleanup EXIT
85
86setup
87
88for server_args in "" "-I veth0 -s -S" ; do
89 # client in skb mode
90 client_args="-I veth1 -S"
91 test "$client_args" "$server_args"
92
93 # client with count of 10 RTT measurements.
94 client_args="-I veth1 -S -c 10"
95 test "$client_args" "$server_args"
96done
97
98echo "OK. All tests passed"
99exit 0
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 9a9fc6c9b70b..b47f205f0310 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -30,9 +30,7 @@ int load_kallsyms(void)
30 if (!f) 30 if (!f)
31 return -ENOENT; 31 return -ENOENT;
32 32
33 while (!feof(f)) { 33 while (fgets(buf, sizeof(buf), f)) {
34 if (!fgets(buf, sizeof(buf), f))
35 break;
36 if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) 34 if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
37 break; 35 break;
38 if (!addr) 36 if (!addr)
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
new file mode 100644
index 000000000000..d60a343b1371
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -0,0 +1,258 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
3
4#include <linux/bpf.h>
5#include <linux/if_link.h>
6#include <arpa/inet.h>
7#include <assert.h>
8#include <errno.h>
9#include <signal.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <unistd.h>
14#include <libgen.h>
15#include <sys/resource.h>
16#include <net/if.h>
17#include <sys/types.h>
18#include <sys/socket.h>
19#include <netdb.h>
20
21#include "bpf/bpf.h"
22#include "bpf/libbpf.h"
23
24#include "xdping.h"
25
26static int ifindex;
27static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
28
29static void cleanup(int sig)
30{
31 bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
32 if (sig)
33 exit(1);
34}
35
36static int get_stats(int fd, __u16 count, __u32 raddr)
37{
38 struct pinginfo pinginfo = { 0 };
39 char inaddrbuf[INET_ADDRSTRLEN];
40 struct in_addr inaddr;
41 __u16 i;
42
43 inaddr.s_addr = raddr;
44
45 printf("\nXDP RTT data:\n");
46
47 if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) {
48 perror("bpf_map_lookup elem: ");
49 return 1;
50 }
51
52 for (i = 0; i < count; i++) {
53 if (pinginfo.times[i] == 0)
54 break;
55
56 printf("64 bytes from %s: icmp_seq=%d ttl=64 time=%#.5f ms\n",
57 inet_ntop(AF_INET, &inaddr, inaddrbuf,
58 sizeof(inaddrbuf)),
59 count + i + 1,
60 (double)pinginfo.times[i]/1000000);
61 }
62
63 if (i < count) {
64 fprintf(stderr, "Expected %d samples, got %d.\n", count, i);
65 return 1;
66 }
67
68 bpf_map_delete_elem(fd, &raddr);
69
70 return 0;
71}
72
73static void show_usage(const char *prog)
74{
75 fprintf(stderr,
76 "usage: %s [OPTS] -I interface destination\n\n"
77 "OPTS:\n"
78 " -c count Stop after sending count requests\n"
79 " (default %d, max %d)\n"
80 " -I interface interface name\n"
81 " -N Run in driver mode\n"
82 " -s Server mode\n"
83 " -S Run in skb mode\n",
84 prog, XDPING_DEFAULT_COUNT, XDPING_MAX_COUNT);
85}
86
87int main(int argc, char **argv)
88{
89 __u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
90 struct addrinfo *a, hints = { .ai_family = AF_INET };
91 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
92 __u16 count = XDPING_DEFAULT_COUNT;
93 struct pinginfo pinginfo = { 0 };
94 const char *optstr = "c:I:NsS";
95 struct bpf_program *main_prog;
96 int prog_fd = -1, map_fd = -1;
97 struct sockaddr_in rin;
98 struct bpf_object *obj;
99 struct bpf_map *map;
100 char *ifname = NULL;
101 char filename[256];
102 int opt, ret = 1;
103 __u32 raddr = 0;
104 int server = 0;
105 char cmd[256];
106
107 while ((opt = getopt(argc, argv, optstr)) != -1) {
108 switch (opt) {
109 case 'c':
110 count = atoi(optarg);
111 if (count < 1 || count > XDPING_MAX_COUNT) {
112 fprintf(stderr,
113 "min count is 1, max count is %d\n",
114 XDPING_MAX_COUNT);
115 return 1;
116 }
117 break;
118 case 'I':
119 ifname = optarg;
120 ifindex = if_nametoindex(ifname);
121 if (!ifindex) {
122 fprintf(stderr, "Could not get interface %s\n",
123 ifname);
124 return 1;
125 }
126 break;
127 case 'N':
128 xdp_flags |= XDP_FLAGS_DRV_MODE;
129 break;
130 case 's':
131 /* use server program */
132 server = 1;
133 break;
134 case 'S':
135 xdp_flags |= XDP_FLAGS_SKB_MODE;
136 break;
137 default:
138 show_usage(basename(argv[0]));
139 return 1;
140 }
141 }
142
143 if (!ifname) {
144 show_usage(basename(argv[0]));
145 return 1;
146 }
147 if (!server && optind == argc) {
148 show_usage(basename(argv[0]));
149 return 1;
150 }
151
152 if ((xdp_flags & mode_flags) == mode_flags) {
153 fprintf(stderr, "-N or -S can be specified, not both.\n");
154 show_usage(basename(argv[0]));
155 return 1;
156 }
157
158 if (!server) {
159 /* Only supports IPv4; see hints initiailization above. */
160 if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
161 fprintf(stderr, "Could not resolve %s\n", argv[optind]);
162 return 1;
163 }
164 memcpy(&rin, a->ai_addr, sizeof(rin));
165 raddr = rin.sin_addr.s_addr;
166 freeaddrinfo(a);
167 }
168
169 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
170 perror("setrlimit(RLIMIT_MEMLOCK)");
171 return 1;
172 }
173
174 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
175
176 if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
177 fprintf(stderr, "load of %s failed\n", filename);
178 return 1;
179 }
180
181 main_prog = bpf_object__find_program_by_title(obj,
182 server ? "xdpserver" :
183 "xdpclient");
184 if (main_prog)
185 prog_fd = bpf_program__fd(main_prog);
186 if (!main_prog || prog_fd < 0) {
187 fprintf(stderr, "could not find xdping program");
188 return 1;
189 }
190
191 map = bpf_map__next(NULL, obj);
192 if (map)
193 map_fd = bpf_map__fd(map);
194 if (!map || map_fd < 0) {
195 fprintf(stderr, "Could not find ping map");
196 goto done;
197 }
198
199 signal(SIGINT, cleanup);
200 signal(SIGTERM, cleanup);
201
202 printf("Setting up XDP for %s, please wait...\n", ifname);
203
204 printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
205
206 if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
207 fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
208 goto done;
209 }
210
211 if (server) {
212 close(prog_fd);
213 close(map_fd);
214 printf("Running server on %s; press Ctrl+C to exit...\n",
215 ifname);
216 do { } while (1);
217 }
218
219 /* Start xdping-ing from last regular ping reply, e.g. for a count
220 * of 10 ICMP requests, we start xdping-ing using reply with seq number
221 * 10. The reason the last "real" ping RTT is much higher is that
222 * the ping program sees the ICMP reply associated with the last
223 * XDP-generated packet, so ping doesn't get a reply until XDP is done.
224 */
225 pinginfo.seq = htons(count);
226 pinginfo.count = count;
227
228 if (bpf_map_update_elem(map_fd, &raddr, &pinginfo, BPF_ANY)) {
229 fprintf(stderr, "could not communicate with BPF map: %s\n",
230 strerror(errno));
231 cleanup(0);
232 goto done;
233 }
234
235 /* We need to wait for XDP setup to complete. */
236 sleep(10);
237
238 snprintf(cmd, sizeof(cmd), "ping -c %d -I %s %s",
239 count, ifname, argv[optind]);
240
241 printf("\nNormal ping RTT data\n");
242 printf("[Ignore final RTT; it is distorted by XDP using the reply]\n");
243
244 ret = system(cmd);
245
246 if (!ret)
247 ret = get_stats(map_fd, count, raddr);
248
249 cleanup(0);
250
251done:
252 if (prog_fd > 0)
253 close(prog_fd);
254 if (map_fd > 0)
255 close(map_fd);
256
257 return ret;
258}
diff --git a/tools/testing/selftests/bpf/xdping.h b/tools/testing/selftests/bpf/xdping.h
new file mode 100644
index 000000000000..afc578df77be
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdping.h
@@ -0,0 +1,13 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
3
4#define XDPING_MAX_COUNT 10
5#define XDPING_DEFAULT_COUNT 4
6
7struct pinginfo {
8 __u64 start;
9 __be16 seq;
10 __u16 count;
11 __u32 pad;
12 __u64 times[XDPING_MAX_COUNT];
13};