aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-03-16 15:20:08 -0400
committerDavid S. Miller <davem@davemloft.net>2019-03-16 15:20:08 -0400
commit0aedadcf6b4863a0d6eaad05a26425cc52944027 (patch)
treed64f583138091cfe886c45dabdfa5cb8efb41eb4
parent4589e28db46ee4961edfd794c5bb43887d38c8e5 (diff)
parent86be36f6502c52ddb4b85938145324fd07332da1 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2019-03-16 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) Fix a umem memory leak on cleanup in AF_XDP, from Björn. 2) Fix BTF to properly resolve forward-declared enums into their corresponding full enum definition types during deduplication, from Andrii. 3) Fix libbpf to reject invalid flags in xsk_socket__create(), from Magnus. 4) Fix accessing invalid pointer returned from bpf_tcp_sock() and bpf_sk_fullsock() after bpf_sk_release() was called, from Martin. 5) Fix generation of load/store DW instructions in PPC JIT, from Naveen. 6) Various fixes in BPF helper function documentation in bpf.h UAPI header used to bpf-helpers(7) man page, from Quentin. 7) Fix segfault in BPF test_progs when prog loading failed, from Yonghong. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/net/bpf_jit.h17
-rw-r--r--arch/powerpc/net/bpf_jit32.h4
-rw-r--r--arch/powerpc/net/bpf_jit64.h20
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c12
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/linux/bpf_verifier.h40
-rw-r--r--include/net/xdp_sock.h1
-rw-r--r--include/uapi/linux/bpf.h188
-rw-r--r--kernel/bpf/verifier.c131
-rw-r--r--net/core/filter.c27
-rw-r--r--net/xdp/xdp_umem.c19
-rw-r--r--tools/include/uapi/linux/bpf.h188
-rw-r--r--tools/lib/bpf/btf.c51
-rw-r--r--tools/lib/bpf/libbpf.c13
-rw-r--r--tools/lib/bpf/xsk.c15
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields_kern.c88
-rw-r--r--tools/testing/selftests/bpf/test_btf.c44
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c134
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c168
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c4
24 files changed, 883 insertions, 290 deletions
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index c5698a523bb1..23f7ed796f38 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -302,6 +302,7 @@
302/* Misc instructions for BPF compiler */ 302/* Misc instructions for BPF compiler */
303#define PPC_INST_LBZ 0x88000000 303#define PPC_INST_LBZ 0x88000000
304#define PPC_INST_LD 0xe8000000 304#define PPC_INST_LD 0xe8000000
305#define PPC_INST_LDX 0x7c00002a
305#define PPC_INST_LHZ 0xa0000000 306#define PPC_INST_LHZ 0xa0000000
306#define PPC_INST_LWZ 0x80000000 307#define PPC_INST_LWZ 0x80000000
307#define PPC_INST_LHBRX 0x7c00062c 308#define PPC_INST_LHBRX 0x7c00062c
@@ -309,6 +310,7 @@
309#define PPC_INST_STB 0x98000000 310#define PPC_INST_STB 0x98000000
310#define PPC_INST_STH 0xb0000000 311#define PPC_INST_STH 0xb0000000
311#define PPC_INST_STD 0xf8000000 312#define PPC_INST_STD 0xf8000000
313#define PPC_INST_STDX 0x7c00012a
312#define PPC_INST_STDU 0xf8000001 314#define PPC_INST_STDU 0xf8000001
313#define PPC_INST_STW 0x90000000 315#define PPC_INST_STW 0x90000000
314#define PPC_INST_STWU 0x94000000 316#define PPC_INST_STWU 0x94000000
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 549e9490ff2a..dcac37745b05 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,6 +51,8 @@
51#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) 51#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i)
52#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ 52#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \
53 ___PPC_RA(base) | ((i) & 0xfffc)) 53 ___PPC_RA(base) | ((i) & 0xfffc))
54#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \
55 ___PPC_RA(base) | ___PPC_RB(b))
54#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ 56#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \
55 ___PPC_RA(base) | ((i) & 0xfffc)) 57 ___PPC_RA(base) | ((i) & 0xfffc))
56#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ 58#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \
@@ -65,7 +67,9 @@
65#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ 67#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \
66 ___PPC_RA(base) | IMM_L(i)) 68 ___PPC_RA(base) | IMM_L(i))
67#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ 69#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \
68 ___PPC_RA(base) | IMM_L(i)) 70 ___PPC_RA(base) | ((i) & 0xfffc))
71#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \
72 ___PPC_RA(base) | ___PPC_RB(b))
69#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ 73#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \
70 ___PPC_RA(base) | IMM_L(i)) 74 ___PPC_RA(base) | IMM_L(i))
71#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ 75#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \
@@ -85,17 +89,6 @@
85 ___PPC_RA(a) | ___PPC_RB(b)) 89 ___PPC_RA(a) | ___PPC_RB(b))
86#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ 90#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \
87 ___PPC_RA(a) | ___PPC_RB(b)) 91 ___PPC_RA(a) | ___PPC_RB(b))
88
89#ifdef CONFIG_PPC64
90#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
91#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0)
92#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
93#else
94#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
95#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
96#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
97#endif
98
99#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) 92#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
100#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) 93#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
101#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ 94#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index dc50a8d4b3b9..21744d8aa053 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -122,6 +122,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
122#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) 122#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
123#endif 123#endif
124 124
125#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
126#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
127#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
128
125#define SEEN_DATAREF 0x10000 /* might call external helpers */ 129#define SEEN_DATAREF 0x10000 /* might call external helpers */
126#define SEEN_XREG 0x20000 /* X reg is used */ 130#define SEEN_XREG 0x20000 /* X reg is used */
127#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary 131#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 3609be4692b3..47f441f351a6 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -68,6 +68,26 @@ static const int b2p[] = {
68/* PPC NVR range -- update this if we ever use NVRs below r27 */ 68/* PPC NVR range -- update this if we ever use NVRs below r27 */
69#define BPF_PPC_NVR_MIN 27 69#define BPF_PPC_NVR_MIN 27
70 70
71/*
72 * WARNING: These can use TMP_REG_2 if the offset is not at word boundary,
73 * so ensure that it isn't in use already.
74 */
75#define PPC_BPF_LL(r, base, i) do { \
76 if ((i) % 4) { \
77 PPC_LI(b2p[TMP_REG_2], (i)); \
78 PPC_LDX(r, base, b2p[TMP_REG_2]); \
79 } else \
80 PPC_LD(r, base, i); \
81 } while(0)
82#define PPC_BPF_STL(r, base, i) do { \
83 if ((i) % 4) { \
84 PPC_LI(b2p[TMP_REG_2], (i)); \
85 PPC_STDX(r, base, b2p[TMP_REG_2]); \
86 } else \
87 PPC_STD(r, base, i); \
88 } while(0)
89#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
90
71#define SEEN_FUNC 0x1000 /* might call external helpers */ 91#define SEEN_FUNC 0x1000 /* might call external helpers */
72#define SEEN_STACK 0x2000 /* uses BPF stack */ 92#define SEEN_STACK 0x2000 /* uses BPF stack */
73#define SEEN_TAILCALL 0x4000 /* uses tail calls */ 93#define SEEN_TAILCALL 0x4000 /* uses tail calls */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 4194d3cfb60c..21a1dcd4b156 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -252,7 +252,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
252 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 252 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
253 * goto out; 253 * goto out;
254 */ 254 */
255 PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 255 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
256 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); 256 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
257 PPC_BCC(COND_GT, out); 257 PPC_BCC(COND_GT, out);
258 258
@@ -265,7 +265,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
265 /* prog = array->ptrs[index]; */ 265 /* prog = array->ptrs[index]; */
266 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); 266 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
267 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); 267 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
268 PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 268 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
269 269
270 /* 270 /*
271 * if (prog == NULL) 271 * if (prog == NULL)
@@ -275,7 +275,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
275 PPC_BCC(COND_EQ, out); 275 PPC_BCC(COND_EQ, out);
276 276
277 /* goto *(prog->bpf_func + prologue_size); */ 277 /* goto *(prog->bpf_func + prologue_size); */
278 PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 278 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
279#ifdef PPC64_ELF_ABI_v1 279#ifdef PPC64_ELF_ABI_v1
280 /* skip past the function descriptor */ 280 /* skip past the function descriptor */
281 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 281 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
@@ -606,7 +606,7 @@ bpf_alu32_trunc:
606 * the instructions generated will remain the 606 * the instructions generated will remain the
607 * same across all passes 607 * same across all passes
608 */ 608 */
609 PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx)); 609 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
610 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); 610 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
611 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); 611 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
612 break; 612 break;
@@ -662,7 +662,7 @@ emit_clear:
662 PPC_LI32(b2p[TMP_REG_1], imm); 662 PPC_LI32(b2p[TMP_REG_1], imm);
663 src_reg = b2p[TMP_REG_1]; 663 src_reg = b2p[TMP_REG_1];
664 } 664 }
665 PPC_STD(src_reg, dst_reg, off); 665 PPC_BPF_STL(src_reg, dst_reg, off);
666 break; 666 break;
667 667
668 /* 668 /*
@@ -709,7 +709,7 @@ emit_clear:
709 break; 709 break;
710 /* dst = *(u64 *)(ul) (src + off) */ 710 /* dst = *(u64 *)(ul) (src + off) */
711 case BPF_LDX | BPF_MEM | BPF_DW: 711 case BPF_LDX | BPF_MEM | BPF_DW:
712 PPC_LD(dst_reg, src_reg, off); 712 PPC_BPF_LL(dst_reg, src_reg, off);
713 break; 713 break;
714 714
715 /* 715 /*
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a2132e09dc1c..f02367faa58d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,6 @@ enum bpf_arg_type {
193 193
194 ARG_PTR_TO_CTX, /* pointer to context */ 194 ARG_PTR_TO_CTX, /* pointer to context */
195 ARG_ANYTHING, /* any (initialized) argument is ok */ 195 ARG_ANYTHING, /* any (initialized) argument is ok */
196 ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
197 ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ 196 ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
198 ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ 197 ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
199}; 198};
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 69f7a3449eda..7d8228d1c898 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -66,6 +66,46 @@ struct bpf_reg_state {
66 * same reference to the socket, to determine proper reference freeing. 66 * same reference to the socket, to determine proper reference freeing.
67 */ 67 */
68 u32 id; 68 u32 id;
69 /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
70 * from a pointer-cast helper, bpf_sk_fullsock() and
71 * bpf_tcp_sock().
72 *
73 * Consider the following where "sk" is a reference counted
74 * pointer returned from "sk = bpf_sk_lookup_tcp();":
75 *
76 * 1: sk = bpf_sk_lookup_tcp();
77 * 2: if (!sk) { return 0; }
78 * 3: fullsock = bpf_sk_fullsock(sk);
79 * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
80 * 5: tp = bpf_tcp_sock(fullsock);
81 * 6: if (!tp) { bpf_sk_release(sk); return 0; }
82 * 7: bpf_sk_release(sk);
83 * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain
84 *
85 * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
86 * "tp" ptr should be invalidated also. In order to do that,
87 * the reg holding "fullsock" and "sk" need to remember
88 * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
89 * such that the verifier can reset all regs which have
90 * ref_obj_id matching the sk_reg->id.
91 *
92 * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
93 * sk_reg->id will stay as NULL-marking purpose only.
94 * After NULL-marking is done, sk_reg->id can be reset to 0.
95 *
96 * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
97 * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
98 *
99 * After "tp = bpf_tcp_sock(fullsock);" at line 5,
100 * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
101 * which is the same as sk_reg->ref_obj_id.
102 *
103 * From the verifier perspective, if sk, fullsock and tp
104 * are not NULL, they are the same ptr with different
105 * reg->type. In particular, bpf_sk_release(tp) is also
106 * allowed and has the same effect as bpf_sk_release(sk).
107 */
108 u32 ref_obj_id;
69 /* For scalar types (SCALAR_VALUE), this represents our knowledge of 109 /* For scalar types (SCALAR_VALUE), this represents our knowledge of
70 * the actual value. 110 * the actual value.
71 * For pointer types, this represents the variable part of the offset 111 * For pointer types, this represents the variable part of the offset
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 61cf7dbb6782..d074b6d60f8a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -36,7 +36,6 @@ struct xdp_umem {
36 u32 headroom; 36 u32 headroom;
37 u32 chunk_size_nohr; 37 u32 chunk_size_nohr;
38 struct user_struct *user; 38 struct user_struct *user;
39 struct pid *pid;
40 unsigned long address; 39 unsigned long address;
41 refcount_t users; 40 refcount_t users;
42 struct work_struct work; 41 struct work_struct work;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3c38ac9a92a7..929c8e537a14 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
502 * Return 502 * Return
503 * 0 on success, or a negative error in case of failure. 503 * 0 on success, or a negative error in case of failure.
504 * 504 *
505 * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
506 * Description
507 * Push an element *value* in *map*. *flags* is one of:
508 *
509 * **BPF_EXIST**
510 * If the queue/stack is full, the oldest element is removed to
511 * make room for this.
512 * Return
513 * 0 on success, or a negative error in case of failure.
514 *
515 * int bpf_probe_read(void *dst, u32 size, const void *src) 505 * int bpf_probe_read(void *dst, u32 size, const void *src)
516 * Description 506 * Description
517 * For tracing programs, safely attempt to read *size* bytes from 507 * For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
1435 * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) 1425 * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
1436 * Description 1426 * Description
1437 * Equivalent to bpf_get_socket_cookie() helper that accepts 1427 * Equivalent to bpf_get_socket_cookie() helper that accepts
1438 * *skb*, but gets socket from **struct bpf_sock_addr** contex. 1428 * *skb*, but gets socket from **struct bpf_sock_addr** context.
1439 * Return 1429 * Return
1440 * A 8-byte long non-decreasing number. 1430 * A 8-byte long non-decreasing number.
1441 * 1431 *
1442 * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) 1432 * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
1443 * Description 1433 * Description
1444 * Equivalent to bpf_get_socket_cookie() helper that accepts 1434 * Equivalent to bpf_get_socket_cookie() helper that accepts
1445 * *skb*, but gets socket from **struct bpf_sock_ops** contex. 1435 * *skb*, but gets socket from **struct bpf_sock_ops** context.
1446 * Return 1436 * Return
1447 * A 8-byte long non-decreasing number. 1437 * A 8-byte long non-decreasing number.
1448 * 1438 *
@@ -2098,52 +2088,52 @@ union bpf_attr {
2098 * Return 2088 * Return
2099 * 0 on success, or a negative error in case of failure. 2089 * 0 on success, or a negative error in case of failure.
2100 * 2090 *
2101 * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) 2091 * int bpf_rc_repeat(void *ctx)
2102 * Description 2092 * Description
2103 * This helper is used in programs implementing IR decoding, to 2093 * This helper is used in programs implementing IR decoding, to
2104 * report a successfully decoded key press with *scancode*, 2094 * report a successfully decoded repeat key message. This delays
2105 * *toggle* value in the given *protocol*. The scancode will be 2095 * the generation of a key up event for previously generated
2106 * translated to a keycode using the rc keymap, and reported as 2096 * key down event.
2107 * an input key down event. After a period a key up event is
2108 * generated. This period can be extended by calling either
2109 * **bpf_rc_keydown**\ () again with the same values, or calling
2110 * **bpf_rc_repeat**\ ().
2111 * 2097 *
2112 * Some protocols include a toggle bit, in case the button was 2098 * Some IR protocols like NEC have a special IR message for
2113 * released and pressed again between consecutive scancodes. 2099 * repeating last button, for when a button is held down.
2114 * 2100 *
2115 * The *ctx* should point to the lirc sample as passed into 2101 * The *ctx* should point to the lirc sample as passed into
2116 * the program. 2102 * the program.
2117 * 2103 *
2118 * The *protocol* is the decoded protocol number (see
2119 * **enum rc_proto** for some predefined values).
2120 *
2121 * This helper is only available is the kernel was compiled with 2104 * This helper is only available is the kernel was compiled with
2122 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 2105 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2123 * "**y**". 2106 * "**y**".
2124 * Return 2107 * Return
2125 * 0 2108 * 0
2126 * 2109 *
2127 * int bpf_rc_repeat(void *ctx) 2110 * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
2128 * Description 2111 * Description
2129 * This helper is used in programs implementing IR decoding, to 2112 * This helper is used in programs implementing IR decoding, to
2130 * report a successfully decoded repeat key message. This delays 2113 * report a successfully decoded key press with *scancode*,
2131 * the generation of a key up event for previously generated 2114 * *toggle* value in the given *protocol*. The scancode will be
2132 * key down event. 2115 * translated to a keycode using the rc keymap, and reported as
2116 * an input key down event. After a period a key up event is
2117 * generated. This period can be extended by calling either
2118 * **bpf_rc_keydown**\ () again with the same values, or calling
2119 * **bpf_rc_repeat**\ ().
2133 * 2120 *
2134 * Some IR protocols like NEC have a special IR message for 2121 * Some protocols include a toggle bit, in case the button was
2135 * repeating last button, for when a button is held down. 2122 * released and pressed again between consecutive scancodes.
2136 * 2123 *
2137 * The *ctx* should point to the lirc sample as passed into 2124 * The *ctx* should point to the lirc sample as passed into
2138 * the program. 2125 * the program.
2139 * 2126 *
2127 * The *protocol* is the decoded protocol number (see
2128 * **enum rc_proto** for some predefined values).
2129 *
2140 * This helper is only available is the kernel was compiled with 2130 * This helper is only available is the kernel was compiled with
2141 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 2131 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2142 * "**y**". 2132 * "**y**".
2143 * Return 2133 * Return
2144 * 0 2134 * 0
2145 * 2135 *
2146 * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) 2136 * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
2147 * Description 2137 * Description
2148 * Return the cgroup v2 id of the socket associated with the *skb*. 2138 * Return the cgroup v2 id of the socket associated with the *skb*.
2149 * This is roughly similar to the **bpf_get_cgroup_classid**\ () 2139 * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
2159 * Return 2149 * Return
2160 * The id is returned or 0 in case the id could not be retrieved. 2150 * The id is returned or 0 in case the id could not be retrieved.
2161 * 2151 *
2162 * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
2163 * Description
2164 * Return id of cgroup v2 that is ancestor of cgroup associated
2165 * with the *skb* at the *ancestor_level*. The root cgroup is at
2166 * *ancestor_level* zero and each step down the hierarchy
2167 * increments the level. If *ancestor_level* == level of cgroup
2168 * associated with *skb*, then return value will be same as that
2169 * of **bpf_skb_cgroup_id**\ ().
2170 *
2171 * The helper is useful to implement policies based on cgroups
2172 * that are upper in hierarchy than immediate cgroup associated
2173 * with *skb*.
2174 *
2175 * The format of returned id and helper limitations are same as in
2176 * **bpf_skb_cgroup_id**\ ().
2177 * Return
2178 * The id is returned or 0 in case the id could not be retrieved.
2179 *
2180 * u64 bpf_get_current_cgroup_id(void) 2152 * u64 bpf_get_current_cgroup_id(void)
2181 * Return 2153 * Return
2182 * A 64-bit integer containing the current cgroup id based 2154 * A 64-bit integer containing the current cgroup id based
2183 * on the cgroup within which the current task is running. 2155 * on the cgroup within which the current task is running.
2184 * 2156 *
2185 * void* get_local_storage(void *map, u64 flags) 2157 * void *bpf_get_local_storage(void *map, u64 flags)
2186 * Description 2158 * Description
2187 * Get the pointer to the local storage area. 2159 * Get the pointer to the local storage area.
2188 * The type and the size of the local storage is defined 2160 * The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
2209 * Return 2181 * Return
2210 * 0 on success, or a negative error in case of failure. 2182 * 0 on success, or a negative error in case of failure.
2211 * 2183 *
2184 * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
2185 * Description
2186 * Return id of cgroup v2 that is ancestor of cgroup associated
2187 * with the *skb* at the *ancestor_level*. The root cgroup is at
2188 * *ancestor_level* zero and each step down the hierarchy
2189 * increments the level. If *ancestor_level* == level of cgroup
2190 * associated with *skb*, then return value will be same as that
2191 * of **bpf_skb_cgroup_id**\ ().
2192 *
2193 * The helper is useful to implement policies based on cgroups
2194 * that are upper in hierarchy than immediate cgroup associated
2195 * with *skb*.
2196 *
2197 * The format of returned id and helper limitations are same as in
2198 * **bpf_skb_cgroup_id**\ ().
2199 * Return
2200 * The id is returned or 0 in case the id could not be retrieved.
2201 *
2212 * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) 2202 * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
2213 * Description 2203 * Description
2214 * Look for TCP socket matching *tuple*, optionally in a child 2204 * Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
2289 * Return 2279 * Return
2290 * 0 on success, or a negative error in case of failure. 2280 * 0 on success, or a negative error in case of failure.
2291 * 2281 *
2282 * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
2283 * Description
2284 * Push an element *value* in *map*. *flags* is one of:
2285 *
2286 * **BPF_EXIST**
2287 * If the queue/stack is full, the oldest element is
2288 * removed to make room for this.
2289 * Return
2290 * 0 on success, or a negative error in case of failure.
2291 *
2292 * int bpf_map_pop_elem(struct bpf_map *map, void *value) 2292 * int bpf_map_pop_elem(struct bpf_map *map, void *value)
2293 * Description 2293 * Description
2294 * Pop an element from *map*. 2294 * Pop an element from *map*.
@@ -2343,29 +2343,94 @@ union bpf_attr {
2343 * Return 2343 * Return
2344 * 0 2344 * 0
2345 * 2345 *
2346 * int bpf_spin_lock(struct bpf_spin_lock *lock)
2347 * Description
2348 * Acquire a spinlock represented by the pointer *lock*, which is
2349 * stored as part of a value of a map. Taking the lock allows to
2350 * safely update the rest of the fields in that value. The
2351 * spinlock can (and must) later be released with a call to
2352 * **bpf_spin_unlock**\ (\ *lock*\ ).
2353 *
2354 * Spinlocks in BPF programs come with a number of restrictions
2355 * and constraints:
2356 *
2357 * * **bpf_spin_lock** objects are only allowed inside maps of
2358 * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
2359 * list could be extended in the future).
2360 * * BTF description of the map is mandatory.
2361 * * The BPF program can take ONE lock at a time, since taking two
2362 * or more could cause dead locks.
2363 * * Only one **struct bpf_spin_lock** is allowed per map element.
2364 * * When the lock is taken, calls (either BPF to BPF or helpers)
2365 * are not allowed.
2366 * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
2367 * allowed inside a spinlock-ed region.
2368 * * The BPF program MUST call **bpf_spin_unlock**\ () to release
2369 * the lock, on all execution paths, before it returns.
2370 * * The BPF program can access **struct bpf_spin_lock** only via
2371 * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
2372 * helpers. Loading or storing data into the **struct
2373 * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
2374 * * To use the **bpf_spin_lock**\ () helper, the BTF description
2375 * of the map value must be a struct and have **struct
2376 * bpf_spin_lock** *anyname*\ **;** field at the top level.
2377 * Nested lock inside another struct is not allowed.
2378 * * The **struct bpf_spin_lock** *lock* field in a map value must
2379 * be aligned on a multiple of 4 bytes in that value.
2380 * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
2381 * the **bpf_spin_lock** field to user space.
2382 * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
2383 * a BPF program, do not update the **bpf_spin_lock** field.
2384 * * **bpf_spin_lock** cannot be on the stack or inside a
2385 * networking packet (it can only be inside of a map values).
2386 * * **bpf_spin_lock** is available to root only.
2387 * * Tracing programs and socket filter programs cannot use
2388 * **bpf_spin_lock**\ () due to insufficient preemption checks
2389 * (but this may change in the future).
2390 * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
2391 * Return
2392 * 0
2393 *
2394 * int bpf_spin_unlock(struct bpf_spin_lock *lock)
2395 * Description
2396 * Release the *lock* previously locked by a call to
2397 * **bpf_spin_lock**\ (\ *lock*\ ).
2398 * Return
2399 * 0
2400 *
2346 * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) 2401 * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
2347 * Description 2402 * Description
2348 * This helper gets a **struct bpf_sock** pointer such 2403 * This helper gets a **struct bpf_sock** pointer such
2349 * that all the fields in bpf_sock can be accessed. 2404 * that all the fields in this **bpf_sock** can be accessed.
2350 * Return 2405 * Return
2351 * A **struct bpf_sock** pointer on success, or NULL in 2406 * A **struct bpf_sock** pointer on success, or **NULL** in
2352 * case of failure. 2407 * case of failure.
2353 * 2408 *
2354 * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) 2409 * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
2355 * Description 2410 * Description
2356 * This helper gets a **struct bpf_tcp_sock** pointer from a 2411 * This helper gets a **struct bpf_tcp_sock** pointer from a
2357 * **struct bpf_sock** pointer. 2412 * **struct bpf_sock** pointer.
2358 *
2359 * Return 2413 * Return
2360 * A **struct bpf_tcp_sock** pointer on success, or NULL in 2414 * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
2361 * case of failure. 2415 * case of failure.
2362 * 2416 *
2363 * int bpf_skb_ecn_set_ce(struct sk_buf *skb) 2417 * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
2364 * Description 2418 * Description
2365 * Sets ECN of IP header to ce (congestion encountered) if 2419 * Set ECN (Explicit Congestion Notification) field of IP header
2366 * current value is ect (ECN capable). Works with IPv6 and IPv4. 2420 * to **CE** (Congestion Encountered) if current value is **ECT**
2367 * Return 2421 * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
2368 * 1 if set, 0 if not set. 2422 * and IPv4.
2423 * Return
2424 * 1 if the **CE** flag is set (either by the current helper call
2425 * or because it was already present), 0 if it is not set.
2426 *
2427 * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
2428 * Description
2429 * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
2430 * **bpf_sk_release**\ () is unnecessary and not allowed.
2431 * Return
2432 * A **struct bpf_sock** pointer on success, or **NULL** in
2433 * case of failure.
2369 */ 2434 */
2370#define __BPF_FUNC_MAPPER(FN) \ 2435#define __BPF_FUNC_MAPPER(FN) \
2371 FN(unspec), \ 2436 FN(unspec), \
@@ -2465,7 +2530,8 @@ union bpf_attr {
2465 FN(spin_unlock), \ 2530 FN(spin_unlock), \
2466 FN(sk_fullsock), \ 2531 FN(sk_fullsock), \
2467 FN(tcp_sock), \ 2532 FN(tcp_sock), \
2468 FN(skb_ecn_set_ce), 2533 FN(skb_ecn_set_ce), \
2534 FN(get_listener_sock),
2469 2535
2470/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2536/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2471 * function eBPF program intends to call 2537 * function eBPF program intends to call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce166a002d16..86f9cd5d1c4e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
212 int access_size; 212 int access_size;
213 s64 msize_smax_value; 213 s64 msize_smax_value;
214 u64 msize_umax_value; 214 u64 msize_umax_value;
215 int ptr_id; 215 int ref_obj_id;
216 int func_id; 216 int func_id;
217}; 217};
218 218
@@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
346 type == PTR_TO_TCP_SOCK_OR_NULL; 346 type == PTR_TO_TCP_SOCK_OR_NULL;
347} 347}
348 348
349static bool type_is_refcounted(enum bpf_reg_type type)
350{
351 return type == PTR_TO_SOCKET;
352}
353
354static bool type_is_refcounted_or_null(enum bpf_reg_type type)
355{
356 return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
357}
358
359static bool reg_is_refcounted(const struct bpf_reg_state *reg)
360{
361 return type_is_refcounted(reg->type);
362}
363
364static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 349static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
365{ 350{
366 return reg->type == PTR_TO_MAP_VALUE && 351 return reg->type == PTR_TO_MAP_VALUE &&
367 map_value_has_spin_lock(reg->map_ptr); 352 map_value_has_spin_lock(reg->map_ptr);
368} 353}
369 354
370static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) 355static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
371{ 356{
372 return type_is_refcounted_or_null(reg->type); 357 return type == ARG_PTR_TO_SOCK_COMMON;
373}
374
375static bool arg_type_is_refcounted(enum bpf_arg_type type)
376{
377 return type == ARG_PTR_TO_SOCKET;
378} 358}
379 359
380/* Determine whether the function releases some resources allocated by another 360/* Determine whether the function releases some resources allocated by another
@@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
392 func_id == BPF_FUNC_sk_lookup_udp; 372 func_id == BPF_FUNC_sk_lookup_udp;
393} 373}
394 374
375static bool is_ptr_cast_function(enum bpf_func_id func_id)
376{
377 return func_id == BPF_FUNC_tcp_sock ||
378 func_id == BPF_FUNC_sk_fullsock;
379}
380
395/* string representation of 'enum bpf_reg_type' */ 381/* string representation of 'enum bpf_reg_type' */
396static const char * const reg_type_str[] = { 382static const char * const reg_type_str[] = {
397 [NOT_INIT] = "?", 383 [NOT_INIT] = "?",
@@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
465 if (t == PTR_TO_STACK) 451 if (t == PTR_TO_STACK)
466 verbose(env, ",call_%d", func(env, reg)->callsite); 452 verbose(env, ",call_%d", func(env, reg)->callsite);
467 } else { 453 } else {
468 verbose(env, "(id=%d", reg->id); 454 verbose(env, "(id=%d ref_obj_id=%d", reg->id,
455 reg->ref_obj_id);
469 if (t != SCALAR_VALUE) 456 if (t != SCALAR_VALUE)
470 verbose(env, ",off=%d", reg->off); 457 verbose(env, ",off=%d", reg->off);
471 if (type_is_pkt_pointer(t)) 458 if (type_is_pkt_pointer(t))
@@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2414 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ 2401 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2415 if (!type_is_sk_pointer(type)) 2402 if (!type_is_sk_pointer(type))
2416 goto err_type; 2403 goto err_type;
2417 } else if (arg_type == ARG_PTR_TO_SOCKET) { 2404 if (reg->ref_obj_id) {
2418 expected_type = PTR_TO_SOCKET; 2405 if (meta->ref_obj_id) {
2419 if (type != expected_type) 2406 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
2420 goto err_type; 2407 regno, reg->ref_obj_id,
2421 if (meta->ptr_id || !reg->id) { 2408 meta->ref_obj_id);
2422 verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", 2409 return -EFAULT;
2423 meta->ptr_id, reg->id); 2410 }
2424 return -EFAULT; 2411 meta->ref_obj_id = reg->ref_obj_id;
2425 } 2412 }
2426 meta->ptr_id = reg->id;
2427 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { 2413 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2428 if (meta->func_id == BPF_FUNC_spin_lock) { 2414 if (meta->func_id == BPF_FUNC_spin_lock) {
2429 if (process_spin_lock(env, regno, true)) 2415 if (process_spin_lock(env, regno, true))
@@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2740 return true; 2726 return true;
2741} 2727}
2742 2728
2743static bool check_refcount_ok(const struct bpf_func_proto *fn) 2729static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
2744{ 2730{
2745 int count = 0; 2731 int count = 0;
2746 2732
2747 if (arg_type_is_refcounted(fn->arg1_type)) 2733 if (arg_type_may_be_refcounted(fn->arg1_type))
2748 count++; 2734 count++;
2749 if (arg_type_is_refcounted(fn->arg2_type)) 2735 if (arg_type_may_be_refcounted(fn->arg2_type))
2750 count++; 2736 count++;
2751 if (arg_type_is_refcounted(fn->arg3_type)) 2737 if (arg_type_may_be_refcounted(fn->arg3_type))
2752 count++; 2738 count++;
2753 if (arg_type_is_refcounted(fn->arg4_type)) 2739 if (arg_type_may_be_refcounted(fn->arg4_type))
2754 count++; 2740 count++;
2755 if (arg_type_is_refcounted(fn->arg5_type)) 2741 if (arg_type_may_be_refcounted(fn->arg5_type))
2756 count++; 2742 count++;
2757 2743
2744 /* A reference acquiring function cannot acquire
2745 * another refcounted ptr.
2746 */
2747 if (is_acquire_function(func_id) && count)
2748 return false;
2749
2758 /* We only support one arg being unreferenced at the moment, 2750 /* We only support one arg being unreferenced at the moment,
2759 * which is sufficient for the helper functions we have right now. 2751 * which is sufficient for the helper functions we have right now.
2760 */ 2752 */
2761 return count <= 1; 2753 return count <= 1;
2762} 2754}
2763 2755
2764static int check_func_proto(const struct bpf_func_proto *fn) 2756static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
2765{ 2757{
2766 return check_raw_mode_ok(fn) && 2758 return check_raw_mode_ok(fn) &&
2767 check_arg_pair_ok(fn) && 2759 check_arg_pair_ok(fn) &&
2768 check_refcount_ok(fn) ? 0 : -EINVAL; 2760 check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
2769} 2761}
2770 2762
2771/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 2763/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2799} 2791}
2800 2792
2801static void release_reg_references(struct bpf_verifier_env *env, 2793static void release_reg_references(struct bpf_verifier_env *env,
2802 struct bpf_func_state *state, int id) 2794 struct bpf_func_state *state,
2795 int ref_obj_id)
2803{ 2796{
2804 struct bpf_reg_state *regs = state->regs, *reg; 2797 struct bpf_reg_state *regs = state->regs, *reg;
2805 int i; 2798 int i;
2806 2799
2807 for (i = 0; i < MAX_BPF_REG; i++) 2800 for (i = 0; i < MAX_BPF_REG; i++)
2808 if (regs[i].id == id) 2801 if (regs[i].ref_obj_id == ref_obj_id)
2809 mark_reg_unknown(env, regs, i); 2802 mark_reg_unknown(env, regs, i);
2810 2803
2811 bpf_for_each_spilled_reg(i, state, reg) { 2804 bpf_for_each_spilled_reg(i, state, reg) {
2812 if (!reg) 2805 if (!reg)
2813 continue; 2806 continue;
2814 if (reg_is_refcounted(reg) && reg->id == id) 2807 if (reg->ref_obj_id == ref_obj_id)
2815 __mark_reg_unknown(reg); 2808 __mark_reg_unknown(reg);
2816 } 2809 }
2817} 2810}
@@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
2820 * resources. Identify all copies of the same pointer and clear the reference. 2813 * resources. Identify all copies of the same pointer and clear the reference.
2821 */ 2814 */
2822static int release_reference(struct bpf_verifier_env *env, 2815static int release_reference(struct bpf_verifier_env *env,
2823 struct bpf_call_arg_meta *meta) 2816 int ref_obj_id)
2824{ 2817{
2825 struct bpf_verifier_state *vstate = env->cur_state; 2818 struct bpf_verifier_state *vstate = env->cur_state;
2819 int err;
2826 int i; 2820 int i;
2827 2821
2822 err = release_reference_state(cur_func(env), ref_obj_id);
2823 if (err)
2824 return err;
2825
2828 for (i = 0; i <= vstate->curframe; i++) 2826 for (i = 0; i <= vstate->curframe; i++)
2829 release_reg_references(env, vstate->frame[i], meta->ptr_id); 2827 release_reg_references(env, vstate->frame[i], ref_obj_id);
2830 2828
2831 return release_reference_state(cur_func(env), meta->ptr_id); 2829 return 0;
2832} 2830}
2833 2831
2834static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 2832static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3047 memset(&meta, 0, sizeof(meta)); 3045 memset(&meta, 0, sizeof(meta));
3048 meta.pkt_access = fn->pkt_access; 3046 meta.pkt_access = fn->pkt_access;
3049 3047
3050 err = check_func_proto(fn); 3048 err = check_func_proto(fn, func_id);
3051 if (err) { 3049 if (err) {
3052 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 3050 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
3053 func_id_name(func_id), func_id); 3051 func_id_name(func_id), func_id);
@@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3093 return err; 3091 return err;
3094 } 3092 }
3095 } else if (is_release_function(func_id)) { 3093 } else if (is_release_function(func_id)) {
3096 err = release_reference(env, &meta); 3094 err = release_reference(env, meta.ref_obj_id);
3097 if (err) { 3095 if (err) {
3098 verbose(env, "func %s#%d reference has not been acquired before\n", 3096 verbose(env, "func %s#%d reference has not been acquired before\n",
3099 func_id_name(func_id), func_id); 3097 func_id_name(func_id), func_id);
@@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3154 3152
3155 if (id < 0) 3153 if (id < 0)
3156 return id; 3154 return id;
3157 /* For release_reference() */ 3155 /* For mark_ptr_or_null_reg() */
3158 regs[BPF_REG_0].id = id; 3156 regs[BPF_REG_0].id = id;
3157 /* For release_reference() */
3158 regs[BPF_REG_0].ref_obj_id = id;
3159 } else { 3159 } else {
3160 /* For mark_ptr_or_null_reg() */ 3160 /* For mark_ptr_or_null_reg() */
3161 regs[BPF_REG_0].id = ++env->id_gen; 3161 regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3170 return -EINVAL; 3170 return -EINVAL;
3171 } 3171 }
3172 3172
3173 if (is_ptr_cast_function(func_id))
3174 /* For release_reference() */
3175 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
3176
3173 do_refine_retval_range(regs, fn->ret_type, func_id, &meta); 3177 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
3174 3178
3175 err = check_map_func_compatibility(env, meta.map_ptr, func_id); 3179 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4665 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { 4669 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4666 reg->type = PTR_TO_TCP_SOCK; 4670 reg->type = PTR_TO_TCP_SOCK;
4667 } 4671 }
4668 if (is_null || !(reg_is_refcounted(reg) || 4672 if (is_null) {
4669 reg_may_point_to_spin_lock(reg))) { 4673 /* We don't need id and ref_obj_id from this point
4670 /* We don't need id from this point onwards anymore, 4674 * onwards anymore, thus we should better reset it,
4671 * thus we should better reset it, so that state 4675 * so that state pruning has chances to take effect.
4672 * pruning has chances to take effect. 4676 */
4677 reg->id = 0;
4678 reg->ref_obj_id = 0;
4679 } else if (!reg_may_point_to_spin_lock(reg)) {
4680 /* For not-NULL ptr, reg->ref_obj_id will be reset
4681 * in release_reg_references().
4682 *
4683 * reg->id is still used by spin_lock ptr. Other
4684 * than spin_lock ptr type, reg->id can be reset.
4673 */ 4685 */
4674 reg->id = 0; 4686 reg->id = 0;
4675 } 4687 }
@@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4684{ 4696{
4685 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 4697 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4686 struct bpf_reg_state *reg, *regs = state->regs; 4698 struct bpf_reg_state *reg, *regs = state->regs;
4699 u32 ref_obj_id = regs[regno].ref_obj_id;
4687 u32 id = regs[regno].id; 4700 u32 id = regs[regno].id;
4688 int i, j; 4701 int i, j;
4689 4702
4690 if (reg_is_refcounted_or_null(&regs[regno]) && is_null) 4703 if (ref_obj_id && ref_obj_id == id && is_null)
4691 release_reference_state(state, id); 4704 /* regs[regno] is in the " == NULL" branch.
4705 * No one could have freed the reference state before
4706 * doing the NULL check.
4707 */
4708 WARN_ON_ONCE(release_reference_state(state, id));
4692 4709
4693 for (i = 0; i < MAX_BPF_REG; i++) 4710 for (i = 0; i < MAX_BPF_REG; i++)
4694 mark_ptr_or_null_reg(state, &regs[i], id, is_null); 4711 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
diff --git a/net/core/filter.c b/net/core/filter.c
index f274620945ff..647c63a7b25b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1796 1796
1797BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) 1797BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1798{ 1798{
1799 sk = sk_to_full_sk(sk);
1800
1801 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; 1799 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1802} 1800}
1803 1801
@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
5266 .func = bpf_sk_release, 5264 .func = bpf_sk_release,
5267 .gpl_only = false, 5265 .gpl_only = false,
5268 .ret_type = RET_INTEGER, 5266 .ret_type = RET_INTEGER,
5269 .arg1_type = ARG_PTR_TO_SOCKET, 5267 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5270}; 5268};
5271 5269
5272BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, 5270BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
5407 5405
5408BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) 5406BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
5409{ 5407{
5410 sk = sk_to_full_sk(sk);
5411
5412 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 5408 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
5413 return (unsigned long)sk; 5409 return (unsigned long)sk;
5414 5410
@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
5422 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 5418 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5423}; 5419};
5424 5420
5421BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
5422{
5423 sk = sk_to_full_sk(sk);
5424
5425 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
5426 return (unsigned long)sk;
5427
5428 return (unsigned long)NULL;
5429}
5430
5431static const struct bpf_func_proto bpf_get_listener_sock_proto = {
5432 .func = bpf_get_listener_sock,
5433 .gpl_only = false,
5434 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5435 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5436};
5437
5425BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) 5438BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
5426{ 5439{
5427 unsigned int iphdr_len; 5440 unsigned int iphdr_len;
@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5607#ifdef CONFIG_INET 5620#ifdef CONFIG_INET
5608 case BPF_FUNC_tcp_sock: 5621 case BPF_FUNC_tcp_sock:
5609 return &bpf_tcp_sock_proto; 5622 return &bpf_tcp_sock_proto;
5623 case BPF_FUNC_get_listener_sock:
5624 return &bpf_get_listener_sock_proto;
5610 case BPF_FUNC_skb_ecn_set_ce: 5625 case BPF_FUNC_skb_ecn_set_ce:
5611 return &bpf_skb_ecn_set_ce_proto; 5626 return &bpf_skb_ecn_set_ce_proto;
5612#endif 5627#endif
@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5702 return &bpf_sk_release_proto; 5717 return &bpf_sk_release_proto;
5703 case BPF_FUNC_tcp_sock: 5718 case BPF_FUNC_tcp_sock:
5704 return &bpf_tcp_sock_proto; 5719 return &bpf_tcp_sock_proto;
5720 case BPF_FUNC_get_listener_sock:
5721 return &bpf_get_listener_sock_proto;
5705#endif 5722#endif
5706 default: 5723 default:
5707 return bpf_base_func_proto(func_id); 5724 return bpf_base_func_proto(func_id);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 77520eacee8f..989e52386c35 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
193 193
194static void xdp_umem_release(struct xdp_umem *umem) 194static void xdp_umem_release(struct xdp_umem *umem)
195{ 195{
196 struct task_struct *task;
197 struct mm_struct *mm;
198
199 xdp_umem_clear_dev(umem); 196 xdp_umem_clear_dev(umem);
200 197
201 ida_simple_remove(&umem_ida, umem->id); 198 ida_simple_remove(&umem_ida, umem->id);
@@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem)
214 211
215 xdp_umem_unpin_pages(umem); 212 xdp_umem_unpin_pages(umem);
216 213
217 task = get_pid_task(umem->pid, PIDTYPE_PID);
218 put_pid(umem->pid);
219 if (!task)
220 goto out;
221 mm = get_task_mm(task);
222 put_task_struct(task);
223 if (!mm)
224 goto out;
225
226 mmput(mm);
227 kfree(umem->pages); 214 kfree(umem->pages);
228 umem->pages = NULL; 215 umem->pages = NULL;
229 216
230 xdp_umem_unaccount_pages(umem); 217 xdp_umem_unaccount_pages(umem);
231out:
232 kfree(umem); 218 kfree(umem);
233} 219}
234 220
@@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
357 if (size_chk < 0) 343 if (size_chk < 0)
358 return -EINVAL; 344 return -EINVAL;
359 345
360 umem->pid = get_task_pid(current, PIDTYPE_PID);
361 umem->address = (unsigned long)addr; 346 umem->address = (unsigned long)addr;
362 umem->chunk_mask = ~((u64)chunk_size - 1); 347 umem->chunk_mask = ~((u64)chunk_size - 1);
363 umem->size = size; 348 umem->size = size;
@@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
373 358
374 err = xdp_umem_account_pages(umem); 359 err = xdp_umem_account_pages(umem);
375 if (err) 360 if (err)
376 goto out; 361 return err;
377 362
378 err = xdp_umem_pin_pages(umem); 363 err = xdp_umem_pin_pages(umem);
379 if (err) 364 if (err)
@@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
392 377
393out_account: 378out_account:
394 xdp_umem_unaccount_pages(umem); 379 xdp_umem_unaccount_pages(umem);
395out:
396 put_pid(umem->pid);
397 return err; 380 return err;
398} 381}
399 382
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3c38ac9a92a7..929c8e537a14 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
502 * Return 502 * Return
503 * 0 on success, or a negative error in case of failure. 503 * 0 on success, or a negative error in case of failure.
504 * 504 *
505 * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
506 * Description
507 * Push an element *value* in *map*. *flags* is one of:
508 *
509 * **BPF_EXIST**
510 * If the queue/stack is full, the oldest element is removed to
511 * make room for this.
512 * Return
513 * 0 on success, or a negative error in case of failure.
514 *
515 * int bpf_probe_read(void *dst, u32 size, const void *src) 505 * int bpf_probe_read(void *dst, u32 size, const void *src)
516 * Description 506 * Description
517 * For tracing programs, safely attempt to read *size* bytes from 507 * For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
1435 * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) 1425 * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
1436 * Description 1426 * Description
1437 * Equivalent to bpf_get_socket_cookie() helper that accepts 1427 * Equivalent to bpf_get_socket_cookie() helper that accepts
1438 * *skb*, but gets socket from **struct bpf_sock_addr** contex. 1428 * *skb*, but gets socket from **struct bpf_sock_addr** context.
1439 * Return 1429 * Return
1440 * A 8-byte long non-decreasing number. 1430 * A 8-byte long non-decreasing number.
1441 * 1431 *
1442 * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) 1432 * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
1443 * Description 1433 * Description
1444 * Equivalent to bpf_get_socket_cookie() helper that accepts 1434 * Equivalent to bpf_get_socket_cookie() helper that accepts
1445 * *skb*, but gets socket from **struct bpf_sock_ops** contex. 1435 * *skb*, but gets socket from **struct bpf_sock_ops** context.
1446 * Return 1436 * Return
1447 * A 8-byte long non-decreasing number. 1437 * A 8-byte long non-decreasing number.
1448 * 1438 *
@@ -2098,52 +2088,52 @@ union bpf_attr {
2098 * Return 2088 * Return
2099 * 0 on success, or a negative error in case of failure. 2089 * 0 on success, or a negative error in case of failure.
2100 * 2090 *
2101 * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) 2091 * int bpf_rc_repeat(void *ctx)
2102 * Description 2092 * Description
2103 * This helper is used in programs implementing IR decoding, to 2093 * This helper is used in programs implementing IR decoding, to
2104 * report a successfully decoded key press with *scancode*, 2094 * report a successfully decoded repeat key message. This delays
2105 * *toggle* value in the given *protocol*. The scancode will be 2095 * the generation of a key up event for previously generated
2106 * translated to a keycode using the rc keymap, and reported as 2096 * key down event.
2107 * an input key down event. After a period a key up event is
2108 * generated. This period can be extended by calling either
2109 * **bpf_rc_keydown**\ () again with the same values, or calling
2110 * **bpf_rc_repeat**\ ().
2111 * 2097 *
2112 * Some protocols include a toggle bit, in case the button was 2098 * Some IR protocols like NEC have a special IR message for
2113 * released and pressed again between consecutive scancodes. 2099 * repeating last button, for when a button is held down.
2114 * 2100 *
2115 * The *ctx* should point to the lirc sample as passed into 2101 * The *ctx* should point to the lirc sample as passed into
2116 * the program. 2102 * the program.
2117 * 2103 *
2118 * The *protocol* is the decoded protocol number (see
2119 * **enum rc_proto** for some predefined values).
2120 *
2121 * This helper is only available is the kernel was compiled with 2104 * This helper is only available is the kernel was compiled with
2122 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 2105 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2123 * "**y**". 2106 * "**y**".
2124 * Return 2107 * Return
2125 * 0 2108 * 0
2126 * 2109 *
2127 * int bpf_rc_repeat(void *ctx) 2110 * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
2128 * Description 2111 * Description
2129 * This helper is used in programs implementing IR decoding, to 2112 * This helper is used in programs implementing IR decoding, to
2130 * report a successfully decoded repeat key message. This delays 2113 * report a successfully decoded key press with *scancode*,
2131 * the generation of a key up event for previously generated 2114 * *toggle* value in the given *protocol*. The scancode will be
2132 * key down event. 2115 * translated to a keycode using the rc keymap, and reported as
2116 * an input key down event. After a period a key up event is
2117 * generated. This period can be extended by calling either
2118 * **bpf_rc_keydown**\ () again with the same values, or calling
2119 * **bpf_rc_repeat**\ ().
2133 * 2120 *
2134 * Some IR protocols like NEC have a special IR message for 2121 * Some protocols include a toggle bit, in case the button was
2135 * repeating last button, for when a button is held down. 2122 * released and pressed again between consecutive scancodes.
2136 * 2123 *
2137 * The *ctx* should point to the lirc sample as passed into 2124 * The *ctx* should point to the lirc sample as passed into
2138 * the program. 2125 * the program.
2139 * 2126 *
2127 * The *protocol* is the decoded protocol number (see
2128 * **enum rc_proto** for some predefined values).
2129 *
2140 * This helper is only available is the kernel was compiled with 2130 * This helper is only available is the kernel was compiled with
2141 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 2131 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2142 * "**y**". 2132 * "**y**".
2143 * Return 2133 * Return
2144 * 0 2134 * 0
2145 * 2135 *
2146 * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) 2136 * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
2147 * Description 2137 * Description
2148 * Return the cgroup v2 id of the socket associated with the *skb*. 2138 * Return the cgroup v2 id of the socket associated with the *skb*.
2149 * This is roughly similar to the **bpf_get_cgroup_classid**\ () 2139 * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
2159 * Return 2149 * Return
2160 * The id is returned or 0 in case the id could not be retrieved. 2150 * The id is returned or 0 in case the id could not be retrieved.
2161 * 2151 *
2162 * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
2163 * Description
2164 * Return id of cgroup v2 that is ancestor of cgroup associated
2165 * with the *skb* at the *ancestor_level*. The root cgroup is at
2166 * *ancestor_level* zero and each step down the hierarchy
2167 * increments the level. If *ancestor_level* == level of cgroup
2168 * associated with *skb*, then return value will be same as that
2169 * of **bpf_skb_cgroup_id**\ ().
2170 *
2171 * The helper is useful to implement policies based on cgroups
2172 * that are upper in hierarchy than immediate cgroup associated
2173 * with *skb*.
2174 *
2175 * The format of returned id and helper limitations are same as in
2176 * **bpf_skb_cgroup_id**\ ().
2177 * Return
2178 * The id is returned or 0 in case the id could not be retrieved.
2179 *
2180 * u64 bpf_get_current_cgroup_id(void) 2152 * u64 bpf_get_current_cgroup_id(void)
2181 * Return 2153 * Return
2182 * A 64-bit integer containing the current cgroup id based 2154 * A 64-bit integer containing the current cgroup id based
2183 * on the cgroup within which the current task is running. 2155 * on the cgroup within which the current task is running.
2184 * 2156 *
2185 * void* get_local_storage(void *map, u64 flags) 2157 * void *bpf_get_local_storage(void *map, u64 flags)
2186 * Description 2158 * Description
2187 * Get the pointer to the local storage area. 2159 * Get the pointer to the local storage area.
2188 * The type and the size of the local storage is defined 2160 * The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
2209 * Return 2181 * Return
2210 * 0 on success, or a negative error in case of failure. 2182 * 0 on success, or a negative error in case of failure.
2211 * 2183 *
2184 * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
2185 * Description
2186 * Return id of cgroup v2 that is ancestor of cgroup associated
2187 * with the *skb* at the *ancestor_level*. The root cgroup is at
2188 * *ancestor_level* zero and each step down the hierarchy
2189 * increments the level. If *ancestor_level* == level of cgroup
2190 * associated with *skb*, then return value will be same as that
2191 * of **bpf_skb_cgroup_id**\ ().
2192 *
2193 * The helper is useful to implement policies based on cgroups
2194 * that are upper in hierarchy than immediate cgroup associated
2195 * with *skb*.
2196 *
2197 * The format of returned id and helper limitations are same as in
2198 * **bpf_skb_cgroup_id**\ ().
2199 * Return
2200 * The id is returned or 0 in case the id could not be retrieved.
2201 *
2212 * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) 2202 * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
2213 * Description 2203 * Description
2214 * Look for TCP socket matching *tuple*, optionally in a child 2204 * Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
2289 * Return 2279 * Return
2290 * 0 on success, or a negative error in case of failure. 2280 * 0 on success, or a negative error in case of failure.
2291 * 2281 *
2282 * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
2283 * Description
2284 * Push an element *value* in *map*. *flags* is one of:
2285 *
2286 * **BPF_EXIST**
2287 * If the queue/stack is full, the oldest element is
2288 * removed to make room for this.
2289 * Return
2290 * 0 on success, or a negative error in case of failure.
2291 *
2292 * int bpf_map_pop_elem(struct bpf_map *map, void *value) 2292 * int bpf_map_pop_elem(struct bpf_map *map, void *value)
2293 * Description 2293 * Description
2294 * Pop an element from *map*. 2294 * Pop an element from *map*.
@@ -2343,29 +2343,94 @@ union bpf_attr {
2343 * Return 2343 * Return
2344 * 0 2344 * 0
2345 * 2345 *
2346 * int bpf_spin_lock(struct bpf_spin_lock *lock)
2347 * Description
2348 * Acquire a spinlock represented by the pointer *lock*, which is
2349 * stored as part of a value of a map. Taking the lock allows to
2350 * safely update the rest of the fields in that value. The
2351 * spinlock can (and must) later be released with a call to
2352 * **bpf_spin_unlock**\ (\ *lock*\ ).
2353 *
2354 * Spinlocks in BPF programs come with a number of restrictions
2355 * and constraints:
2356 *
2357 * * **bpf_spin_lock** objects are only allowed inside maps of
2358 * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
2359 * list could be extended in the future).
2360 * * BTF description of the map is mandatory.
2361 * * The BPF program can take ONE lock at a time, since taking two
2362 * or more could cause dead locks.
2363 * * Only one **struct bpf_spin_lock** is allowed per map element.
2364 * * When the lock is taken, calls (either BPF to BPF or helpers)
2365 * are not allowed.
2366 * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
2367 * allowed inside a spinlock-ed region.
2368 * * The BPF program MUST call **bpf_spin_unlock**\ () to release
2369 * the lock, on all execution paths, before it returns.
2370 * * The BPF program can access **struct bpf_spin_lock** only via
2371 * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
2372 * helpers. Loading or storing data into the **struct
2373 * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
2374 * * To use the **bpf_spin_lock**\ () helper, the BTF description
2375 * of the map value must be a struct and have **struct
2376 * bpf_spin_lock** *anyname*\ **;** field at the top level.
2377 * Nested lock inside another struct is not allowed.
2378 * * The **struct bpf_spin_lock** *lock* field in a map value must
2379 * be aligned on a multiple of 4 bytes in that value.
2380 * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
2381 * the **bpf_spin_lock** field to user space.
2382 * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
2383 * a BPF program, do not update the **bpf_spin_lock** field.
2384 * * **bpf_spin_lock** cannot be on the stack or inside a
2385 * networking packet (it can only be inside of a map values).
2386 * * **bpf_spin_lock** is available to root only.
2387 * * Tracing programs and socket filter programs cannot use
2388 * **bpf_spin_lock**\ () due to insufficient preemption checks
2389 * (but this may change in the future).
2390 * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
2391 * Return
2392 * 0
2393 *
2394 * int bpf_spin_unlock(struct bpf_spin_lock *lock)
2395 * Description
2396 * Release the *lock* previously locked by a call to
2397 * **bpf_spin_lock**\ (\ *lock*\ ).
2398 * Return
2399 * 0
2400 *
2346 * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) 2401 * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
2347 * Description 2402 * Description
2348 * This helper gets a **struct bpf_sock** pointer such 2403 * This helper gets a **struct bpf_sock** pointer such
2349 * that all the fields in bpf_sock can be accessed. 2404 * that all the fields in this **bpf_sock** can be accessed.
2350 * Return 2405 * Return
2351 * A **struct bpf_sock** pointer on success, or NULL in 2406 * A **struct bpf_sock** pointer on success, or **NULL** in
2352 * case of failure. 2407 * case of failure.
2353 * 2408 *
2354 * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) 2409 * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
2355 * Description 2410 * Description
2356 * This helper gets a **struct bpf_tcp_sock** pointer from a 2411 * This helper gets a **struct bpf_tcp_sock** pointer from a
2357 * **struct bpf_sock** pointer. 2412 * **struct bpf_sock** pointer.
2358 *
2359 * Return 2413 * Return
2360 * A **struct bpf_tcp_sock** pointer on success, or NULL in 2414 * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
2361 * case of failure. 2415 * case of failure.
2362 * 2416 *
2363 * int bpf_skb_ecn_set_ce(struct sk_buf *skb) 2417 * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
2364 * Description 2418 * Description
2365 * Sets ECN of IP header to ce (congestion encountered) if 2419 * Set ECN (Explicit Congestion Notification) field of IP header
2366 * current value is ect (ECN capable). Works with IPv6 and IPv4. 2420 * to **CE** (Congestion Encountered) if current value is **ECT**
2367 * Return 2421 * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
2368 * 1 if set, 0 if not set. 2422 * and IPv4.
2423 * Return
2424 * 1 if the **CE** flag is set (either by the current helper call
2425 * or because it was already present), 0 if it is not set.
2426 *
2427 * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
2428 * Description
2429 * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
2430 * **bpf_sk_release**\ () is unnecessary and not allowed.
2431 * Return
2432 * A **struct bpf_sock** pointer on success, or **NULL** in
2433 * case of failure.
2369 */ 2434 */
2370#define __BPF_FUNC_MAPPER(FN) \ 2435#define __BPF_FUNC_MAPPER(FN) \
2371 FN(unspec), \ 2436 FN(unspec), \
@@ -2465,7 +2530,8 @@ union bpf_attr {
2465 FN(spin_unlock), \ 2530 FN(spin_unlock), \
2466 FN(sk_fullsock), \ 2531 FN(sk_fullsock), \
2467 FN(tcp_sock), \ 2532 FN(tcp_sock), \
2468 FN(skb_ecn_set_ce), 2533 FN(skb_ecn_set_ce), \
2534 FN(get_listener_sock),
2469 2535
2470/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2536/* integer value in 'imm' field of BPF_CALL instruction selects which helper
2471 * function eBPF program intends to call 2537 * function eBPF program intends to call
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1b8d8cdd3575..87e3020ac1bc 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
1602/* Calculate type signature hash of ENUM. */ 1602/* Calculate type signature hash of ENUM. */
1603static __u32 btf_hash_enum(struct btf_type *t) 1603static __u32 btf_hash_enum(struct btf_type *t)
1604{ 1604{
1605 struct btf_enum *member = (struct btf_enum *)(t + 1); 1605 __u32 h;
1606 __u32 vlen = BTF_INFO_VLEN(t->info);
1607 __u32 h = btf_hash_common(t);
1608 int i;
1609 1606
1610 for (i = 0; i < vlen; i++) { 1607 /* don't hash vlen and enum members to support enum fwd resolving */
1611 h = hash_combine(h, member->name_off); 1608 h = hash_combine(0, t->name_off);
1612 h = hash_combine(h, member->val); 1609 h = hash_combine(h, t->info & ~0xffff);
1613 member++; 1610 h = hash_combine(h, t->size);
1614 }
1615 return h; 1611 return h;
1616} 1612}
1617 1613
@@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
1637 return true; 1633 return true;
1638} 1634}
1639 1635
1636static inline bool btf_is_enum_fwd(struct btf_type *t)
1637{
1638 return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
1639 BTF_INFO_VLEN(t->info) == 0;
1640}
1641
1642static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
1643{
1644 if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
1645 return btf_equal_enum(t1, t2);
1646 /* ignore vlen when comparing */
1647 return t1->name_off == t2->name_off &&
1648 (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
1649 t1->size == t2->size;
1650}
1651
1640/* 1652/*
1641 * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, 1653 * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
1642 * as referenced type IDs equivalence is established separately during type 1654 * as referenced type IDs equivalence is established separately during type
@@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1860 new_id = cand_node->type_id; 1872 new_id = cand_node->type_id;
1861 break; 1873 break;
1862 } 1874 }
1875 if (d->opts.dont_resolve_fwds)
1876 continue;
1877 if (btf_compat_enum(t, cand)) {
1878 if (btf_is_enum_fwd(t)) {
1879 /* resolve fwd to full enum */
1880 new_id = cand_node->type_id;
1881 break;
1882 }
1883 /* resolve canonical enum fwd to full enum */
1884 d->map[cand_node->type_id] = type_id;
1885 }
1863 } 1886 }
1864 break; 1887 break;
1865 1888
@@ -2084,15 +2107,15 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
2084 return fwd_kind == real_kind; 2107 return fwd_kind == real_kind;
2085 } 2108 }
2086 2109
2087 if (cand_type->info != canon_type->info)
2088 return 0;
2089
2090 switch (cand_kind) { 2110 switch (cand_kind) {
2091 case BTF_KIND_INT: 2111 case BTF_KIND_INT:
2092 return btf_equal_int(cand_type, canon_type); 2112 return btf_equal_int(cand_type, canon_type);
2093 2113
2094 case BTF_KIND_ENUM: 2114 case BTF_KIND_ENUM:
2095 return btf_equal_enum(cand_type, canon_type); 2115 if (d->opts.dont_resolve_fwds)
2116 return btf_equal_enum(cand_type, canon_type);
2117 else
2118 return btf_compat_enum(cand_type, canon_type);
2096 2119
2097 case BTF_KIND_FWD: 2120 case BTF_KIND_FWD:
2098 return btf_equal_common(cand_type, canon_type); 2121 return btf_equal_common(cand_type, canon_type);
@@ -2103,6 +2126,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
2103 case BTF_KIND_PTR: 2126 case BTF_KIND_PTR:
2104 case BTF_KIND_TYPEDEF: 2127 case BTF_KIND_TYPEDEF:
2105 case BTF_KIND_FUNC: 2128 case BTF_KIND_FUNC:
2129 if (cand_type->info != canon_type->info)
2130 return 0;
2106 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); 2131 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2107 2132
2108 case BTF_KIND_ARRAY: { 2133 case BTF_KIND_ARRAY: {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d5b830d60601..5e977d2688da 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -835,12 +835,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
835 obj->efile.maps_shndx = idx; 835 obj->efile.maps_shndx = idx;
836 else if (strcmp(name, BTF_ELF_SEC) == 0) { 836 else if (strcmp(name, BTF_ELF_SEC) == 0) {
837 obj->btf = btf__new(data->d_buf, data->d_size); 837 obj->btf = btf__new(data->d_buf, data->d_size);
838 if (IS_ERR(obj->btf) || btf__load(obj->btf)) { 838 if (IS_ERR(obj->btf)) {
839 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", 839 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
840 BTF_ELF_SEC, PTR_ERR(obj->btf)); 840 BTF_ELF_SEC, PTR_ERR(obj->btf));
841 if (!IS_ERR(obj->btf))
842 btf__free(obj->btf);
843 obj->btf = NULL; 841 obj->btf = NULL;
842 continue;
843 }
844 err = btf__load(obj->btf);
845 if (err) {
846 pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n",
847 BTF_ELF_SEC, err);
848 btf__free(obj->btf);
849 obj->btf = NULL;
850 err = 0;
844 } 851 }
845 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 852 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
846 btf_ext_data = data; 853 btf_ext_data = data;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index f98ac82c9aea..8d0078b65486 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -126,8 +126,8 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
126 cfg->frame_headroom = usr_cfg->frame_headroom; 126 cfg->frame_headroom = usr_cfg->frame_headroom;
127} 127}
128 128
129static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, 129static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
130 const struct xsk_socket_config *usr_cfg) 130 const struct xsk_socket_config *usr_cfg)
131{ 131{
132 if (!usr_cfg) { 132 if (!usr_cfg) {
133 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 133 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
@@ -135,14 +135,19 @@ static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
135 cfg->libbpf_flags = 0; 135 cfg->libbpf_flags = 0;
136 cfg->xdp_flags = 0; 136 cfg->xdp_flags = 0;
137 cfg->bind_flags = 0; 137 cfg->bind_flags = 0;
138 return; 138 return 0;
139 } 139 }
140 140
141 if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
142 return -EINVAL;
143
141 cfg->rx_size = usr_cfg->rx_size; 144 cfg->rx_size = usr_cfg->rx_size;
142 cfg->tx_size = usr_cfg->tx_size; 145 cfg->tx_size = usr_cfg->tx_size;
143 cfg->libbpf_flags = usr_cfg->libbpf_flags; 146 cfg->libbpf_flags = usr_cfg->libbpf_flags;
144 cfg->xdp_flags = usr_cfg->xdp_flags; 147 cfg->xdp_flags = usr_cfg->xdp_flags;
145 cfg->bind_flags = usr_cfg->bind_flags; 148 cfg->bind_flags = usr_cfg->bind_flags;
149
150 return 0;
146} 151}
147 152
148int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, 153int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
@@ -557,7 +562,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
557 } 562 }
558 strncpy(xsk->ifname, ifname, IFNAMSIZ); 563 strncpy(xsk->ifname, ifname, IFNAMSIZ);
559 564
560 xsk_set_xdp_socket_config(&xsk->config, usr_config); 565 err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
566 if (err)
567 goto out_socket;
561 568
562 if (rx) { 569 if (rx) {
563 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, 570 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index c9433a496d54..c81fc350f7ad 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
180 (void *) BPF_FUNC_sk_fullsock; 180 (void *) BPF_FUNC_sk_fullsock;
181static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = 181static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
182 (void *) BPF_FUNC_tcp_sock; 182 (void *) BPF_FUNC_tcp_sock;
183static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
184 (void *) BPF_FUNC_get_listener_sock;
183static int (*bpf_skb_ecn_set_ce)(void *ctx) = 185static int (*bpf_skb_ecn_set_ce)(void *ctx) =
184 (void *) BPF_FUNC_skb_ecn_set_ce; 186 (void *) BPF_FUNC_skb_ecn_set_ce;
185 187
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 90f8a206340a..ee99368c595c 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -37,7 +37,7 @@ void test_map_lock(void)
37 const char *file = "./test_map_lock.o"; 37 const char *file = "./test_map_lock.o";
38 int prog_fd, map_fd[2], vars[17] = {}; 38 int prog_fd, map_fd[2], vars[17] = {};
39 pthread_t thread_id[6]; 39 pthread_t thread_id[6];
40 struct bpf_object *obj; 40 struct bpf_object *obj = NULL;
41 int err = 0, key = 0, i; 41 int err = 0, key = 0, i;
42 void *ret; 42 void *ret;
43 43
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 9a573a9675d7..114ebe6a438e 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -5,7 +5,7 @@ void test_spinlock(void)
5{ 5{
6 const char *file = "./test_spin_lock.o"; 6 const char *file = "./test_spin_lock.o";
7 pthread_t thread_id[4]; 7 pthread_t thread_id[4];
8 struct bpf_object *obj; 8 struct bpf_object *obj = NULL;
9 int prog_fd; 9 int prog_fd;
10 int err = 0, i; 10 int err = 0, i;
11 void *ret; 11 void *ret;
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index de1a43e8f610..37328f148538 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -8,38 +8,51 @@
8#include "bpf_helpers.h" 8#include "bpf_helpers.h"
9#include "bpf_endian.h" 9#include "bpf_endian.h"
10 10
11enum bpf_array_idx { 11enum bpf_addr_array_idx {
12 SRV_IDX, 12 ADDR_SRV_IDX,
13 CLI_IDX, 13 ADDR_CLI_IDX,
14 __NR_BPF_ARRAY_IDX, 14 __NR_BPF_ADDR_ARRAY_IDX,
15};
16
17enum bpf_result_array_idx {
18 EGRESS_SRV_IDX,
19 EGRESS_CLI_IDX,
20 INGRESS_LISTEN_IDX,
21 __NR_BPF_RESULT_ARRAY_IDX,
22};
23
24enum bpf_linum_array_idx {
25 EGRESS_LINUM_IDX,
26 INGRESS_LINUM_IDX,
27 __NR_BPF_LINUM_ARRAY_IDX,
15}; 28};
16 29
17struct bpf_map_def SEC("maps") addr_map = { 30struct bpf_map_def SEC("maps") addr_map = {
18 .type = BPF_MAP_TYPE_ARRAY, 31 .type = BPF_MAP_TYPE_ARRAY,
19 .key_size = sizeof(__u32), 32 .key_size = sizeof(__u32),
20 .value_size = sizeof(struct sockaddr_in6), 33 .value_size = sizeof(struct sockaddr_in6),
21 .max_entries = __NR_BPF_ARRAY_IDX, 34 .max_entries = __NR_BPF_ADDR_ARRAY_IDX,
22}; 35};
23 36
24struct bpf_map_def SEC("maps") sock_result_map = { 37struct bpf_map_def SEC("maps") sock_result_map = {
25 .type = BPF_MAP_TYPE_ARRAY, 38 .type = BPF_MAP_TYPE_ARRAY,
26 .key_size = sizeof(__u32), 39 .key_size = sizeof(__u32),
27 .value_size = sizeof(struct bpf_sock), 40 .value_size = sizeof(struct bpf_sock),
28 .max_entries = __NR_BPF_ARRAY_IDX, 41 .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
29}; 42};
30 43
31struct bpf_map_def SEC("maps") tcp_sock_result_map = { 44struct bpf_map_def SEC("maps") tcp_sock_result_map = {
32 .type = BPF_MAP_TYPE_ARRAY, 45 .type = BPF_MAP_TYPE_ARRAY,
33 .key_size = sizeof(__u32), 46 .key_size = sizeof(__u32),
34 .value_size = sizeof(struct bpf_tcp_sock), 47 .value_size = sizeof(struct bpf_tcp_sock),
35 .max_entries = __NR_BPF_ARRAY_IDX, 48 .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
36}; 49};
37 50
38struct bpf_map_def SEC("maps") linum_map = { 51struct bpf_map_def SEC("maps") linum_map = {
39 .type = BPF_MAP_TYPE_ARRAY, 52 .type = BPF_MAP_TYPE_ARRAY,
40 .key_size = sizeof(__u32), 53 .key_size = sizeof(__u32),
41 .value_size = sizeof(__u32), 54 .value_size = sizeof(__u32),
42 .max_entries = 1, 55 .max_entries = __NR_BPF_LINUM_ARRAY_IDX,
43}; 56};
44 57
45static bool is_loopback6(__u32 *a6) 58static bool is_loopback6(__u32 *a6)
@@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst,
100 113
101#define RETURN { \ 114#define RETURN { \
102 linum = __LINE__; \ 115 linum = __LINE__; \
103 bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ 116 bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
104 return 1; \ 117 return 1; \
105} 118}
106 119
107SEC("cgroup_skb/egress") 120SEC("cgroup_skb/egress")
108int read_sock_fields(struct __sk_buff *skb) 121int egress_read_sock_fields(struct __sk_buff *skb)
109{ 122{
110 __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; 123 __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
111 struct sockaddr_in6 *srv_sa6, *cli_sa6; 124 struct sockaddr_in6 *srv_sa6, *cli_sa6;
112 struct bpf_tcp_sock *tp, *tp_ret; 125 struct bpf_tcp_sock *tp, *tp_ret;
113 struct bpf_sock *sk, *sk_ret; 126 struct bpf_sock *sk, *sk_ret;
114 __u32 linum, idx0 = 0; 127 __u32 linum, linum_idx;
128
129 linum_idx = EGRESS_LINUM_IDX;
115 130
116 sk = skb->sk; 131 sk = skb->sk;
117 if (!sk || sk->state == 10) 132 if (!sk || sk->state == 10)
@@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb)
132 RETURN; 147 RETURN;
133 148
134 if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) 149 if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
135 idx = srv_idx; 150 result_idx = EGRESS_SRV_IDX;
136 else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) 151 else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
137 idx = cli_idx; 152 result_idx = EGRESS_CLI_IDX;
138 else 153 else
139 RETURN; 154 RETURN;
140 155
141 sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); 156 sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
142 tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); 157 tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
158 if (!sk_ret || !tp_ret)
159 RETURN;
160
161 skcpy(sk_ret, sk);
162 tpcpy(tp_ret, tp);
163
164 RETURN;
165}
166
167SEC("cgroup_skb/ingress")
168int ingress_read_sock_fields(struct __sk_buff *skb)
169{
170 __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
171 struct bpf_tcp_sock *tp, *tp_ret;
172 struct bpf_sock *sk, *sk_ret;
173 struct sockaddr_in6 *srv_sa6;
174 __u32 linum, linum_idx;
175
176 linum_idx = INGRESS_LINUM_IDX;
177
178 sk = skb->sk;
179 if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
180 RETURN;
181
182 srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
183 if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
184 RETURN;
185
186 if (sk->state != 10 && sk->state != 12)
187 RETURN;
188
189 sk = bpf_get_listener_sock(sk);
190 if (!sk)
191 RETURN;
192
193 tp = bpf_tcp_sock(sk);
194 if (!tp)
195 RETURN;
196
197 sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
198 tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
143 if (!sk_ret || !tp_ret) 199 if (!sk_ret || !tp_ret)
144 RETURN; 200 RETURN;
145 201
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 38797aa627a7..23e3b314ca60 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5874,6 +5874,50 @@ const struct btf_dedup_test dedup_tests[] = {
5874 .dont_resolve_fwds = false, 5874 .dont_resolve_fwds = false,
5875 }, 5875 },
5876}, 5876},
5877{
5878 .descr = "dedup: enum fwd resolution",
5879 .input = {
5880 .raw_types = {
5881 /* [1] fwd enum 'e1' before full enum */
5882 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
5883 /* [2] full enum 'e1' after fwd */
5884 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
5885 BTF_ENUM_ENC(NAME_NTH(2), 123),
5886 /* [3] full enum 'e2' before fwd */
5887 BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
5888 BTF_ENUM_ENC(NAME_NTH(4), 456),
5889 /* [4] fwd enum 'e2' after full enum */
5890 BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
5891 /* [5] incompatible fwd enum with different size */
5892 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
5893 /* [6] incompatible full enum with different value */
5894 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
5895 BTF_ENUM_ENC(NAME_NTH(2), 321),
5896 BTF_END_RAW,
5897 },
5898 BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
5899 },
5900 .expect = {
5901 .raw_types = {
5902 /* [1] full enum 'e1' */
5903 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
5904 BTF_ENUM_ENC(NAME_NTH(2), 123),
5905 /* [2] full enum 'e2' */
5906 BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
5907 BTF_ENUM_ENC(NAME_NTH(4), 456),
5908 /* [3] incompatible fwd enum with different size */
5909 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
5910 /* [4] incompatible full enum with different value */
5911 BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
5912 BTF_ENUM_ENC(NAME_NTH(2), 321),
5913 BTF_END_RAW,
5914 },
5915 BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
5916 },
5917 .opts = {
5918 .dont_resolve_fwds = false,
5919 },
5920},
5877 5921
5878}; 5922};
5879 5923
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index bc8943938bf5..dcae7f664dce 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -16,10 +16,23 @@
16#include "cgroup_helpers.h" 16#include "cgroup_helpers.h"
17#include "bpf_rlimit.h" 17#include "bpf_rlimit.h"
18 18
19enum bpf_array_idx { 19enum bpf_addr_array_idx {
20 SRV_IDX, 20 ADDR_SRV_IDX,
21 CLI_IDX, 21 ADDR_CLI_IDX,
22 __NR_BPF_ARRAY_IDX, 22 __NR_BPF_ADDR_ARRAY_IDX,
23};
24
25enum bpf_result_array_idx {
26 EGRESS_SRV_IDX,
27 EGRESS_CLI_IDX,
28 INGRESS_LISTEN_IDX,
29 __NR_BPF_RESULT_ARRAY_IDX,
30};
31
32enum bpf_linum_array_idx {
33 EGRESS_LINUM_IDX,
34 INGRESS_LINUM_IDX,
35 __NR_BPF_LINUM_ARRAY_IDX,
23}; 36};
24 37
25#define CHECK(condition, tag, format...) ({ \ 38#define CHECK(condition, tag, format...) ({ \
@@ -41,8 +54,16 @@ static int linum_map_fd;
41static int addr_map_fd; 54static int addr_map_fd;
42static int tp_map_fd; 55static int tp_map_fd;
43static int sk_map_fd; 56static int sk_map_fd;
44static __u32 srv_idx = SRV_IDX; 57
45static __u32 cli_idx = CLI_IDX; 58static __u32 addr_srv_idx = ADDR_SRV_IDX;
59static __u32 addr_cli_idx = ADDR_CLI_IDX;
60
61static __u32 egress_srv_idx = EGRESS_SRV_IDX;
62static __u32 egress_cli_idx = EGRESS_CLI_IDX;
63static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
64
65static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
66static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
46 67
47static void init_loopback6(struct sockaddr_in6 *sa6) 68static void init_loopback6(struct sockaddr_in6 *sa6)
48{ 69{
@@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp)
93 114
94static void check_result(void) 115static void check_result(void)
95{ 116{
96 struct bpf_tcp_sock srv_tp, cli_tp; 117 struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
97 struct bpf_sock srv_sk, cli_sk; 118 struct bpf_sock srv_sk, cli_sk, listen_sk;
98 __u32 linum, idx0 = 0; 119 __u32 ingress_linum, egress_linum;
99 int err; 120 int err;
100 121
101 err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); 122 err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
123 &egress_linum);
102 CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", 124 CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
103 "err:%d errno:%d", err, errno); 125 "err:%d errno:%d", err, errno);
104 126
105 err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); 127 err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
106 CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", 128 &ingress_linum);
129 CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
130 "err:%d errno:%d", err, errno);
131
132 err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
133 CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
134 "err:%d errno:%d", err, errno);
135 err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
136 CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
137 "err:%d errno:%d", err, errno);
138
139 err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
140 CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
107 "err:%d errno:%d", err, errno); 141 "err:%d errno:%d", err, errno);
108 err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); 142 err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
109 CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", 143 CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
110 "err:%d errno:%d", err, errno); 144 "err:%d errno:%d", err, errno);
111 145
112 err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); 146 err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
113 CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", 147 CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
114 "err:%d errno:%d", err, errno); 148 "err:%d errno:%d", err, errno);
115 err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); 149 err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
116 CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", 150 CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
117 "err:%d errno:%d", err, errno); 151 "err:%d errno:%d", err, errno);
118 152
153 printf("listen_sk: ");
154 print_sk(&listen_sk);
155 printf("\n");
156
119 printf("srv_sk: "); 157 printf("srv_sk: ");
120 print_sk(&srv_sk); 158 print_sk(&srv_sk);
121 printf("\n"); 159 printf("\n");
@@ -124,6 +162,10 @@ static void check_result(void)
124 print_sk(&cli_sk); 162 print_sk(&cli_sk);
125 printf("\n"); 163 printf("\n");
126 164
165 printf("listen_tp: ");
166 print_tp(&listen_tp);
167 printf("\n");
168
127 printf("srv_tp: "); 169 printf("srv_tp: ");
128 print_tp(&srv_tp); 170 print_tp(&srv_tp);
129 printf("\n"); 171 printf("\n");
@@ -132,6 +174,19 @@ static void check_result(void)
132 print_tp(&cli_tp); 174 print_tp(&cli_tp);
133 printf("\n"); 175 printf("\n");
134 176
177 CHECK(listen_sk.state != 10 ||
178 listen_sk.family != AF_INET6 ||
179 listen_sk.protocol != IPPROTO_TCP ||
180 memcmp(listen_sk.src_ip6, &in6addr_loopback,
181 sizeof(listen_sk.src_ip6)) ||
182 listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
183 listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
184 listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
185 listen_sk.dst_port,
186 "Unexpected listen_sk",
187 "Check listen_sk output. ingress_linum:%u",
188 ingress_linum);
189
135 CHECK(srv_sk.state == 10 || 190 CHECK(srv_sk.state == 10 ||
136 !srv_sk.state || 191 !srv_sk.state ||
137 srv_sk.family != AF_INET6 || 192 srv_sk.family != AF_INET6 ||
@@ -142,7 +197,8 @@ static void check_result(void)
142 sizeof(srv_sk.dst_ip6)) || 197 sizeof(srv_sk.dst_ip6)) ||
143 srv_sk.src_port != ntohs(srv_sa6.sin6_port) || 198 srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
144 srv_sk.dst_port != cli_sa6.sin6_port, 199 srv_sk.dst_port != cli_sa6.sin6_port,
145 "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); 200 "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
201 egress_linum);
146 202
147 CHECK(cli_sk.state == 10 || 203 CHECK(cli_sk.state == 10 ||
148 !cli_sk.state || 204 !cli_sk.state ||
@@ -154,21 +210,31 @@ static void check_result(void)
154 sizeof(cli_sk.dst_ip6)) || 210 sizeof(cli_sk.dst_ip6)) ||
155 cli_sk.src_port != ntohs(cli_sa6.sin6_port) || 211 cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
156 cli_sk.dst_port != srv_sa6.sin6_port, 212 cli_sk.dst_port != srv_sa6.sin6_port,
157 "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); 213 "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
214 egress_linum);
215
216 CHECK(listen_tp.data_segs_out ||
217 listen_tp.data_segs_in ||
218 listen_tp.total_retrans ||
219 listen_tp.bytes_acked,
220 "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
221 ingress_linum);
158 222
159 CHECK(srv_tp.data_segs_out != 1 || 223 CHECK(srv_tp.data_segs_out != 1 ||
160 srv_tp.data_segs_in || 224 srv_tp.data_segs_in ||
161 srv_tp.snd_cwnd != 10 || 225 srv_tp.snd_cwnd != 10 ||
162 srv_tp.total_retrans || 226 srv_tp.total_retrans ||
163 srv_tp.bytes_acked != DATA_LEN, 227 srv_tp.bytes_acked != DATA_LEN,
164 "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); 228 "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
229 egress_linum);
165 230
166 CHECK(cli_tp.data_segs_out || 231 CHECK(cli_tp.data_segs_out ||
167 cli_tp.data_segs_in != 1 || 232 cli_tp.data_segs_in != 1 ||
168 cli_tp.snd_cwnd != 10 || 233 cli_tp.snd_cwnd != 10 ||
169 cli_tp.total_retrans || 234 cli_tp.total_retrans ||
170 cli_tp.bytes_received != DATA_LEN, 235 cli_tp.bytes_received != DATA_LEN,
171 "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); 236 "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
237 egress_linum);
172} 238}
173 239
174static void test(void) 240static void test(void)
@@ -211,10 +277,10 @@ static void test(void)
211 err, errno); 277 err, errno);
212 278
213 /* Update addr_map with srv_sa6 and cli_sa6 */ 279 /* Update addr_map with srv_sa6 and cli_sa6 */
214 err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); 280 err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
215 CHECK(err, "map_update", "err:%d errno:%d", err, errno); 281 CHECK(err, "map_update", "err:%d errno:%d", err, errno);
216 282
217 err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); 283 err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
218 CHECK(err, "map_update", "err:%d errno:%d", err, errno); 284 CHECK(err, "map_update", "err:%d errno:%d", err, errno);
219 285
220 /* Connect from cli_sa6 to srv_sa6 */ 286 /* Connect from cli_sa6 to srv_sa6 */
@@ -273,9 +339,9 @@ int main(int argc, char **argv)
273 struct bpf_prog_load_attr attr = { 339 struct bpf_prog_load_attr attr = {
274 .file = "test_sock_fields_kern.o", 340 .file = "test_sock_fields_kern.o",
275 .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 341 .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
276 .expected_attach_type = BPF_CGROUP_INET_EGRESS,
277 }; 342 };
278 int cgroup_fd, prog_fd, err; 343 int cgroup_fd, egress_fd, ingress_fd, err;
344 struct bpf_program *ingress_prog;
279 struct bpf_object *obj; 345 struct bpf_object *obj;
280 struct bpf_map *map; 346 struct bpf_map *map;
281 347
@@ -293,12 +359,24 @@ int main(int argc, char **argv)
293 err = join_cgroup(TEST_CGROUP); 359 err = join_cgroup(TEST_CGROUP);
294 CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); 360 CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
295 361
296 err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); 362 err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
297 CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); 363 CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
298 364
299 err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); 365 ingress_prog = bpf_object__find_program_by_title(obj,
366 "cgroup_skb/ingress");
367 CHECK(!ingress_prog,
368 "bpf_object__find_program_by_title(cgroup_skb/ingress)",
369 "not found");
370 ingress_fd = bpf_program__fd(ingress_prog);
371
372 err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
300 CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", 373 CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
301 "err:%d errno%d", err, errno); 374 "err:%d errno%d", err, errno);
375
376 err = bpf_prog_attach(ingress_fd, cgroup_fd,
377 BPF_CGROUP_INET_INGRESS, 0);
378 CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
379 "err:%d errno%d", err, errno);
302 close(cgroup_fd); 380 close(cgroup_fd);
303 381
304 map = bpf_object__find_map_by_name(obj, "addr_map"); 382 map = bpf_object__find_map_by_name(obj, "addr_map");
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 3ed3593bd8b6..923f2110072d 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -605,3 +605,171 @@
605 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 605 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
606 .result = ACCEPT, 606 .result = ACCEPT,
607}, 607},
608{
609 "reference tracking: use ptr from bpf_tcp_sock() after release",
610 .insns = {
611 BPF_SK_LOOKUP,
612 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
613 BPF_EXIT_INSN(),
614 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
615 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
616 BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
617 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
618 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
619 BPF_EMIT_CALL(BPF_FUNC_sk_release),
620 BPF_EXIT_INSN(),
621 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
622 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
623 BPF_EMIT_CALL(BPF_FUNC_sk_release),
624 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)),
625 BPF_EXIT_INSN(),
626 },
627 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
628 .result = REJECT,
629 .errstr = "invalid mem access",
630},
631{
632 "reference tracking: use ptr from bpf_sk_fullsock() after release",
633 .insns = {
634 BPF_SK_LOOKUP,
635 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
636 BPF_EXIT_INSN(),
637 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
638 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
639 BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
640 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
641 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
642 BPF_EMIT_CALL(BPF_FUNC_sk_release),
643 BPF_EXIT_INSN(),
644 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
645 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
646 BPF_EMIT_CALL(BPF_FUNC_sk_release),
647 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
648 BPF_EXIT_INSN(),
649 },
650 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
651 .result = REJECT,
652 .errstr = "invalid mem access",
653},
654{
655 "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
656 .insns = {
657 BPF_SK_LOOKUP,
658 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
659 BPF_EXIT_INSN(),
660 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
661 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
662 BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
663 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
664 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
665 BPF_EMIT_CALL(BPF_FUNC_sk_release),
666 BPF_EXIT_INSN(),
667 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
668 BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
669 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
670 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
671 BPF_EMIT_CALL(BPF_FUNC_sk_release),
672 BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
673 BPF_EXIT_INSN(),
674 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
675 BPF_EXIT_INSN(),
676 },
677 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
678 .result = REJECT,
679 .errstr = "invalid mem access",
680},
681{
682 "reference tracking: use sk after bpf_sk_release(tp)",
683 .insns = {
684 BPF_SK_LOOKUP,
685 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
686 BPF_EXIT_INSN(),
687 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
688 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
689 BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
690 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
691 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
692 BPF_EMIT_CALL(BPF_FUNC_sk_release),
693 BPF_EXIT_INSN(),
694 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
695 BPF_EMIT_CALL(BPF_FUNC_sk_release),
696 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
697 BPF_EXIT_INSN(),
698 },
699 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
700 .result = REJECT,
701 .errstr = "invalid mem access",
702},
703{
704 "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
705 .insns = {
706 BPF_SK_LOOKUP,
707 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
708 BPF_EXIT_INSN(),
709 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
710 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
711 BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
712 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
713 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
714 BPF_EMIT_CALL(BPF_FUNC_sk_release),
715 BPF_EXIT_INSN(),
716 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
717 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
718 BPF_EMIT_CALL(BPF_FUNC_sk_release),
719 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)),
720 BPF_EXIT_INSN(),
721 },
722 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
723 .result = ACCEPT,
724},
725{
726 "reference tracking: bpf_sk_release(listen_sk)",
727 .insns = {
728 BPF_SK_LOOKUP,
729 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
730 BPF_EXIT_INSN(),
731 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
732 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
733 BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
734 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
735 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
736 BPF_EMIT_CALL(BPF_FUNC_sk_release),
737 BPF_EXIT_INSN(),
738 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
739 BPF_EMIT_CALL(BPF_FUNC_sk_release),
740 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
741 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
742 BPF_EMIT_CALL(BPF_FUNC_sk_release),
743 BPF_EXIT_INSN(),
744 },
745 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
746 .result = REJECT,
747 .errstr = "reference has not been acquired before",
748},
749{
750 /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
751 "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
752 .insns = {
753 BPF_SK_LOOKUP,
754 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
755 BPF_EXIT_INSN(),
756 BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
757 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
758 BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
759 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
760 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
761 BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
762 BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
763 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3),
764 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
765 BPF_EMIT_CALL(BPF_FUNC_sk_release),
766 BPF_EXIT_INSN(),
767 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)),
768 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
769 BPF_EMIT_CALL(BPF_FUNC_sk_release),
770 BPF_EXIT_INSN(),
771 },
772 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
773 .result = REJECT,
774 .errstr = "invalid mem access",
775},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 0ddfdf76aba5..416436231fab 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -342,7 +342,7 @@
342 }, 342 },
343 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 343 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
344 .result = REJECT, 344 .result = REJECT,
345 .errstr = "type=sock_common expected=sock", 345 .errstr = "reference has not been acquired before",
346}, 346},
347{ 347{
348 "bpf_sk_release(bpf_sk_fullsock(skb->sk))", 348 "bpf_sk_release(bpf_sk_fullsock(skb->sk))",
@@ -380,5 +380,5 @@
380 }, 380 },
381 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 381 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
382 .result = REJECT, 382 .result = REJECT,
383 .errstr = "type=tcp_sock expected=sock", 383 .errstr = "reference has not been acquired before",
384}, 384},