diff options
author | David S. Miller <davem@davemloft.net> | 2019-06-15 21:19:47 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-06-15 21:19:47 -0400 |
commit | 1eb4169c1e6b3c95f3a99c2c7f91b10e6c98e848 (patch) | |
tree | bc9dd11610389aadebdc4e016d5035dff7aefaeb | |
parent | 5db2e7c7917f40236a021959893121c4e496f609 (diff) | |
parent | 9594dc3c7e71b9f52bee1d7852eb3d4e3aea9e99 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says:
====================
pull-request: bpf 2019-06-15
The following pull-request contains BPF updates for your *net* tree.
The main changes are:
1) fix stack layout of JITed x64 bpf code, from Alexei.
2) fix out of bounds memory access in bpf_sk_storage, from Arthur.
3) fix lpm trie walk, from Jonathan.
4) fix nested bpf_perf_event_output, from Matt.
5) and several other fixes.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/powerpc/include/asm/ppc-opcode.h | 1 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit.h | 2 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 8 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 74 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 4 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 9 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 9 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 100 | ||||
-rw-r--r-- | net/core/bpf_sk_storage.c | 3 | ||||
-rw-r--r-- | net/core/sock.c | 3 | ||||
-rw-r--r-- | net/xdp/xdp_umem.c | 11 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 4 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_lpm_map.c | 41 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/verifier/div_overflow.c | 14 |
14 files changed, 188 insertions, 95 deletions
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 493c5c943acd..2291daf39cd1 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h | |||
@@ -338,6 +338,7 @@ | |||
338 | #define PPC_INST_MADDLD 0x10000033 | 338 | #define PPC_INST_MADDLD 0x10000033 |
339 | #define PPC_INST_DIVWU 0x7c000396 | 339 | #define PPC_INST_DIVWU 0x7c000396 |
340 | #define PPC_INST_DIVD 0x7c0003d2 | 340 | #define PPC_INST_DIVD 0x7c0003d2 |
341 | #define PPC_INST_DIVDU 0x7c000392 | ||
341 | #define PPC_INST_RLWINM 0x54000000 | 342 | #define PPC_INST_RLWINM 0x54000000 |
342 | #define PPC_INST_RLWINM_DOT 0x54000001 | 343 | #define PPC_INST_RLWINM_DOT 0x54000001 |
343 | #define PPC_INST_RLWIMI 0x50000000 | 344 | #define PPC_INST_RLWIMI 0x50000000 |
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index dcac37745b05..1e932898d430 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h | |||
@@ -116,7 +116,7 @@ | |||
116 | ___PPC_RA(a) | IMM_L(i)) | 116 | ___PPC_RA(a) | IMM_L(i)) |
117 | #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ | 117 | #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ |
118 | ___PPC_RA(a) | ___PPC_RB(b)) | 118 | ___PPC_RA(a) | ___PPC_RB(b)) |
119 | #define PPC_DIVD(d, a, b) EMIT(PPC_INST_DIVD | ___PPC_RT(d) | \ | 119 | #define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \ |
120 | ___PPC_RA(a) | ___PPC_RB(b)) | 120 | ___PPC_RA(a) | ___PPC_RB(b)) |
121 | #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ | 121 | #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ |
122 | ___PPC_RS(a) | ___PPC_RB(b)) | 122 | ___PPC_RS(a) | ___PPC_RB(b)) |
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 21a1dcd4b156..e3fedeffe40f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c | |||
@@ -399,12 +399,12 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | |||
399 | case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ | 399 | case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ |
400 | case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ | 400 | case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ |
401 | if (BPF_OP(code) == BPF_MOD) { | 401 | if (BPF_OP(code) == BPF_MOD) { |
402 | PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); | 402 | PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); |
403 | PPC_MULD(b2p[TMP_REG_1], src_reg, | 403 | PPC_MULD(b2p[TMP_REG_1], src_reg, |
404 | b2p[TMP_REG_1]); | 404 | b2p[TMP_REG_1]); |
405 | PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); | 405 | PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); |
406 | } else | 406 | } else |
407 | PPC_DIVD(dst_reg, dst_reg, src_reg); | 407 | PPC_DIVDU(dst_reg, dst_reg, src_reg); |
408 | break; | 408 | break; |
409 | case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ | 409 | case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ |
410 | case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ | 410 | case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ |
@@ -432,7 +432,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | |||
432 | break; | 432 | break; |
433 | case BPF_ALU64: | 433 | case BPF_ALU64: |
434 | if (BPF_OP(code) == BPF_MOD) { | 434 | if (BPF_OP(code) == BPF_MOD) { |
435 | PPC_DIVD(b2p[TMP_REG_2], dst_reg, | 435 | PPC_DIVDU(b2p[TMP_REG_2], dst_reg, |
436 | b2p[TMP_REG_1]); | 436 | b2p[TMP_REG_1]); |
437 | PPC_MULD(b2p[TMP_REG_1], | 437 | PPC_MULD(b2p[TMP_REG_1], |
438 | b2p[TMP_REG_1], | 438 | b2p[TMP_REG_1], |
@@ -440,7 +440,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | |||
440 | PPC_SUB(dst_reg, dst_reg, | 440 | PPC_SUB(dst_reg, dst_reg, |
441 | b2p[TMP_REG_1]); | 441 | b2p[TMP_REG_1]); |
442 | } else | 442 | } else |
443 | PPC_DIVD(dst_reg, dst_reg, | 443 | PPC_DIVDU(dst_reg, dst_reg, |
444 | b2p[TMP_REG_1]); | 444 | b2p[TMP_REG_1]); |
445 | break; | 445 | break; |
446 | } | 446 | } |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index afabf597c855..d88bc0935886 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -190,9 +190,7 @@ struct jit_context { | |||
190 | #define BPF_MAX_INSN_SIZE 128 | 190 | #define BPF_MAX_INSN_SIZE 128 |
191 | #define BPF_INSN_SAFETY 64 | 191 | #define BPF_INSN_SAFETY 64 |
192 | 192 | ||
193 | #define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ | 193 | #define PROLOGUE_SIZE 20 |
194 | |||
195 | #define PROLOGUE_SIZE 37 | ||
196 | 194 | ||
197 | /* | 195 | /* |
198 | * Emit x86-64 prologue code for BPF program and check its size. | 196 | * Emit x86-64 prologue code for BPF program and check its size. |
@@ -203,44 +201,19 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) | |||
203 | u8 *prog = *pprog; | 201 | u8 *prog = *pprog; |
204 | int cnt = 0; | 202 | int cnt = 0; |
205 | 203 | ||
206 | /* push rbp */ | 204 | EMIT1(0x55); /* push rbp */ |
207 | EMIT1(0x55); | 205 | EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ |
208 | 206 | /* sub rsp, rounded_stack_depth */ | |
209 | /* mov rbp,rsp */ | 207 | EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); |
210 | EMIT3(0x48, 0x89, 0xE5); | 208 | EMIT1(0x53); /* push rbx */ |
211 | 209 | EMIT2(0x41, 0x55); /* push r13 */ | |
212 | /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ | 210 | EMIT2(0x41, 0x56); /* push r14 */ |
213 | EMIT3_off32(0x48, 0x81, 0xEC, | 211 | EMIT2(0x41, 0x57); /* push r15 */ |
214 | round_up(stack_depth, 8) + AUX_STACK_SPACE); | ||
215 | |||
216 | /* sub rbp, AUX_STACK_SPACE */ | ||
217 | EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); | ||
218 | |||
219 | /* mov qword ptr [rbp+0],rbx */ | ||
220 | EMIT4(0x48, 0x89, 0x5D, 0); | ||
221 | /* mov qword ptr [rbp+8],r13 */ | ||
222 | EMIT4(0x4C, 0x89, 0x6D, 8); | ||
223 | /* mov qword ptr [rbp+16],r14 */ | ||
224 | EMIT4(0x4C, 0x89, 0x75, 16); | ||
225 | /* mov qword ptr [rbp+24],r15 */ | ||
226 | EMIT4(0x4C, 0x89, 0x7D, 24); | ||
227 | |||
228 | if (!ebpf_from_cbpf) { | 212 | if (!ebpf_from_cbpf) { |
229 | /* | 213 | /* zero init tail_call_cnt */ |
230 | * Clear the tail call counter (tail_call_cnt): for eBPF tail | 214 | EMIT2(0x6a, 0x00); |
231 | * calls we need to reset the counter to 0. It's done in two | ||
232 | * instructions, resetting RAX register to 0, and moving it | ||
233 | * to the counter location. | ||
234 | */ | ||
235 | |||
236 | /* xor eax, eax */ | ||
237 | EMIT2(0x31, 0xc0); | ||
238 | /* mov qword ptr [rbp+32], rax */ | ||
239 | EMIT4(0x48, 0x89, 0x45, 32); | ||
240 | |||
241 | BUILD_BUG_ON(cnt != PROLOGUE_SIZE); | 215 | BUILD_BUG_ON(cnt != PROLOGUE_SIZE); |
242 | } | 216 | } |
243 | |||
244 | *pprog = prog; | 217 | *pprog = prog; |
245 | } | 218 | } |
246 | 219 | ||
@@ -285,13 +258,13 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
285 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) | 258 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
286 | * goto out; | 259 | * goto out; |
287 | */ | 260 | */ |
288 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | 261 | EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ |
289 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | 262 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ |
290 | #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) | 263 | #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) |
291 | EMIT2(X86_JA, OFFSET2); /* ja out */ | 264 | EMIT2(X86_JA, OFFSET2); /* ja out */ |
292 | label2 = cnt; | 265 | label2 = cnt; |
293 | EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ | 266 | EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ |
294 | EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */ | 267 | EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ |
295 | 268 | ||
296 | /* prog = array->ptrs[index]; */ | 269 | /* prog = array->ptrs[index]; */ |
297 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ | 270 | EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ |
@@ -1040,19 +1013,14 @@ emit_jmp: | |||
1040 | seen_exit = true; | 1013 | seen_exit = true; |
1041 | /* Update cleanup_addr */ | 1014 | /* Update cleanup_addr */ |
1042 | ctx->cleanup_addr = proglen; | 1015 | ctx->cleanup_addr = proglen; |
1043 | /* mov rbx, qword ptr [rbp+0] */ | 1016 | if (!bpf_prog_was_classic(bpf_prog)) |
1044 | EMIT4(0x48, 0x8B, 0x5D, 0); | 1017 | EMIT1(0x5B); /* get rid of tail_call_cnt */ |
1045 | /* mov r13, qword ptr [rbp+8] */ | 1018 | EMIT2(0x41, 0x5F); /* pop r15 */ |
1046 | EMIT4(0x4C, 0x8B, 0x6D, 8); | 1019 | EMIT2(0x41, 0x5E); /* pop r14 */ |
1047 | /* mov r14, qword ptr [rbp+16] */ | 1020 | EMIT2(0x41, 0x5D); /* pop r13 */ |
1048 | EMIT4(0x4C, 0x8B, 0x75, 16); | 1021 | EMIT1(0x5B); /* pop rbx */ |
1049 | /* mov r15, qword ptr [rbp+24] */ | 1022 | EMIT1(0xC9); /* leave */ |
1050 | EMIT4(0x4C, 0x8B, 0x7D, 24); | 1023 | EMIT1(0xC3); /* ret */ |
1051 | |||
1052 | /* add rbp, AUX_STACK_SPACE */ | ||
1053 | EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE); | ||
1054 | EMIT1(0xC9); /* leave */ | ||
1055 | EMIT1(0xC3); /* ret */ | ||
1056 | break; | 1024 | break; |
1057 | 1025 | ||
1058 | default: | 1026 | default: |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e4114a7e4451..a8b823c30b43 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -3378,8 +3378,8 @@ struct bpf_raw_tracepoint_args { | |||
3378 | /* DIRECT: Skip the FIB rules and go to FIB table associated with device | 3378 | /* DIRECT: Skip the FIB rules and go to FIB table associated with device |
3379 | * OUTPUT: Do lookup from egress perspective; default is ingress | 3379 | * OUTPUT: Do lookup from egress perspective; default is ingress |
3380 | */ | 3380 | */ |
3381 | #define BPF_FIB_LOOKUP_DIRECT BIT(0) | 3381 | #define BPF_FIB_LOOKUP_DIRECT (1U << 0) |
3382 | #define BPF_FIB_LOOKUP_OUTPUT BIT(1) | 3382 | #define BPF_FIB_LOOKUP_OUTPUT (1U << 1) |
3383 | 3383 | ||
3384 | enum { | 3384 | enum { |
3385 | BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ | 3385 | BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1e525d70f833..1defea4b2755 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -186,6 +186,7 @@ static void dev_map_free(struct bpf_map *map) | |||
186 | if (!dev) | 186 | if (!dev) |
187 | continue; | 187 | continue; |
188 | 188 | ||
189 | free_percpu(dev->bulkq); | ||
189 | dev_put(dev->dev); | 190 | dev_put(dev->dev); |
190 | kfree(dev); | 191 | kfree(dev); |
191 | } | 192 | } |
@@ -281,6 +282,7 @@ void __dev_map_flush(struct bpf_map *map) | |||
281 | unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); | 282 | unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); |
282 | u32 bit; | 283 | u32 bit; |
283 | 284 | ||
285 | rcu_read_lock(); | ||
284 | for_each_set_bit(bit, bitmap, map->max_entries) { | 286 | for_each_set_bit(bit, bitmap, map->max_entries) { |
285 | struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); | 287 | struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); |
286 | struct xdp_bulk_queue *bq; | 288 | struct xdp_bulk_queue *bq; |
@@ -291,11 +293,12 @@ void __dev_map_flush(struct bpf_map *map) | |||
291 | if (unlikely(!dev)) | 293 | if (unlikely(!dev)) |
292 | continue; | 294 | continue; |
293 | 295 | ||
294 | __clear_bit(bit, bitmap); | ||
295 | |||
296 | bq = this_cpu_ptr(dev->bulkq); | 296 | bq = this_cpu_ptr(dev->bulkq); |
297 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); | 297 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); |
298 | |||
299 | __clear_bit(bit, bitmap); | ||
298 | } | 300 | } |
301 | rcu_read_unlock(); | ||
299 | } | 302 | } |
300 | 303 | ||
301 | /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or | 304 | /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or |
@@ -388,6 +391,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) | |||
388 | 391 | ||
389 | int cpu; | 392 | int cpu; |
390 | 393 | ||
394 | rcu_read_lock(); | ||
391 | for_each_online_cpu(cpu) { | 395 | for_each_online_cpu(cpu) { |
392 | bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); | 396 | bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); |
393 | __clear_bit(dev->bit, bitmap); | 397 | __clear_bit(dev->bit, bitmap); |
@@ -395,6 +399,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) | |||
395 | bq = per_cpu_ptr(dev->bulkq, cpu); | 399 | bq = per_cpu_ptr(dev->bulkq, cpu); |
396 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); | 400 | bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); |
397 | } | 401 | } |
402 | rcu_read_unlock(); | ||
398 | } | 403 | } |
399 | } | 404 | } |
400 | 405 | ||
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e61630c2e50b..864e2a496376 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c | |||
@@ -716,9 +716,14 @@ find_leftmost: | |||
716 | * have exact two children, so this function will never return NULL. | 716 | * have exact two children, so this function will never return NULL. |
717 | */ | 717 | */ |
718 | for (node = search_root; node;) { | 718 | for (node = search_root; node;) { |
719 | if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) | 719 | if (node->flags & LPM_TREE_NODE_FLAG_IM) { |
720 | node = rcu_dereference(node->child[0]); | ||
721 | } else { | ||
720 | next_node = node; | 722 | next_node = node; |
721 | node = rcu_dereference(node->child[0]); | 723 | node = rcu_dereference(node->child[0]); |
724 | if (!node) | ||
725 | node = rcu_dereference(next_node->child[1]); | ||
726 | } | ||
722 | } | 727 | } |
723 | do_copy: | 728 | do_copy: |
724 | next_key->prefixlen = next_node->prefixlen; | 729 | next_key->prefixlen = next_node->prefixlen; |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f92d6ad5e080..1c9a4745e596 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -410,8 +410,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { | |||
410 | .arg4_type = ARG_CONST_SIZE, | 410 | .arg4_type = ARG_CONST_SIZE, |
411 | }; | 411 | }; |
412 | 412 | ||
413 | static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); | ||
414 | |||
415 | static __always_inline u64 | 413 | static __always_inline u64 |
416 | __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | 414 | __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, |
417 | u64 flags, struct perf_sample_data *sd) | 415 | u64 flags, struct perf_sample_data *sd) |
@@ -442,24 +440,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | |||
442 | return perf_event_output(event, sd, regs); | 440 | return perf_event_output(event, sd, regs); |
443 | } | 441 | } |
444 | 442 | ||
443 | /* | ||
444 | * Support executing tracepoints in normal, irq, and nmi context that each call | ||
445 | * bpf_perf_event_output | ||
446 | */ | ||
447 | struct bpf_trace_sample_data { | ||
448 | struct perf_sample_data sds[3]; | ||
449 | }; | ||
450 | |||
451 | static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); | ||
452 | static DEFINE_PER_CPU(int, bpf_trace_nest_level); | ||
445 | BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, | 453 | BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, |
446 | u64, flags, void *, data, u64, size) | 454 | u64, flags, void *, data, u64, size) |
447 | { | 455 | { |
448 | struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); | 456 | struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); |
457 | int nest_level = this_cpu_inc_return(bpf_trace_nest_level); | ||
449 | struct perf_raw_record raw = { | 458 | struct perf_raw_record raw = { |
450 | .frag = { | 459 | .frag = { |
451 | .size = size, | 460 | .size = size, |
452 | .data = data, | 461 | .data = data, |
453 | }, | 462 | }, |
454 | }; | 463 | }; |
464 | struct perf_sample_data *sd; | ||
465 | int err; | ||
455 | 466 | ||
456 | if (unlikely(flags & ~(BPF_F_INDEX_MASK))) | 467 | if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { |
457 | return -EINVAL; | 468 | err = -EBUSY; |
469 | goto out; | ||
470 | } | ||
471 | |||
472 | sd = &sds->sds[nest_level - 1]; | ||
473 | |||
474 | if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { | ||
475 | err = -EINVAL; | ||
476 | goto out; | ||
477 | } | ||
458 | 478 | ||
459 | perf_sample_data_init(sd, 0, 0); | 479 | perf_sample_data_init(sd, 0, 0); |
460 | sd->raw = &raw; | 480 | sd->raw = &raw; |
461 | 481 | ||
462 | return __bpf_perf_event_output(regs, map, flags, sd); | 482 | err = __bpf_perf_event_output(regs, map, flags, sd); |
483 | |||
484 | out: | ||
485 | this_cpu_dec(bpf_trace_nest_level); | ||
486 | return err; | ||
463 | } | 487 | } |
464 | 488 | ||
465 | static const struct bpf_func_proto bpf_perf_event_output_proto = { | 489 | static const struct bpf_func_proto bpf_perf_event_output_proto = { |
@@ -822,16 +846,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
822 | /* | 846 | /* |
823 | * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp | 847 | * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp |
824 | * to avoid potential recursive reuse issue when/if tracepoints are added | 848 | * to avoid potential recursive reuse issue when/if tracepoints are added |
825 | * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack | 849 | * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. |
850 | * | ||
851 | * Since raw tracepoints run despite bpf_prog_active, support concurrent usage | ||
852 | * in normal, irq, and nmi context. | ||
826 | */ | 853 | */ |
827 | static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); | 854 | struct bpf_raw_tp_regs { |
855 | struct pt_regs regs[3]; | ||
856 | }; | ||
857 | static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); | ||
858 | static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); | ||
859 | static struct pt_regs *get_bpf_raw_tp_regs(void) | ||
860 | { | ||
861 | struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); | ||
862 | int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); | ||
863 | |||
864 | if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { | ||
865 | this_cpu_dec(bpf_raw_tp_nest_level); | ||
866 | return ERR_PTR(-EBUSY); | ||
867 | } | ||
868 | |||
869 | return &tp_regs->regs[nest_level - 1]; | ||
870 | } | ||
871 | |||
872 | static void put_bpf_raw_tp_regs(void) | ||
873 | { | ||
874 | this_cpu_dec(bpf_raw_tp_nest_level); | ||
875 | } | ||
876 | |||
828 | BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, | 877 | BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, |
829 | struct bpf_map *, map, u64, flags, void *, data, u64, size) | 878 | struct bpf_map *, map, u64, flags, void *, data, u64, size) |
830 | { | 879 | { |
831 | struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); | 880 | struct pt_regs *regs = get_bpf_raw_tp_regs(); |
881 | int ret; | ||
882 | |||
883 | if (IS_ERR(regs)) | ||
884 | return PTR_ERR(regs); | ||
832 | 885 | ||
833 | perf_fetch_caller_regs(regs); | 886 | perf_fetch_caller_regs(regs); |
834 | return ____bpf_perf_event_output(regs, map, flags, data, size); | 887 | ret = ____bpf_perf_event_output(regs, map, flags, data, size); |
888 | |||
889 | put_bpf_raw_tp_regs(); | ||
890 | return ret; | ||
835 | } | 891 | } |
836 | 892 | ||
837 | static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { | 893 | static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { |
@@ -848,12 +904,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { | |||
848 | BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, | 904 | BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, |
849 | struct bpf_map *, map, u64, flags) | 905 | struct bpf_map *, map, u64, flags) |
850 | { | 906 | { |
851 | struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); | 907 | struct pt_regs *regs = get_bpf_raw_tp_regs(); |
908 | int ret; | ||
909 | |||
910 | if (IS_ERR(regs)) | ||
911 | return PTR_ERR(regs); | ||
852 | 912 | ||
853 | perf_fetch_caller_regs(regs); | 913 | perf_fetch_caller_regs(regs); |
854 | /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ | 914 | /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ |
855 | return bpf_get_stackid((unsigned long) regs, (unsigned long) map, | 915 | ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, |
856 | flags, 0, 0); | 916 | flags, 0, 0); |
917 | put_bpf_raw_tp_regs(); | ||
918 | return ret; | ||
857 | } | 919 | } |
858 | 920 | ||
859 | static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { | 921 | static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { |
@@ -868,11 +930,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { | |||
868 | BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, | 930 | BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, |
869 | void *, buf, u32, size, u64, flags) | 931 | void *, buf, u32, size, u64, flags) |
870 | { | 932 | { |
871 | struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); | 933 | struct pt_regs *regs = get_bpf_raw_tp_regs(); |
934 | int ret; | ||
935 | |||
936 | if (IS_ERR(regs)) | ||
937 | return PTR_ERR(regs); | ||
872 | 938 | ||
873 | perf_fetch_caller_regs(regs); | 939 | perf_fetch_caller_regs(regs); |
874 | return bpf_get_stack((unsigned long) regs, (unsigned long) buf, | 940 | ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, |
875 | (unsigned long) size, flags, 0); | 941 | (unsigned long) size, flags, 0); |
942 | put_bpf_raw_tp_regs(); | ||
943 | return ret; | ||
876 | } | 944 | } |
877 | 945 | ||
878 | static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { | 946 | static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { |
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index cc9597a87770..d1c4e1f3be2c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c | |||
@@ -633,7 +633,8 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) | |||
633 | return ERR_PTR(-ENOMEM); | 633 | return ERR_PTR(-ENOMEM); |
634 | bpf_map_init_from_attr(&smap->map, attr); | 634 | bpf_map_init_from_attr(&smap->map, attr); |
635 | 635 | ||
636 | smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus())); | 636 | /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ |
637 | smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus()))); | ||
637 | nbuckets = 1U << smap->bucket_log; | 638 | nbuckets = 1U << smap->bucket_log; |
638 | smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, | 639 | smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, |
639 | GFP_USER | __GFP_NOWARN); | 640 | GFP_USER | __GFP_NOWARN); |
diff --git a/net/core/sock.c b/net/core/sock.c index 7f49392579a5..af09a23e4822 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1850,6 +1850,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1850 | goto out; | 1850 | goto out; |
1851 | } | 1851 | } |
1852 | RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); | 1852 | RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); |
1853 | #ifdef CONFIG_BPF_SYSCALL | ||
1854 | RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); | ||
1855 | #endif | ||
1853 | 1856 | ||
1854 | newsk->sk_err = 0; | 1857 | newsk->sk_err = 0; |
1855 | newsk->sk_err_soft = 0; | 1858 | newsk->sk_err_soft = 0; |
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 2b18223e7eb8..9c6de4f114f8 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c | |||
@@ -143,6 +143,9 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) | |||
143 | struct netdev_bpf bpf; | 143 | struct netdev_bpf bpf; |
144 | int err; | 144 | int err; |
145 | 145 | ||
146 | if (!umem->dev) | ||
147 | return; | ||
148 | |||
146 | if (umem->zc) { | 149 | if (umem->zc) { |
147 | bpf.command = XDP_SETUP_XSK_UMEM; | 150 | bpf.command = XDP_SETUP_XSK_UMEM; |
148 | bpf.xsk.umem = NULL; | 151 | bpf.xsk.umem = NULL; |
@@ -156,11 +159,9 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) | |||
156 | WARN(1, "failed to disable umem!\n"); | 159 | WARN(1, "failed to disable umem!\n"); |
157 | } | 160 | } |
158 | 161 | ||
159 | if (umem->dev) { | 162 | rtnl_lock(); |
160 | rtnl_lock(); | 163 | xdp_clear_umem_at_qid(umem->dev, umem->queue_id); |
161 | xdp_clear_umem_at_qid(umem->dev, umem->queue_id); | 164 | rtnl_unlock(); |
162 | rtnl_unlock(); | ||
163 | } | ||
164 | 165 | ||
165 | if (umem->zc) { | 166 | if (umem->zc) { |
166 | dev_put(umem->dev); | 167 | dev_put(umem->dev); |
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e4114a7e4451..a8b823c30b43 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h | |||
@@ -3378,8 +3378,8 @@ struct bpf_raw_tracepoint_args { | |||
3378 | /* DIRECT: Skip the FIB rules and go to FIB table associated with device | 3378 | /* DIRECT: Skip the FIB rules and go to FIB table associated with device |
3379 | * OUTPUT: Do lookup from egress perspective; default is ingress | 3379 | * OUTPUT: Do lookup from egress perspective; default is ingress |
3380 | */ | 3380 | */ |
3381 | #define BPF_FIB_LOOKUP_DIRECT BIT(0) | 3381 | #define BPF_FIB_LOOKUP_DIRECT (1U << 0) |
3382 | #define BPF_FIB_LOOKUP_OUTPUT BIT(1) | 3382 | #define BPF_FIB_LOOKUP_OUTPUT (1U << 1) |
3383 | 3383 | ||
3384 | enum { | 3384 | enum { |
3385 | BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ | 3385 | BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ |
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 02d7c871862a..006be3963977 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c | |||
@@ -573,13 +573,13 @@ static void test_lpm_get_next_key(void) | |||
573 | 573 | ||
574 | /* add one more element (total two) */ | 574 | /* add one more element (total two) */ |
575 | key_p->prefixlen = 24; | 575 | key_p->prefixlen = 24; |
576 | inet_pton(AF_INET, "192.168.0.0", key_p->data); | 576 | inet_pton(AF_INET, "192.168.128.0", key_p->data); |
577 | assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); | 577 | assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); |
578 | 578 | ||
579 | memset(key_p, 0, key_size); | 579 | memset(key_p, 0, key_size); |
580 | assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); | 580 | assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); |
581 | assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && | 581 | assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && |
582 | key_p->data[1] == 168 && key_p->data[2] == 0); | 582 | key_p->data[1] == 168 && key_p->data[2] == 128); |
583 | 583 | ||
584 | memset(next_key_p, 0, key_size); | 584 | memset(next_key_p, 0, key_size); |
585 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); | 585 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); |
@@ -592,7 +592,7 @@ static void test_lpm_get_next_key(void) | |||
592 | 592 | ||
593 | /* Add one more element (total three) */ | 593 | /* Add one more element (total three) */ |
594 | key_p->prefixlen = 24; | 594 | key_p->prefixlen = 24; |
595 | inet_pton(AF_INET, "192.168.128.0", key_p->data); | 595 | inet_pton(AF_INET, "192.168.0.0", key_p->data); |
596 | assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); | 596 | assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); |
597 | 597 | ||
598 | memset(key_p, 0, key_size); | 598 | memset(key_p, 0, key_size); |
@@ -643,6 +643,41 @@ static void test_lpm_get_next_key(void) | |||
643 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && | 643 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && |
644 | errno == ENOENT); | 644 | errno == ENOENT); |
645 | 645 | ||
646 | /* Add one more element (total five) */ | ||
647 | key_p->prefixlen = 28; | ||
648 | inet_pton(AF_INET, "192.168.1.128", key_p->data); | ||
649 | assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); | ||
650 | |||
651 | memset(key_p, 0, key_size); | ||
652 | assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); | ||
653 | assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && | ||
654 | key_p->data[1] == 168 && key_p->data[2] == 0); | ||
655 | |||
656 | memset(next_key_p, 0, key_size); | ||
657 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); | ||
658 | assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 && | ||
659 | next_key_p->data[1] == 168 && next_key_p->data[2] == 1 && | ||
660 | next_key_p->data[3] == 128); | ||
661 | |||
662 | memcpy(key_p, next_key_p, key_size); | ||
663 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); | ||
664 | assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && | ||
665 | next_key_p->data[1] == 168 && next_key_p->data[2] == 1); | ||
666 | |||
667 | memcpy(key_p, next_key_p, key_size); | ||
668 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); | ||
669 | assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && | ||
670 | next_key_p->data[1] == 168 && next_key_p->data[2] == 128); | ||
671 | |||
672 | memcpy(key_p, next_key_p, key_size); | ||
673 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); | ||
674 | assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && | ||
675 | next_key_p->data[1] == 168); | ||
676 | |||
677 | memcpy(key_p, next_key_p, key_size); | ||
678 | assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && | ||
679 | errno == ENOENT); | ||
680 | |||
646 | /* no exact matching key should return the first one in post order */ | 681 | /* no exact matching key should return the first one in post order */ |
647 | key_p->prefixlen = 22; | 682 | key_p->prefixlen = 22; |
648 | inet_pton(AF_INET, "192.168.1.0", key_p->data); | 683 | inet_pton(AF_INET, "192.168.1.0", key_p->data); |
diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c index bd3f38dbe796..acab4f00819f 100644 --- a/tools/testing/selftests/bpf/verifier/div_overflow.c +++ b/tools/testing/selftests/bpf/verifier/div_overflow.c | |||
@@ -29,8 +29,11 @@ | |||
29 | "DIV64 overflow, check 1", | 29 | "DIV64 overflow, check 1", |
30 | .insns = { | 30 | .insns = { |
31 | BPF_MOV64_IMM(BPF_REG_1, -1), | 31 | BPF_MOV64_IMM(BPF_REG_1, -1), |
32 | BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), | 32 | BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), |
33 | BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), | 33 | BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), |
34 | BPF_MOV32_IMM(BPF_REG_0, 0), | ||
35 | BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 1), | ||
36 | BPF_MOV32_IMM(BPF_REG_0, 1), | ||
34 | BPF_EXIT_INSN(), | 37 | BPF_EXIT_INSN(), |
35 | }, | 38 | }, |
36 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | 39 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, |
@@ -40,8 +43,11 @@ | |||
40 | { | 43 | { |
41 | "DIV64 overflow, check 2", | 44 | "DIV64 overflow, check 2", |
42 | .insns = { | 45 | .insns = { |
43 | BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), | 46 | BPF_LD_IMM64(BPF_REG_1, LLONG_MIN), |
44 | BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), | 47 | BPF_ALU64_IMM(BPF_DIV, BPF_REG_1, -1), |
48 | BPF_MOV32_IMM(BPF_REG_0, 0), | ||
49 | BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 1), | ||
50 | BPF_MOV32_IMM(BPF_REG_0, 1), | ||
45 | BPF_EXIT_INSN(), | 51 | BPF_EXIT_INSN(), |
46 | }, | 52 | }, |
47 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, | 53 | .prog_type = BPF_PROG_TYPE_SCHED_CLS, |