diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/Makefile | 1 | ||||
| -rw-r--r-- | kernel/bpf/devmap.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 793 | ||||
| -rw-r--r-- | kernel/bpf/xskmap.c | 9 |
4 files changed, 734 insertions, 71 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 4c2fa3ac56f6..29d781061cd5 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 2 | obj-y := core.o | 2 | obj-y := core.o |
| 3 | CFLAGS_core.o += $(call cc-disable-warning, override-init) | ||
| 3 | 4 | ||
| 4 | obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o | 5 | obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o |
| 5 | obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o | 6 | obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b84c44505e06..40e86a7e0ef0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
| @@ -80,8 +80,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr) | |||
| 80 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 80 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
| 81 | { | 81 | { |
| 82 | struct bpf_dtab *dtab; | 82 | struct bpf_dtab *dtab; |
| 83 | int err = -EINVAL; | ||
| 84 | u64 cost; | 83 | u64 cost; |
| 84 | int err; | ||
| 85 | 85 | ||
| 86 | if (!capable(CAP_NET_ADMIN)) | 86 | if (!capable(CAP_NET_ADMIN)) |
| 87 | return ERR_PTR(-EPERM); | 87 | return ERR_PTR(-EPERM); |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1e9d10b32984..0e079b2298f8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -326,7 +326,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type) | |||
| 326 | { | 326 | { |
| 327 | return type == PTR_TO_SOCKET || | 327 | return type == PTR_TO_SOCKET || |
| 328 | type == PTR_TO_SOCK_COMMON || | 328 | type == PTR_TO_SOCK_COMMON || |
| 329 | type == PTR_TO_TCP_SOCK; | 329 | type == PTR_TO_TCP_SOCK || |
| 330 | type == PTR_TO_XDP_SOCK; | ||
| 330 | } | 331 | } |
| 331 | 332 | ||
| 332 | static bool reg_type_may_be_null(enum bpf_reg_type type) | 333 | static bool reg_type_may_be_null(enum bpf_reg_type type) |
| @@ -398,6 +399,7 @@ static const char * const reg_type_str[] = { | |||
| 398 | [PTR_TO_TCP_SOCK] = "tcp_sock", | 399 | [PTR_TO_TCP_SOCK] = "tcp_sock", |
| 399 | [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", | 400 | [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", |
| 400 | [PTR_TO_TP_BUFFER] = "tp_buffer", | 401 | [PTR_TO_TP_BUFFER] = "tp_buffer", |
| 402 | [PTR_TO_XDP_SOCK] = "xdp_sock", | ||
| 401 | }; | 403 | }; |
| 402 | 404 | ||
| 403 | static char slot_type_char[] = { | 405 | static char slot_type_char[] = { |
| @@ -445,12 +447,12 @@ static void print_verifier_state(struct bpf_verifier_env *env, | |||
| 445 | verbose(env, " R%d", i); | 447 | verbose(env, " R%d", i); |
| 446 | print_liveness(env, reg->live); | 448 | print_liveness(env, reg->live); |
| 447 | verbose(env, "=%s", reg_type_str[t]); | 449 | verbose(env, "=%s", reg_type_str[t]); |
| 450 | if (t == SCALAR_VALUE && reg->precise) | ||
| 451 | verbose(env, "P"); | ||
| 448 | if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && | 452 | if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && |
| 449 | tnum_is_const(reg->var_off)) { | 453 | tnum_is_const(reg->var_off)) { |
| 450 | /* reg->off should be 0 for SCALAR_VALUE */ | 454 | /* reg->off should be 0 for SCALAR_VALUE */ |
| 451 | verbose(env, "%lld", reg->var_off.value + reg->off); | 455 | verbose(env, "%lld", reg->var_off.value + reg->off); |
| 452 | if (t == PTR_TO_STACK) | ||
| 453 | verbose(env, ",call_%d", func(env, reg)->callsite); | ||
| 454 | } else { | 456 | } else { |
| 455 | verbose(env, "(id=%d", reg->id); | 457 | verbose(env, "(id=%d", reg->id); |
| 456 | if (reg_type_may_be_refcounted_or_null(t)) | 458 | if (reg_type_may_be_refcounted_or_null(t)) |
| @@ -512,11 +514,17 @@ static void print_verifier_state(struct bpf_verifier_env *env, | |||
| 512 | continue; | 514 | continue; |
| 513 | verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); | 515 | verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); |
| 514 | print_liveness(env, state->stack[i].spilled_ptr.live); | 516 | print_liveness(env, state->stack[i].spilled_ptr.live); |
| 515 | if (state->stack[i].slot_type[0] == STACK_SPILL) | 517 | if (state->stack[i].slot_type[0] == STACK_SPILL) { |
| 516 | verbose(env, "=%s", | 518 | reg = &state->stack[i].spilled_ptr; |
| 517 | reg_type_str[state->stack[i].spilled_ptr.type]); | 519 | t = reg->type; |
| 518 | else | 520 | verbose(env, "=%s", reg_type_str[t]); |
| 521 | if (t == SCALAR_VALUE && reg->precise) | ||
| 522 | verbose(env, "P"); | ||
| 523 | if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) | ||
| 524 | verbose(env, "%lld", reg->var_off.value + reg->off); | ||
| 525 | } else { | ||
| 519 | verbose(env, "=%s", types_buf); | 526 | verbose(env, "=%s", types_buf); |
| 527 | } | ||
| 520 | } | 528 | } |
| 521 | if (state->acquired_refs && state->refs[0].id) { | 529 | if (state->acquired_refs && state->refs[0].id) { |
| 522 | verbose(env, " refs=%d", state->refs[0].id); | 530 | verbose(env, " refs=%d", state->refs[0].id); |
| @@ -665,6 +673,13 @@ static void free_func_state(struct bpf_func_state *state) | |||
| 665 | kfree(state); | 673 | kfree(state); |
| 666 | } | 674 | } |
| 667 | 675 | ||
| 676 | static void clear_jmp_history(struct bpf_verifier_state *state) | ||
| 677 | { | ||
| 678 | kfree(state->jmp_history); | ||
| 679 | state->jmp_history = NULL; | ||
| 680 | state->jmp_history_cnt = 0; | ||
| 681 | } | ||
| 682 | |||
| 668 | static void free_verifier_state(struct bpf_verifier_state *state, | 683 | static void free_verifier_state(struct bpf_verifier_state *state, |
| 669 | bool free_self) | 684 | bool free_self) |
| 670 | { | 685 | { |
| @@ -674,6 +689,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, | |||
| 674 | free_func_state(state->frame[i]); | 689 | free_func_state(state->frame[i]); |
| 675 | state->frame[i] = NULL; | 690 | state->frame[i] = NULL; |
| 676 | } | 691 | } |
| 692 | clear_jmp_history(state); | ||
| 677 | if (free_self) | 693 | if (free_self) |
| 678 | kfree(state); | 694 | kfree(state); |
| 679 | } | 695 | } |
| @@ -701,8 +717,18 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, | |||
| 701 | const struct bpf_verifier_state *src) | 717 | const struct bpf_verifier_state *src) |
| 702 | { | 718 | { |
| 703 | struct bpf_func_state *dst; | 719 | struct bpf_func_state *dst; |
| 720 | u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; | ||
| 704 | int i, err; | 721 | int i, err; |
| 705 | 722 | ||
| 723 | if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { | ||
| 724 | kfree(dst_state->jmp_history); | ||
| 725 | dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); | ||
| 726 | if (!dst_state->jmp_history) | ||
| 727 | return -ENOMEM; | ||
| 728 | } | ||
| 729 | memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); | ||
| 730 | dst_state->jmp_history_cnt = src->jmp_history_cnt; | ||
| 731 | |||
| 706 | /* if dst has more stack frames then src frame, free them */ | 732 | /* if dst has more stack frames then src frame, free them */ |
| 707 | for (i = src->curframe + 1; i <= dst_state->curframe; i++) { | 733 | for (i = src->curframe + 1; i <= dst_state->curframe; i++) { |
| 708 | free_func_state(dst_state->frame[i]); | 734 | free_func_state(dst_state->frame[i]); |
| @@ -711,6 +737,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, | |||
| 711 | dst_state->speculative = src->speculative; | 737 | dst_state->speculative = src->speculative; |
| 712 | dst_state->curframe = src->curframe; | 738 | dst_state->curframe = src->curframe; |
| 713 | dst_state->active_spin_lock = src->active_spin_lock; | 739 | dst_state->active_spin_lock = src->active_spin_lock; |
| 740 | dst_state->branches = src->branches; | ||
| 741 | dst_state->parent = src->parent; | ||
| 742 | dst_state->first_insn_idx = src->first_insn_idx; | ||
| 743 | dst_state->last_insn_idx = src->last_insn_idx; | ||
| 714 | for (i = 0; i <= src->curframe; i++) { | 744 | for (i = 0; i <= src->curframe; i++) { |
| 715 | dst = dst_state->frame[i]; | 745 | dst = dst_state->frame[i]; |
| 716 | if (!dst) { | 746 | if (!dst) { |
| @@ -726,6 +756,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, | |||
| 726 | return 0; | 756 | return 0; |
| 727 | } | 757 | } |
| 728 | 758 | ||
| 759 | static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) | ||
| 760 | { | ||
| 761 | while (st) { | ||
| 762 | u32 br = --st->branches; | ||
| 763 | |||
| 764 | /* WARN_ON(br > 1) technically makes sense here, | ||
| 765 | * but see comment in push_stack(), hence: | ||
| 766 | */ | ||
| 767 | WARN_ONCE((int)br < 0, | ||
| 768 | "BUG update_branch_counts:branches_to_explore=%d\n", | ||
| 769 | br); | ||
| 770 | if (br) | ||
| 771 | break; | ||
| 772 | st = st->parent; | ||
| 773 | } | ||
| 774 | } | ||
| 775 | |||
| 729 | static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, | 776 | static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, |
| 730 | int *insn_idx) | 777 | int *insn_idx) |
| 731 | { | 778 | { |
| @@ -779,6 +826,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, | |||
| 779 | env->stack_size); | 826 | env->stack_size); |
| 780 | goto err; | 827 | goto err; |
| 781 | } | 828 | } |
| 829 | if (elem->st.parent) { | ||
| 830 | ++elem->st.parent->branches; | ||
| 831 | /* WARN_ON(branches > 2) technically makes sense here, | ||
| 832 | * but | ||
| 833 | * 1. speculative states will bump 'branches' for non-branch | ||
| 834 | * instructions | ||
| 835 | * 2. is_state_visited() heuristics may decide not to create | ||
| 836 | * a new state for a sequence of branches and all such current | ||
| 837 | * and cloned states will be pointing to a single parent state | ||
| 838 | * which might have large 'branches' count. | ||
| 839 | */ | ||
| 840 | } | ||
| 782 | return &elem->st; | 841 | return &elem->st; |
| 783 | err: | 842 | err: |
| 784 | free_verifier_state(env->cur_state, true); | 843 | free_verifier_state(env->cur_state, true); |
| @@ -926,6 +985,9 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) | |||
| 926 | reg->smax_value = S64_MAX; | 985 | reg->smax_value = S64_MAX; |
| 927 | reg->umin_value = 0; | 986 | reg->umin_value = 0; |
| 928 | reg->umax_value = U64_MAX; | 987 | reg->umax_value = U64_MAX; |
| 988 | |||
| 989 | /* constant backtracking is enabled for root only for now */ | ||
| 990 | reg->precise = capable(CAP_SYS_ADMIN) ? false : true; | ||
| 929 | } | 991 | } |
| 930 | 992 | ||
| 931 | /* Mark a register as having a completely unknown (scalar) value. */ | 993 | /* Mark a register as having a completely unknown (scalar) value. */ |
| @@ -1337,6 +1399,389 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, | |||
| 1337 | return 0; | 1399 | return 0; |
| 1338 | } | 1400 | } |
| 1339 | 1401 | ||
| 1402 | /* for any branch, call, exit record the history of jmps in the given state */ | ||
| 1403 | static int push_jmp_history(struct bpf_verifier_env *env, | ||
| 1404 | struct bpf_verifier_state *cur) | ||
| 1405 | { | ||
| 1406 | u32 cnt = cur->jmp_history_cnt; | ||
| 1407 | struct bpf_idx_pair *p; | ||
| 1408 | |||
| 1409 | cnt++; | ||
| 1410 | p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); | ||
| 1411 | if (!p) | ||
| 1412 | return -ENOMEM; | ||
| 1413 | p[cnt - 1].idx = env->insn_idx; | ||
| 1414 | p[cnt - 1].prev_idx = env->prev_insn_idx; | ||
| 1415 | cur->jmp_history = p; | ||
| 1416 | cur->jmp_history_cnt = cnt; | ||
| 1417 | return 0; | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | /* Backtrack one insn at a time. If idx is not at the top of recorded | ||
| 1421 | * history then previous instruction came from straight line execution. | ||
| 1422 | */ | ||
| 1423 | static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, | ||
| 1424 | u32 *history) | ||
| 1425 | { | ||
| 1426 | u32 cnt = *history; | ||
| 1427 | |||
| 1428 | if (cnt && st->jmp_history[cnt - 1].idx == i) { | ||
| 1429 | i = st->jmp_history[cnt - 1].prev_idx; | ||
| 1430 | (*history)--; | ||
| 1431 | } else { | ||
| 1432 | i--; | ||
| 1433 | } | ||
| 1434 | return i; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | /* For given verifier state backtrack_insn() is called from the last insn to | ||
| 1438 | * the first insn. Its purpose is to compute a bitmask of registers and | ||
| 1439 | * stack slots that needs precision in the parent verifier state. | ||
| 1440 | */ | ||
| 1441 | static int backtrack_insn(struct bpf_verifier_env *env, int idx, | ||
| 1442 | u32 *reg_mask, u64 *stack_mask) | ||
| 1443 | { | ||
| 1444 | const struct bpf_insn_cbs cbs = { | ||
| 1445 | .cb_print = verbose, | ||
| 1446 | .private_data = env, | ||
| 1447 | }; | ||
| 1448 | struct bpf_insn *insn = env->prog->insnsi + idx; | ||
| 1449 | u8 class = BPF_CLASS(insn->code); | ||
| 1450 | u8 opcode = BPF_OP(insn->code); | ||
| 1451 | u8 mode = BPF_MODE(insn->code); | ||
| 1452 | u32 dreg = 1u << insn->dst_reg; | ||
| 1453 | u32 sreg = 1u << insn->src_reg; | ||
| 1454 | u32 spi; | ||
| 1455 | |||
| 1456 | if (insn->code == 0) | ||
| 1457 | return 0; | ||
| 1458 | if (env->log.level & BPF_LOG_LEVEL) { | ||
| 1459 | verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); | ||
| 1460 | verbose(env, "%d: ", idx); | ||
| 1461 | print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | if (class == BPF_ALU || class == BPF_ALU64) { | ||
| 1465 | if (!(*reg_mask & dreg)) | ||
| 1466 | return 0; | ||
| 1467 | if (opcode == BPF_MOV) { | ||
| 1468 | if (BPF_SRC(insn->code) == BPF_X) { | ||
| 1469 | /* dreg = sreg | ||
| 1470 | * dreg needs precision after this insn | ||
| 1471 | * sreg needs precision before this insn | ||
| 1472 | */ | ||
| 1473 | *reg_mask &= ~dreg; | ||
| 1474 | *reg_mask |= sreg; | ||
| 1475 | } else { | ||
| 1476 | /* dreg = K | ||
| 1477 | * dreg needs precision after this insn. | ||
| 1478 | * Corresponding register is already marked | ||
| 1479 | * as precise=true in this verifier state. | ||
| 1480 | * No further markings in parent are necessary | ||
| 1481 | */ | ||
| 1482 | *reg_mask &= ~dreg; | ||
| 1483 | } | ||
| 1484 | } else { | ||
| 1485 | if (BPF_SRC(insn->code) == BPF_X) { | ||
| 1486 | /* dreg += sreg | ||
| 1487 | * both dreg and sreg need precision | ||
| 1488 | * before this insn | ||
| 1489 | */ | ||
| 1490 | *reg_mask |= sreg; | ||
| 1491 | } /* else dreg += K | ||
| 1492 | * dreg still needs precision before this insn | ||
| 1493 | */ | ||
| 1494 | } | ||
| 1495 | } else if (class == BPF_LDX) { | ||
| 1496 | if (!(*reg_mask & dreg)) | ||
| 1497 | return 0; | ||
| 1498 | *reg_mask &= ~dreg; | ||
| 1499 | |||
| 1500 | /* scalars can only be spilled into stack w/o losing precision. | ||
| 1501 | * Load from any other memory can be zero extended. | ||
| 1502 | * The desire to keep that precision is already indicated | ||
| 1503 | * by 'precise' mark in corresponding register of this state. | ||
| 1504 | * No further tracking necessary. | ||
| 1505 | */ | ||
| 1506 | if (insn->src_reg != BPF_REG_FP) | ||
| 1507 | return 0; | ||
| 1508 | if (BPF_SIZE(insn->code) != BPF_DW) | ||
| 1509 | return 0; | ||
| 1510 | |||
| 1511 | /* dreg = *(u64 *)[fp - off] was a fill from the stack. | ||
| 1512 | * that [fp - off] slot contains scalar that needs to be | ||
| 1513 | * tracked with precision | ||
| 1514 | */ | ||
| 1515 | spi = (-insn->off - 1) / BPF_REG_SIZE; | ||
| 1516 | if (spi >= 64) { | ||
| 1517 | verbose(env, "BUG spi %d\n", spi); | ||
| 1518 | WARN_ONCE(1, "verifier backtracking bug"); | ||
| 1519 | return -EFAULT; | ||
| 1520 | } | ||
| 1521 | *stack_mask |= 1ull << spi; | ||
| 1522 | } else if (class == BPF_STX) { | ||
| 1523 | if (*reg_mask & dreg) | ||
| 1524 | /* stx shouldn't be using _scalar_ dst_reg | ||
| 1525 | * to access memory. It means backtracking | ||
| 1526 | * encountered a case of pointer subtraction. | ||
| 1527 | */ | ||
| 1528 | return -ENOTSUPP; | ||
| 1529 | /* scalars can only be spilled into stack */ | ||
| 1530 | if (insn->dst_reg != BPF_REG_FP) | ||
| 1531 | return 0; | ||
| 1532 | if (BPF_SIZE(insn->code) != BPF_DW) | ||
| 1533 | return 0; | ||
| 1534 | spi = (-insn->off - 1) / BPF_REG_SIZE; | ||
| 1535 | if (spi >= 64) { | ||
| 1536 | verbose(env, "BUG spi %d\n", spi); | ||
| 1537 | WARN_ONCE(1, "verifier backtracking bug"); | ||
| 1538 | return -EFAULT; | ||
| 1539 | } | ||
| 1540 | if (!(*stack_mask & (1ull << spi))) | ||
| 1541 | return 0; | ||
| 1542 | *stack_mask &= ~(1ull << spi); | ||
| 1543 | *reg_mask |= sreg; | ||
| 1544 | } else if (class == BPF_JMP || class == BPF_JMP32) { | ||
| 1545 | if (opcode == BPF_CALL) { | ||
| 1546 | if (insn->src_reg == BPF_PSEUDO_CALL) | ||
| 1547 | return -ENOTSUPP; | ||
| 1548 | /* regular helper call sets R0 */ | ||
| 1549 | *reg_mask &= ~1; | ||
| 1550 | if (*reg_mask & 0x3f) { | ||
| 1551 | /* if backtracing was looking for registers R1-R5 | ||
| 1552 | * they should have been found already. | ||
| 1553 | */ | ||
| 1554 | verbose(env, "BUG regs %x\n", *reg_mask); | ||
| 1555 | WARN_ONCE(1, "verifier backtracking bug"); | ||
| 1556 | return -EFAULT; | ||
| 1557 | } | ||
| 1558 | } else if (opcode == BPF_EXIT) { | ||
| 1559 | return -ENOTSUPP; | ||
| 1560 | } | ||
| 1561 | } else if (class == BPF_LD) { | ||
| 1562 | if (!(*reg_mask & dreg)) | ||
| 1563 | return 0; | ||
| 1564 | *reg_mask &= ~dreg; | ||
| 1565 | /* It's ld_imm64 or ld_abs or ld_ind. | ||
| 1566 | * For ld_imm64 no further tracking of precision | ||
| 1567 | * into parent is necessary | ||
| 1568 | */ | ||
| 1569 | if (mode == BPF_IND || mode == BPF_ABS) | ||
| 1570 | /* to be analyzed */ | ||
| 1571 | return -ENOTSUPP; | ||
| 1572 | } else if (class == BPF_ST) { | ||
| 1573 | if (*reg_mask & dreg) | ||
| 1574 | /* likely pointer subtraction */ | ||
| 1575 | return -ENOTSUPP; | ||
| 1576 | } | ||
| 1577 | return 0; | ||
| 1578 | } | ||
| 1579 | |||
| 1580 | /* the scalar precision tracking algorithm: | ||
| 1581 | * . at the start all registers have precise=false. | ||
| 1582 | * . scalar ranges are tracked as normal through alu and jmp insns. | ||
| 1583 | * . once precise value of the scalar register is used in: | ||
| 1584 | * . ptr + scalar alu | ||
| 1585 | * . if (scalar cond K|scalar) | ||
| 1586 | * . helper_call(.., scalar, ...) where ARG_CONST is expected | ||
| 1587 | * backtrack through the verifier states and mark all registers and | ||
| 1588 | * stack slots with spilled constants that these scalar regisers | ||
| 1589 | * should be precise. | ||
| 1590 | * . during state pruning two registers (or spilled stack slots) | ||
| 1591 | * are equivalent if both are not precise. | ||
| 1592 | * | ||
| 1593 | * Note the verifier cannot simply walk register parentage chain, | ||
| 1594 | * since many different registers and stack slots could have been | ||
| 1595 | * used to compute single precise scalar. | ||
| 1596 | * | ||
| 1597 | * The approach of starting with precise=true for all registers and then | ||
| 1598 | * backtrack to mark a register as not precise when the verifier detects | ||
| 1599 | * that program doesn't care about specific value (e.g., when helper | ||
| 1600 | * takes register as ARG_ANYTHING parameter) is not safe. | ||
| 1601 | * | ||
| 1602 | * It's ok to walk single parentage chain of the verifier states. | ||
| 1603 | * It's possible that this backtracking will go all the way till 1st insn. | ||
| 1604 | * All other branches will be explored for needing precision later. | ||
| 1605 | * | ||
| 1606 | * The backtracking needs to deal with cases like: | ||
| 1607 | * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) | ||
| 1608 | * r9 -= r8 | ||
| 1609 | * r5 = r9 | ||
| 1610 | * if r5 > 0x79f goto pc+7 | ||
| 1611 | * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) | ||
| 1612 | * r5 += 1 | ||
| 1613 | * ... | ||
| 1614 | * call bpf_perf_event_output#25 | ||
| 1615 | * where .arg5_type = ARG_CONST_SIZE_OR_ZERO | ||
| 1616 | * | ||
| 1617 | * and this case: | ||
| 1618 | * r6 = 1 | ||
| 1619 | * call foo // uses callee's r6 inside to compute r0 | ||
| 1620 | * r0 += r6 | ||
| 1621 | * if r0 == 0 goto | ||
| 1622 | * | ||
| 1623 | * to track above reg_mask/stack_mask needs to be independent for each frame. | ||
| 1624 | * | ||
| 1625 | * Also if parent's curframe > frame where backtracking started, | ||
| 1626 | * the verifier need to mark registers in both frames, otherwise callees | ||
| 1627 | * may incorrectly prune callers. This is similar to | ||
| 1628 | * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") | ||
| 1629 | * | ||
| 1630 | * For now backtracking falls back into conservative marking. | ||
| 1631 | */ | ||
| 1632 | static void mark_all_scalars_precise(struct bpf_verifier_env *env, | ||
| 1633 | struct bpf_verifier_state *st) | ||
| 1634 | { | ||
| 1635 | struct bpf_func_state *func; | ||
| 1636 | struct bpf_reg_state *reg; | ||
| 1637 | int i, j; | ||
| 1638 | |||
| 1639 | /* big hammer: mark all scalars precise in this path. | ||
| 1640 | * pop_stack may still get !precise scalars. | ||
| 1641 | */ | ||
| 1642 | for (; st; st = st->parent) | ||
| 1643 | for (i = 0; i <= st->curframe; i++) { | ||
| 1644 | func = st->frame[i]; | ||
| 1645 | for (j = 0; j < BPF_REG_FP; j++) { | ||
| 1646 | reg = &func->regs[j]; | ||
| 1647 | if (reg->type != SCALAR_VALUE) | ||
| 1648 | continue; | ||
| 1649 | reg->precise = true; | ||
| 1650 | } | ||
| 1651 | for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { | ||
| 1652 | if (func->stack[j].slot_type[0] != STACK_SPILL) | ||
| 1653 | continue; | ||
| 1654 | reg = &func->stack[j].spilled_ptr; | ||
| 1655 | if (reg->type != SCALAR_VALUE) | ||
| 1656 | continue; | ||
| 1657 | reg->precise = true; | ||
| 1658 | } | ||
| 1659 | } | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | static int mark_chain_precision(struct bpf_verifier_env *env, int regno) | ||
| 1663 | { | ||
| 1664 | struct bpf_verifier_state *st = env->cur_state; | ||
| 1665 | int first_idx = st->first_insn_idx; | ||
| 1666 | int last_idx = env->insn_idx; | ||
| 1667 | struct bpf_func_state *func; | ||
| 1668 | struct bpf_reg_state *reg; | ||
| 1669 | u32 reg_mask = 1u << regno; | ||
| 1670 | u64 stack_mask = 0; | ||
| 1671 | bool skip_first = true; | ||
| 1672 | int i, err; | ||
| 1673 | |||
| 1674 | if (!env->allow_ptr_leaks) | ||
| 1675 | /* backtracking is root only for now */ | ||
| 1676 | return 0; | ||
| 1677 | |||
| 1678 | func = st->frame[st->curframe]; | ||
| 1679 | reg = &func->regs[regno]; | ||
| 1680 | if (reg->type != SCALAR_VALUE) { | ||
| 1681 | WARN_ONCE(1, "backtracing misuse"); | ||
| 1682 | return -EFAULT; | ||
| 1683 | } | ||
| 1684 | if (reg->precise) | ||
| 1685 | return 0; | ||
| 1686 | func->regs[regno].precise = true; | ||
| 1687 | |||
| 1688 | for (;;) { | ||
| 1689 | DECLARE_BITMAP(mask, 64); | ||
| 1690 | bool new_marks = false; | ||
| 1691 | u32 history = st->jmp_history_cnt; | ||
| 1692 | |||
| 1693 | if (env->log.level & BPF_LOG_LEVEL) | ||
| 1694 | verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); | ||
| 1695 | for (i = last_idx;;) { | ||
| 1696 | if (skip_first) { | ||
| 1697 | err = 0; | ||
| 1698 | skip_first = false; | ||
| 1699 | } else { | ||
| 1700 | err = backtrack_insn(env, i, ®_mask, &stack_mask); | ||
| 1701 | } | ||
| 1702 | if (err == -ENOTSUPP) { | ||
| 1703 | mark_all_scalars_precise(env, st); | ||
| 1704 | return 0; | ||
| 1705 | } else if (err) { | ||
| 1706 | return err; | ||
| 1707 | } | ||
| 1708 | if (!reg_mask && !stack_mask) | ||
| 1709 | /* Found assignment(s) into tracked register in this state. | ||
| 1710 | * Since this state is already marked, just return. | ||
| 1711 | * Nothing to be tracked further in the parent state. | ||
| 1712 | */ | ||
| 1713 | return 0; | ||
| 1714 | if (i == first_idx) | ||
| 1715 | break; | ||
| 1716 | i = get_prev_insn_idx(st, i, &history); | ||
| 1717 | if (i >= env->prog->len) { | ||
| 1718 | /* This can happen if backtracking reached insn 0 | ||
| 1719 | * and there are still reg_mask or stack_mask | ||
| 1720 | * to backtrack. | ||
| 1721 | * It means the backtracking missed the spot where | ||
| 1722 | * particular register was initialized with a constant. | ||
| 1723 | */ | ||
| 1724 | verbose(env, "BUG backtracking idx %d\n", i); | ||
| 1725 | WARN_ONCE(1, "verifier backtracking bug"); | ||
| 1726 | return -EFAULT; | ||
| 1727 | } | ||
| 1728 | } | ||
| 1729 | st = st->parent; | ||
| 1730 | if (!st) | ||
| 1731 | break; | ||
| 1732 | |||
| 1733 | func = st->frame[st->curframe]; | ||
| 1734 | bitmap_from_u64(mask, reg_mask); | ||
| 1735 | for_each_set_bit(i, mask, 32) { | ||
| 1736 | reg = &func->regs[i]; | ||
| 1737 | if (reg->type != SCALAR_VALUE) | ||
| 1738 | continue; | ||
| 1739 | if (!reg->precise) | ||
| 1740 | new_marks = true; | ||
| 1741 | reg->precise = true; | ||
| 1742 | } | ||
| 1743 | |||
| 1744 | bitmap_from_u64(mask, stack_mask); | ||
| 1745 | for_each_set_bit(i, mask, 64) { | ||
| 1746 | if (i >= func->allocated_stack / BPF_REG_SIZE) { | ||
| 1747 | /* This can happen if backtracking | ||
| 1748 | * is propagating stack precision where | ||
| 1749 | * caller has larger stack frame | ||
| 1750 | * than callee, but backtrack_insn() should | ||
| 1751 | * have returned -ENOTSUPP. | ||
| 1752 | */ | ||
| 1753 | verbose(env, "BUG spi %d stack_size %d\n", | ||
| 1754 | i, func->allocated_stack); | ||
| 1755 | WARN_ONCE(1, "verifier backtracking bug"); | ||
| 1756 | return -EFAULT; | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | if (func->stack[i].slot_type[0] != STACK_SPILL) | ||
| 1760 | continue; | ||
| 1761 | reg = &func->stack[i].spilled_ptr; | ||
| 1762 | if (reg->type != SCALAR_VALUE) | ||
| 1763 | continue; | ||
| 1764 | if (!reg->precise) | ||
| 1765 | new_marks = true; | ||
| 1766 | reg->precise = true; | ||
| 1767 | } | ||
| 1768 | if (env->log.level & BPF_LOG_LEVEL) { | ||
| 1769 | print_verifier_state(env, func); | ||
| 1770 | verbose(env, "parent %s regs=%x stack=%llx marks\n", | ||
| 1771 | new_marks ? "didn't have" : "already had", | ||
| 1772 | reg_mask, stack_mask); | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | if (!new_marks) | ||
| 1776 | break; | ||
| 1777 | |||
| 1778 | last_idx = st->last_insn_idx; | ||
| 1779 | first_idx = st->first_insn_idx; | ||
| 1780 | } | ||
| 1781 | return 0; | ||
| 1782 | } | ||
| 1783 | |||
| 1784 | |||
| 1340 | static bool is_spillable_regtype(enum bpf_reg_type type) | 1785 | static bool is_spillable_regtype(enum bpf_reg_type type) |
| 1341 | { | 1786 | { |
| 1342 | switch (type) { | 1787 | switch (type) { |
| @@ -1355,6 +1800,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) | |||
| 1355 | case PTR_TO_SOCK_COMMON_OR_NULL: | 1800 | case PTR_TO_SOCK_COMMON_OR_NULL: |
| 1356 | case PTR_TO_TCP_SOCK: | 1801 | case PTR_TO_TCP_SOCK: |
| 1357 | case PTR_TO_TCP_SOCK_OR_NULL: | 1802 | case PTR_TO_TCP_SOCK_OR_NULL: |
| 1803 | case PTR_TO_XDP_SOCK: | ||
| 1358 | return true; | 1804 | return true; |
| 1359 | default: | 1805 | default: |
| 1360 | return false; | 1806 | return false; |
| @@ -1367,6 +1813,23 @@ static bool register_is_null(struct bpf_reg_state *reg) | |||
| 1367 | return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); | 1813 | return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); |
| 1368 | } | 1814 | } |
| 1369 | 1815 | ||
| 1816 | static bool register_is_const(struct bpf_reg_state *reg) | ||
| 1817 | { | ||
| 1818 | return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); | ||
| 1819 | } | ||
| 1820 | |||
| 1821 | static void save_register_state(struct bpf_func_state *state, | ||
| 1822 | int spi, struct bpf_reg_state *reg) | ||
| 1823 | { | ||
| 1824 | int i; | ||
| 1825 | |||
| 1826 | state->stack[spi].spilled_ptr = *reg; | ||
| 1827 | state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; | ||
| 1828 | |||
| 1829 | for (i = 0; i < BPF_REG_SIZE; i++) | ||
| 1830 | state->stack[spi].slot_type[i] = STACK_SPILL; | ||
| 1831 | } | ||
| 1832 | |||
| 1370 | /* check_stack_read/write functions track spill/fill of registers, | 1833 | /* check_stack_read/write functions track spill/fill of registers, |
| 1371 | * stack boundary and alignment are checked in check_mem_access() | 1834 | * stack boundary and alignment are checked in check_mem_access() |
| 1372 | */ | 1835 | */ |
| @@ -1376,7 +1839,8 @@ static int check_stack_write(struct bpf_verifier_env *env, | |||
| 1376 | { | 1839 | { |
| 1377 | struct bpf_func_state *cur; /* state of the current function */ | 1840 | struct bpf_func_state *cur; /* state of the current function */ |
| 1378 | int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; | 1841 | int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; |
| 1379 | enum bpf_reg_type type; | 1842 | u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; |
| 1843 | struct bpf_reg_state *reg = NULL; | ||
| 1380 | 1844 | ||
| 1381 | err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), | 1845 | err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), |
| 1382 | state->acquired_refs, true); | 1846 | state->acquired_refs, true); |
| @@ -1393,27 +1857,48 @@ static int check_stack_write(struct bpf_verifier_env *env, | |||
| 1393 | } | 1857 | } |
| 1394 | 1858 | ||
| 1395 | cur = env->cur_state->frame[env->cur_state->curframe]; | 1859 | cur = env->cur_state->frame[env->cur_state->curframe]; |
| 1396 | if (value_regno >= 0 && | 1860 | if (value_regno >= 0) |
| 1397 | is_spillable_regtype((type = cur->regs[value_regno].type))) { | 1861 | reg = &cur->regs[value_regno]; |
| 1398 | 1862 | ||
| 1863 | if (reg && size == BPF_REG_SIZE && register_is_const(reg) && | ||
| 1864 | !register_is_null(reg) && env->allow_ptr_leaks) { | ||
| 1865 | if (dst_reg != BPF_REG_FP) { | ||
| 1866 | /* The backtracking logic can only recognize explicit | ||
| 1867 | * stack slot address like [fp - 8]. Other spill of | ||
| 1868 | * scalar via different register has to be conervative. | ||
| 1869 | * Backtrack from here and mark all registers as precise | ||
| 1870 | * that contributed into 'reg' being a constant. | ||
| 1871 | */ | ||
| 1872 | err = mark_chain_precision(env, value_regno); | ||
| 1873 | if (err) | ||
| 1874 | return err; | ||
| 1875 | } | ||
| 1876 | save_register_state(state, spi, reg); | ||
| 1877 | } else if (reg && is_spillable_regtype(reg->type)) { | ||
| 1399 | /* register containing pointer is being spilled into stack */ | 1878 | /* register containing pointer is being spilled into stack */ |
| 1400 | if (size != BPF_REG_SIZE) { | 1879 | if (size != BPF_REG_SIZE) { |
| 1880 | verbose_linfo(env, insn_idx, "; "); | ||
| 1401 | verbose(env, "invalid size of register spill\n"); | 1881 | verbose(env, "invalid size of register spill\n"); |
| 1402 | return -EACCES; | 1882 | return -EACCES; |
| 1403 | } | 1883 | } |
| 1404 | 1884 | ||
| 1405 | if (state != cur && type == PTR_TO_STACK) { | 1885 | if (state != cur && reg->type == PTR_TO_STACK) { |
| 1406 | verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); | 1886 | verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); |
| 1407 | return -EINVAL; | 1887 | return -EINVAL; |
| 1408 | } | 1888 | } |
| 1409 | 1889 | ||
| 1410 | /* save register state */ | 1890 | if (!env->allow_ptr_leaks) { |
| 1411 | state->stack[spi].spilled_ptr = cur->regs[value_regno]; | 1891 | bool sanitize = false; |
| 1412 | state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; | ||
| 1413 | 1892 | ||
| 1414 | for (i = 0; i < BPF_REG_SIZE; i++) { | 1893 | if (state->stack[spi].slot_type[0] == STACK_SPILL && |
| 1415 | if (state->stack[spi].slot_type[i] == STACK_MISC && | 1894 | register_is_const(&state->stack[spi].spilled_ptr)) |
| 1416 | !env->allow_ptr_leaks) { | 1895 | sanitize = true; |
| 1896 | for (i = 0; i < BPF_REG_SIZE; i++) | ||
| 1897 | if (state->stack[spi].slot_type[i] == STACK_MISC) { | ||
| 1898 | sanitize = true; | ||
| 1899 | break; | ||
| 1900 | } | ||
| 1901 | if (sanitize) { | ||
| 1417 | int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; | 1902 | int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; |
| 1418 | int soff = (-spi - 1) * BPF_REG_SIZE; | 1903 | int soff = (-spi - 1) * BPF_REG_SIZE; |
| 1419 | 1904 | ||
| @@ -1436,8 +1921,8 @@ static int check_stack_write(struct bpf_verifier_env *env, | |||
| 1436 | } | 1921 | } |
| 1437 | *poff = soff; | 1922 | *poff = soff; |
| 1438 | } | 1923 | } |
| 1439 | state->stack[spi].slot_type[i] = STACK_SPILL; | ||
| 1440 | } | 1924 | } |
| 1925 | save_register_state(state, spi, reg); | ||
| 1441 | } else { | 1926 | } else { |
| 1442 | u8 type = STACK_MISC; | 1927 | u8 type = STACK_MISC; |
| 1443 | 1928 | ||
| @@ -1460,9 +1945,13 @@ static int check_stack_write(struct bpf_verifier_env *env, | |||
| 1460 | state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; | 1945 | state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; |
| 1461 | 1946 | ||
| 1462 | /* when we zero initialize stack slots mark them as such */ | 1947 | /* when we zero initialize stack slots mark them as such */ |
| 1463 | if (value_regno >= 0 && | 1948 | if (reg && register_is_null(reg)) { |
| 1464 | register_is_null(&cur->regs[value_regno])) | 1949 | /* backtracking doesn't work for STACK_ZERO yet. */ |
| 1950 | err = mark_chain_precision(env, value_regno); | ||
| 1951 | if (err) | ||
| 1952 | return err; | ||
| 1465 | type = STACK_ZERO; | 1953 | type = STACK_ZERO; |
| 1954 | } | ||
| 1466 | 1955 | ||
| 1467 | /* Mark slots affected by this stack write. */ | 1956 | /* Mark slots affected by this stack write. */ |
| 1468 | for (i = 0; i < size; i++) | 1957 | for (i = 0; i < size; i++) |
| @@ -1479,6 +1968,7 @@ static int check_stack_read(struct bpf_verifier_env *env, | |||
| 1479 | struct bpf_verifier_state *vstate = env->cur_state; | 1968 | struct bpf_verifier_state *vstate = env->cur_state; |
| 1480 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; | 1969 | struct bpf_func_state *state = vstate->frame[vstate->curframe]; |
| 1481 | int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; | 1970 | int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; |
| 1971 | struct bpf_reg_state *reg; | ||
| 1482 | u8 *stype; | 1972 | u8 *stype; |
| 1483 | 1973 | ||
| 1484 | if (reg_state->allocated_stack <= slot) { | 1974 | if (reg_state->allocated_stack <= slot) { |
| @@ -1487,11 +1977,21 @@ static int check_stack_read(struct bpf_verifier_env *env, | |||
| 1487 | return -EACCES; | 1977 | return -EACCES; |
| 1488 | } | 1978 | } |
| 1489 | stype = reg_state->stack[spi].slot_type; | 1979 | stype = reg_state->stack[spi].slot_type; |
| 1980 | reg = ®_state->stack[spi].spilled_ptr; | ||
| 1490 | 1981 | ||
| 1491 | if (stype[0] == STACK_SPILL) { | 1982 | if (stype[0] == STACK_SPILL) { |
| 1492 | if (size != BPF_REG_SIZE) { | 1983 | if (size != BPF_REG_SIZE) { |
| 1493 | verbose(env, "invalid size of register spill\n"); | 1984 | if (reg->type != SCALAR_VALUE) { |
| 1494 | return -EACCES; | 1985 | verbose_linfo(env, env->insn_idx, "; "); |
| 1986 | verbose(env, "invalid size of register fill\n"); | ||
| 1987 | return -EACCES; | ||
| 1988 | } | ||
| 1989 | if (value_regno >= 0) { | ||
| 1990 | mark_reg_unknown(env, state->regs, value_regno); | ||
| 1991 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; | ||
| 1992 | } | ||
| 1993 | mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); | ||
| 1994 | return 0; | ||
| 1495 | } | 1995 | } |
| 1496 | for (i = 1; i < BPF_REG_SIZE; i++) { | 1996 | for (i = 1; i < BPF_REG_SIZE; i++) { |
| 1497 | if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { | 1997 | if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { |
| @@ -1502,17 +2002,14 @@ static int check_stack_read(struct bpf_verifier_env *env, | |||
| 1502 | 2002 | ||
| 1503 | if (value_regno >= 0) { | 2003 | if (value_regno >= 0) { |
| 1504 | /* restore register state from stack */ | 2004 | /* restore register state from stack */ |
| 1505 | state->regs[value_regno] = reg_state->stack[spi].spilled_ptr; | 2005 | state->regs[value_regno] = *reg; |
| 1506 | /* mark reg as written since spilled pointer state likely | 2006 | /* mark reg as written since spilled pointer state likely |
| 1507 | * has its liveness marks cleared by is_state_visited() | 2007 | * has its liveness marks cleared by is_state_visited() |
| 1508 | * which resets stack/reg liveness for state transitions | 2008 | * which resets stack/reg liveness for state transitions |
| 1509 | */ | 2009 | */ |
| 1510 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; | 2010 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; |
| 1511 | } | 2011 | } |
| 1512 | mark_reg_read(env, ®_state->stack[spi].spilled_ptr, | 2012 | mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); |
| 1513 | reg_state->stack[spi].spilled_ptr.parent, | ||
| 1514 | REG_LIVE_READ64); | ||
| 1515 | return 0; | ||
| 1516 | } else { | 2013 | } else { |
| 1517 | int zeros = 0; | 2014 | int zeros = 0; |
| 1518 | 2015 | ||
| @@ -1527,23 +2024,32 @@ static int check_stack_read(struct bpf_verifier_env *env, | |||
| 1527 | off, i, size); | 2024 | off, i, size); |
| 1528 | return -EACCES; | 2025 | return -EACCES; |
| 1529 | } | 2026 | } |
| 1530 | mark_reg_read(env, ®_state->stack[spi].spilled_ptr, | 2027 | mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); |
| 1531 | reg_state->stack[spi].spilled_ptr.parent, | ||
| 1532 | REG_LIVE_READ64); | ||
| 1533 | if (value_regno >= 0) { | 2028 | if (value_regno >= 0) { |
| 1534 | if (zeros == size) { | 2029 | if (zeros == size) { |
| 1535 | /* any size read into register is zero extended, | 2030 | /* any size read into register is zero extended, |
| 1536 | * so the whole register == const_zero | 2031 | * so the whole register == const_zero |
| 1537 | */ | 2032 | */ |
| 1538 | __mark_reg_const_zero(&state->regs[value_regno]); | 2033 | __mark_reg_const_zero(&state->regs[value_regno]); |
| 2034 | /* backtracking doesn't support STACK_ZERO yet, | ||
| 2035 | * so mark it precise here, so that later | ||
| 2036 | * backtracking can stop here. | ||
| 2037 | * Backtracking may not need this if this register | ||
| 2038 | * doesn't participate in pointer adjustment. | ||
| 2039 | * Forward propagation of precise flag is not | ||
| 2040 | * necessary either. This mark is only to stop | ||
| 2041 | * backtracking. Any register that contributed | ||
| 2042 | * to const 0 was marked precise before spill. | ||
| 2043 | */ | ||
| 2044 | state->regs[value_regno].precise = true; | ||
| 1539 | } else { | 2045 | } else { |
| 1540 | /* have read misc data from the stack */ | 2046 | /* have read misc data from the stack */ |
| 1541 | mark_reg_unknown(env, state->regs, value_regno); | 2047 | mark_reg_unknown(env, state->regs, value_regno); |
| 1542 | } | 2048 | } |
| 1543 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; | 2049 | state->regs[value_regno].live |= REG_LIVE_WRITTEN; |
| 1544 | } | 2050 | } |
| 1545 | return 0; | ||
| 1546 | } | 2051 | } |
| 2052 | return 0; | ||
| 1547 | } | 2053 | } |
| 1548 | 2054 | ||
| 1549 | static int check_stack_access(struct bpf_verifier_env *env, | 2055 | static int check_stack_access(struct bpf_verifier_env *env, |
| @@ -1835,6 +2341,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, | |||
| 1835 | case PTR_TO_TCP_SOCK: | 2341 | case PTR_TO_TCP_SOCK: |
| 1836 | valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); | 2342 | valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); |
| 1837 | break; | 2343 | break; |
| 2344 | case PTR_TO_XDP_SOCK: | ||
| 2345 | valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); | ||
| 2346 | break; | ||
| 1838 | default: | 2347 | default: |
| 1839 | valid = false; | 2348 | valid = false; |
| 1840 | } | 2349 | } |
| @@ -1999,6 +2508,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, | |||
| 1999 | case PTR_TO_TCP_SOCK: | 2508 | case PTR_TO_TCP_SOCK: |
| 2000 | pointer_desc = "tcp_sock "; | 2509 | pointer_desc = "tcp_sock "; |
| 2001 | break; | 2510 | break; |
| 2511 | case PTR_TO_XDP_SOCK: | ||
| 2512 | pointer_desc = "xdp_sock "; | ||
| 2513 | break; | ||
| 2002 | default: | 2514 | default: |
| 2003 | break; | 2515 | break; |
| 2004 | } | 2516 | } |
| @@ -2398,7 +2910,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, | |||
| 2398 | { | 2910 | { |
| 2399 | struct bpf_reg_state *reg = reg_state(env, regno); | 2911 | struct bpf_reg_state *reg = reg_state(env, regno); |
| 2400 | struct bpf_func_state *state = func(env, reg); | 2912 | struct bpf_func_state *state = func(env, reg); |
| 2401 | int err, min_off, max_off, i, slot, spi; | 2913 | int err, min_off, max_off, i, j, slot, spi; |
| 2402 | 2914 | ||
| 2403 | if (reg->type != PTR_TO_STACK) { | 2915 | if (reg->type != PTR_TO_STACK) { |
| 2404 | /* Allow zero-byte read from NULL, regardless of pointer type */ | 2916 | /* Allow zero-byte read from NULL, regardless of pointer type */ |
| @@ -2486,6 +2998,14 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, | |||
| 2486 | *stype = STACK_MISC; | 2998 | *stype = STACK_MISC; |
| 2487 | goto mark; | 2999 | goto mark; |
| 2488 | } | 3000 | } |
| 3001 | if (state->stack[spi].slot_type[0] == STACK_SPILL && | ||
| 3002 | state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { | ||
| 3003 | __mark_reg_unknown(&state->stack[spi].spilled_ptr); | ||
| 3004 | for (j = 0; j < BPF_REG_SIZE; j++) | ||
| 3005 | state->stack[spi].slot_type[j] = STACK_MISC; | ||
| 3006 | goto mark; | ||
| 3007 | } | ||
| 3008 | |||
| 2489 | err: | 3009 | err: |
| 2490 | if (tnum_is_const(reg->var_off)) { | 3010 | if (tnum_is_const(reg->var_off)) { |
| 2491 | verbose(env, "invalid indirect read from stack off %d+%d size %d\n", | 3011 | verbose(env, "invalid indirect read from stack off %d+%d size %d\n", |
| @@ -2837,6 +3357,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, | |||
| 2837 | err = check_helper_mem_access(env, regno - 1, | 3357 | err = check_helper_mem_access(env, regno - 1, |
| 2838 | reg->umax_value, | 3358 | reg->umax_value, |
| 2839 | zero_size_allowed, meta); | 3359 | zero_size_allowed, meta); |
| 3360 | if (!err) | ||
| 3361 | err = mark_chain_precision(env, regno); | ||
| 2840 | } else if (arg_type_is_int_ptr(arg_type)) { | 3362 | } else if (arg_type_is_int_ptr(arg_type)) { |
| 2841 | int size = int_ptr_type_to_size(arg_type); | 3363 | int size = int_ptr_type_to_size(arg_type); |
| 2842 | 3364 | ||
| @@ -2897,10 +3419,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
| 2897 | * appear. | 3419 | * appear. |
| 2898 | */ | 3420 | */ |
| 2899 | case BPF_MAP_TYPE_CPUMAP: | 3421 | case BPF_MAP_TYPE_CPUMAP: |
| 2900 | case BPF_MAP_TYPE_XSKMAP: | ||
| 2901 | if (func_id != BPF_FUNC_redirect_map) | 3422 | if (func_id != BPF_FUNC_redirect_map) |
| 2902 | goto error; | 3423 | goto error; |
| 2903 | break; | 3424 | break; |
| 3425 | case BPF_MAP_TYPE_XSKMAP: | ||
| 3426 | if (func_id != BPF_FUNC_redirect_map && | ||
| 3427 | func_id != BPF_FUNC_map_lookup_elem) | ||
| 3428 | goto error; | ||
| 3429 | break; | ||
| 2904 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: | 3430 | case BPF_MAP_TYPE_ARRAY_OF_MAPS: |
| 2905 | case BPF_MAP_TYPE_HASH_OF_MAPS: | 3431 | case BPF_MAP_TYPE_HASH_OF_MAPS: |
| 2906 | if (func_id != BPF_FUNC_map_lookup_elem) | 3432 | if (func_id != BPF_FUNC_map_lookup_elem) |
| @@ -3791,6 +4317,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | |||
| 3791 | case PTR_TO_SOCK_COMMON_OR_NULL: | 4317 | case PTR_TO_SOCK_COMMON_OR_NULL: |
| 3792 | case PTR_TO_TCP_SOCK: | 4318 | case PTR_TO_TCP_SOCK: |
| 3793 | case PTR_TO_TCP_SOCK_OR_NULL: | 4319 | case PTR_TO_TCP_SOCK_OR_NULL: |
| 4320 | case PTR_TO_XDP_SOCK: | ||
| 3794 | verbose(env, "R%d pointer arithmetic on %s prohibited\n", | 4321 | verbose(env, "R%d pointer arithmetic on %s prohibited\n", |
| 3795 | dst, reg_type_str[ptr_reg->type]); | 4322 | dst, reg_type_str[ptr_reg->type]); |
| 3796 | return -EACCES; | 4323 | return -EACCES; |
| @@ -4268,6 +4795,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, | |||
| 4268 | struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; | 4795 | struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; |
| 4269 | struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; | 4796 | struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; |
| 4270 | u8 opcode = BPF_OP(insn->code); | 4797 | u8 opcode = BPF_OP(insn->code); |
| 4798 | int err; | ||
| 4271 | 4799 | ||
| 4272 | dst_reg = ®s[insn->dst_reg]; | 4800 | dst_reg = ®s[insn->dst_reg]; |
| 4273 | src_reg = NULL; | 4801 | src_reg = NULL; |
| @@ -4294,11 +4822,17 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, | |||
| 4294 | * This is legal, but we have to reverse our | 4822 | * This is legal, but we have to reverse our |
| 4295 | * src/dest handling in computing the range | 4823 | * src/dest handling in computing the range |
| 4296 | */ | 4824 | */ |
| 4825 | err = mark_chain_precision(env, insn->dst_reg); | ||
| 4826 | if (err) | ||
| 4827 | return err; | ||
| 4297 | return adjust_ptr_min_max_vals(env, insn, | 4828 | return adjust_ptr_min_max_vals(env, insn, |
| 4298 | src_reg, dst_reg); | 4829 | src_reg, dst_reg); |
| 4299 | } | 4830 | } |
| 4300 | } else if (ptr_reg) { | 4831 | } else if (ptr_reg) { |
| 4301 | /* pointer += scalar */ | 4832 | /* pointer += scalar */ |
| 4833 | err = mark_chain_precision(env, insn->src_reg); | ||
| 4834 | if (err) | ||
| 4835 | return err; | ||
| 4302 | return adjust_ptr_min_max_vals(env, insn, | 4836 | return adjust_ptr_min_max_vals(env, insn, |
| 4303 | dst_reg, src_reg); | 4837 | dst_reg, src_reg); |
| 4304 | } | 4838 | } |
| @@ -5030,6 +5564,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, | |||
| 5030 | if (reg->map_ptr->inner_map_meta) { | 5564 | if (reg->map_ptr->inner_map_meta) { |
| 5031 | reg->type = CONST_PTR_TO_MAP; | 5565 | reg->type = CONST_PTR_TO_MAP; |
| 5032 | reg->map_ptr = reg->map_ptr->inner_map_meta; | 5566 | reg->map_ptr = reg->map_ptr->inner_map_meta; |
| 5567 | } else if (reg->map_ptr->map_type == | ||
| 5568 | BPF_MAP_TYPE_XSKMAP) { | ||
| 5569 | reg->type = PTR_TO_XDP_SOCK; | ||
| 5033 | } else { | 5570 | } else { |
| 5034 | reg->type = PTR_TO_MAP_VALUE; | 5571 | reg->type = PTR_TO_MAP_VALUE; |
| 5035 | } | 5572 | } |
| @@ -5201,9 +5738,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, | |||
| 5201 | struct bpf_verifier_state *this_branch = env->cur_state; | 5738 | struct bpf_verifier_state *this_branch = env->cur_state; |
| 5202 | struct bpf_verifier_state *other_branch; | 5739 | struct bpf_verifier_state *other_branch; |
| 5203 | struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; | 5740 | struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; |
| 5204 | struct bpf_reg_state *dst_reg, *other_branch_regs; | 5741 | struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; |
| 5205 | u8 opcode = BPF_OP(insn->code); | 5742 | u8 opcode = BPF_OP(insn->code); |
| 5206 | bool is_jmp32; | 5743 | bool is_jmp32; |
| 5744 | int pred = -1; | ||
| 5207 | int err; | 5745 | int err; |
| 5208 | 5746 | ||
| 5209 | /* Only conditional jumps are expected to reach here. */ | 5747 | /* Only conditional jumps are expected to reach here. */ |
| @@ -5228,6 +5766,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, | |||
| 5228 | insn->src_reg); | 5766 | insn->src_reg); |
| 5229 | return -EACCES; | 5767 | return -EACCES; |
| 5230 | } | 5768 | } |
| 5769 | src_reg = ®s[insn->src_reg]; | ||
| 5231 | } else { | 5770 | } else { |
| 5232 | if (insn->src_reg != BPF_REG_0) { | 5771 | if (insn->src_reg != BPF_REG_0) { |
| 5233 | verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); | 5772 | verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); |
| @@ -5243,20 +5782,29 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, | |||
| 5243 | dst_reg = ®s[insn->dst_reg]; | 5782 | dst_reg = ®s[insn->dst_reg]; |
| 5244 | is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; | 5783 | is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; |
| 5245 | 5784 | ||
| 5246 | if (BPF_SRC(insn->code) == BPF_K) { | 5785 | if (BPF_SRC(insn->code) == BPF_K) |
| 5247 | int pred = is_branch_taken(dst_reg, insn->imm, opcode, | 5786 | pred = is_branch_taken(dst_reg, insn->imm, |
| 5248 | is_jmp32); | 5787 | opcode, is_jmp32); |
| 5249 | 5788 | else if (src_reg->type == SCALAR_VALUE && | |
| 5250 | if (pred == 1) { | 5789 | tnum_is_const(src_reg->var_off)) |
| 5251 | /* only follow the goto, ignore fall-through */ | 5790 | pred = is_branch_taken(dst_reg, src_reg->var_off.value, |
| 5252 | *insn_idx += insn->off; | 5791 | opcode, is_jmp32); |
| 5253 | return 0; | 5792 | if (pred >= 0) { |
| 5254 | } else if (pred == 0) { | 5793 | err = mark_chain_precision(env, insn->dst_reg); |
| 5255 | /* only follow fall-through branch, since | 5794 | if (BPF_SRC(insn->code) == BPF_X && !err) |
| 5256 | * that's where the program will go | 5795 | err = mark_chain_precision(env, insn->src_reg); |
| 5257 | */ | 5796 | if (err) |
| 5258 | return 0; | 5797 | return err; |
| 5259 | } | 5798 | } |
| 5799 | if (pred == 1) { | ||
| 5800 | /* only follow the goto, ignore fall-through */ | ||
| 5801 | *insn_idx += insn->off; | ||
| 5802 | return 0; | ||
| 5803 | } else if (pred == 0) { | ||
| 5804 | /* only follow fall-through branch, since | ||
| 5805 | * that's where the program will go | ||
| 5806 | */ | ||
| 5807 | return 0; | ||
| 5260 | } | 5808 | } |
| 5261 | 5809 | ||
| 5262 | other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, | 5810 | other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, |
| @@ -5616,7 +6164,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx) | |||
| 5616 | * w - next instruction | 6164 | * w - next instruction |
| 5617 | * e - edge | 6165 | * e - edge |
| 5618 | */ | 6166 | */ |
| 5619 | static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) | 6167 | static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, |
| 6168 | bool loop_ok) | ||
| 5620 | { | 6169 | { |
| 5621 | int *insn_stack = env->cfg.insn_stack; | 6170 | int *insn_stack = env->cfg.insn_stack; |
| 5622 | int *insn_state = env->cfg.insn_state; | 6171 | int *insn_state = env->cfg.insn_state; |
| @@ -5646,6 +6195,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) | |||
| 5646 | insn_stack[env->cfg.cur_stack++] = w; | 6195 | insn_stack[env->cfg.cur_stack++] = w; |
| 5647 | return 1; | 6196 | return 1; |
| 5648 | } else if ((insn_state[w] & 0xF0) == DISCOVERED) { | 6197 | } else if ((insn_state[w] & 0xF0) == DISCOVERED) { |
| 6198 | if (loop_ok && env->allow_ptr_leaks) | ||
| 6199 | return 0; | ||
| 5649 | verbose_linfo(env, t, "%d: ", t); | 6200 | verbose_linfo(env, t, "%d: ", t); |
| 5650 | verbose_linfo(env, w, "%d: ", w); | 6201 | verbose_linfo(env, w, "%d: ", w); |
| 5651 | verbose(env, "back-edge from insn %d to %d\n", t, w); | 6202 | verbose(env, "back-edge from insn %d to %d\n", t, w); |
| @@ -5697,7 +6248,7 @@ peek_stack: | |||
| 5697 | if (opcode == BPF_EXIT) { | 6248 | if (opcode == BPF_EXIT) { |
| 5698 | goto mark_explored; | 6249 | goto mark_explored; |
| 5699 | } else if (opcode == BPF_CALL) { | 6250 | } else if (opcode == BPF_CALL) { |
| 5700 | ret = push_insn(t, t + 1, FALLTHROUGH, env); | 6251 | ret = push_insn(t, t + 1, FALLTHROUGH, env, false); |
| 5701 | if (ret == 1) | 6252 | if (ret == 1) |
| 5702 | goto peek_stack; | 6253 | goto peek_stack; |
| 5703 | else if (ret < 0) | 6254 | else if (ret < 0) |
| @@ -5706,7 +6257,8 @@ peek_stack: | |||
| 5706 | init_explored_state(env, t + 1); | 6257 | init_explored_state(env, t + 1); |
| 5707 | if (insns[t].src_reg == BPF_PSEUDO_CALL) { | 6258 | if (insns[t].src_reg == BPF_PSEUDO_CALL) { |
| 5708 | init_explored_state(env, t); | 6259 | init_explored_state(env, t); |
| 5709 | ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); | 6260 | ret = push_insn(t, t + insns[t].imm + 1, BRANCH, |
| 6261 | env, false); | ||
| 5710 | if (ret == 1) | 6262 | if (ret == 1) |
| 5711 | goto peek_stack; | 6263 | goto peek_stack; |
| 5712 | else if (ret < 0) | 6264 | else if (ret < 0) |
| @@ -5719,11 +6271,16 @@ peek_stack: | |||
| 5719 | } | 6271 | } |
| 5720 | /* unconditional jump with single edge */ | 6272 | /* unconditional jump with single edge */ |
| 5721 | ret = push_insn(t, t + insns[t].off + 1, | 6273 | ret = push_insn(t, t + insns[t].off + 1, |
| 5722 | FALLTHROUGH, env); | 6274 | FALLTHROUGH, env, true); |
| 5723 | if (ret == 1) | 6275 | if (ret == 1) |
| 5724 | goto peek_stack; | 6276 | goto peek_stack; |
| 5725 | else if (ret < 0) | 6277 | else if (ret < 0) |
| 5726 | goto err_free; | 6278 | goto err_free; |
| 6279 | /* unconditional jmp is not a good pruning point, | ||
| 6280 | * but it's marked, since backtracking needs | ||
| 6281 | * to record jmp history in is_state_visited(). | ||
| 6282 | */ | ||
| 6283 | init_explored_state(env, t + insns[t].off + 1); | ||
| 5727 | /* tell verifier to check for equivalent states | 6284 | /* tell verifier to check for equivalent states |
| 5728 | * after every call and jump | 6285 | * after every call and jump |
| 5729 | */ | 6286 | */ |
| @@ -5732,13 +6289,13 @@ peek_stack: | |||
| 5732 | } else { | 6289 | } else { |
| 5733 | /* conditional jump with two edges */ | 6290 | /* conditional jump with two edges */ |
| 5734 | init_explored_state(env, t); | 6291 | init_explored_state(env, t); |
| 5735 | ret = push_insn(t, t + 1, FALLTHROUGH, env); | 6292 | ret = push_insn(t, t + 1, FALLTHROUGH, env, true); |
| 5736 | if (ret == 1) | 6293 | if (ret == 1) |
| 5737 | goto peek_stack; | 6294 | goto peek_stack; |
| 5738 | else if (ret < 0) | 6295 | else if (ret < 0) |
| 5739 | goto err_free; | 6296 | goto err_free; |
| 5740 | 6297 | ||
| 5741 | ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); | 6298 | ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); |
| 5742 | if (ret == 1) | 6299 | if (ret == 1) |
| 5743 | goto peek_stack; | 6300 | goto peek_stack; |
| 5744 | else if (ret < 0) | 6301 | else if (ret < 0) |
| @@ -5748,7 +6305,7 @@ peek_stack: | |||
| 5748 | /* all other non-branch instructions with single | 6305 | /* all other non-branch instructions with single |
| 5749 | * fall-through edge | 6306 | * fall-through edge |
| 5750 | */ | 6307 | */ |
| 5751 | ret = push_insn(t, t + 1, FALLTHROUGH, env); | 6308 | ret = push_insn(t, t + 1, FALLTHROUGH, env, false); |
| 5752 | if (ret == 1) | 6309 | if (ret == 1) |
| 5753 | goto peek_stack; | 6310 | goto peek_stack; |
| 5754 | else if (ret < 0) | 6311 | else if (ret < 0) |
| @@ -6181,6 +6738,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, | |||
| 6181 | 6738 | ||
| 6182 | sl = *explored_state(env, insn); | 6739 | sl = *explored_state(env, insn); |
| 6183 | while (sl) { | 6740 | while (sl) { |
| 6741 | if (sl->state.branches) | ||
| 6742 | goto next; | ||
| 6184 | if (sl->state.insn_idx != insn || | 6743 | if (sl->state.insn_idx != insn || |
| 6185 | sl->state.curframe != cur->curframe) | 6744 | sl->state.curframe != cur->curframe) |
| 6186 | goto next; | 6745 | goto next; |
| @@ -6222,6 +6781,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, | |||
| 6222 | switch (rold->type) { | 6781 | switch (rold->type) { |
| 6223 | case SCALAR_VALUE: | 6782 | case SCALAR_VALUE: |
| 6224 | if (rcur->type == SCALAR_VALUE) { | 6783 | if (rcur->type == SCALAR_VALUE) { |
| 6784 | if (!rold->precise && !rcur->precise) | ||
| 6785 | return true; | ||
| 6225 | /* new val must satisfy old val knowledge */ | 6786 | /* new val must satisfy old val knowledge */ |
| 6226 | return range_within(rold, rcur) && | 6787 | return range_within(rold, rcur) && |
| 6227 | tnum_in(rold->var_off, rcur->var_off); | 6788 | tnum_in(rold->var_off, rcur->var_off); |
| @@ -6294,6 +6855,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, | |||
| 6294 | case PTR_TO_SOCK_COMMON_OR_NULL: | 6855 | case PTR_TO_SOCK_COMMON_OR_NULL: |
| 6295 | case PTR_TO_TCP_SOCK: | 6856 | case PTR_TO_TCP_SOCK: |
| 6296 | case PTR_TO_TCP_SOCK_OR_NULL: | 6857 | case PTR_TO_TCP_SOCK_OR_NULL: |
| 6858 | case PTR_TO_XDP_SOCK: | ||
| 6297 | /* Only valid matches are exact, which memcmp() above | 6859 | /* Only valid matches are exact, which memcmp() above |
| 6298 | * would have accepted | 6860 | * would have accepted |
| 6299 | */ | 6861 | */ |
| @@ -6544,19 +7106,52 @@ static int propagate_liveness(struct bpf_verifier_env *env, | |||
| 6544 | return 0; | 7106 | return 0; |
| 6545 | } | 7107 | } |
| 6546 | 7108 | ||
| 7109 | static bool states_maybe_looping(struct bpf_verifier_state *old, | ||
| 7110 | struct bpf_verifier_state *cur) | ||
| 7111 | { | ||
| 7112 | struct bpf_func_state *fold, *fcur; | ||
| 7113 | int i, fr = cur->curframe; | ||
| 7114 | |||
| 7115 | if (old->curframe != fr) | ||
| 7116 | return false; | ||
| 7117 | |||
| 7118 | fold = old->frame[fr]; | ||
| 7119 | fcur = cur->frame[fr]; | ||
| 7120 | for (i = 0; i < MAX_BPF_REG; i++) | ||
| 7121 | if (memcmp(&fold->regs[i], &fcur->regs[i], | ||
| 7122 | offsetof(struct bpf_reg_state, parent))) | ||
| 7123 | return false; | ||
| 7124 | return true; | ||
| 7125 | } | ||
| 7126 | |||
| 7127 | |||
| 6547 | static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | 7128 | static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) |
| 6548 | { | 7129 | { |
| 6549 | struct bpf_verifier_state_list *new_sl; | 7130 | struct bpf_verifier_state_list *new_sl; |
| 6550 | struct bpf_verifier_state_list *sl, **pprev; | 7131 | struct bpf_verifier_state_list *sl, **pprev; |
| 6551 | struct bpf_verifier_state *cur = env->cur_state, *new; | 7132 | struct bpf_verifier_state *cur = env->cur_state, *new; |
| 6552 | int i, j, err, states_cnt = 0; | 7133 | int i, j, err, states_cnt = 0; |
| 7134 | bool add_new_state = false; | ||
| 6553 | 7135 | ||
| 7136 | cur->last_insn_idx = env->prev_insn_idx; | ||
| 6554 | if (!env->insn_aux_data[insn_idx].prune_point) | 7137 | if (!env->insn_aux_data[insn_idx].prune_point) |
| 6555 | /* this 'insn_idx' instruction wasn't marked, so we will not | 7138 | /* this 'insn_idx' instruction wasn't marked, so we will not |
| 6556 | * be doing state search here | 7139 | * be doing state search here |
| 6557 | */ | 7140 | */ |
| 6558 | return 0; | 7141 | return 0; |
| 6559 | 7142 | ||
| 7143 | /* bpf progs typically have pruning point every 4 instructions | ||
| 7144 | * http://vger.kernel.org/bpfconf2019.html#session-1 | ||
| 7145 | * Do not add new state for future pruning if the verifier hasn't seen | ||
| 7146 | * at least 2 jumps and at least 8 instructions. | ||
| 7147 | * This heuristics helps decrease 'total_states' and 'peak_states' metric. | ||
| 7148 | * In tests that amounts to up to 50% reduction into total verifier | ||
| 7149 | * memory consumption and 20% verifier time speedup. | ||
| 7150 | */ | ||
| 7151 | if (env->jmps_processed - env->prev_jmps_processed >= 2 && | ||
| 7152 | env->insn_processed - env->prev_insn_processed >= 8) | ||
| 7153 | add_new_state = true; | ||
| 7154 | |||
| 6560 | pprev = explored_state(env, insn_idx); | 7155 | pprev = explored_state(env, insn_idx); |
| 6561 | sl = *pprev; | 7156 | sl = *pprev; |
| 6562 | 7157 | ||
| @@ -6566,6 +7161,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
| 6566 | states_cnt++; | 7161 | states_cnt++; |
| 6567 | if (sl->state.insn_idx != insn_idx) | 7162 | if (sl->state.insn_idx != insn_idx) |
| 6568 | goto next; | 7163 | goto next; |
| 7164 | if (sl->state.branches) { | ||
| 7165 | if (states_maybe_looping(&sl->state, cur) && | ||
| 7166 | states_equal(env, &sl->state, cur)) { | ||
| 7167 | verbose_linfo(env, insn_idx, "; "); | ||
| 7168 | verbose(env, "infinite loop detected at insn %d\n", insn_idx); | ||
| 7169 | return -EINVAL; | ||
| 7170 | } | ||
| 7171 | /* if the verifier is processing a loop, avoid adding new state | ||
| 7172 | * too often, since different loop iterations have distinct | ||
| 7173 | * states and may not help future pruning. | ||
| 7174 | * This threshold shouldn't be too low to make sure that | ||
| 7175 | * a loop with large bound will be rejected quickly. | ||
| 7176 | * The most abusive loop will be: | ||
| 7177 | * r1 += 1 | ||
| 7178 | * if r1 < 1000000 goto pc-2 | ||
| 7179 | * 1M insn_procssed limit / 100 == 10k peak states. | ||
| 7180 | * This threshold shouldn't be too high either, since states | ||
| 7181 | * at the end of the loop are likely to be useful in pruning. | ||
| 7182 | */ | ||
| 7183 | if (env->jmps_processed - env->prev_jmps_processed < 20 && | ||
| 7184 | env->insn_processed - env->prev_insn_processed < 100) | ||
| 7185 | add_new_state = false; | ||
| 7186 | goto miss; | ||
| 7187 | } | ||
| 6569 | if (states_equal(env, &sl->state, cur)) { | 7188 | if (states_equal(env, &sl->state, cur)) { |
| 6570 | sl->hit_cnt++; | 7189 | sl->hit_cnt++; |
| 6571 | /* reached equivalent register/stack state, | 7190 | /* reached equivalent register/stack state, |
| @@ -6583,7 +7202,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
| 6583 | return err; | 7202 | return err; |
| 6584 | return 1; | 7203 | return 1; |
| 6585 | } | 7204 | } |
| 6586 | sl->miss_cnt++; | 7205 | miss: |
| 7206 | /* when new state is not going to be added do not increase miss count. | ||
| 7207 | * Otherwise several loop iterations will remove the state | ||
| 7208 | * recorded earlier. The goal of these heuristics is to have | ||
| 7209 | * states from some iterations of the loop (some in the beginning | ||
| 7210 | * and some at the end) to help pruning. | ||
| 7211 | */ | ||
| 7212 | if (add_new_state) | ||
| 7213 | sl->miss_cnt++; | ||
| 6587 | /* heuristic to determine whether this state is beneficial | 7214 | /* heuristic to determine whether this state is beneficial |
| 6588 | * to keep checking from state equivalence point of view. | 7215 | * to keep checking from state equivalence point of view. |
| 6589 | * Higher numbers increase max_states_per_insn and verification time, | 7216 | * Higher numbers increase max_states_per_insn and verification time, |
| @@ -6595,6 +7222,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) | |||
| 6595 | */ | 7222 | */ |
| 6596 | *pprev = sl->next; | 7223 | *pprev = sl->next; |
| 6597 | if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { | 7224 | if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { |
| 7225 | u32 br = sl->state.branches; | ||
| 7226 | |||
| 7227 | WARN_ONCE(br, | ||
| 7228 | "BUG live_done but branches_to_explore %d\n", | ||
| 7229 | br); | ||
| 6598 | free_verifier_state(&sl->state, false); | 7230 | free_verifier_state(&sl->state, false); |
| 6599 | kfree(sl); | 7231 | kfree(sl); |
| 6600 | env->peak_states--; | 7232 | env->peak_states--; |
| @@ -6618,20 +7250,27 @@ next: | |||
| 6618 | env->max_states_per_insn = states_cnt; | 7250 | env->max_states_per_insn = states_cnt; |
| 6619 | 7251 | ||
| 6620 | if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) | 7252 | if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) |
| 6621 | return 0; | 7253 | return push_jmp_history(env, cur); |
| 7254 | |||
| 7255 | if (!add_new_state) | ||
| 7256 | return push_jmp_history(env, cur); | ||
| 6622 | 7257 | ||
| 6623 | /* there were no equivalent states, remember current one. | 7258 | /* There were no equivalent states, remember the current one. |
| 6624 | * technically the current state is not proven to be safe yet, | 7259 | * Technically the current state is not proven to be safe yet, |
| 6625 | * but it will either reach outer most bpf_exit (which means it's safe) | 7260 | * but it will either reach outer most bpf_exit (which means it's safe) |
| 6626 | * or it will be rejected. Since there are no loops, we won't be | 7261 | * or it will be rejected. When there are no loops the verifier won't be |
| 6627 | * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) | 7262 | * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) |
| 6628 | * again on the way to bpf_exit | 7263 | * again on the way to bpf_exit. |
| 7264 | * When looping the sl->state.branches will be > 0 and this state | ||
| 7265 | * will not be considered for equivalence until branches == 0. | ||
| 6629 | */ | 7266 | */ |
| 6630 | new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); | 7267 | new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); |
| 6631 | if (!new_sl) | 7268 | if (!new_sl) |
| 6632 | return -ENOMEM; | 7269 | return -ENOMEM; |
| 6633 | env->total_states++; | 7270 | env->total_states++; |
| 6634 | env->peak_states++; | 7271 | env->peak_states++; |
| 7272 | env->prev_jmps_processed = env->jmps_processed; | ||
| 7273 | env->prev_insn_processed = env->insn_processed; | ||
| 6635 | 7274 | ||
| 6636 | /* add new state to the head of linked list */ | 7275 | /* add new state to the head of linked list */ |
| 6637 | new = &new_sl->state; | 7276 | new = &new_sl->state; |
| @@ -6642,6 +7281,12 @@ next: | |||
| 6642 | return err; | 7281 | return err; |
| 6643 | } | 7282 | } |
| 6644 | new->insn_idx = insn_idx; | 7283 | new->insn_idx = insn_idx; |
| 7284 | WARN_ONCE(new->branches != 1, | ||
| 7285 | "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); | ||
| 7286 | |||
| 7287 | cur->parent = new; | ||
| 7288 | cur->first_insn_idx = insn_idx; | ||
| 7289 | clear_jmp_history(cur); | ||
| 6645 | new_sl->next = *explored_state(env, insn_idx); | 7290 | new_sl->next = *explored_state(env, insn_idx); |
| 6646 | *explored_state(env, insn_idx) = new_sl; | 7291 | *explored_state(env, insn_idx) = new_sl; |
| 6647 | /* connect new state to parentage chain. Current frame needs all | 7292 | /* connect new state to parentage chain. Current frame needs all |
| @@ -6651,17 +7296,18 @@ next: | |||
| 6651 | * the state of the call instruction (with WRITTEN set), and r0 comes | 7296 | * the state of the call instruction (with WRITTEN set), and r0 comes |
| 6652 | * from callee with its full parentage chain, anyway. | 7297 | * from callee with its full parentage chain, anyway. |
| 6653 | */ | 7298 | */ |
| 6654 | for (j = 0; j <= cur->curframe; j++) | ||
| 6655 | for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) | ||
| 6656 | cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; | ||
| 6657 | /* clear write marks in current state: the writes we did are not writes | 7299 | /* clear write marks in current state: the writes we did are not writes |
| 6658 | * our child did, so they don't screen off its reads from us. | 7300 | * our child did, so they don't screen off its reads from us. |
| 6659 | * (There are no read marks in current state, because reads always mark | 7301 | * (There are no read marks in current state, because reads always mark |
| 6660 | * their parent and current state never has children yet. Only | 7302 | * their parent and current state never has children yet. Only |
| 6661 | * explored_states can get read marks.) | 7303 | * explored_states can get read marks.) |
| 6662 | */ | 7304 | */ |
| 6663 | for (i = 0; i < BPF_REG_FP; i++) | 7305 | for (j = 0; j <= cur->curframe; j++) { |
| 6664 | cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE; | 7306 | for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) |
| 7307 | cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; | ||
| 7308 | for (i = 0; i < BPF_REG_FP; i++) | ||
| 7309 | cur->frame[j]->regs[i].live = REG_LIVE_NONE; | ||
| 7310 | } | ||
| 6665 | 7311 | ||
| 6666 | /* all stack frames are accessible from callee, clear them all */ | 7312 | /* all stack frames are accessible from callee, clear them all */ |
| 6667 | for (j = 0; j <= cur->curframe; j++) { | 7313 | for (j = 0; j <= cur->curframe; j++) { |
| @@ -6688,6 +7334,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) | |||
| 6688 | case PTR_TO_SOCK_COMMON_OR_NULL: | 7334 | case PTR_TO_SOCK_COMMON_OR_NULL: |
| 6689 | case PTR_TO_TCP_SOCK: | 7335 | case PTR_TO_TCP_SOCK: |
| 6690 | case PTR_TO_TCP_SOCK_OR_NULL: | 7336 | case PTR_TO_TCP_SOCK_OR_NULL: |
| 7337 | case PTR_TO_XDP_SOCK: | ||
| 6691 | return false; | 7338 | return false; |
| 6692 | default: | 7339 | default: |
| 6693 | return true; | 7340 | return true; |
| @@ -6719,6 +7366,7 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 6719 | struct bpf_reg_state *regs; | 7366 | struct bpf_reg_state *regs; |
| 6720 | int insn_cnt = env->prog->len; | 7367 | int insn_cnt = env->prog->len; |
| 6721 | bool do_print_state = false; | 7368 | bool do_print_state = false; |
| 7369 | int prev_insn_idx = -1; | ||
| 6722 | 7370 | ||
| 6723 | env->prev_linfo = NULL; | 7371 | env->prev_linfo = NULL; |
| 6724 | 7372 | ||
| @@ -6727,6 +7375,7 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 6727 | return -ENOMEM; | 7375 | return -ENOMEM; |
| 6728 | state->curframe = 0; | 7376 | state->curframe = 0; |
| 6729 | state->speculative = false; | 7377 | state->speculative = false; |
| 7378 | state->branches = 1; | ||
| 6730 | state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); | 7379 | state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); |
| 6731 | if (!state->frame[0]) { | 7380 | if (!state->frame[0]) { |
| 6732 | kfree(state); | 7381 | kfree(state); |
| @@ -6743,6 +7392,7 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 6743 | u8 class; | 7392 | u8 class; |
| 6744 | int err; | 7393 | int err; |
| 6745 | 7394 | ||
| 7395 | env->prev_insn_idx = prev_insn_idx; | ||
| 6746 | if (env->insn_idx >= insn_cnt) { | 7396 | if (env->insn_idx >= insn_cnt) { |
| 6747 | verbose(env, "invalid insn idx %d insn_cnt %d\n", | 7397 | verbose(env, "invalid insn idx %d insn_cnt %d\n", |
| 6748 | env->insn_idx, insn_cnt); | 7398 | env->insn_idx, insn_cnt); |
| @@ -6815,6 +7465,7 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 6815 | 7465 | ||
| 6816 | regs = cur_regs(env); | 7466 | regs = cur_regs(env); |
| 6817 | env->insn_aux_data[env->insn_idx].seen = true; | 7467 | env->insn_aux_data[env->insn_idx].seen = true; |
| 7468 | prev_insn_idx = env->insn_idx; | ||
| 6818 | 7469 | ||
| 6819 | if (class == BPF_ALU || class == BPF_ALU64) { | 7470 | if (class == BPF_ALU || class == BPF_ALU64) { |
| 6820 | err = check_alu_op(env, insn); | 7471 | err = check_alu_op(env, insn); |
| @@ -6933,6 +7584,7 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 6933 | } else if (class == BPF_JMP || class == BPF_JMP32) { | 7584 | } else if (class == BPF_JMP || class == BPF_JMP32) { |
| 6934 | u8 opcode = BPF_OP(insn->code); | 7585 | u8 opcode = BPF_OP(insn->code); |
| 6935 | 7586 | ||
| 7587 | env->jmps_processed++; | ||
| 6936 | if (opcode == BPF_CALL) { | 7588 | if (opcode == BPF_CALL) { |
| 6937 | if (BPF_SRC(insn->code) != BPF_K || | 7589 | if (BPF_SRC(insn->code) != BPF_K || |
| 6938 | insn->off != 0 || | 7590 | insn->off != 0 || |
| @@ -6987,7 +7639,6 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 6987 | 7639 | ||
| 6988 | if (state->curframe) { | 7640 | if (state->curframe) { |
| 6989 | /* exit from nested function */ | 7641 | /* exit from nested function */ |
| 6990 | env->prev_insn_idx = env->insn_idx; | ||
| 6991 | err = prepare_func_exit(env, &env->insn_idx); | 7642 | err = prepare_func_exit(env, &env->insn_idx); |
| 6992 | if (err) | 7643 | if (err) |
| 6993 | return err; | 7644 | return err; |
| @@ -7018,7 +7669,8 @@ static int do_check(struct bpf_verifier_env *env) | |||
| 7018 | if (err) | 7669 | if (err) |
| 7019 | return err; | 7670 | return err; |
| 7020 | process_bpf_exit: | 7671 | process_bpf_exit: |
| 7021 | err = pop_stack(env, &env->prev_insn_idx, | 7672 | update_branch_counts(env, env->cur_state); |
| 7673 | err = pop_stack(env, &prev_insn_idx, | ||
| 7022 | &env->insn_idx); | 7674 | &env->insn_idx); |
| 7023 | if (err < 0) { | 7675 | if (err < 0) { |
| 7024 | if (err != -ENOENT) | 7676 | if (err != -ENOENT) |
| @@ -7821,6 +8473,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) | |||
| 7821 | case PTR_TO_TCP_SOCK: | 8473 | case PTR_TO_TCP_SOCK: |
| 7822 | convert_ctx_access = bpf_tcp_sock_convert_ctx_access; | 8474 | convert_ctx_access = bpf_tcp_sock_convert_ctx_access; |
| 7823 | break; | 8475 | break; |
| 8476 | case PTR_TO_XDP_SOCK: | ||
| 8477 | convert_ctx_access = bpf_xdp_sock_convert_ctx_access; | ||
| 8478 | break; | ||
| 7824 | default: | 8479 | default: |
| 7825 | continue; | 8480 | continue; |
| 7826 | } | 8481 | } |
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 22066c28ba61..ef7338cebd18 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c | |||
| @@ -17,8 +17,8 @@ struct xsk_map { | |||
| 17 | 17 | ||
| 18 | static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | 18 | static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) |
| 19 | { | 19 | { |
| 20 | int cpu, err = -EINVAL; | ||
| 21 | struct xsk_map *m; | 20 | struct xsk_map *m; |
| 21 | int cpu, err; | ||
| 22 | u64 cost; | 22 | u64 cost; |
| 23 | 23 | ||
| 24 | if (!capable(CAP_NET_ADMIN)) | 24 | if (!capable(CAP_NET_ADMIN)) |
| @@ -152,6 +152,12 @@ void __xsk_map_flush(struct bpf_map *map) | |||
| 152 | 152 | ||
| 153 | static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) | 153 | static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) |
| 154 | { | 154 | { |
| 155 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 156 | return __xsk_map_lookup_elem(map, *(u32 *)key); | ||
| 157 | } | ||
| 158 | |||
| 159 | static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) | ||
| 160 | { | ||
| 155 | return ERR_PTR(-EOPNOTSUPP); | 161 | return ERR_PTR(-EOPNOTSUPP); |
| 156 | } | 162 | } |
| 157 | 163 | ||
| @@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = { | |||
| 218 | .map_free = xsk_map_free, | 224 | .map_free = xsk_map_free, |
| 219 | .map_get_next_key = xsk_map_get_next_key, | 225 | .map_get_next_key = xsk_map_get_next_key, |
| 220 | .map_lookup_elem = xsk_map_lookup_elem, | 226 | .map_lookup_elem = xsk_map_lookup_elem, |
| 227 | .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, | ||
| 221 | .map_update_elem = xsk_map_update_elem, | 228 | .map_update_elem = xsk_map_update_elem, |
| 222 | .map_delete_elem = xsk_map_delete_elem, | 229 | .map_delete_elem = xsk_map_delete_elem, |
| 223 | .map_check_btf = map_check_no_btf, | 230 | .map_check_btf = map_check_no_btf, |
