aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/devmap.c2
-rw-r--r--kernel/bpf/verifier.c793
-rw-r--r--kernel/bpf/xskmap.c9
4 files changed, 734 insertions, 71 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 4c2fa3ac56f6..29d781061cd5 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-y := core.o 2obj-y := core.o
3CFLAGS_core.o += $(call cc-disable-warning, override-init)
3 4
4obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 5obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
5obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 6obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index b84c44505e06..40e86a7e0ef0 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -80,8 +80,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
80static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 80static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
81{ 81{
82 struct bpf_dtab *dtab; 82 struct bpf_dtab *dtab;
83 int err = -EINVAL;
84 u64 cost; 83 u64 cost;
84 int err;
85 85
86 if (!capable(CAP_NET_ADMIN)) 86 if (!capable(CAP_NET_ADMIN))
87 return ERR_PTR(-EPERM); 87 return ERR_PTR(-EPERM);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1e9d10b32984..0e079b2298f8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -326,7 +326,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
326{ 326{
327 return type == PTR_TO_SOCKET || 327 return type == PTR_TO_SOCKET ||
328 type == PTR_TO_SOCK_COMMON || 328 type == PTR_TO_SOCK_COMMON ||
329 type == PTR_TO_TCP_SOCK; 329 type == PTR_TO_TCP_SOCK ||
330 type == PTR_TO_XDP_SOCK;
330} 331}
331 332
332static bool reg_type_may_be_null(enum bpf_reg_type type) 333static bool reg_type_may_be_null(enum bpf_reg_type type)
@@ -398,6 +399,7 @@ static const char * const reg_type_str[] = {
398 [PTR_TO_TCP_SOCK] = "tcp_sock", 399 [PTR_TO_TCP_SOCK] = "tcp_sock",
399 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 400 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
400 [PTR_TO_TP_BUFFER] = "tp_buffer", 401 [PTR_TO_TP_BUFFER] = "tp_buffer",
402 [PTR_TO_XDP_SOCK] = "xdp_sock",
401}; 403};
402 404
403static char slot_type_char[] = { 405static char slot_type_char[] = {
@@ -445,12 +447,12 @@ static void print_verifier_state(struct bpf_verifier_env *env,
445 verbose(env, " R%d", i); 447 verbose(env, " R%d", i);
446 print_liveness(env, reg->live); 448 print_liveness(env, reg->live);
447 verbose(env, "=%s", reg_type_str[t]); 449 verbose(env, "=%s", reg_type_str[t]);
450 if (t == SCALAR_VALUE && reg->precise)
451 verbose(env, "P");
448 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && 452 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
449 tnum_is_const(reg->var_off)) { 453 tnum_is_const(reg->var_off)) {
450 /* reg->off should be 0 for SCALAR_VALUE */ 454 /* reg->off should be 0 for SCALAR_VALUE */
451 verbose(env, "%lld", reg->var_off.value + reg->off); 455 verbose(env, "%lld", reg->var_off.value + reg->off);
452 if (t == PTR_TO_STACK)
453 verbose(env, ",call_%d", func(env, reg)->callsite);
454 } else { 456 } else {
455 verbose(env, "(id=%d", reg->id); 457 verbose(env, "(id=%d", reg->id);
456 if (reg_type_may_be_refcounted_or_null(t)) 458 if (reg_type_may_be_refcounted_or_null(t))
@@ -512,11 +514,17 @@ static void print_verifier_state(struct bpf_verifier_env *env,
512 continue; 514 continue;
513 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); 515 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
514 print_liveness(env, state->stack[i].spilled_ptr.live); 516 print_liveness(env, state->stack[i].spilled_ptr.live);
515 if (state->stack[i].slot_type[0] == STACK_SPILL) 517 if (state->stack[i].slot_type[0] == STACK_SPILL) {
516 verbose(env, "=%s", 518 reg = &state->stack[i].spilled_ptr;
517 reg_type_str[state->stack[i].spilled_ptr.type]); 519 t = reg->type;
518 else 520 verbose(env, "=%s", reg_type_str[t]);
521 if (t == SCALAR_VALUE && reg->precise)
522 verbose(env, "P");
523 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
524 verbose(env, "%lld", reg->var_off.value + reg->off);
525 } else {
519 verbose(env, "=%s", types_buf); 526 verbose(env, "=%s", types_buf);
527 }
520 } 528 }
521 if (state->acquired_refs && state->refs[0].id) { 529 if (state->acquired_refs && state->refs[0].id) {
522 verbose(env, " refs=%d", state->refs[0].id); 530 verbose(env, " refs=%d", state->refs[0].id);
@@ -665,6 +673,13 @@ static void free_func_state(struct bpf_func_state *state)
665 kfree(state); 673 kfree(state);
666} 674}
667 675
676static void clear_jmp_history(struct bpf_verifier_state *state)
677{
678 kfree(state->jmp_history);
679 state->jmp_history = NULL;
680 state->jmp_history_cnt = 0;
681}
682
668static void free_verifier_state(struct bpf_verifier_state *state, 683static void free_verifier_state(struct bpf_verifier_state *state,
669 bool free_self) 684 bool free_self)
670{ 685{
@@ -674,6 +689,7 @@ static void free_verifier_state(struct bpf_verifier_state *state,
674 free_func_state(state->frame[i]); 689 free_func_state(state->frame[i]);
675 state->frame[i] = NULL; 690 state->frame[i] = NULL;
676 } 691 }
692 clear_jmp_history(state);
677 if (free_self) 693 if (free_self)
678 kfree(state); 694 kfree(state);
679} 695}
@@ -701,8 +717,18 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
701 const struct bpf_verifier_state *src) 717 const struct bpf_verifier_state *src)
702{ 718{
703 struct bpf_func_state *dst; 719 struct bpf_func_state *dst;
720 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
704 int i, err; 721 int i, err;
705 722
723 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
724 kfree(dst_state->jmp_history);
725 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
726 if (!dst_state->jmp_history)
727 return -ENOMEM;
728 }
729 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
730 dst_state->jmp_history_cnt = src->jmp_history_cnt;
731
706 /* if dst has more stack frames then src frame, free them */ 732 /* if dst has more stack frames then src frame, free them */
707 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 733 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
708 free_func_state(dst_state->frame[i]); 734 free_func_state(dst_state->frame[i]);
@@ -711,6 +737,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
711 dst_state->speculative = src->speculative; 737 dst_state->speculative = src->speculative;
712 dst_state->curframe = src->curframe; 738 dst_state->curframe = src->curframe;
713 dst_state->active_spin_lock = src->active_spin_lock; 739 dst_state->active_spin_lock = src->active_spin_lock;
740 dst_state->branches = src->branches;
741 dst_state->parent = src->parent;
742 dst_state->first_insn_idx = src->first_insn_idx;
743 dst_state->last_insn_idx = src->last_insn_idx;
714 for (i = 0; i <= src->curframe; i++) { 744 for (i = 0; i <= src->curframe; i++) {
715 dst = dst_state->frame[i]; 745 dst = dst_state->frame[i];
716 if (!dst) { 746 if (!dst) {
@@ -726,6 +756,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
726 return 0; 756 return 0;
727} 757}
728 758
759static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
760{
761 while (st) {
762 u32 br = --st->branches;
763
764 /* WARN_ON(br > 1) technically makes sense here,
765 * but see comment in push_stack(), hence:
766 */
767 WARN_ONCE((int)br < 0,
768 "BUG update_branch_counts:branches_to_explore=%d\n",
769 br);
770 if (br)
771 break;
772 st = st->parent;
773 }
774}
775
729static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 776static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
730 int *insn_idx) 777 int *insn_idx)
731{ 778{
@@ -779,6 +826,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
779 env->stack_size); 826 env->stack_size);
780 goto err; 827 goto err;
781 } 828 }
829 if (elem->st.parent) {
830 ++elem->st.parent->branches;
831 /* WARN_ON(branches > 2) technically makes sense here,
832 * but
833 * 1. speculative states will bump 'branches' for non-branch
834 * instructions
835 * 2. is_state_visited() heuristics may decide not to create
836 * a new state for a sequence of branches and all such current
837 * and cloned states will be pointing to a single parent state
838 * which might have large 'branches' count.
839 */
840 }
782 return &elem->st; 841 return &elem->st;
783err: 842err:
784 free_verifier_state(env->cur_state, true); 843 free_verifier_state(env->cur_state, true);
@@ -926,6 +985,9 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
926 reg->smax_value = S64_MAX; 985 reg->smax_value = S64_MAX;
927 reg->umin_value = 0; 986 reg->umin_value = 0;
928 reg->umax_value = U64_MAX; 987 reg->umax_value = U64_MAX;
988
989 /* constant backtracking is enabled for root only for now */
990 reg->precise = capable(CAP_SYS_ADMIN) ? false : true;
929} 991}
930 992
931/* Mark a register as having a completely unknown (scalar) value. */ 993/* Mark a register as having a completely unknown (scalar) value. */
@@ -1337,6 +1399,389 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1337 return 0; 1399 return 0;
1338} 1400}
1339 1401
1402/* for any branch, call, exit record the history of jmps in the given state */
1403static int push_jmp_history(struct bpf_verifier_env *env,
1404 struct bpf_verifier_state *cur)
1405{
1406 u32 cnt = cur->jmp_history_cnt;
1407 struct bpf_idx_pair *p;
1408
1409 cnt++;
1410 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1411 if (!p)
1412 return -ENOMEM;
1413 p[cnt - 1].idx = env->insn_idx;
1414 p[cnt - 1].prev_idx = env->prev_insn_idx;
1415 cur->jmp_history = p;
1416 cur->jmp_history_cnt = cnt;
1417 return 0;
1418}
1419
1420/* Backtrack one insn at a time. If idx is not at the top of recorded
1421 * history then previous instruction came from straight line execution.
1422 */
1423static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1424 u32 *history)
1425{
1426 u32 cnt = *history;
1427
1428 if (cnt && st->jmp_history[cnt - 1].idx == i) {
1429 i = st->jmp_history[cnt - 1].prev_idx;
1430 (*history)--;
1431 } else {
1432 i--;
1433 }
1434 return i;
1435}
1436
1437/* For given verifier state backtrack_insn() is called from the last insn to
1438 * the first insn. Its purpose is to compute a bitmask of registers and
1439 * stack slots that needs precision in the parent verifier state.
1440 */
1441static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1442 u32 *reg_mask, u64 *stack_mask)
1443{
1444 const struct bpf_insn_cbs cbs = {
1445 .cb_print = verbose,
1446 .private_data = env,
1447 };
1448 struct bpf_insn *insn = env->prog->insnsi + idx;
1449 u8 class = BPF_CLASS(insn->code);
1450 u8 opcode = BPF_OP(insn->code);
1451 u8 mode = BPF_MODE(insn->code);
1452 u32 dreg = 1u << insn->dst_reg;
1453 u32 sreg = 1u << insn->src_reg;
1454 u32 spi;
1455
1456 if (insn->code == 0)
1457 return 0;
1458 if (env->log.level & BPF_LOG_LEVEL) {
1459 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1460 verbose(env, "%d: ", idx);
1461 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1462 }
1463
1464 if (class == BPF_ALU || class == BPF_ALU64) {
1465 if (!(*reg_mask & dreg))
1466 return 0;
1467 if (opcode == BPF_MOV) {
1468 if (BPF_SRC(insn->code) == BPF_X) {
1469 /* dreg = sreg
1470 * dreg needs precision after this insn
1471 * sreg needs precision before this insn
1472 */
1473 *reg_mask &= ~dreg;
1474 *reg_mask |= sreg;
1475 } else {
1476 /* dreg = K
1477 * dreg needs precision after this insn.
1478 * Corresponding register is already marked
1479 * as precise=true in this verifier state.
1480 * No further markings in parent are necessary
1481 */
1482 *reg_mask &= ~dreg;
1483 }
1484 } else {
1485 if (BPF_SRC(insn->code) == BPF_X) {
1486 /* dreg += sreg
1487 * both dreg and sreg need precision
1488 * before this insn
1489 */
1490 *reg_mask |= sreg;
1491 } /* else dreg += K
1492 * dreg still needs precision before this insn
1493 */
1494 }
1495 } else if (class == BPF_LDX) {
1496 if (!(*reg_mask & dreg))
1497 return 0;
1498 *reg_mask &= ~dreg;
1499
1500 /* scalars can only be spilled into stack w/o losing precision.
1501 * Load from any other memory can be zero extended.
1502 * The desire to keep that precision is already indicated
1503 * by 'precise' mark in corresponding register of this state.
1504 * No further tracking necessary.
1505 */
1506 if (insn->src_reg != BPF_REG_FP)
1507 return 0;
1508 if (BPF_SIZE(insn->code) != BPF_DW)
1509 return 0;
1510
1511 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1512 * that [fp - off] slot contains scalar that needs to be
1513 * tracked with precision
1514 */
1515 spi = (-insn->off - 1) / BPF_REG_SIZE;
1516 if (spi >= 64) {
1517 verbose(env, "BUG spi %d\n", spi);
1518 WARN_ONCE(1, "verifier backtracking bug");
1519 return -EFAULT;
1520 }
1521 *stack_mask |= 1ull << spi;
1522 } else if (class == BPF_STX) {
1523 if (*reg_mask & dreg)
1524 /* stx shouldn't be using _scalar_ dst_reg
1525 * to access memory. It means backtracking
1526 * encountered a case of pointer subtraction.
1527 */
1528 return -ENOTSUPP;
1529 /* scalars can only be spilled into stack */
1530 if (insn->dst_reg != BPF_REG_FP)
1531 return 0;
1532 if (BPF_SIZE(insn->code) != BPF_DW)
1533 return 0;
1534 spi = (-insn->off - 1) / BPF_REG_SIZE;
1535 if (spi >= 64) {
1536 verbose(env, "BUG spi %d\n", spi);
1537 WARN_ONCE(1, "verifier backtracking bug");
1538 return -EFAULT;
1539 }
1540 if (!(*stack_mask & (1ull << spi)))
1541 return 0;
1542 *stack_mask &= ~(1ull << spi);
1543 *reg_mask |= sreg;
1544 } else if (class == BPF_JMP || class == BPF_JMP32) {
1545 if (opcode == BPF_CALL) {
1546 if (insn->src_reg == BPF_PSEUDO_CALL)
1547 return -ENOTSUPP;
1548 /* regular helper call sets R0 */
1549 *reg_mask &= ~1;
1550 if (*reg_mask & 0x3f) {
1551 /* if backtracing was looking for registers R1-R5
1552 * they should have been found already.
1553 */
1554 verbose(env, "BUG regs %x\n", *reg_mask);
1555 WARN_ONCE(1, "verifier backtracking bug");
1556 return -EFAULT;
1557 }
1558 } else if (opcode == BPF_EXIT) {
1559 return -ENOTSUPP;
1560 }
1561 } else if (class == BPF_LD) {
1562 if (!(*reg_mask & dreg))
1563 return 0;
1564 *reg_mask &= ~dreg;
1565 /* It's ld_imm64 or ld_abs or ld_ind.
1566 * For ld_imm64 no further tracking of precision
1567 * into parent is necessary
1568 */
1569 if (mode == BPF_IND || mode == BPF_ABS)
1570 /* to be analyzed */
1571 return -ENOTSUPP;
1572 } else if (class == BPF_ST) {
1573 if (*reg_mask & dreg)
1574 /* likely pointer subtraction */
1575 return -ENOTSUPP;
1576 }
1577 return 0;
1578}
1579
1580/* the scalar precision tracking algorithm:
1581 * . at the start all registers have precise=false.
1582 * . scalar ranges are tracked as normal through alu and jmp insns.
1583 * . once precise value of the scalar register is used in:
1584 * . ptr + scalar alu
1585 * . if (scalar cond K|scalar)
1586 * . helper_call(.., scalar, ...) where ARG_CONST is expected
1587 * backtrack through the verifier states and mark all registers and
1588 * stack slots with spilled constants that these scalar regisers
1589 * should be precise.
1590 * . during state pruning two registers (or spilled stack slots)
1591 * are equivalent if both are not precise.
1592 *
1593 * Note the verifier cannot simply walk register parentage chain,
1594 * since many different registers and stack slots could have been
1595 * used to compute single precise scalar.
1596 *
1597 * The approach of starting with precise=true for all registers and then
1598 * backtrack to mark a register as not precise when the verifier detects
1599 * that program doesn't care about specific value (e.g., when helper
1600 * takes register as ARG_ANYTHING parameter) is not safe.
1601 *
1602 * It's ok to walk single parentage chain of the verifier states.
1603 * It's possible that this backtracking will go all the way till 1st insn.
1604 * All other branches will be explored for needing precision later.
1605 *
1606 * The backtracking needs to deal with cases like:
1607 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1608 * r9 -= r8
1609 * r5 = r9
1610 * if r5 > 0x79f goto pc+7
1611 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1612 * r5 += 1
1613 * ...
1614 * call bpf_perf_event_output#25
1615 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1616 *
1617 * and this case:
1618 * r6 = 1
1619 * call foo // uses callee's r6 inside to compute r0
1620 * r0 += r6
1621 * if r0 == 0 goto
1622 *
1623 * to track above reg_mask/stack_mask needs to be independent for each frame.
1624 *
1625 * Also if parent's curframe > frame where backtracking started,
1626 * the verifier need to mark registers in both frames, otherwise callees
1627 * may incorrectly prune callers. This is similar to
1628 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1629 *
1630 * For now backtracking falls back into conservative marking.
1631 */
1632static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1633 struct bpf_verifier_state *st)
1634{
1635 struct bpf_func_state *func;
1636 struct bpf_reg_state *reg;
1637 int i, j;
1638
1639 /* big hammer: mark all scalars precise in this path.
1640 * pop_stack may still get !precise scalars.
1641 */
1642 for (; st; st = st->parent)
1643 for (i = 0; i <= st->curframe; i++) {
1644 func = st->frame[i];
1645 for (j = 0; j < BPF_REG_FP; j++) {
1646 reg = &func->regs[j];
1647 if (reg->type != SCALAR_VALUE)
1648 continue;
1649 reg->precise = true;
1650 }
1651 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
1652 if (func->stack[j].slot_type[0] != STACK_SPILL)
1653 continue;
1654 reg = &func->stack[j].spilled_ptr;
1655 if (reg->type != SCALAR_VALUE)
1656 continue;
1657 reg->precise = true;
1658 }
1659 }
1660}
1661
1662static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
1663{
1664 struct bpf_verifier_state *st = env->cur_state;
1665 int first_idx = st->first_insn_idx;
1666 int last_idx = env->insn_idx;
1667 struct bpf_func_state *func;
1668 struct bpf_reg_state *reg;
1669 u32 reg_mask = 1u << regno;
1670 u64 stack_mask = 0;
1671 bool skip_first = true;
1672 int i, err;
1673
1674 if (!env->allow_ptr_leaks)
1675 /* backtracking is root only for now */
1676 return 0;
1677
1678 func = st->frame[st->curframe];
1679 reg = &func->regs[regno];
1680 if (reg->type != SCALAR_VALUE) {
1681 WARN_ONCE(1, "backtracing misuse");
1682 return -EFAULT;
1683 }
1684 if (reg->precise)
1685 return 0;
1686 func->regs[regno].precise = true;
1687
1688 for (;;) {
1689 DECLARE_BITMAP(mask, 64);
1690 bool new_marks = false;
1691 u32 history = st->jmp_history_cnt;
1692
1693 if (env->log.level & BPF_LOG_LEVEL)
1694 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
1695 for (i = last_idx;;) {
1696 if (skip_first) {
1697 err = 0;
1698 skip_first = false;
1699 } else {
1700 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
1701 }
1702 if (err == -ENOTSUPP) {
1703 mark_all_scalars_precise(env, st);
1704 return 0;
1705 } else if (err) {
1706 return err;
1707 }
1708 if (!reg_mask && !stack_mask)
1709 /* Found assignment(s) into tracked register in this state.
1710 * Since this state is already marked, just return.
1711 * Nothing to be tracked further in the parent state.
1712 */
1713 return 0;
1714 if (i == first_idx)
1715 break;
1716 i = get_prev_insn_idx(st, i, &history);
1717 if (i >= env->prog->len) {
1718 /* This can happen if backtracking reached insn 0
1719 * and there are still reg_mask or stack_mask
1720 * to backtrack.
1721 * It means the backtracking missed the spot where
1722 * particular register was initialized with a constant.
1723 */
1724 verbose(env, "BUG backtracking idx %d\n", i);
1725 WARN_ONCE(1, "verifier backtracking bug");
1726 return -EFAULT;
1727 }
1728 }
1729 st = st->parent;
1730 if (!st)
1731 break;
1732
1733 func = st->frame[st->curframe];
1734 bitmap_from_u64(mask, reg_mask);
1735 for_each_set_bit(i, mask, 32) {
1736 reg = &func->regs[i];
1737 if (reg->type != SCALAR_VALUE)
1738 continue;
1739 if (!reg->precise)
1740 new_marks = true;
1741 reg->precise = true;
1742 }
1743
1744 bitmap_from_u64(mask, stack_mask);
1745 for_each_set_bit(i, mask, 64) {
1746 if (i >= func->allocated_stack / BPF_REG_SIZE) {
1747 /* This can happen if backtracking
1748 * is propagating stack precision where
1749 * caller has larger stack frame
1750 * than callee, but backtrack_insn() should
1751 * have returned -ENOTSUPP.
1752 */
1753 verbose(env, "BUG spi %d stack_size %d\n",
1754 i, func->allocated_stack);
1755 WARN_ONCE(1, "verifier backtracking bug");
1756 return -EFAULT;
1757 }
1758
1759 if (func->stack[i].slot_type[0] != STACK_SPILL)
1760 continue;
1761 reg = &func->stack[i].spilled_ptr;
1762 if (reg->type != SCALAR_VALUE)
1763 continue;
1764 if (!reg->precise)
1765 new_marks = true;
1766 reg->precise = true;
1767 }
1768 if (env->log.level & BPF_LOG_LEVEL) {
1769 print_verifier_state(env, func);
1770 verbose(env, "parent %s regs=%x stack=%llx marks\n",
1771 new_marks ? "didn't have" : "already had",
1772 reg_mask, stack_mask);
1773 }
1774
1775 if (!new_marks)
1776 break;
1777
1778 last_idx = st->last_insn_idx;
1779 first_idx = st->first_insn_idx;
1780 }
1781 return 0;
1782}
1783
1784
1340static bool is_spillable_regtype(enum bpf_reg_type type) 1785static bool is_spillable_regtype(enum bpf_reg_type type)
1341{ 1786{
1342 switch (type) { 1787 switch (type) {
@@ -1355,6 +1800,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
1355 case PTR_TO_SOCK_COMMON_OR_NULL: 1800 case PTR_TO_SOCK_COMMON_OR_NULL:
1356 case PTR_TO_TCP_SOCK: 1801 case PTR_TO_TCP_SOCK:
1357 case PTR_TO_TCP_SOCK_OR_NULL: 1802 case PTR_TO_TCP_SOCK_OR_NULL:
1803 case PTR_TO_XDP_SOCK:
1358 return true; 1804 return true;
1359 default: 1805 default:
1360 return false; 1806 return false;
@@ -1367,6 +1813,23 @@ static bool register_is_null(struct bpf_reg_state *reg)
1367 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); 1813 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1368} 1814}
1369 1815
1816static bool register_is_const(struct bpf_reg_state *reg)
1817{
1818 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
1819}
1820
1821static void save_register_state(struct bpf_func_state *state,
1822 int spi, struct bpf_reg_state *reg)
1823{
1824 int i;
1825
1826 state->stack[spi].spilled_ptr = *reg;
1827 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1828
1829 for (i = 0; i < BPF_REG_SIZE; i++)
1830 state->stack[spi].slot_type[i] = STACK_SPILL;
1831}
1832
1370/* check_stack_read/write functions track spill/fill of registers, 1833/* check_stack_read/write functions track spill/fill of registers,
1371 * stack boundary and alignment are checked in check_mem_access() 1834 * stack boundary and alignment are checked in check_mem_access()
1372 */ 1835 */
@@ -1376,7 +1839,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
1376{ 1839{
1377 struct bpf_func_state *cur; /* state of the current function */ 1840 struct bpf_func_state *cur; /* state of the current function */
1378 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 1841 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1379 enum bpf_reg_type type; 1842 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
1843 struct bpf_reg_state *reg = NULL;
1380 1844
1381 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), 1845 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1382 state->acquired_refs, true); 1846 state->acquired_refs, true);
@@ -1393,27 +1857,48 @@ static int check_stack_write(struct bpf_verifier_env *env,
1393 } 1857 }
1394 1858
1395 cur = env->cur_state->frame[env->cur_state->curframe]; 1859 cur = env->cur_state->frame[env->cur_state->curframe];
1396 if (value_regno >= 0 && 1860 if (value_regno >= 0)
1397 is_spillable_regtype((type = cur->regs[value_regno].type))) { 1861 reg = &cur->regs[value_regno];
1398 1862
1863 if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
1864 !register_is_null(reg) && env->allow_ptr_leaks) {
1865 if (dst_reg != BPF_REG_FP) {
1866 /* The backtracking logic can only recognize explicit
1867 * stack slot address like [fp - 8]. Other spill of
1868 * scalar via different register has to be conervative.
1869 * Backtrack from here and mark all registers as precise
1870 * that contributed into 'reg' being a constant.
1871 */
1872 err = mark_chain_precision(env, value_regno);
1873 if (err)
1874 return err;
1875 }
1876 save_register_state(state, spi, reg);
1877 } else if (reg && is_spillable_regtype(reg->type)) {
1399 /* register containing pointer is being spilled into stack */ 1878 /* register containing pointer is being spilled into stack */
1400 if (size != BPF_REG_SIZE) { 1879 if (size != BPF_REG_SIZE) {
1880 verbose_linfo(env, insn_idx, "; ");
1401 verbose(env, "invalid size of register spill\n"); 1881 verbose(env, "invalid size of register spill\n");
1402 return -EACCES; 1882 return -EACCES;
1403 } 1883 }
1404 1884
1405 if (state != cur && type == PTR_TO_STACK) { 1885 if (state != cur && reg->type == PTR_TO_STACK) {
1406 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 1886 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1407 return -EINVAL; 1887 return -EINVAL;
1408 } 1888 }
1409 1889
1410 /* save register state */ 1890 if (!env->allow_ptr_leaks) {
1411 state->stack[spi].spilled_ptr = cur->regs[value_regno]; 1891 bool sanitize = false;
1412 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1413 1892
1414 for (i = 0; i < BPF_REG_SIZE; i++) { 1893 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
1415 if (state->stack[spi].slot_type[i] == STACK_MISC && 1894 register_is_const(&state->stack[spi].spilled_ptr))
1416 !env->allow_ptr_leaks) { 1895 sanitize = true;
1896 for (i = 0; i < BPF_REG_SIZE; i++)
1897 if (state->stack[spi].slot_type[i] == STACK_MISC) {
1898 sanitize = true;
1899 break;
1900 }
1901 if (sanitize) {
1417 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; 1902 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1418 int soff = (-spi - 1) * BPF_REG_SIZE; 1903 int soff = (-spi - 1) * BPF_REG_SIZE;
1419 1904
@@ -1436,8 +1921,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
1436 } 1921 }
1437 *poff = soff; 1922 *poff = soff;
1438 } 1923 }
1439 state->stack[spi].slot_type[i] = STACK_SPILL;
1440 } 1924 }
1925 save_register_state(state, spi, reg);
1441 } else { 1926 } else {
1442 u8 type = STACK_MISC; 1927 u8 type = STACK_MISC;
1443 1928
@@ -1460,9 +1945,13 @@ static int check_stack_write(struct bpf_verifier_env *env,
1460 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 1945 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1461 1946
1462 /* when we zero initialize stack slots mark them as such */ 1947 /* when we zero initialize stack slots mark them as such */
1463 if (value_regno >= 0 && 1948 if (reg && register_is_null(reg)) {
1464 register_is_null(&cur->regs[value_regno])) 1949 /* backtracking doesn't work for STACK_ZERO yet. */
1950 err = mark_chain_precision(env, value_regno);
1951 if (err)
1952 return err;
1465 type = STACK_ZERO; 1953 type = STACK_ZERO;
1954 }
1466 1955
1467 /* Mark slots affected by this stack write. */ 1956 /* Mark slots affected by this stack write. */
1468 for (i = 0; i < size; i++) 1957 for (i = 0; i < size; i++)
@@ -1479,6 +1968,7 @@ static int check_stack_read(struct bpf_verifier_env *env,
1479 struct bpf_verifier_state *vstate = env->cur_state; 1968 struct bpf_verifier_state *vstate = env->cur_state;
1480 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 1969 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1481 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 1970 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1971 struct bpf_reg_state *reg;
1482 u8 *stype; 1972 u8 *stype;
1483 1973
1484 if (reg_state->allocated_stack <= slot) { 1974 if (reg_state->allocated_stack <= slot) {
@@ -1487,11 +1977,21 @@ static int check_stack_read(struct bpf_verifier_env *env,
1487 return -EACCES; 1977 return -EACCES;
1488 } 1978 }
1489 stype = reg_state->stack[spi].slot_type; 1979 stype = reg_state->stack[spi].slot_type;
1980 reg = &reg_state->stack[spi].spilled_ptr;
1490 1981
1491 if (stype[0] == STACK_SPILL) { 1982 if (stype[0] == STACK_SPILL) {
1492 if (size != BPF_REG_SIZE) { 1983 if (size != BPF_REG_SIZE) {
1493 verbose(env, "invalid size of register spill\n"); 1984 if (reg->type != SCALAR_VALUE) {
1494 return -EACCES; 1985 verbose_linfo(env, env->insn_idx, "; ");
1986 verbose(env, "invalid size of register fill\n");
1987 return -EACCES;
1988 }
1989 if (value_regno >= 0) {
1990 mark_reg_unknown(env, state->regs, value_regno);
1991 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1992 }
1993 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
1994 return 0;
1495 } 1995 }
1496 for (i = 1; i < BPF_REG_SIZE; i++) { 1996 for (i = 1; i < BPF_REG_SIZE; i++) {
1497 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { 1997 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
@@ -1502,17 +2002,14 @@ static int check_stack_read(struct bpf_verifier_env *env,
1502 2002
1503 if (value_regno >= 0) { 2003 if (value_regno >= 0) {
1504 /* restore register state from stack */ 2004 /* restore register state from stack */
1505 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr; 2005 state->regs[value_regno] = *reg;
1506 /* mark reg as written since spilled pointer state likely 2006 /* mark reg as written since spilled pointer state likely
1507 * has its liveness marks cleared by is_state_visited() 2007 * has its liveness marks cleared by is_state_visited()
1508 * which resets stack/reg liveness for state transitions 2008 * which resets stack/reg liveness for state transitions
1509 */ 2009 */
1510 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2010 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1511 } 2011 }
1512 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr, 2012 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
1513 reg_state->stack[spi].spilled_ptr.parent,
1514 REG_LIVE_READ64);
1515 return 0;
1516 } else { 2013 } else {
1517 int zeros = 0; 2014 int zeros = 0;
1518 2015
@@ -1527,23 +2024,32 @@ static int check_stack_read(struct bpf_verifier_env *env,
1527 off, i, size); 2024 off, i, size);
1528 return -EACCES; 2025 return -EACCES;
1529 } 2026 }
1530 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr, 2027 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
1531 reg_state->stack[spi].spilled_ptr.parent,
1532 REG_LIVE_READ64);
1533 if (value_regno >= 0) { 2028 if (value_regno >= 0) {
1534 if (zeros == size) { 2029 if (zeros == size) {
1535 /* any size read into register is zero extended, 2030 /* any size read into register is zero extended,
1536 * so the whole register == const_zero 2031 * so the whole register == const_zero
1537 */ 2032 */
1538 __mark_reg_const_zero(&state->regs[value_regno]); 2033 __mark_reg_const_zero(&state->regs[value_regno]);
2034 /* backtracking doesn't support STACK_ZERO yet,
2035 * so mark it precise here, so that later
2036 * backtracking can stop here.
2037 * Backtracking may not need this if this register
2038 * doesn't participate in pointer adjustment.
2039 * Forward propagation of precise flag is not
2040 * necessary either. This mark is only to stop
2041 * backtracking. Any register that contributed
2042 * to const 0 was marked precise before spill.
2043 */
2044 state->regs[value_regno].precise = true;
1539 } else { 2045 } else {
1540 /* have read misc data from the stack */ 2046 /* have read misc data from the stack */
1541 mark_reg_unknown(env, state->regs, value_regno); 2047 mark_reg_unknown(env, state->regs, value_regno);
1542 } 2048 }
1543 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2049 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1544 } 2050 }
1545 return 0;
1546 } 2051 }
2052 return 0;
1547} 2053}
1548 2054
1549static int check_stack_access(struct bpf_verifier_env *env, 2055static int check_stack_access(struct bpf_verifier_env *env,
@@ -1835,6 +2341,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1835 case PTR_TO_TCP_SOCK: 2341 case PTR_TO_TCP_SOCK:
1836 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 2342 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1837 break; 2343 break;
2344 case PTR_TO_XDP_SOCK:
2345 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
2346 break;
1838 default: 2347 default:
1839 valid = false; 2348 valid = false;
1840 } 2349 }
@@ -1999,6 +2508,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1999 case PTR_TO_TCP_SOCK: 2508 case PTR_TO_TCP_SOCK:
2000 pointer_desc = "tcp_sock "; 2509 pointer_desc = "tcp_sock ";
2001 break; 2510 break;
2511 case PTR_TO_XDP_SOCK:
2512 pointer_desc = "xdp_sock ";
2513 break;
2002 default: 2514 default:
2003 break; 2515 break;
2004 } 2516 }
@@ -2398,7 +2910,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2398{ 2910{
2399 struct bpf_reg_state *reg = reg_state(env, regno); 2911 struct bpf_reg_state *reg = reg_state(env, regno);
2400 struct bpf_func_state *state = func(env, reg); 2912 struct bpf_func_state *state = func(env, reg);
2401 int err, min_off, max_off, i, slot, spi; 2913 int err, min_off, max_off, i, j, slot, spi;
2402 2914
2403 if (reg->type != PTR_TO_STACK) { 2915 if (reg->type != PTR_TO_STACK) {
2404 /* Allow zero-byte read from NULL, regardless of pointer type */ 2916 /* Allow zero-byte read from NULL, regardless of pointer type */
@@ -2486,6 +2998,14 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2486 *stype = STACK_MISC; 2998 *stype = STACK_MISC;
2487 goto mark; 2999 goto mark;
2488 } 3000 }
3001 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
3002 state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
3003 __mark_reg_unknown(&state->stack[spi].spilled_ptr);
3004 for (j = 0; j < BPF_REG_SIZE; j++)
3005 state->stack[spi].slot_type[j] = STACK_MISC;
3006 goto mark;
3007 }
3008
2489err: 3009err:
2490 if (tnum_is_const(reg->var_off)) { 3010 if (tnum_is_const(reg->var_off)) {
2491 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 3011 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
@@ -2837,6 +3357,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2837 err = check_helper_mem_access(env, regno - 1, 3357 err = check_helper_mem_access(env, regno - 1,
2838 reg->umax_value, 3358 reg->umax_value,
2839 zero_size_allowed, meta); 3359 zero_size_allowed, meta);
3360 if (!err)
3361 err = mark_chain_precision(env, regno);
2840 } else if (arg_type_is_int_ptr(arg_type)) { 3362 } else if (arg_type_is_int_ptr(arg_type)) {
2841 int size = int_ptr_type_to_size(arg_type); 3363 int size = int_ptr_type_to_size(arg_type);
2842 3364
@@ -2897,10 +3419,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
2897 * appear. 3419 * appear.
2898 */ 3420 */
2899 case BPF_MAP_TYPE_CPUMAP: 3421 case BPF_MAP_TYPE_CPUMAP:
2900 case BPF_MAP_TYPE_XSKMAP:
2901 if (func_id != BPF_FUNC_redirect_map) 3422 if (func_id != BPF_FUNC_redirect_map)
2902 goto error; 3423 goto error;
2903 break; 3424 break;
3425 case BPF_MAP_TYPE_XSKMAP:
3426 if (func_id != BPF_FUNC_redirect_map &&
3427 func_id != BPF_FUNC_map_lookup_elem)
3428 goto error;
3429 break;
2904 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 3430 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2905 case BPF_MAP_TYPE_HASH_OF_MAPS: 3431 case BPF_MAP_TYPE_HASH_OF_MAPS:
2906 if (func_id != BPF_FUNC_map_lookup_elem) 3432 if (func_id != BPF_FUNC_map_lookup_elem)
@@ -3791,6 +4317,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3791 case PTR_TO_SOCK_COMMON_OR_NULL: 4317 case PTR_TO_SOCK_COMMON_OR_NULL:
3792 case PTR_TO_TCP_SOCK: 4318 case PTR_TO_TCP_SOCK:
3793 case PTR_TO_TCP_SOCK_OR_NULL: 4319 case PTR_TO_TCP_SOCK_OR_NULL:
4320 case PTR_TO_XDP_SOCK:
3794 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 4321 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3795 dst, reg_type_str[ptr_reg->type]); 4322 dst, reg_type_str[ptr_reg->type]);
3796 return -EACCES; 4323 return -EACCES;
@@ -4268,6 +4795,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
4268 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 4795 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
4269 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 4796 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
4270 u8 opcode = BPF_OP(insn->code); 4797 u8 opcode = BPF_OP(insn->code);
4798 int err;
4271 4799
4272 dst_reg = &regs[insn->dst_reg]; 4800 dst_reg = &regs[insn->dst_reg];
4273 src_reg = NULL; 4801 src_reg = NULL;
@@ -4294,11 +4822,17 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
4294 * This is legal, but we have to reverse our 4822 * This is legal, but we have to reverse our
4295 * src/dest handling in computing the range 4823 * src/dest handling in computing the range
4296 */ 4824 */
4825 err = mark_chain_precision(env, insn->dst_reg);
4826 if (err)
4827 return err;
4297 return adjust_ptr_min_max_vals(env, insn, 4828 return adjust_ptr_min_max_vals(env, insn,
4298 src_reg, dst_reg); 4829 src_reg, dst_reg);
4299 } 4830 }
4300 } else if (ptr_reg) { 4831 } else if (ptr_reg) {
4301 /* pointer += scalar */ 4832 /* pointer += scalar */
4833 err = mark_chain_precision(env, insn->src_reg);
4834 if (err)
4835 return err;
4302 return adjust_ptr_min_max_vals(env, insn, 4836 return adjust_ptr_min_max_vals(env, insn,
4303 dst_reg, src_reg); 4837 dst_reg, src_reg);
4304 } 4838 }
@@ -5030,6 +5564,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
5030 if (reg->map_ptr->inner_map_meta) { 5564 if (reg->map_ptr->inner_map_meta) {
5031 reg->type = CONST_PTR_TO_MAP; 5565 reg->type = CONST_PTR_TO_MAP;
5032 reg->map_ptr = reg->map_ptr->inner_map_meta; 5566 reg->map_ptr = reg->map_ptr->inner_map_meta;
5567 } else if (reg->map_ptr->map_type ==
5568 BPF_MAP_TYPE_XSKMAP) {
5569 reg->type = PTR_TO_XDP_SOCK;
5033 } else { 5570 } else {
5034 reg->type = PTR_TO_MAP_VALUE; 5571 reg->type = PTR_TO_MAP_VALUE;
5035 } 5572 }
@@ -5201,9 +5738,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
5201 struct bpf_verifier_state *this_branch = env->cur_state; 5738 struct bpf_verifier_state *this_branch = env->cur_state;
5202 struct bpf_verifier_state *other_branch; 5739 struct bpf_verifier_state *other_branch;
5203 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 5740 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
5204 struct bpf_reg_state *dst_reg, *other_branch_regs; 5741 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
5205 u8 opcode = BPF_OP(insn->code); 5742 u8 opcode = BPF_OP(insn->code);
5206 bool is_jmp32; 5743 bool is_jmp32;
5744 int pred = -1;
5207 int err; 5745 int err;
5208 5746
5209 /* Only conditional jumps are expected to reach here. */ 5747 /* Only conditional jumps are expected to reach here. */
@@ -5228,6 +5766,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
5228 insn->src_reg); 5766 insn->src_reg);
5229 return -EACCES; 5767 return -EACCES;
5230 } 5768 }
5769 src_reg = &regs[insn->src_reg];
5231 } else { 5770 } else {
5232 if (insn->src_reg != BPF_REG_0) { 5771 if (insn->src_reg != BPF_REG_0) {
5233 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 5772 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
@@ -5243,20 +5782,29 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
5243 dst_reg = &regs[insn->dst_reg]; 5782 dst_reg = &regs[insn->dst_reg];
5244 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 5783 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
5245 5784
5246 if (BPF_SRC(insn->code) == BPF_K) { 5785 if (BPF_SRC(insn->code) == BPF_K)
5247 int pred = is_branch_taken(dst_reg, insn->imm, opcode, 5786 pred = is_branch_taken(dst_reg, insn->imm,
5248 is_jmp32); 5787 opcode, is_jmp32);
5249 5788 else if (src_reg->type == SCALAR_VALUE &&
5250 if (pred == 1) { 5789 tnum_is_const(src_reg->var_off))
5251 /* only follow the goto, ignore fall-through */ 5790 pred = is_branch_taken(dst_reg, src_reg->var_off.value,
5252 *insn_idx += insn->off; 5791 opcode, is_jmp32);
5253 return 0; 5792 if (pred >= 0) {
5254 } else if (pred == 0) { 5793 err = mark_chain_precision(env, insn->dst_reg);
5255 /* only follow fall-through branch, since 5794 if (BPF_SRC(insn->code) == BPF_X && !err)
5256 * that's where the program will go 5795 err = mark_chain_precision(env, insn->src_reg);
5257 */ 5796 if (err)
5258 return 0; 5797 return err;
5259 } 5798 }
5799 if (pred == 1) {
5800 /* only follow the goto, ignore fall-through */
5801 *insn_idx += insn->off;
5802 return 0;
5803 } else if (pred == 0) {
5804 /* only follow fall-through branch, since
5805 * that's where the program will go
5806 */
5807 return 0;
5260 } 5808 }
5261 5809
5262 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, 5810 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
@@ -5616,7 +6164,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx)
5616 * w - next instruction 6164 * w - next instruction
5617 * e - edge 6165 * e - edge
5618 */ 6166 */
5619static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) 6167static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
6168 bool loop_ok)
5620{ 6169{
5621 int *insn_stack = env->cfg.insn_stack; 6170 int *insn_stack = env->cfg.insn_stack;
5622 int *insn_state = env->cfg.insn_state; 6171 int *insn_state = env->cfg.insn_state;
@@ -5646,6 +6195,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
5646 insn_stack[env->cfg.cur_stack++] = w; 6195 insn_stack[env->cfg.cur_stack++] = w;
5647 return 1; 6196 return 1;
5648 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 6197 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
6198 if (loop_ok && env->allow_ptr_leaks)
6199 return 0;
5649 verbose_linfo(env, t, "%d: ", t); 6200 verbose_linfo(env, t, "%d: ", t);
5650 verbose_linfo(env, w, "%d: ", w); 6201 verbose_linfo(env, w, "%d: ", w);
5651 verbose(env, "back-edge from insn %d to %d\n", t, w); 6202 verbose(env, "back-edge from insn %d to %d\n", t, w);
@@ -5697,7 +6248,7 @@ peek_stack:
5697 if (opcode == BPF_EXIT) { 6248 if (opcode == BPF_EXIT) {
5698 goto mark_explored; 6249 goto mark_explored;
5699 } else if (opcode == BPF_CALL) { 6250 } else if (opcode == BPF_CALL) {
5700 ret = push_insn(t, t + 1, FALLTHROUGH, env); 6251 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
5701 if (ret == 1) 6252 if (ret == 1)
5702 goto peek_stack; 6253 goto peek_stack;
5703 else if (ret < 0) 6254 else if (ret < 0)
@@ -5706,7 +6257,8 @@ peek_stack:
5706 init_explored_state(env, t + 1); 6257 init_explored_state(env, t + 1);
5707 if (insns[t].src_reg == BPF_PSEUDO_CALL) { 6258 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5708 init_explored_state(env, t); 6259 init_explored_state(env, t);
5709 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); 6260 ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
6261 env, false);
5710 if (ret == 1) 6262 if (ret == 1)
5711 goto peek_stack; 6263 goto peek_stack;
5712 else if (ret < 0) 6264 else if (ret < 0)
@@ -5719,11 +6271,16 @@ peek_stack:
5719 } 6271 }
5720 /* unconditional jump with single edge */ 6272 /* unconditional jump with single edge */
5721 ret = push_insn(t, t + insns[t].off + 1, 6273 ret = push_insn(t, t + insns[t].off + 1,
5722 FALLTHROUGH, env); 6274 FALLTHROUGH, env, true);
5723 if (ret == 1) 6275 if (ret == 1)
5724 goto peek_stack; 6276 goto peek_stack;
5725 else if (ret < 0) 6277 else if (ret < 0)
5726 goto err_free; 6278 goto err_free;
6279 /* unconditional jmp is not a good pruning point,
6280 * but it's marked, since backtracking needs
6281 * to record jmp history in is_state_visited().
6282 */
6283 init_explored_state(env, t + insns[t].off + 1);
5727 /* tell verifier to check for equivalent states 6284 /* tell verifier to check for equivalent states
5728 * after every call and jump 6285 * after every call and jump
5729 */ 6286 */
@@ -5732,13 +6289,13 @@ peek_stack:
5732 } else { 6289 } else {
5733 /* conditional jump with two edges */ 6290 /* conditional jump with two edges */
5734 init_explored_state(env, t); 6291 init_explored_state(env, t);
5735 ret = push_insn(t, t + 1, FALLTHROUGH, env); 6292 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
5736 if (ret == 1) 6293 if (ret == 1)
5737 goto peek_stack; 6294 goto peek_stack;
5738 else if (ret < 0) 6295 else if (ret < 0)
5739 goto err_free; 6296 goto err_free;
5740 6297
5741 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); 6298 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
5742 if (ret == 1) 6299 if (ret == 1)
5743 goto peek_stack; 6300 goto peek_stack;
5744 else if (ret < 0) 6301 else if (ret < 0)
@@ -5748,7 +6305,7 @@ peek_stack:
5748 /* all other non-branch instructions with single 6305 /* all other non-branch instructions with single
5749 * fall-through edge 6306 * fall-through edge
5750 */ 6307 */
5751 ret = push_insn(t, t + 1, FALLTHROUGH, env); 6308 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
5752 if (ret == 1) 6309 if (ret == 1)
5753 goto peek_stack; 6310 goto peek_stack;
5754 else if (ret < 0) 6311 else if (ret < 0)
@@ -6181,6 +6738,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
6181 6738
6182 sl = *explored_state(env, insn); 6739 sl = *explored_state(env, insn);
6183 while (sl) { 6740 while (sl) {
6741 if (sl->state.branches)
6742 goto next;
6184 if (sl->state.insn_idx != insn || 6743 if (sl->state.insn_idx != insn ||
6185 sl->state.curframe != cur->curframe) 6744 sl->state.curframe != cur->curframe)
6186 goto next; 6745 goto next;
@@ -6222,6 +6781,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
6222 switch (rold->type) { 6781 switch (rold->type) {
6223 case SCALAR_VALUE: 6782 case SCALAR_VALUE:
6224 if (rcur->type == SCALAR_VALUE) { 6783 if (rcur->type == SCALAR_VALUE) {
6784 if (!rold->precise && !rcur->precise)
6785 return true;
6225 /* new val must satisfy old val knowledge */ 6786 /* new val must satisfy old val knowledge */
6226 return range_within(rold, rcur) && 6787 return range_within(rold, rcur) &&
6227 tnum_in(rold->var_off, rcur->var_off); 6788 tnum_in(rold->var_off, rcur->var_off);
@@ -6294,6 +6855,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
6294 case PTR_TO_SOCK_COMMON_OR_NULL: 6855 case PTR_TO_SOCK_COMMON_OR_NULL:
6295 case PTR_TO_TCP_SOCK: 6856 case PTR_TO_TCP_SOCK:
6296 case PTR_TO_TCP_SOCK_OR_NULL: 6857 case PTR_TO_TCP_SOCK_OR_NULL:
6858 case PTR_TO_XDP_SOCK:
6297 /* Only valid matches are exact, which memcmp() above 6859 /* Only valid matches are exact, which memcmp() above
6298 * would have accepted 6860 * would have accepted
6299 */ 6861 */
@@ -6544,19 +7106,52 @@ static int propagate_liveness(struct bpf_verifier_env *env,
6544 return 0; 7106 return 0;
6545} 7107}
6546 7108
7109static bool states_maybe_looping(struct bpf_verifier_state *old,
7110 struct bpf_verifier_state *cur)
7111{
7112 struct bpf_func_state *fold, *fcur;
7113 int i, fr = cur->curframe;
7114
7115 if (old->curframe != fr)
7116 return false;
7117
7118 fold = old->frame[fr];
7119 fcur = cur->frame[fr];
7120 for (i = 0; i < MAX_BPF_REG; i++)
7121 if (memcmp(&fold->regs[i], &fcur->regs[i],
7122 offsetof(struct bpf_reg_state, parent)))
7123 return false;
7124 return true;
7125}
7126
7127
6547static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 7128static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6548{ 7129{
6549 struct bpf_verifier_state_list *new_sl; 7130 struct bpf_verifier_state_list *new_sl;
6550 struct bpf_verifier_state_list *sl, **pprev; 7131 struct bpf_verifier_state_list *sl, **pprev;
6551 struct bpf_verifier_state *cur = env->cur_state, *new; 7132 struct bpf_verifier_state *cur = env->cur_state, *new;
6552 int i, j, err, states_cnt = 0; 7133 int i, j, err, states_cnt = 0;
7134 bool add_new_state = false;
6553 7135
7136 cur->last_insn_idx = env->prev_insn_idx;
6554 if (!env->insn_aux_data[insn_idx].prune_point) 7137 if (!env->insn_aux_data[insn_idx].prune_point)
6555 /* this 'insn_idx' instruction wasn't marked, so we will not 7138 /* this 'insn_idx' instruction wasn't marked, so we will not
6556 * be doing state search here 7139 * be doing state search here
6557 */ 7140 */
6558 return 0; 7141 return 0;
6559 7142
7143 /* bpf progs typically have pruning point every 4 instructions
7144 * http://vger.kernel.org/bpfconf2019.html#session-1
7145 * Do not add new state for future pruning if the verifier hasn't seen
7146 * at least 2 jumps and at least 8 instructions.
7147 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
7148 * In tests that amounts to up to 50% reduction into total verifier
7149 * memory consumption and 20% verifier time speedup.
7150 */
7151 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
7152 env->insn_processed - env->prev_insn_processed >= 8)
7153 add_new_state = true;
7154
6560 pprev = explored_state(env, insn_idx); 7155 pprev = explored_state(env, insn_idx);
6561 sl = *pprev; 7156 sl = *pprev;
6562 7157
@@ -6566,6 +7161,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6566 states_cnt++; 7161 states_cnt++;
6567 if (sl->state.insn_idx != insn_idx) 7162 if (sl->state.insn_idx != insn_idx)
6568 goto next; 7163 goto next;
7164 if (sl->state.branches) {
7165 if (states_maybe_looping(&sl->state, cur) &&
7166 states_equal(env, &sl->state, cur)) {
7167 verbose_linfo(env, insn_idx, "; ");
7168 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
7169 return -EINVAL;
7170 }
7171 /* if the verifier is processing a loop, avoid adding new state
7172 * too often, since different loop iterations have distinct
7173 * states and may not help future pruning.
7174 * This threshold shouldn't be too low to make sure that
7175 * a loop with large bound will be rejected quickly.
7176 * The most abusive loop will be:
7177 * r1 += 1
7178 * if r1 < 1000000 goto pc-2
7179 * 1M insn_procssed limit / 100 == 10k peak states.
7180 * This threshold shouldn't be too high either, since states
7181 * at the end of the loop are likely to be useful in pruning.
7182 */
7183 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
7184 env->insn_processed - env->prev_insn_processed < 100)
7185 add_new_state = false;
7186 goto miss;
7187 }
6569 if (states_equal(env, &sl->state, cur)) { 7188 if (states_equal(env, &sl->state, cur)) {
6570 sl->hit_cnt++; 7189 sl->hit_cnt++;
6571 /* reached equivalent register/stack state, 7190 /* reached equivalent register/stack state,
@@ -6583,7 +7202,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6583 return err; 7202 return err;
6584 return 1; 7203 return 1;
6585 } 7204 }
6586 sl->miss_cnt++; 7205miss:
7206 /* when new state is not going to be added do not increase miss count.
7207 * Otherwise several loop iterations will remove the state
7208 * recorded earlier. The goal of these heuristics is to have
7209 * states from some iterations of the loop (some in the beginning
7210 * and some at the end) to help pruning.
7211 */
7212 if (add_new_state)
7213 sl->miss_cnt++;
6587 /* heuristic to determine whether this state is beneficial 7214 /* heuristic to determine whether this state is beneficial
6588 * to keep checking from state equivalence point of view. 7215 * to keep checking from state equivalence point of view.
6589 * Higher numbers increase max_states_per_insn and verification time, 7216 * Higher numbers increase max_states_per_insn and verification time,
@@ -6595,6 +7222,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6595 */ 7222 */
6596 *pprev = sl->next; 7223 *pprev = sl->next;
6597 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { 7224 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
7225 u32 br = sl->state.branches;
7226
7227 WARN_ONCE(br,
7228 "BUG live_done but branches_to_explore %d\n",
7229 br);
6598 free_verifier_state(&sl->state, false); 7230 free_verifier_state(&sl->state, false);
6599 kfree(sl); 7231 kfree(sl);
6600 env->peak_states--; 7232 env->peak_states--;
@@ -6618,20 +7250,27 @@ next:
6618 env->max_states_per_insn = states_cnt; 7250 env->max_states_per_insn = states_cnt;
6619 7251
6620 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 7252 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6621 return 0; 7253 return push_jmp_history(env, cur);
7254
7255 if (!add_new_state)
7256 return push_jmp_history(env, cur);
6622 7257
6623 /* there were no equivalent states, remember current one. 7258 /* There were no equivalent states, remember the current one.
6624 * technically the current state is not proven to be safe yet, 7259 * Technically the current state is not proven to be safe yet,
6625 * but it will either reach outer most bpf_exit (which means it's safe) 7260 * but it will either reach outer most bpf_exit (which means it's safe)
6626 * or it will be rejected. Since there are no loops, we won't be 7261 * or it will be rejected. When there are no loops the verifier won't be
6627 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 7262 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6628 * again on the way to bpf_exit 7263 * again on the way to bpf_exit.
7264 * When looping the sl->state.branches will be > 0 and this state
7265 * will not be considered for equivalence until branches == 0.
6629 */ 7266 */
6630 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 7267 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
6631 if (!new_sl) 7268 if (!new_sl)
6632 return -ENOMEM; 7269 return -ENOMEM;
6633 env->total_states++; 7270 env->total_states++;
6634 env->peak_states++; 7271 env->peak_states++;
7272 env->prev_jmps_processed = env->jmps_processed;
7273 env->prev_insn_processed = env->insn_processed;
6635 7274
6636 /* add new state to the head of linked list */ 7275 /* add new state to the head of linked list */
6637 new = &new_sl->state; 7276 new = &new_sl->state;
@@ -6642,6 +7281,12 @@ next:
6642 return err; 7281 return err;
6643 } 7282 }
6644 new->insn_idx = insn_idx; 7283 new->insn_idx = insn_idx;
7284 WARN_ONCE(new->branches != 1,
7285 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
7286
7287 cur->parent = new;
7288 cur->first_insn_idx = insn_idx;
7289 clear_jmp_history(cur);
6645 new_sl->next = *explored_state(env, insn_idx); 7290 new_sl->next = *explored_state(env, insn_idx);
6646 *explored_state(env, insn_idx) = new_sl; 7291 *explored_state(env, insn_idx) = new_sl;
6647 /* connect new state to parentage chain. Current frame needs all 7292 /* connect new state to parentage chain. Current frame needs all
@@ -6651,17 +7296,18 @@ next:
6651 * the state of the call instruction (with WRITTEN set), and r0 comes 7296 * the state of the call instruction (with WRITTEN set), and r0 comes
6652 * from callee with its full parentage chain, anyway. 7297 * from callee with its full parentage chain, anyway.
6653 */ 7298 */
6654 for (j = 0; j <= cur->curframe; j++)
6655 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
6656 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
6657 /* clear write marks in current state: the writes we did are not writes 7299 /* clear write marks in current state: the writes we did are not writes
6658 * our child did, so they don't screen off its reads from us. 7300 * our child did, so they don't screen off its reads from us.
6659 * (There are no read marks in current state, because reads always mark 7301 * (There are no read marks in current state, because reads always mark
6660 * their parent and current state never has children yet. Only 7302 * their parent and current state never has children yet. Only
6661 * explored_states can get read marks.) 7303 * explored_states can get read marks.)
6662 */ 7304 */
6663 for (i = 0; i < BPF_REG_FP; i++) 7305 for (j = 0; j <= cur->curframe; j++) {
6664 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE; 7306 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
7307 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
7308 for (i = 0; i < BPF_REG_FP; i++)
7309 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
7310 }
6665 7311
6666 /* all stack frames are accessible from callee, clear them all */ 7312 /* all stack frames are accessible from callee, clear them all */
6667 for (j = 0; j <= cur->curframe; j++) { 7313 for (j = 0; j <= cur->curframe; j++) {
@@ -6688,6 +7334,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6688 case PTR_TO_SOCK_COMMON_OR_NULL: 7334 case PTR_TO_SOCK_COMMON_OR_NULL:
6689 case PTR_TO_TCP_SOCK: 7335 case PTR_TO_TCP_SOCK:
6690 case PTR_TO_TCP_SOCK_OR_NULL: 7336 case PTR_TO_TCP_SOCK_OR_NULL:
7337 case PTR_TO_XDP_SOCK:
6691 return false; 7338 return false;
6692 default: 7339 default:
6693 return true; 7340 return true;
@@ -6719,6 +7366,7 @@ static int do_check(struct bpf_verifier_env *env)
6719 struct bpf_reg_state *regs; 7366 struct bpf_reg_state *regs;
6720 int insn_cnt = env->prog->len; 7367 int insn_cnt = env->prog->len;
6721 bool do_print_state = false; 7368 bool do_print_state = false;
7369 int prev_insn_idx = -1;
6722 7370
6723 env->prev_linfo = NULL; 7371 env->prev_linfo = NULL;
6724 7372
@@ -6727,6 +7375,7 @@ static int do_check(struct bpf_verifier_env *env)
6727 return -ENOMEM; 7375 return -ENOMEM;
6728 state->curframe = 0; 7376 state->curframe = 0;
6729 state->speculative = false; 7377 state->speculative = false;
7378 state->branches = 1;
6730 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 7379 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
6731 if (!state->frame[0]) { 7380 if (!state->frame[0]) {
6732 kfree(state); 7381 kfree(state);
@@ -6743,6 +7392,7 @@ static int do_check(struct bpf_verifier_env *env)
6743 u8 class; 7392 u8 class;
6744 int err; 7393 int err;
6745 7394
7395 env->prev_insn_idx = prev_insn_idx;
6746 if (env->insn_idx >= insn_cnt) { 7396 if (env->insn_idx >= insn_cnt) {
6747 verbose(env, "invalid insn idx %d insn_cnt %d\n", 7397 verbose(env, "invalid insn idx %d insn_cnt %d\n",
6748 env->insn_idx, insn_cnt); 7398 env->insn_idx, insn_cnt);
@@ -6815,6 +7465,7 @@ static int do_check(struct bpf_verifier_env *env)
6815 7465
6816 regs = cur_regs(env); 7466 regs = cur_regs(env);
6817 env->insn_aux_data[env->insn_idx].seen = true; 7467 env->insn_aux_data[env->insn_idx].seen = true;
7468 prev_insn_idx = env->insn_idx;
6818 7469
6819 if (class == BPF_ALU || class == BPF_ALU64) { 7470 if (class == BPF_ALU || class == BPF_ALU64) {
6820 err = check_alu_op(env, insn); 7471 err = check_alu_op(env, insn);
@@ -6933,6 +7584,7 @@ static int do_check(struct bpf_verifier_env *env)
6933 } else if (class == BPF_JMP || class == BPF_JMP32) { 7584 } else if (class == BPF_JMP || class == BPF_JMP32) {
6934 u8 opcode = BPF_OP(insn->code); 7585 u8 opcode = BPF_OP(insn->code);
6935 7586
7587 env->jmps_processed++;
6936 if (opcode == BPF_CALL) { 7588 if (opcode == BPF_CALL) {
6937 if (BPF_SRC(insn->code) != BPF_K || 7589 if (BPF_SRC(insn->code) != BPF_K ||
6938 insn->off != 0 || 7590 insn->off != 0 ||
@@ -6987,7 +7639,6 @@ static int do_check(struct bpf_verifier_env *env)
6987 7639
6988 if (state->curframe) { 7640 if (state->curframe) {
6989 /* exit from nested function */ 7641 /* exit from nested function */
6990 env->prev_insn_idx = env->insn_idx;
6991 err = prepare_func_exit(env, &env->insn_idx); 7642 err = prepare_func_exit(env, &env->insn_idx);
6992 if (err) 7643 if (err)
6993 return err; 7644 return err;
@@ -7018,7 +7669,8 @@ static int do_check(struct bpf_verifier_env *env)
7018 if (err) 7669 if (err)
7019 return err; 7670 return err;
7020process_bpf_exit: 7671process_bpf_exit:
7021 err = pop_stack(env, &env->prev_insn_idx, 7672 update_branch_counts(env, env->cur_state);
7673 err = pop_stack(env, &prev_insn_idx,
7022 &env->insn_idx); 7674 &env->insn_idx);
7023 if (err < 0) { 7675 if (err < 0) {
7024 if (err != -ENOENT) 7676 if (err != -ENOENT)
@@ -7821,6 +8473,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
7821 case PTR_TO_TCP_SOCK: 8473 case PTR_TO_TCP_SOCK:
7822 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 8474 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7823 break; 8475 break;
8476 case PTR_TO_XDP_SOCK:
8477 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
8478 break;
7824 default: 8479 default:
7825 continue; 8480 continue;
7826 } 8481 }
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 22066c28ba61..ef7338cebd18 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -17,8 +17,8 @@ struct xsk_map {
17 17
18static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 18static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
19{ 19{
20 int cpu, err = -EINVAL;
21 struct xsk_map *m; 20 struct xsk_map *m;
21 int cpu, err;
22 u64 cost; 22 u64 cost;
23 23
24 if (!capable(CAP_NET_ADMIN)) 24 if (!capable(CAP_NET_ADMIN))
@@ -152,6 +152,12 @@ void __xsk_map_flush(struct bpf_map *map)
152 152
153static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) 153static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
154{ 154{
155 WARN_ON_ONCE(!rcu_read_lock_held());
156 return __xsk_map_lookup_elem(map, *(u32 *)key);
157}
158
159static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
160{
155 return ERR_PTR(-EOPNOTSUPP); 161 return ERR_PTR(-EOPNOTSUPP);
156} 162}
157 163
@@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = {
218 .map_free = xsk_map_free, 224 .map_free = xsk_map_free,
219 .map_get_next_key = xsk_map_get_next_key, 225 .map_get_next_key = xsk_map_get_next_key,
220 .map_lookup_elem = xsk_map_lookup_elem, 226 .map_lookup_elem = xsk_map_lookup_elem,
227 .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
221 .map_update_elem = xsk_map_update_elem, 228 .map_update_elem = xsk_map_update_elem,
222 .map_delete_elem = xsk_map_delete_elem, 229 .map_delete_elem = xsk_map_delete_elem,
223 .map_check_btf = map_check_no_btf, 230 .map_check_btf = map_check_no_btf,