aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-03 15:53:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-03 15:53:47 -0500
commit43d86ee8c639df750529b4d8f062b328b61c423e (patch)
tree076161dd7ce3f843b9c965a780ecfbf020f75e8e /kernel
parent645ff1e8e704c4f33ab1fcd3c87f95cb9b6d7144 (diff)
parentc5ee066333ebc322a24a00a743ed941a0c68617e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "Several fixes here. Basically split down the line between newly introduced regressions and long existing problems: 1) Double free in tipc_enable_bearer(), from Cong Wang. 2) Many fixes to nf_conncount, from Florian Westphal. 3) op->get_regs_len() can throw an error, check it, from Yunsheng Lin. 4) Need to use GFP_ATOMIC in *_add_hash_mac_address() of fsl/fman driver, from Scott Wood. 5) Inifnite loop in fib_empty_table(), from Yue Haibing. 6) Use after free in ax25_fillin_cb(), from Cong Wang. 7) Fix socket locking in nr_find_socket(), also from Cong Wang. 8) Fix WoL wakeup enable in r8169, from Heiner Kallweit. 9) On 32-bit sock->sk_stamp is not thread-safe, from Deepa Dinamani. 10) Fix ptr_ring wrap during queue swap, from Cong Wang. 11) Missing shutdown callback in hinic driver, from Xue Chaojing. 12) Need to return NULL on error from ip6_neigh_lookup(), from Stefano Brivio. 13) BPF out of bounds speculation fixes from Daniel Borkmann" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (57 commits) ipv6: Consider sk_bound_dev_if when binding a socket to an address ipv6: Fix dump of specific table with strict checking bpf: add various test cases to selftests bpf: prevent out of bounds speculation on pointer arithmetic bpf: fix check_map_access smin_value test when pointer contains offset bpf: restrict unknown scalars of mixed signed bounds for unprivileged bpf: restrict stack pointer arithmetic for unprivileged bpf: restrict map value pointer arithmetic for unprivileged bpf: enable access to ax register also from verifier rewrite bpf: move tmp variable into ax register in interpreter bpf: move {prev_,}insn_idx into verifier env isdn: fix kernel-infoleak in capi_unlocked_ioctl ipv6: route: Fix return value of ip6_neigh_lookup() on neigh_create() error net/hamradio/6pack: use mod_timer() to rearm timers net-next/hinic:add shutdown callback net: hns3: call hns3_nic_net_open() while doing HNAE3_UP_CLIENT ip: validate header length on virtual device xmit tap: call skb_probe_transport_header after setting skb->dev ptr_ring: wrap back ->producer in __ptr_ring_swap_queue() net: rds: remove unnecessary NULL check ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c54
-rw-r--r--kernel/bpf/verifier.c336
2 files changed, 312 insertions, 78 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 38de580abcc2..f908b9356025 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -54,6 +54,7 @@
54#define DST regs[insn->dst_reg] 54#define DST regs[insn->dst_reg]
55#define SRC regs[insn->src_reg] 55#define SRC regs[insn->src_reg]
56#define FP regs[BPF_REG_FP] 56#define FP regs[BPF_REG_FP]
57#define AX regs[BPF_REG_AX]
57#define ARG1 regs[BPF_REG_ARG1] 58#define ARG1 regs[BPF_REG_ARG1]
58#define CTX regs[BPF_REG_CTX] 59#define CTX regs[BPF_REG_CTX]
59#define IMM insn->imm 60#define IMM insn->imm
@@ -857,6 +858,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
857 BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); 858 BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
858 BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); 859 BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
859 860
861 /* Constraints on AX register:
862 *
863 * AX register is inaccessible from user space. It is mapped in
864 * all JITs, and used here for constant blinding rewrites. It is
865 * typically "stateless" meaning its contents are only valid within
866 * the executed instruction, but not across several instructions.
867 * There are a few exceptions however which are further detailed
868 * below.
869 *
870 * Constant blinding is only used by JITs, not in the interpreter.
871 * The interpreter uses AX in some occasions as a local temporary
872 * register e.g. in DIV or MOD instructions.
873 *
874 * In restricted circumstances, the verifier can also use the AX
875 * register for rewrites as long as they do not interfere with
876 * the above cases!
877 */
878 if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
879 goto out;
880
860 if (from->imm == 0 && 881 if (from->imm == 0 &&
861 (from->code == (BPF_ALU | BPF_MOV | BPF_K) || 882 (from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
862 from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { 883 from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
@@ -1188,7 +1209,6 @@ bool bpf_opcode_in_insntable(u8 code)
1188 */ 1209 */
1189static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) 1210static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
1190{ 1211{
1191 u64 tmp;
1192#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y 1212#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
1193#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z 1213#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
1194 static const void *jumptable[256] = { 1214 static const void *jumptable[256] = {
@@ -1268,36 +1288,36 @@ select_insn:
1268 (*(s64 *) &DST) >>= IMM; 1288 (*(s64 *) &DST) >>= IMM;
1269 CONT; 1289 CONT;
1270 ALU64_MOD_X: 1290 ALU64_MOD_X:
1271 div64_u64_rem(DST, SRC, &tmp); 1291 div64_u64_rem(DST, SRC, &AX);
1272 DST = tmp; 1292 DST = AX;
1273 CONT; 1293 CONT;
1274 ALU_MOD_X: 1294 ALU_MOD_X:
1275 tmp = (u32) DST; 1295 AX = (u32) DST;
1276 DST = do_div(tmp, (u32) SRC); 1296 DST = do_div(AX, (u32) SRC);
1277 CONT; 1297 CONT;
1278 ALU64_MOD_K: 1298 ALU64_MOD_K:
1279 div64_u64_rem(DST, IMM, &tmp); 1299 div64_u64_rem(DST, IMM, &AX);
1280 DST = tmp; 1300 DST = AX;
1281 CONT; 1301 CONT;
1282 ALU_MOD_K: 1302 ALU_MOD_K:
1283 tmp = (u32) DST; 1303 AX = (u32) DST;
1284 DST = do_div(tmp, (u32) IMM); 1304 DST = do_div(AX, (u32) IMM);
1285 CONT; 1305 CONT;
1286 ALU64_DIV_X: 1306 ALU64_DIV_X:
1287 DST = div64_u64(DST, SRC); 1307 DST = div64_u64(DST, SRC);
1288 CONT; 1308 CONT;
1289 ALU_DIV_X: 1309 ALU_DIV_X:
1290 tmp = (u32) DST; 1310 AX = (u32) DST;
1291 do_div(tmp, (u32) SRC); 1311 do_div(AX, (u32) SRC);
1292 DST = (u32) tmp; 1312 DST = (u32) AX;
1293 CONT; 1313 CONT;
1294 ALU64_DIV_K: 1314 ALU64_DIV_K:
1295 DST = div64_u64(DST, IMM); 1315 DST = div64_u64(DST, IMM);
1296 CONT; 1316 CONT;
1297 ALU_DIV_K: 1317 ALU_DIV_K:
1298 tmp = (u32) DST; 1318 AX = (u32) DST;
1299 do_div(tmp, (u32) IMM); 1319 do_div(AX, (u32) IMM);
1300 DST = (u32) tmp; 1320 DST = (u32) AX;
1301 CONT; 1321 CONT;
1302 ALU_END_TO_BE: 1322 ALU_END_TO_BE:
1303 switch (IMM) { 1323 switch (IMM) {
@@ -1553,7 +1573,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
1553static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ 1573static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
1554{ \ 1574{ \
1555 u64 stack[stack_size / sizeof(u64)]; \ 1575 u64 stack[stack_size / sizeof(u64)]; \
1556 u64 regs[MAX_BPF_REG]; \ 1576 u64 regs[MAX_BPF_EXT_REG]; \
1557\ 1577\
1558 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ 1578 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1559 ARG1 = (u64) (unsigned long) ctx; \ 1579 ARG1 = (u64) (unsigned long) ctx; \
@@ -1566,7 +1586,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
1566 const struct bpf_insn *insn) \ 1586 const struct bpf_insn *insn) \
1567{ \ 1587{ \
1568 u64 stack[stack_size / sizeof(u64)]; \ 1588 u64 stack[stack_size / sizeof(u64)]; \
1569 u64 regs[MAX_BPF_REG]; \ 1589 u64 regs[MAX_BPF_EXT_REG]; \
1570\ 1590\
1571 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ 1591 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1572 BPF_R1 = r1; \ 1592 BPF_R1 = r1; \
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 71d86e3024ae..f6bc62a9ee8e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -710,6 +710,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
710 free_func_state(dst_state->frame[i]); 710 free_func_state(dst_state->frame[i]);
711 dst_state->frame[i] = NULL; 711 dst_state->frame[i] = NULL;
712 } 712 }
713 dst_state->speculative = src->speculative;
713 dst_state->curframe = src->curframe; 714 dst_state->curframe = src->curframe;
714 for (i = 0; i <= src->curframe; i++) { 715 for (i = 0; i <= src->curframe; i++) {
715 dst = dst_state->frame[i]; 716 dst = dst_state->frame[i];
@@ -754,7 +755,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
754} 755}
755 756
756static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, 757static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
757 int insn_idx, int prev_insn_idx) 758 int insn_idx, int prev_insn_idx,
759 bool speculative)
758{ 760{
759 struct bpf_verifier_state *cur = env->cur_state; 761 struct bpf_verifier_state *cur = env->cur_state;
760 struct bpf_verifier_stack_elem *elem; 762 struct bpf_verifier_stack_elem *elem;
@@ -772,6 +774,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
772 err = copy_verifier_state(&elem->st, cur); 774 err = copy_verifier_state(&elem->st, cur);
773 if (err) 775 if (err)
774 goto err; 776 goto err;
777 elem->st.speculative |= speculative;
775 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { 778 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
776 verbose(env, "BPF program is too complex\n"); 779 verbose(env, "BPF program is too complex\n");
777 goto err; 780 goto err;
@@ -1387,6 +1390,31 @@ static int check_stack_read(struct bpf_verifier_env *env,
1387 } 1390 }
1388} 1391}
1389 1392
1393static int check_stack_access(struct bpf_verifier_env *env,
1394 const struct bpf_reg_state *reg,
1395 int off, int size)
1396{
1397 /* Stack accesses must be at a fixed offset, so that we
1398 * can determine what type of data were returned. See
1399 * check_stack_read().
1400 */
1401 if (!tnum_is_const(reg->var_off)) {
1402 char tn_buf[48];
1403
1404 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1405 verbose(env, "variable stack access var_off=%s off=%d size=%d",
1406 tn_buf, off, size);
1407 return -EACCES;
1408 }
1409
1410 if (off >= 0 || off < -MAX_BPF_STACK) {
1411 verbose(env, "invalid stack off=%d size=%d\n", off, size);
1412 return -EACCES;
1413 }
1414
1415 return 0;
1416}
1417
1390/* check read/write into map element returned by bpf_map_lookup_elem() */ 1418/* check read/write into map element returned by bpf_map_lookup_elem() */
1391static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, 1419static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1392 int size, bool zero_size_allowed) 1420 int size, bool zero_size_allowed)
@@ -1418,13 +1446,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1418 */ 1446 */
1419 if (env->log.level) 1447 if (env->log.level)
1420 print_verifier_state(env, state); 1448 print_verifier_state(env, state);
1449
1421 /* The minimum value is only important with signed 1450 /* The minimum value is only important with signed
1422 * comparisons where we can't assume the floor of a 1451 * comparisons where we can't assume the floor of a
1423 * value is 0. If we are using signed variables for our 1452 * value is 0. If we are using signed variables for our
1424 * index'es we need to make sure that whatever we use 1453 * index'es we need to make sure that whatever we use
1425 * will have a set floor within our range. 1454 * will have a set floor within our range.
1426 */ 1455 */
1427 if (reg->smin_value < 0) { 1456 if (reg->smin_value < 0 &&
1457 (reg->smin_value == S64_MIN ||
1458 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
1459 reg->smin_value + off < 0)) {
1428 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 1460 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1429 regno); 1461 regno);
1430 return -EACCES; 1462 return -EACCES;
@@ -1954,24 +1986,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1954 } 1986 }
1955 1987
1956 } else if (reg->type == PTR_TO_STACK) { 1988 } else if (reg->type == PTR_TO_STACK) {
1957 /* stack accesses must be at a fixed offset, so that we can
1958 * determine what type of data were returned.
1959 * See check_stack_read().
1960 */
1961 if (!tnum_is_const(reg->var_off)) {
1962 char tn_buf[48];
1963
1964 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1965 verbose(env, "variable stack access var_off=%s off=%d size=%d",
1966 tn_buf, off, size);
1967 return -EACCES;
1968 }
1969 off += reg->var_off.value; 1989 off += reg->var_off.value;
1970 if (off >= 0 || off < -MAX_BPF_STACK) { 1990 err = check_stack_access(env, reg, off, size);
1971 verbose(env, "invalid stack off=%d size=%d\n", off, 1991 if (err)
1972 size); 1992 return err;
1973 return -EACCES;
1974 }
1975 1993
1976 state = func(env, reg); 1994 state = func(env, reg);
1977 err = update_stack_depth(env, state, off); 1995 err = update_stack_depth(env, state, off);
@@ -3052,6 +3070,102 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
3052 return true; 3070 return true;
3053} 3071}
3054 3072
3073static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
3074{
3075 return &env->insn_aux_data[env->insn_idx];
3076}
3077
3078static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3079 u32 *ptr_limit, u8 opcode, bool off_is_neg)
3080{
3081 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
3082 (opcode == BPF_SUB && !off_is_neg);
3083 u32 off;
3084
3085 switch (ptr_reg->type) {
3086 case PTR_TO_STACK:
3087 off = ptr_reg->off + ptr_reg->var_off.value;
3088 if (mask_to_left)
3089 *ptr_limit = MAX_BPF_STACK + off;
3090 else
3091 *ptr_limit = -off;
3092 return 0;
3093 case PTR_TO_MAP_VALUE:
3094 if (mask_to_left) {
3095 *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
3096 } else {
3097 off = ptr_reg->smin_value + ptr_reg->off;
3098 *ptr_limit = ptr_reg->map_ptr->value_size - off;
3099 }
3100 return 0;
3101 default:
3102 return -EINVAL;
3103 }
3104}
3105
3106static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3107 struct bpf_insn *insn,
3108 const struct bpf_reg_state *ptr_reg,
3109 struct bpf_reg_state *dst_reg,
3110 bool off_is_neg)
3111{
3112 struct bpf_verifier_state *vstate = env->cur_state;
3113 struct bpf_insn_aux_data *aux = cur_aux(env);
3114 bool ptr_is_dst_reg = ptr_reg == dst_reg;
3115 u8 opcode = BPF_OP(insn->code);
3116 u32 alu_state, alu_limit;
3117 struct bpf_reg_state tmp;
3118 bool ret;
3119
3120 if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K)
3121 return 0;
3122
3123 /* We already marked aux for masking from non-speculative
3124 * paths, thus we got here in the first place. We only care
3125 * to explore bad access from here.
3126 */
3127 if (vstate->speculative)
3128 goto do_sim;
3129
3130 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
3131 alu_state |= ptr_is_dst_reg ?
3132 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
3133
3134 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3135 return 0;
3136
3137 /* If we arrived here from different branches with different
3138 * limits to sanitize, then this won't work.
3139 */
3140 if (aux->alu_state &&
3141 (aux->alu_state != alu_state ||
3142 aux->alu_limit != alu_limit))
3143 return -EACCES;
3144
3145 /* Corresponding fixup done in fixup_bpf_calls(). */
3146 aux->alu_state = alu_state;
3147 aux->alu_limit = alu_limit;
3148
3149do_sim:
3150 /* Simulate and find potential out-of-bounds access under
3151 * speculative execution from truncation as a result of
3152 * masking when off was not within expected range. If off
3153 * sits in dst, then we temporarily need to move ptr there
3154 * to simulate dst (== 0) +/-= ptr. Needed, for example,
3155 * for cases where we use K-based arithmetic in one direction
3156 * and truncated reg-based in the other in order to explore
3157 * bad access.
3158 */
3159 if (!ptr_is_dst_reg) {
3160 tmp = *dst_reg;
3161 *dst_reg = *ptr_reg;
3162 }
3163 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
3164 if (!ptr_is_dst_reg)
3165 *dst_reg = tmp;
3166 return !ret ? -EFAULT : 0;
3167}
3168
3055/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 3169/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
3056 * Caller should also handle BPF_MOV case separately. 3170 * Caller should also handle BPF_MOV case separately.
3057 * If we return -EACCES, caller may want to try again treating pointer as a 3171 * If we return -EACCES, caller may want to try again treating pointer as a
@@ -3070,8 +3184,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3070 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; 3184 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
3071 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, 3185 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3072 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; 3186 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
3187 u32 dst = insn->dst_reg, src = insn->src_reg;
3073 u8 opcode = BPF_OP(insn->code); 3188 u8 opcode = BPF_OP(insn->code);
3074 u32 dst = insn->dst_reg; 3189 int ret;
3075 3190
3076 dst_reg = &regs[dst]; 3191 dst_reg = &regs[dst];
3077 3192
@@ -3104,6 +3219,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3104 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 3219 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3105 dst, reg_type_str[ptr_reg->type]); 3220 dst, reg_type_str[ptr_reg->type]);
3106 return -EACCES; 3221 return -EACCES;
3222 case PTR_TO_MAP_VALUE:
3223 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
3224 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
3225 off_reg == dst_reg ? dst : src);
3226 return -EACCES;
3227 }
3228 /* fall-through */
3107 default: 3229 default:
3108 break; 3230 break;
3109 } 3231 }
@@ -3120,6 +3242,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3120 3242
3121 switch (opcode) { 3243 switch (opcode) {
3122 case BPF_ADD: 3244 case BPF_ADD:
3245 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3246 if (ret < 0) {
3247 verbose(env, "R%d tried to add from different maps or paths\n", dst);
3248 return ret;
3249 }
3123 /* We can take a fixed offset as long as it doesn't overflow 3250 /* We can take a fixed offset as long as it doesn't overflow
3124 * the s32 'off' field 3251 * the s32 'off' field
3125 */ 3252 */
@@ -3170,6 +3297,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3170 } 3297 }
3171 break; 3298 break;
3172 case BPF_SUB: 3299 case BPF_SUB:
3300 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3301 if (ret < 0) {
3302 verbose(env, "R%d tried to sub from different maps or paths\n", dst);
3303 return ret;
3304 }
3173 if (dst_reg == off_reg) { 3305 if (dst_reg == off_reg) {
3174 /* scalar -= pointer. Creates an unknown scalar */ 3306 /* scalar -= pointer. Creates an unknown scalar */
3175 verbose(env, "R%d tried to subtract pointer from scalar\n", 3307 verbose(env, "R%d tried to subtract pointer from scalar\n",
@@ -3249,6 +3381,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3249 __update_reg_bounds(dst_reg); 3381 __update_reg_bounds(dst_reg);
3250 __reg_deduce_bounds(dst_reg); 3382 __reg_deduce_bounds(dst_reg);
3251 __reg_bound_offset(dst_reg); 3383 __reg_bound_offset(dst_reg);
3384
3385 /* For unprivileged we require that resulting offset must be in bounds
3386 * in order to be able to sanitize access later on.
3387 */
3388 if (!env->allow_ptr_leaks) {
3389 if (dst_reg->type == PTR_TO_MAP_VALUE &&
3390 check_map_access(env, dst, dst_reg->off, 1, false)) {
3391 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
3392 "prohibited for !root\n", dst);
3393 return -EACCES;
3394 } else if (dst_reg->type == PTR_TO_STACK &&
3395 check_stack_access(env, dst_reg, dst_reg->off +
3396 dst_reg->var_off.value, 1)) {
3397 verbose(env, "R%d stack pointer arithmetic goes out of range, "
3398 "prohibited for !root\n", dst);
3399 return -EACCES;
3400 }
3401 }
3402
3252 return 0; 3403 return 0;
3253} 3404}
3254 3405
@@ -4348,7 +4499,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4348 } 4499 }
4349 } 4500 }
4350 4501
4351 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); 4502 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
4503 false);
4352 if (!other_branch) 4504 if (!other_branch)
4353 return -EFAULT; 4505 return -EFAULT;
4354 other_branch_regs = other_branch->frame[other_branch->curframe]->regs; 4506 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
@@ -5458,6 +5610,12 @@ static bool states_equal(struct bpf_verifier_env *env,
5458 if (old->curframe != cur->curframe) 5610 if (old->curframe != cur->curframe)
5459 return false; 5611 return false;
5460 5612
5613 /* Verification state from speculative execution simulation
5614 * must never prune a non-speculative execution one.
5615 */
5616 if (old->speculative && !cur->speculative)
5617 return false;
5618
5461 /* for states to be equal callsites have to be the same 5619 /* for states to be equal callsites have to be the same
5462 * and all frame states need to be equivalent 5620 * and all frame states need to be equivalent
5463 */ 5621 */
@@ -5650,7 +5808,6 @@ static int do_check(struct bpf_verifier_env *env)
5650 struct bpf_insn *insns = env->prog->insnsi; 5808 struct bpf_insn *insns = env->prog->insnsi;
5651 struct bpf_reg_state *regs; 5809 struct bpf_reg_state *regs;
5652 int insn_cnt = env->prog->len, i; 5810 int insn_cnt = env->prog->len, i;
5653 int insn_idx, prev_insn_idx = 0;
5654 int insn_processed = 0; 5811 int insn_processed = 0;
5655 bool do_print_state = false; 5812 bool do_print_state = false;
5656 5813
@@ -5660,6 +5817,7 @@ static int do_check(struct bpf_verifier_env *env)
5660 if (!state) 5817 if (!state)
5661 return -ENOMEM; 5818 return -ENOMEM;
5662 state->curframe = 0; 5819 state->curframe = 0;
5820 state->speculative = false;
5663 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 5821 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
5664 if (!state->frame[0]) { 5822 if (!state->frame[0]) {
5665 kfree(state); 5823 kfree(state);
@@ -5670,19 +5828,19 @@ static int do_check(struct bpf_verifier_env *env)
5670 BPF_MAIN_FUNC /* callsite */, 5828 BPF_MAIN_FUNC /* callsite */,
5671 0 /* frameno */, 5829 0 /* frameno */,
5672 0 /* subprogno, zero == main subprog */); 5830 0 /* subprogno, zero == main subprog */);
5673 insn_idx = 0; 5831
5674 for (;;) { 5832 for (;;) {
5675 struct bpf_insn *insn; 5833 struct bpf_insn *insn;
5676 u8 class; 5834 u8 class;
5677 int err; 5835 int err;
5678 5836
5679 if (insn_idx >= insn_cnt) { 5837 if (env->insn_idx >= insn_cnt) {
5680 verbose(env, "invalid insn idx %d insn_cnt %d\n", 5838 verbose(env, "invalid insn idx %d insn_cnt %d\n",
5681 insn_idx, insn_cnt); 5839 env->insn_idx, insn_cnt);
5682 return -EFAULT; 5840 return -EFAULT;
5683 } 5841 }
5684 5842
5685 insn = &insns[insn_idx]; 5843 insn = &insns[env->insn_idx];
5686 class = BPF_CLASS(insn->code); 5844 class = BPF_CLASS(insn->code);
5687 5845
5688 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 5846 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
@@ -5692,17 +5850,19 @@ static int do_check(struct bpf_verifier_env *env)
5692 return -E2BIG; 5850 return -E2BIG;
5693 } 5851 }
5694 5852
5695 err = is_state_visited(env, insn_idx); 5853 err = is_state_visited(env, env->insn_idx);
5696 if (err < 0) 5854 if (err < 0)
5697 return err; 5855 return err;
5698 if (err == 1) { 5856 if (err == 1) {
5699 /* found equivalent state, can prune the search */ 5857 /* found equivalent state, can prune the search */
5700 if (env->log.level) { 5858 if (env->log.level) {
5701 if (do_print_state) 5859 if (do_print_state)
5702 verbose(env, "\nfrom %d to %d: safe\n", 5860 verbose(env, "\nfrom %d to %d%s: safe\n",
5703 prev_insn_idx, insn_idx); 5861 env->prev_insn_idx, env->insn_idx,
5862 env->cur_state->speculative ?
5863 " (speculative execution)" : "");
5704 else 5864 else
5705 verbose(env, "%d: safe\n", insn_idx); 5865 verbose(env, "%d: safe\n", env->insn_idx);
5706 } 5866 }
5707 goto process_bpf_exit; 5867 goto process_bpf_exit;
5708 } 5868 }
@@ -5715,10 +5875,12 @@ static int do_check(struct bpf_verifier_env *env)
5715 5875
5716 if (env->log.level > 1 || (env->log.level && do_print_state)) { 5876 if (env->log.level > 1 || (env->log.level && do_print_state)) {
5717 if (env->log.level > 1) 5877 if (env->log.level > 1)
5718 verbose(env, "%d:", insn_idx); 5878 verbose(env, "%d:", env->insn_idx);
5719 else 5879 else
5720 verbose(env, "\nfrom %d to %d:", 5880 verbose(env, "\nfrom %d to %d%s:",
5721 prev_insn_idx, insn_idx); 5881 env->prev_insn_idx, env->insn_idx,
5882 env->cur_state->speculative ?
5883 " (speculative execution)" : "");
5722 print_verifier_state(env, state->frame[state->curframe]); 5884 print_verifier_state(env, state->frame[state->curframe]);
5723 do_print_state = false; 5885 do_print_state = false;
5724 } 5886 }
@@ -5729,20 +5891,20 @@ static int do_check(struct bpf_verifier_env *env)
5729 .private_data = env, 5891 .private_data = env,
5730 }; 5892 };
5731 5893
5732 verbose_linfo(env, insn_idx, "; "); 5894 verbose_linfo(env, env->insn_idx, "; ");
5733 verbose(env, "%d: ", insn_idx); 5895 verbose(env, "%d: ", env->insn_idx);
5734 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 5896 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
5735 } 5897 }
5736 5898
5737 if (bpf_prog_is_dev_bound(env->prog->aux)) { 5899 if (bpf_prog_is_dev_bound(env->prog->aux)) {
5738 err = bpf_prog_offload_verify_insn(env, insn_idx, 5900 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
5739 prev_insn_idx); 5901 env->prev_insn_idx);
5740 if (err) 5902 if (err)
5741 return err; 5903 return err;
5742 } 5904 }
5743 5905
5744 regs = cur_regs(env); 5906 regs = cur_regs(env);
5745 env->insn_aux_data[insn_idx].seen = true; 5907 env->insn_aux_data[env->insn_idx].seen = true;
5746 5908
5747 if (class == BPF_ALU || class == BPF_ALU64) { 5909 if (class == BPF_ALU || class == BPF_ALU64) {
5748 err = check_alu_op(env, insn); 5910 err = check_alu_op(env, insn);
@@ -5768,13 +5930,13 @@ static int do_check(struct bpf_verifier_env *env)
5768 /* check that memory (src_reg + off) is readable, 5930 /* check that memory (src_reg + off) is readable,
5769 * the state of dst_reg will be updated by this func 5931 * the state of dst_reg will be updated by this func
5770 */ 5932 */
5771 err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, 5933 err = check_mem_access(env, env->insn_idx, insn->src_reg,
5772 BPF_SIZE(insn->code), BPF_READ, 5934 insn->off, BPF_SIZE(insn->code),
5773 insn->dst_reg, false); 5935 BPF_READ, insn->dst_reg, false);
5774 if (err) 5936 if (err)
5775 return err; 5937 return err;
5776 5938
5777 prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; 5939 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
5778 5940
5779 if (*prev_src_type == NOT_INIT) { 5941 if (*prev_src_type == NOT_INIT) {
5780 /* saw a valid insn 5942 /* saw a valid insn
@@ -5799,10 +5961,10 @@ static int do_check(struct bpf_verifier_env *env)
5799 enum bpf_reg_type *prev_dst_type, dst_reg_type; 5961 enum bpf_reg_type *prev_dst_type, dst_reg_type;
5800 5962
5801 if (BPF_MODE(insn->code) == BPF_XADD) { 5963 if (BPF_MODE(insn->code) == BPF_XADD) {
5802 err = check_xadd(env, insn_idx, insn); 5964 err = check_xadd(env, env->insn_idx, insn);
5803 if (err) 5965 if (err)
5804 return err; 5966 return err;
5805 insn_idx++; 5967 env->insn_idx++;
5806 continue; 5968 continue;
5807 } 5969 }
5808 5970
@@ -5818,13 +5980,13 @@ static int do_check(struct bpf_verifier_env *env)
5818 dst_reg_type = regs[insn->dst_reg].type; 5980 dst_reg_type = regs[insn->dst_reg].type;
5819 5981
5820 /* check that memory (dst_reg + off) is writeable */ 5982 /* check that memory (dst_reg + off) is writeable */
5821 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 5983 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
5822 BPF_SIZE(insn->code), BPF_WRITE, 5984 insn->off, BPF_SIZE(insn->code),
5823 insn->src_reg, false); 5985 BPF_WRITE, insn->src_reg, false);
5824 if (err) 5986 if (err)
5825 return err; 5987 return err;
5826 5988
5827 prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; 5989 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
5828 5990
5829 if (*prev_dst_type == NOT_INIT) { 5991 if (*prev_dst_type == NOT_INIT) {
5830 *prev_dst_type = dst_reg_type; 5992 *prev_dst_type = dst_reg_type;
@@ -5852,9 +6014,9 @@ static int do_check(struct bpf_verifier_env *env)
5852 } 6014 }
5853 6015
5854 /* check that memory (dst_reg + off) is writeable */ 6016 /* check that memory (dst_reg + off) is writeable */
5855 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 6017 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
5856 BPF_SIZE(insn->code), BPF_WRITE, 6018 insn->off, BPF_SIZE(insn->code),
5857 -1, false); 6019 BPF_WRITE, -1, false);
5858 if (err) 6020 if (err)
5859 return err; 6021 return err;
5860 6022
@@ -5872,9 +6034,9 @@ static int do_check(struct bpf_verifier_env *env)
5872 } 6034 }
5873 6035
5874 if (insn->src_reg == BPF_PSEUDO_CALL) 6036 if (insn->src_reg == BPF_PSEUDO_CALL)
5875 err = check_func_call(env, insn, &insn_idx); 6037 err = check_func_call(env, insn, &env->insn_idx);
5876 else 6038 else
5877 err = check_helper_call(env, insn->imm, insn_idx); 6039 err = check_helper_call(env, insn->imm, env->insn_idx);
5878 if (err) 6040 if (err)
5879 return err; 6041 return err;
5880 6042
@@ -5887,7 +6049,7 @@ static int do_check(struct bpf_verifier_env *env)
5887 return -EINVAL; 6049 return -EINVAL;
5888 } 6050 }
5889 6051
5890 insn_idx += insn->off + 1; 6052 env->insn_idx += insn->off + 1;
5891 continue; 6053 continue;
5892 6054
5893 } else if (opcode == BPF_EXIT) { 6055 } else if (opcode == BPF_EXIT) {
@@ -5901,8 +6063,8 @@ static int do_check(struct bpf_verifier_env *env)
5901 6063
5902 if (state->curframe) { 6064 if (state->curframe) {
5903 /* exit from nested function */ 6065 /* exit from nested function */
5904 prev_insn_idx = insn_idx; 6066 env->prev_insn_idx = env->insn_idx;
5905 err = prepare_func_exit(env, &insn_idx); 6067 err = prepare_func_exit(env, &env->insn_idx);
5906 if (err) 6068 if (err)
5907 return err; 6069 return err;
5908 do_print_state = true; 6070 do_print_state = true;
@@ -5932,7 +6094,8 @@ static int do_check(struct bpf_verifier_env *env)
5932 if (err) 6094 if (err)
5933 return err; 6095 return err;
5934process_bpf_exit: 6096process_bpf_exit:
5935 err = pop_stack(env, &prev_insn_idx, &insn_idx); 6097 err = pop_stack(env, &env->prev_insn_idx,
6098 &env->insn_idx);
5936 if (err < 0) { 6099 if (err < 0) {
5937 if (err != -ENOENT) 6100 if (err != -ENOENT)
5938 return err; 6101 return err;
@@ -5942,7 +6105,7 @@ process_bpf_exit:
5942 continue; 6105 continue;
5943 } 6106 }
5944 } else { 6107 } else {
5945 err = check_cond_jmp_op(env, insn, &insn_idx); 6108 err = check_cond_jmp_op(env, insn, &env->insn_idx);
5946 if (err) 6109 if (err)
5947 return err; 6110 return err;
5948 } 6111 }
@@ -5959,8 +6122,8 @@ process_bpf_exit:
5959 if (err) 6122 if (err)
5960 return err; 6123 return err;
5961 6124
5962 insn_idx++; 6125 env->insn_idx++;
5963 env->insn_aux_data[insn_idx].seen = true; 6126 env->insn_aux_data[env->insn_idx].seen = true;
5964 } else { 6127 } else {
5965 verbose(env, "invalid BPF_LD mode\n"); 6128 verbose(env, "invalid BPF_LD mode\n");
5966 return -EINVAL; 6129 return -EINVAL;
@@ -5970,7 +6133,7 @@ process_bpf_exit:
5970 return -EINVAL; 6133 return -EINVAL;
5971 } 6134 }
5972 6135
5973 insn_idx++; 6136 env->insn_idx++;
5974 } 6137 }
5975 6138
5976 verbose(env, "processed %d insns (limit %d), stack depth ", 6139 verbose(env, "processed %d insns (limit %d), stack depth ",
@@ -6709,6 +6872,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
6709 continue; 6872 continue;
6710 } 6873 }
6711 6874
6875 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
6876 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
6877 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
6878 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
6879 struct bpf_insn insn_buf[16];
6880 struct bpf_insn *patch = &insn_buf[0];
6881 bool issrc, isneg;
6882 u32 off_reg;
6883
6884 aux = &env->insn_aux_data[i + delta];
6885 if (!aux->alu_state)
6886 continue;
6887
6888 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
6889 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
6890 BPF_ALU_SANITIZE_SRC;
6891
6892 off_reg = issrc ? insn->src_reg : insn->dst_reg;
6893 if (isneg)
6894 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
6895 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
6896 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
6897 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
6898 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
6899 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
6900 if (issrc) {
6901 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
6902 off_reg);
6903 insn->src_reg = BPF_REG_AX;
6904 } else {
6905 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
6906 BPF_REG_AX);
6907 }
6908 if (isneg)
6909 insn->code = insn->code == code_add ?
6910 code_sub : code_add;
6911 *patch++ = *insn;
6912 if (issrc && isneg)
6913 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
6914 cnt = patch - insn_buf;
6915
6916 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6917 if (!new_prog)
6918 return -ENOMEM;
6919
6920 delta += cnt - 1;
6921 env->prog = prog = new_prog;
6922 insn = new_prog->insnsi + i + delta;
6923 continue;
6924 }
6925
6712 if (insn->code != (BPF_JMP | BPF_CALL)) 6926 if (insn->code != (BPF_JMP | BPF_CALL))
6713 continue; 6927 continue;
6714 if (insn->src_reg == BPF_PSEUDO_CALL) 6928 if (insn->src_reg == BPF_PSEUDO_CALL)