aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-01-28 22:38:33 -0500
committerDavid S. Miller <davem@davemloft.net>2019-01-28 22:38:33 -0500
commitec7146db150082737cbfeacaae0f33e42c95cf18 (patch)
tree9fd307588cd07ed2f42d9df4554bc83ab19cf382 /kernel
parent343917b410ba7250dbbe59a8330feffaf36eaab8 (diff)
parent3d2af27a84a8474e510f5d8362303bfbee946308 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2019-01-29 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Teach verifier dead code removal, this also allows for optimizing / removing conditional branches around dead code and to shrink the resulting image. Code store constrained architectures like nfp would have hard time doing this at JIT level, from Jakub. 2) Add JMP32 instructions to BPF ISA in order to allow for optimizing code generation for 32-bit sub-registers. Evaluation shows that this can result in code reduction of ~5-20% compared to 64 bit-only code generation. Also add implementation for most JITs, from Jiong. 3) Add support for __int128 types in BTF which is also needed for vmlinux's BTF conversion to work, from Yonghong. 4) Add a new command to bpftool in order to dump a list of BPF-related parameters from the system or for a specific network device e.g. in terms of available prog/map types or helper functions, from Quentin. 5) Add AF_XDP sock_diag interface for querying sockets from user space which provides information about the RX/TX/fill/completion rings, umem, memory usage etc, from Björn. 6) Add skb context access for skb_shared_info->gso_segs field, from Eric. 7) Add support for testing flow dissector BPF programs by extending existing BPF_PROG_TEST_RUN infrastructure, from Stanislav. 8) Split BPF kselftest's test_verifier into various subgroups of tests in order better deal with merge conflicts in this area, from Jakub. 9) Add support for queue/stack manipulations in bpftool, from Stanislav. 10) Document BTF, from Yonghong. 11) Dump supported ELF section names in libbpf on program load failure, from Taeung. 12) Silence a false positive compiler warning in verifier's BTF handling, from Peter. 13) Fix help string in bpftool's feature probing, from Prashant. 14) Remove duplicate includes in BPF kselftests, from Yue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c104
-rw-r--r--kernel/bpf/core.c273
-rw-r--r--kernel/bpf/disasm.c34
-rw-r--r--kernel/bpf/offload.c35
-rw-r--r--kernel/bpf/verifier.c624
5 files changed, 774 insertions, 296 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index befe570be5ba..3d661f0606fe 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -157,7 +157,7 @@
157 * 157 *
158 */ 158 */
159 159
160#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) 160#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
161#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) 161#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
162#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) 162#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
163#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) 163#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
@@ -525,7 +525,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
525 525
526/* 526/*
527 * Regular int is not a bit field and it must be either 527 * Regular int is not a bit field and it must be either
528 * u8/u16/u32/u64. 528 * u8/u16/u32/u64 or __int128.
529 */ 529 */
530static bool btf_type_int_is_regular(const struct btf_type *t) 530static bool btf_type_int_is_regular(const struct btf_type *t)
531{ 531{
@@ -538,7 +538,8 @@ static bool btf_type_int_is_regular(const struct btf_type *t)
538 if (BITS_PER_BYTE_MASKED(nr_bits) || 538 if (BITS_PER_BYTE_MASKED(nr_bits) ||
539 BTF_INT_OFFSET(int_data) || 539 BTF_INT_OFFSET(int_data) ||
540 (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && 540 (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
541 nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { 541 nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
542 nr_bytes != (2 * sizeof(u64)))) {
542 return false; 543 return false;
543 } 544 }
544 545
@@ -1063,9 +1064,9 @@ static int btf_int_check_member(struct btf_verifier_env *env,
1063 nr_copy_bits = BTF_INT_BITS(int_data) + 1064 nr_copy_bits = BTF_INT_BITS(int_data) +
1064 BITS_PER_BYTE_MASKED(struct_bits_off); 1065 BITS_PER_BYTE_MASKED(struct_bits_off);
1065 1066
1066 if (nr_copy_bits > BITS_PER_U64) { 1067 if (nr_copy_bits > BITS_PER_U128) {
1067 btf_verifier_log_member(env, struct_type, member, 1068 btf_verifier_log_member(env, struct_type, member,
1068 "nr_copy_bits exceeds 64"); 1069 "nr_copy_bits exceeds 128");
1069 return -EINVAL; 1070 return -EINVAL;
1070 } 1071 }
1071 1072
@@ -1119,9 +1120,9 @@ static int btf_int_check_kflag_member(struct btf_verifier_env *env,
1119 1120
1120 bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); 1121 bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
1121 nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); 1122 nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
1122 if (nr_copy_bits > BITS_PER_U64) { 1123 if (nr_copy_bits > BITS_PER_U128) {
1123 btf_verifier_log_member(env, struct_type, member, 1124 btf_verifier_log_member(env, struct_type, member,
1124 "nr_copy_bits exceeds 64"); 1125 "nr_copy_bits exceeds 128");
1125 return -EINVAL; 1126 return -EINVAL;
1126 } 1127 }
1127 1128
@@ -1168,9 +1169,9 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
1168 1169
1169 nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); 1170 nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
1170 1171
1171 if (nr_bits > BITS_PER_U64) { 1172 if (nr_bits > BITS_PER_U128) {
1172 btf_verifier_log_type(env, t, "nr_bits exceeds %zu", 1173 btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
1173 BITS_PER_U64); 1174 BITS_PER_U128);
1174 return -EINVAL; 1175 return -EINVAL;
1175 } 1176 }
1176 1177
@@ -1211,31 +1212,93 @@ static void btf_int_log(struct btf_verifier_env *env,
1211 btf_int_encoding_str(BTF_INT_ENCODING(int_data))); 1212 btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
1212} 1213}
1213 1214
1215static void btf_int128_print(struct seq_file *m, void *data)
1216{
1217 /* data points to a __int128 number.
1218 * Suppose
1219 * int128_num = *(__int128 *)data;
1220 * The below formulas shows what upper_num and lower_num represents:
1221 * upper_num = int128_num >> 64;
1222 * lower_num = int128_num & 0xffffffffFFFFFFFFULL;
1223 */
1224 u64 upper_num, lower_num;
1225
1226#ifdef __BIG_ENDIAN_BITFIELD
1227 upper_num = *(u64 *)data;
1228 lower_num = *(u64 *)(data + 8);
1229#else
1230 upper_num = *(u64 *)(data + 8);
1231 lower_num = *(u64 *)data;
1232#endif
1233 if (upper_num == 0)
1234 seq_printf(m, "0x%llx", lower_num);
1235 else
1236 seq_printf(m, "0x%llx%016llx", upper_num, lower_num);
1237}
1238
1239static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
1240 u16 right_shift_bits)
1241{
1242 u64 upper_num, lower_num;
1243
1244#ifdef __BIG_ENDIAN_BITFIELD
1245 upper_num = print_num[0];
1246 lower_num = print_num[1];
1247#else
1248 upper_num = print_num[1];
1249 lower_num = print_num[0];
1250#endif
1251
1252 /* shake out un-needed bits by shift/or operations */
1253 if (left_shift_bits >= 64) {
1254 upper_num = lower_num << (left_shift_bits - 64);
1255 lower_num = 0;
1256 } else {
1257 upper_num = (upper_num << left_shift_bits) |
1258 (lower_num >> (64 - left_shift_bits));
1259 lower_num = lower_num << left_shift_bits;
1260 }
1261
1262 if (right_shift_bits >= 64) {
1263 lower_num = upper_num >> (right_shift_bits - 64);
1264 upper_num = 0;
1265 } else {
1266 lower_num = (lower_num >> right_shift_bits) |
1267 (upper_num << (64 - right_shift_bits));
1268 upper_num = upper_num >> right_shift_bits;
1269 }
1270
1271#ifdef __BIG_ENDIAN_BITFIELD
1272 print_num[0] = upper_num;
1273 print_num[1] = lower_num;
1274#else
1275 print_num[0] = lower_num;
1276 print_num[1] = upper_num;
1277#endif
1278}
1279
1214static void btf_bitfield_seq_show(void *data, u8 bits_offset, 1280static void btf_bitfield_seq_show(void *data, u8 bits_offset,
1215 u8 nr_bits, struct seq_file *m) 1281 u8 nr_bits, struct seq_file *m)
1216{ 1282{
1217 u16 left_shift_bits, right_shift_bits; 1283 u16 left_shift_bits, right_shift_bits;
1218 u8 nr_copy_bytes; 1284 u8 nr_copy_bytes;
1219 u8 nr_copy_bits; 1285 u8 nr_copy_bits;
1220 u64 print_num; 1286 u64 print_num[2] = {};
1221 1287
1222 nr_copy_bits = nr_bits + bits_offset; 1288 nr_copy_bits = nr_bits + bits_offset;
1223 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); 1289 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
1224 1290
1225 print_num = 0; 1291 memcpy(print_num, data, nr_copy_bytes);
1226 memcpy(&print_num, data, nr_copy_bytes);
1227 1292
1228#ifdef __BIG_ENDIAN_BITFIELD 1293#ifdef __BIG_ENDIAN_BITFIELD
1229 left_shift_bits = bits_offset; 1294 left_shift_bits = bits_offset;
1230#else 1295#else
1231 left_shift_bits = BITS_PER_U64 - nr_copy_bits; 1296 left_shift_bits = BITS_PER_U128 - nr_copy_bits;
1232#endif 1297#endif
1233 right_shift_bits = BITS_PER_U64 - nr_bits; 1298 right_shift_bits = BITS_PER_U128 - nr_bits;
1234
1235 print_num <<= left_shift_bits;
1236 print_num >>= right_shift_bits;
1237 1299
1238 seq_printf(m, "0x%llx", print_num); 1300 btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
1301 btf_int128_print(m, print_num);
1239} 1302}
1240 1303
1241 1304
@@ -1250,7 +1313,7 @@ static void btf_int_bits_seq_show(const struct btf *btf,
1250 1313
1251 /* 1314 /*
1252 * bits_offset is at most 7. 1315 * bits_offset is at most 7.
1253 * BTF_INT_OFFSET() cannot exceed 64 bits. 1316 * BTF_INT_OFFSET() cannot exceed 128 bits.
1254 */ 1317 */
1255 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); 1318 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
1256 data += BITS_ROUNDDOWN_BYTES(total_bits_offset); 1319 data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
@@ -1274,6 +1337,9 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
1274 } 1337 }
1275 1338
1276 switch (nr_bits) { 1339 switch (nr_bits) {
1340 case 128:
1341 btf_int128_print(m, data);
1342 break;
1277 case 64: 1343 case 64:
1278 if (sign) 1344 if (sign)
1279 seq_printf(m, "%lld", *(s64 *)data); 1345 seq_printf(m, "%lld", *(s64 *)data);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f908b9356025..a7bcb23bee84 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -307,15 +307,16 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
307 return 0; 307 return 0;
308} 308}
309 309
310static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, 310static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
311 u32 curr, const bool probe_pass) 311 s32 end_new, u32 curr, const bool probe_pass)
312{ 312{
313 const s64 imm_min = S32_MIN, imm_max = S32_MAX; 313 const s64 imm_min = S32_MIN, imm_max = S32_MAX;
314 s32 delta = end_new - end_old;
314 s64 imm = insn->imm; 315 s64 imm = insn->imm;
315 316
316 if (curr < pos && curr + imm + 1 > pos) 317 if (curr < pos && curr + imm + 1 >= end_old)
317 imm += delta; 318 imm += delta;
318 else if (curr > pos + delta && curr + imm + 1 <= pos + delta) 319 else if (curr >= end_new && curr + imm + 1 < end_new)
319 imm -= delta; 320 imm -= delta;
320 if (imm < imm_min || imm > imm_max) 321 if (imm < imm_min || imm > imm_max)
321 return -ERANGE; 322 return -ERANGE;
@@ -324,15 +325,16 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta,
324 return 0; 325 return 0;
325} 326}
326 327
327static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, 328static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
328 u32 curr, const bool probe_pass) 329 s32 end_new, u32 curr, const bool probe_pass)
329{ 330{
330 const s32 off_min = S16_MIN, off_max = S16_MAX; 331 const s32 off_min = S16_MIN, off_max = S16_MAX;
332 s32 delta = end_new - end_old;
331 s32 off = insn->off; 333 s32 off = insn->off;
332 334
333 if (curr < pos && curr + off + 1 > pos) 335 if (curr < pos && curr + off + 1 >= end_old)
334 off += delta; 336 off += delta;
335 else if (curr > pos + delta && curr + off + 1 <= pos + delta) 337 else if (curr >= end_new && curr + off + 1 < end_new)
336 off -= delta; 338 off -= delta;
337 if (off < off_min || off > off_max) 339 if (off < off_min || off > off_max)
338 return -ERANGE; 340 return -ERANGE;
@@ -341,10 +343,10 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
341 return 0; 343 return 0;
342} 344}
343 345
344static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, 346static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
345 const bool probe_pass) 347 s32 end_new, const bool probe_pass)
346{ 348{
347 u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); 349 u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
348 struct bpf_insn *insn = prog->insnsi; 350 struct bpf_insn *insn = prog->insnsi;
349 int ret = 0; 351 int ret = 0;
350 352
@@ -356,22 +358,23 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
356 * do any other adjustments. Therefore skip the patchlet. 358 * do any other adjustments. Therefore skip the patchlet.
357 */ 359 */
358 if (probe_pass && i == pos) { 360 if (probe_pass && i == pos) {
359 i += delta + 1; 361 i = end_new;
360 insn++; 362 insn = prog->insnsi + end_old;
361 } 363 }
362 code = insn->code; 364 code = insn->code;
363 if (BPF_CLASS(code) != BPF_JMP || 365 if ((BPF_CLASS(code) != BPF_JMP &&
366 BPF_CLASS(code) != BPF_JMP32) ||
364 BPF_OP(code) == BPF_EXIT) 367 BPF_OP(code) == BPF_EXIT)
365 continue; 368 continue;
366 /* Adjust offset of jmps if we cross patch boundaries. */ 369 /* Adjust offset of jmps if we cross patch boundaries. */
367 if (BPF_OP(code) == BPF_CALL) { 370 if (BPF_OP(code) == BPF_CALL) {
368 if (insn->src_reg != BPF_PSEUDO_CALL) 371 if (insn->src_reg != BPF_PSEUDO_CALL)
369 continue; 372 continue;
370 ret = bpf_adj_delta_to_imm(insn, pos, delta, i, 373 ret = bpf_adj_delta_to_imm(insn, pos, end_old,
371 probe_pass); 374 end_new, i, probe_pass);
372 } else { 375 } else {
373 ret = bpf_adj_delta_to_off(insn, pos, delta, i, 376 ret = bpf_adj_delta_to_off(insn, pos, end_old,
374 probe_pass); 377 end_new, i, probe_pass);
375 } 378 }
376 if (ret) 379 if (ret)
377 break; 380 break;
@@ -421,7 +424,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
421 * we afterwards may not fail anymore. 424 * we afterwards may not fail anymore.
422 */ 425 */
423 if (insn_adj_cnt > cnt_max && 426 if (insn_adj_cnt > cnt_max &&
424 bpf_adj_branches(prog, off, insn_delta, true)) 427 bpf_adj_branches(prog, off, off + 1, off + len, true))
425 return NULL; 428 return NULL;
426 429
427 /* Several new instructions need to be inserted. Make room 430 /* Several new instructions need to be inserted. Make room
@@ -453,13 +456,25 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
453 * the ship has sailed to reverse to the original state. An 456 * the ship has sailed to reverse to the original state. An
454 * overflow cannot happen at this point. 457 * overflow cannot happen at this point.
455 */ 458 */
456 BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); 459 BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
457 460
458 bpf_adj_linfo(prog_adj, off, insn_delta); 461 bpf_adj_linfo(prog_adj, off, insn_delta);
459 462
460 return prog_adj; 463 return prog_adj;
461} 464}
462 465
466int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
467{
468 /* Branch offsets can't overflow when program is shrinking, no need
469 * to call bpf_adj_branches(..., true) here
470 */
471 memmove(prog->insnsi + off, prog->insnsi + off + cnt,
472 sizeof(struct bpf_insn) * (prog->len - off - cnt));
473 prog->len -= cnt;
474
475 return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
476}
477
463void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) 478void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
464{ 479{
465 int i; 480 int i;
@@ -934,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
934 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); 949 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
935 break; 950 break;
936 951
952 case BPF_JMP32 | BPF_JEQ | BPF_K:
953 case BPF_JMP32 | BPF_JNE | BPF_K:
954 case BPF_JMP32 | BPF_JGT | BPF_K:
955 case BPF_JMP32 | BPF_JLT | BPF_K:
956 case BPF_JMP32 | BPF_JGE | BPF_K:
957 case BPF_JMP32 | BPF_JLE | BPF_K:
958 case BPF_JMP32 | BPF_JSGT | BPF_K:
959 case BPF_JMP32 | BPF_JSLT | BPF_K:
960 case BPF_JMP32 | BPF_JSGE | BPF_K:
961 case BPF_JMP32 | BPF_JSLE | BPF_K:
962 case BPF_JMP32 | BPF_JSET | BPF_K:
963 /* Accommodate for extra offset in case of a backjump. */
964 off = from->off;
965 if (off < 0)
966 off -= 2;
967 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
968 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
969 *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
970 off);
971 break;
972
937 case BPF_LD | BPF_IMM | BPF_DW: 973 case BPF_LD | BPF_IMM | BPF_DW:
938 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); 974 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
939 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 975 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
@@ -1130,6 +1166,31 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
1130 INSN_2(JMP, CALL), \ 1166 INSN_2(JMP, CALL), \
1131 /* Exit instruction. */ \ 1167 /* Exit instruction. */ \
1132 INSN_2(JMP, EXIT), \ 1168 INSN_2(JMP, EXIT), \
1169 /* 32-bit Jump instructions. */ \
1170 /* Register based. */ \
1171 INSN_3(JMP32, JEQ, X), \
1172 INSN_3(JMP32, JNE, X), \
1173 INSN_3(JMP32, JGT, X), \
1174 INSN_3(JMP32, JLT, X), \
1175 INSN_3(JMP32, JGE, X), \
1176 INSN_3(JMP32, JLE, X), \
1177 INSN_3(JMP32, JSGT, X), \
1178 INSN_3(JMP32, JSLT, X), \
1179 INSN_3(JMP32, JSGE, X), \
1180 INSN_3(JMP32, JSLE, X), \
1181 INSN_3(JMP32, JSET, X), \
1182 /* Immediate based. */ \
1183 INSN_3(JMP32, JEQ, K), \
1184 INSN_3(JMP32, JNE, K), \
1185 INSN_3(JMP32, JGT, K), \
1186 INSN_3(JMP32, JLT, K), \
1187 INSN_3(JMP32, JGE, K), \
1188 INSN_3(JMP32, JLE, K), \
1189 INSN_3(JMP32, JSGT, K), \
1190 INSN_3(JMP32, JSLT, K), \
1191 INSN_3(JMP32, JSGE, K), \
1192 INSN_3(JMP32, JSLE, K), \
1193 INSN_3(JMP32, JSET, K), \
1133 /* Jump instructions. */ \ 1194 /* Jump instructions. */ \
1134 /* Register based. */ \ 1195 /* Register based. */ \
1135 INSN_3(JMP, JEQ, X), \ 1196 INSN_3(JMP, JEQ, X), \
@@ -1390,145 +1451,49 @@ select_insn:
1390out: 1451out:
1391 CONT; 1452 CONT;
1392 } 1453 }
1393 /* JMP */
1394 JMP_JA: 1454 JMP_JA:
1395 insn += insn->off; 1455 insn += insn->off;
1396 CONT; 1456 CONT;
1397 JMP_JEQ_X:
1398 if (DST == SRC) {
1399 insn += insn->off;
1400 CONT_JMP;
1401 }
1402 CONT;
1403 JMP_JEQ_K:
1404 if (DST == IMM) {
1405 insn += insn->off;
1406 CONT_JMP;
1407 }
1408 CONT;
1409 JMP_JNE_X:
1410 if (DST != SRC) {
1411 insn += insn->off;
1412 CONT_JMP;
1413 }
1414 CONT;
1415 JMP_JNE_K:
1416 if (DST != IMM) {
1417 insn += insn->off;
1418 CONT_JMP;
1419 }
1420 CONT;
1421 JMP_JGT_X:
1422 if (DST > SRC) {
1423 insn += insn->off;
1424 CONT_JMP;
1425 }
1426 CONT;
1427 JMP_JGT_K:
1428 if (DST > IMM) {
1429 insn += insn->off;
1430 CONT_JMP;
1431 }
1432 CONT;
1433 JMP_JLT_X:
1434 if (DST < SRC) {
1435 insn += insn->off;
1436 CONT_JMP;
1437 }
1438 CONT;
1439 JMP_JLT_K:
1440 if (DST < IMM) {
1441 insn += insn->off;
1442 CONT_JMP;
1443 }
1444 CONT;
1445 JMP_JGE_X:
1446 if (DST >= SRC) {
1447 insn += insn->off;
1448 CONT_JMP;
1449 }
1450 CONT;
1451 JMP_JGE_K:
1452 if (DST >= IMM) {
1453 insn += insn->off;
1454 CONT_JMP;
1455 }
1456 CONT;
1457 JMP_JLE_X:
1458 if (DST <= SRC) {
1459 insn += insn->off;
1460 CONT_JMP;
1461 }
1462 CONT;
1463 JMP_JLE_K:
1464 if (DST <= IMM) {
1465 insn += insn->off;
1466 CONT_JMP;
1467 }
1468 CONT;
1469 JMP_JSGT_X:
1470 if (((s64) DST) > ((s64) SRC)) {
1471 insn += insn->off;
1472 CONT_JMP;
1473 }
1474 CONT;
1475 JMP_JSGT_K:
1476 if (((s64) DST) > ((s64) IMM)) {
1477 insn += insn->off;
1478 CONT_JMP;
1479 }
1480 CONT;
1481 JMP_JSLT_X:
1482 if (((s64) DST) < ((s64) SRC)) {
1483 insn += insn->off;
1484 CONT_JMP;
1485 }
1486 CONT;
1487 JMP_JSLT_K:
1488 if (((s64) DST) < ((s64) IMM)) {
1489 insn += insn->off;
1490 CONT_JMP;
1491 }
1492 CONT;
1493 JMP_JSGE_X:
1494 if (((s64) DST) >= ((s64) SRC)) {
1495 insn += insn->off;
1496 CONT_JMP;
1497 }
1498 CONT;
1499 JMP_JSGE_K:
1500 if (((s64) DST) >= ((s64) IMM)) {
1501 insn += insn->off;
1502 CONT_JMP;
1503 }
1504 CONT;
1505 JMP_JSLE_X:
1506 if (((s64) DST) <= ((s64) SRC)) {
1507 insn += insn->off;
1508 CONT_JMP;
1509 }
1510 CONT;
1511 JMP_JSLE_K:
1512 if (((s64) DST) <= ((s64) IMM)) {
1513 insn += insn->off;
1514 CONT_JMP;
1515 }
1516 CONT;
1517 JMP_JSET_X:
1518 if (DST & SRC) {
1519 insn += insn->off;
1520 CONT_JMP;
1521 }
1522 CONT;
1523 JMP_JSET_K:
1524 if (DST & IMM) {
1525 insn += insn->off;
1526 CONT_JMP;
1527 }
1528 CONT;
1529 JMP_EXIT: 1457 JMP_EXIT:
1530 return BPF_R0; 1458 return BPF_R0;
1531 1459 /* JMP */
1460#define COND_JMP(SIGN, OPCODE, CMP_OP) \
1461 JMP_##OPCODE##_X: \
1462 if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
1463 insn += insn->off; \
1464 CONT_JMP; \
1465 } \
1466 CONT; \
1467 JMP32_##OPCODE##_X: \
1468 if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
1469 insn += insn->off; \
1470 CONT_JMP; \
1471 } \
1472 CONT; \
1473 JMP_##OPCODE##_K: \
1474 if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
1475 insn += insn->off; \
1476 CONT_JMP; \
1477 } \
1478 CONT; \
1479 JMP32_##OPCODE##_K: \
1480 if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
1481 insn += insn->off; \
1482 CONT_JMP; \
1483 } \
1484 CONT;
1485 COND_JMP(u, JEQ, ==)
1486 COND_JMP(u, JNE, !=)
1487 COND_JMP(u, JGT, >)
1488 COND_JMP(u, JLT, <)
1489 COND_JMP(u, JGE, >=)
1490 COND_JMP(u, JLE, <=)
1491 COND_JMP(u, JSET, &)
1492 COND_JMP(s, JSGT, >)
1493 COND_JMP(s, JSLT, <)
1494 COND_JMP(s, JSGE, >=)
1495 COND_JMP(s, JSLE, <=)
1496#undef COND_JMP
1532 /* STX and ST and LDX*/ 1497 /* STX and ST and LDX*/
1533#define LDST(SIZEOP, SIZE) \ 1498#define LDST(SIZEOP, SIZE) \
1534 STX_MEM_##SIZEOP: \ 1499 STX_MEM_##SIZEOP: \
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index d6b76377cb6e..de73f55e42fd 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -67,7 +67,7 @@ const char *const bpf_class_string[8] = {
67 [BPF_STX] = "stx", 67 [BPF_STX] = "stx",
68 [BPF_ALU] = "alu", 68 [BPF_ALU] = "alu",
69 [BPF_JMP] = "jmp", 69 [BPF_JMP] = "jmp",
70 [BPF_RET] = "BUG", 70 [BPF_JMP32] = "jmp32",
71 [BPF_ALU64] = "alu64", 71 [BPF_ALU64] = "alu64",
72}; 72};
73 73
@@ -136,23 +136,22 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
136 else 136 else
137 print_bpf_end_insn(verbose, cbs->private_data, insn); 137 print_bpf_end_insn(verbose, cbs->private_data, insn);
138 } else if (BPF_OP(insn->code) == BPF_NEG) { 138 } else if (BPF_OP(insn->code) == BPF_NEG) {
139 verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n", 139 verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n",
140 insn->code, insn->dst_reg, 140 insn->code, class == BPF_ALU ? 'w' : 'r',
141 class == BPF_ALU ? "(u32) " : "", 141 insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
142 insn->dst_reg); 142 insn->dst_reg);
143 } else if (BPF_SRC(insn->code) == BPF_X) { 143 } else if (BPF_SRC(insn->code) == BPF_X) {
144 verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n", 144 verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n",
145 insn->code, class == BPF_ALU ? "(u32) " : "", 145 insn->code, class == BPF_ALU ? 'w' : 'r',
146 insn->dst_reg, 146 insn->dst_reg,
147 bpf_alu_string[BPF_OP(insn->code) >> 4], 147 bpf_alu_string[BPF_OP(insn->code) >> 4],
148 class == BPF_ALU ? "(u32) " : "", 148 class == BPF_ALU ? 'w' : 'r',
149 insn->src_reg); 149 insn->src_reg);
150 } else { 150 } else {
151 verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n", 151 verbose(cbs->private_data, "(%02x) %c%d %s %d\n",
152 insn->code, class == BPF_ALU ? "(u32) " : "", 152 insn->code, class == BPF_ALU ? 'w' : 'r',
153 insn->dst_reg, 153 insn->dst_reg,
154 bpf_alu_string[BPF_OP(insn->code) >> 4], 154 bpf_alu_string[BPF_OP(insn->code) >> 4],
155 class == BPF_ALU ? "(u32) " : "",
156 insn->imm); 155 insn->imm);
157 } 156 }
158 } else if (class == BPF_STX) { 157 } else if (class == BPF_STX) {
@@ -220,7 +219,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
220 verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code); 219 verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
221 return; 220 return;
222 } 221 }
223 } else if (class == BPF_JMP) { 222 } else if (class == BPF_JMP32 || class == BPF_JMP) {
224 u8 opcode = BPF_OP(insn->code); 223 u8 opcode = BPF_OP(insn->code);
225 224
226 if (opcode == BPF_CALL) { 225 if (opcode == BPF_CALL) {
@@ -244,13 +243,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
244 } else if (insn->code == (BPF_JMP | BPF_EXIT)) { 243 } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
245 verbose(cbs->private_data, "(%02x) exit\n", insn->code); 244 verbose(cbs->private_data, "(%02x) exit\n", insn->code);
246 } else if (BPF_SRC(insn->code) == BPF_X) { 245 } else if (BPF_SRC(insn->code) == BPF_X) {
247 verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n", 246 verbose(cbs->private_data,
248 insn->code, insn->dst_reg, 247 "(%02x) if %c%d %s %c%d goto pc%+d\n",
248 insn->code, class == BPF_JMP32 ? 'w' : 'r',
249 insn->dst_reg,
249 bpf_jmp_string[BPF_OP(insn->code) >> 4], 250 bpf_jmp_string[BPF_OP(insn->code) >> 4],
251 class == BPF_JMP32 ? 'w' : 'r',
250 insn->src_reg, insn->off); 252 insn->src_reg, insn->off);
251 } else { 253 } else {
252 verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n", 254 verbose(cbs->private_data,
253 insn->code, insn->dst_reg, 255 "(%02x) if %c%d %s 0x%x goto pc%+d\n",
256 insn->code, class == BPF_JMP32 ? 'w' : 'r',
257 insn->dst_reg,
254 bpf_jmp_string[BPF_OP(insn->code) >> 4], 258 bpf_jmp_string[BPF_OP(insn->code) >> 4],
255 insn->imm, insn->off); 259 insn->imm, insn->off);
256 } 260 }
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 54cf2b9c44a4..39dba8c90331 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
173 return ret; 173 return ret;
174} 174}
175 175
176void
177bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
178 struct bpf_insn *insn)
179{
180 const struct bpf_prog_offload_ops *ops;
181 struct bpf_prog_offload *offload;
182 int ret = -EOPNOTSUPP;
183
184 down_read(&bpf_devs_lock);
185 offload = env->prog->aux->offload;
186 if (offload) {
187 ops = offload->offdev->ops;
188 if (!offload->opt_failed && ops->replace_insn)
189 ret = ops->replace_insn(env, off, insn);
190 offload->opt_failed |= ret;
191 }
192 up_read(&bpf_devs_lock);
193}
194
195void
196bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
197{
198 struct bpf_prog_offload *offload;
199 int ret = -EOPNOTSUPP;
200
201 down_read(&bpf_devs_lock);
202 offload = env->prog->aux->offload;
203 if (offload) {
204 if (!offload->opt_failed && offload->offdev->ops->remove_insns)
205 ret = offload->offdev->ops->remove_insns(env, off, cnt);
206 offload->opt_failed |= ret;
207 }
208 up_read(&bpf_devs_lock);
209}
210
176static void __bpf_prog_offload_destroy(struct bpf_prog *prog) 211static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
177{ 212{
178 struct bpf_prog_offload *offload = prog->aux->offload; 213 struct bpf_prog_offload *offload = prog->aux->offload;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 56674a7c3778..8c1c21cd50b4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1095,7 +1095,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
1095 for (i = 0; i < insn_cnt; i++) { 1095 for (i = 0; i < insn_cnt; i++) {
1096 u8 code = insn[i].code; 1096 u8 code = insn[i].code;
1097 1097
1098 if (BPF_CLASS(code) != BPF_JMP) 1098 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1099 goto next; 1099 goto next;
1100 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) 1100 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1101 goto next; 1101 goto next;
@@ -4031,11 +4031,50 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
4031 * 0 - branch will not be taken and fall-through to next insn 4031 * 0 - branch will not be taken and fall-through to next insn
4032 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] 4032 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
4033 */ 4033 */
4034static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) 4034static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
4035 bool is_jmp32)
4035{ 4036{
4037 struct bpf_reg_state reg_lo;
4038 s64 sval;
4039
4036 if (__is_pointer_value(false, reg)) 4040 if (__is_pointer_value(false, reg))
4037 return -1; 4041 return -1;
4038 4042
4043 if (is_jmp32) {
4044 reg_lo = *reg;
4045 reg = &reg_lo;
4046 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size
4047 * could truncate high bits and update umin/umax according to
4048 * information of low bits.
4049 */
4050 coerce_reg_to_size(reg, 4);
4051 /* smin/smax need special handling. For example, after coerce,
4052 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
4053 * used as operand to JMP32. It is a negative number from s32's
4054 * point of view, while it is a positive number when seen as
4055 * s64. The smin/smax are kept as s64, therefore, when used with
4056 * JMP32, they need to be transformed into s32, then sign
4057 * extended back to s64.
4058 *
4059 * Also, smin/smax were copied from umin/umax. If umin/umax has
4060 * different sign bit, then min/max relationship doesn't
4061 * maintain after casting into s32, for this case, set smin/smax
4062 * to safest range.
4063 */
4064 if ((reg->umax_value ^ reg->umin_value) &
4065 (1ULL << 31)) {
4066 reg->smin_value = S32_MIN;
4067 reg->smax_value = S32_MAX;
4068 }
4069 reg->smin_value = (s64)(s32)reg->smin_value;
4070 reg->smax_value = (s64)(s32)reg->smax_value;
4071
4072 val = (u32)val;
4073 sval = (s64)(s32)val;
4074 } else {
4075 sval = (s64)val;
4076 }
4077
4039 switch (opcode) { 4078 switch (opcode) {
4040 case BPF_JEQ: 4079 case BPF_JEQ:
4041 if (tnum_is_const(reg->var_off)) 4080 if (tnum_is_const(reg->var_off))
@@ -4058,9 +4097,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4058 return 0; 4097 return 0;
4059 break; 4098 break;
4060 case BPF_JSGT: 4099 case BPF_JSGT:
4061 if (reg->smin_value > (s64)val) 4100 if (reg->smin_value > sval)
4062 return 1; 4101 return 1;
4063 else if (reg->smax_value < (s64)val) 4102 else if (reg->smax_value < sval)
4064 return 0; 4103 return 0;
4065 break; 4104 break;
4066 case BPF_JLT: 4105 case BPF_JLT:
@@ -4070,9 +4109,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4070 return 0; 4109 return 0;
4071 break; 4110 break;
4072 case BPF_JSLT: 4111 case BPF_JSLT:
4073 if (reg->smax_value < (s64)val) 4112 if (reg->smax_value < sval)
4074 return 1; 4113 return 1;
4075 else if (reg->smin_value >= (s64)val) 4114 else if (reg->smin_value >= sval)
4076 return 0; 4115 return 0;
4077 break; 4116 break;
4078 case BPF_JGE: 4117 case BPF_JGE:
@@ -4082,9 +4121,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4082 return 0; 4121 return 0;
4083 break; 4122 break;
4084 case BPF_JSGE: 4123 case BPF_JSGE:
4085 if (reg->smin_value >= (s64)val) 4124 if (reg->smin_value >= sval)
4086 return 1; 4125 return 1;
4087 else if (reg->smax_value < (s64)val) 4126 else if (reg->smax_value < sval)
4088 return 0; 4127 return 0;
4089 break; 4128 break;
4090 case BPF_JLE: 4129 case BPF_JLE:
@@ -4094,9 +4133,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4094 return 0; 4133 return 0;
4095 break; 4134 break;
4096 case BPF_JSLE: 4135 case BPF_JSLE:
4097 if (reg->smax_value <= (s64)val) 4136 if (reg->smax_value <= sval)
4098 return 1; 4137 return 1;
4099 else if (reg->smin_value > (s64)val) 4138 else if (reg->smin_value > sval)
4100 return 0; 4139 return 0;
4101 break; 4140 break;
4102 } 4141 }
@@ -4104,6 +4143,29 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4104 return -1; 4143 return -1;
4105} 4144}
4106 4145
4146/* Generate min value of the high 32-bit from TNUM info. */
4147static u64 gen_hi_min(struct tnum var)
4148{
4149 return var.value & ~0xffffffffULL;
4150}
4151
4152/* Generate max value of the high 32-bit from TNUM info. */
4153static u64 gen_hi_max(struct tnum var)
4154{
4155 return (var.value | var.mask) & ~0xffffffffULL;
4156}
4157
4158/* Return true if VAL is compared with a s64 sign extended from s32, and they
4159 * are with the same signedness.
4160 */
4161static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
4162{
4163 return ((s32)sval >= 0 &&
4164 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
4165 ((s32)sval < 0 &&
4166 reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
4167}
4168
4107/* Adjusts the register min/max values in the case that the dst_reg is the 4169/* Adjusts the register min/max values in the case that the dst_reg is the
4108 * variable register that we are working on, and src_reg is a constant or we're 4170 * variable register that we are working on, and src_reg is a constant or we're
4109 * simply doing a BPF_K check. 4171 * simply doing a BPF_K check.
@@ -4111,8 +4173,10 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
4111 */ 4173 */
4112static void reg_set_min_max(struct bpf_reg_state *true_reg, 4174static void reg_set_min_max(struct bpf_reg_state *true_reg,
4113 struct bpf_reg_state *false_reg, u64 val, 4175 struct bpf_reg_state *false_reg, u64 val,
4114 u8 opcode) 4176 u8 opcode, bool is_jmp32)
4115{ 4177{
4178 s64 sval;
4179
4116 /* If the dst_reg is a pointer, we can't learn anything about its 4180 /* If the dst_reg is a pointer, we can't learn anything about its
4117 * variable offset from the compare (unless src_reg were a pointer into 4181 * variable offset from the compare (unless src_reg were a pointer into
4118 * the same object, but we don't bother with that. 4182 * the same object, but we don't bother with that.
@@ -4122,19 +4186,31 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
4122 if (__is_pointer_value(false, false_reg)) 4186 if (__is_pointer_value(false, false_reg))
4123 return; 4187 return;
4124 4188
4189 val = is_jmp32 ? (u32)val : val;
4190 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4191
4125 switch (opcode) { 4192 switch (opcode) {
4126 case BPF_JEQ: 4193 case BPF_JEQ:
4127 /* If this is false then we know nothing Jon Snow, but if it is
4128 * true then we know for sure.
4129 */
4130 __mark_reg_known(true_reg, val);
4131 break;
4132 case BPF_JNE: 4194 case BPF_JNE:
4133 /* If this is true we know nothing Jon Snow, but if it is false 4195 {
4134 * we know the value for sure; 4196 struct bpf_reg_state *reg =
4197 opcode == BPF_JEQ ? true_reg : false_reg;
4198
4199 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
4200 * if it is true we know the value for sure. Likewise for
4201 * BPF_JNE.
4135 */ 4202 */
4136 __mark_reg_known(false_reg, val); 4203 if (is_jmp32) {
4204 u64 old_v = reg->var_off.value;
4205 u64 hi_mask = ~0xffffffffULL;
4206
4207 reg->var_off.value = (old_v & hi_mask) | val;
4208 reg->var_off.mask &= hi_mask;
4209 } else {
4210 __mark_reg_known(reg, val);
4211 }
4137 break; 4212 break;
4213 }
4138 case BPF_JSET: 4214 case BPF_JSET:
4139 false_reg->var_off = tnum_and(false_reg->var_off, 4215 false_reg->var_off = tnum_and(false_reg->var_off,
4140 tnum_const(~val)); 4216 tnum_const(~val));
@@ -4142,38 +4218,61 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
4142 true_reg->var_off = tnum_or(true_reg->var_off, 4218 true_reg->var_off = tnum_or(true_reg->var_off,
4143 tnum_const(val)); 4219 tnum_const(val));
4144 break; 4220 break;
4145 case BPF_JGT:
4146 false_reg->umax_value = min(false_reg->umax_value, val);
4147 true_reg->umin_value = max(true_reg->umin_value, val + 1);
4148 break;
4149 case BPF_JSGT:
4150 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
4151 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
4152 break;
4153 case BPF_JLT:
4154 false_reg->umin_value = max(false_reg->umin_value, val);
4155 true_reg->umax_value = min(true_reg->umax_value, val - 1);
4156 break;
4157 case BPF_JSLT:
4158 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
4159 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
4160 break;
4161 case BPF_JGE: 4221 case BPF_JGE:
4162 false_reg->umax_value = min(false_reg->umax_value, val - 1); 4222 case BPF_JGT:
4163 true_reg->umin_value = max(true_reg->umin_value, val); 4223 {
4224 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
4225 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
4226
4227 if (is_jmp32) {
4228 false_umax += gen_hi_max(false_reg->var_off);
4229 true_umin += gen_hi_min(true_reg->var_off);
4230 }
4231 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4232 true_reg->umin_value = max(true_reg->umin_value, true_umin);
4164 break; 4233 break;
4234 }
4165 case BPF_JSGE: 4235 case BPF_JSGE:
4166 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); 4236 case BPF_JSGT:
4167 true_reg->smin_value = max_t(s64, true_reg->smin_value, val); 4237 {
4238 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
4239 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
4240
4241 /* If the full s64 was not sign-extended from s32 then don't
4242 * deduct further info.
4243 */
4244 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4245 break;
4246 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4247 true_reg->smin_value = max(true_reg->smin_value, true_smin);
4168 break; 4248 break;
4249 }
4169 case BPF_JLE: 4250 case BPF_JLE:
4170 false_reg->umin_value = max(false_reg->umin_value, val + 1); 4251 case BPF_JLT:
4171 true_reg->umax_value = min(true_reg->umax_value, val); 4252 {
4253 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
4254 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
4255
4256 if (is_jmp32) {
4257 false_umin += gen_hi_min(false_reg->var_off);
4258 true_umax += gen_hi_max(true_reg->var_off);
4259 }
4260 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4261 true_reg->umax_value = min(true_reg->umax_value, true_umax);
4172 break; 4262 break;
4263 }
4173 case BPF_JSLE: 4264 case BPF_JSLE:
4174 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); 4265 case BPF_JSLT:
4175 true_reg->smax_value = min_t(s64, true_reg->smax_value, val); 4266 {
4267 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
4268 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
4269
4270 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4271 break;
4272 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4273 true_reg->smax_value = min(true_reg->smax_value, true_smax);
4176 break; 4274 break;
4275 }
4177 default: 4276 default:
4178 break; 4277 break;
4179 } 4278 }
@@ -4196,24 +4295,34 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
4196 */ 4295 */
4197static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, 4296static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4198 struct bpf_reg_state *false_reg, u64 val, 4297 struct bpf_reg_state *false_reg, u64 val,
4199 u8 opcode) 4298 u8 opcode, bool is_jmp32)
4200{ 4299{
4300 s64 sval;
4301
4201 if (__is_pointer_value(false, false_reg)) 4302 if (__is_pointer_value(false, false_reg))
4202 return; 4303 return;
4203 4304
4305 val = is_jmp32 ? (u32)val : val;
4306 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4307
4204 switch (opcode) { 4308 switch (opcode) {
4205 case BPF_JEQ: 4309 case BPF_JEQ:
4206 /* If this is false then we know nothing Jon Snow, but if it is
4207 * true then we know for sure.
4208 */
4209 __mark_reg_known(true_reg, val);
4210 break;
4211 case BPF_JNE: 4310 case BPF_JNE:
4212 /* If this is true we know nothing Jon Snow, but if it is false 4311 {
4213 * we know the value for sure; 4312 struct bpf_reg_state *reg =
4214 */ 4313 opcode == BPF_JEQ ? true_reg : false_reg;
4215 __mark_reg_known(false_reg, val); 4314
4315 if (is_jmp32) {
4316 u64 old_v = reg->var_off.value;
4317 u64 hi_mask = ~0xffffffffULL;
4318
4319 reg->var_off.value = (old_v & hi_mask) | val;
4320 reg->var_off.mask &= hi_mask;
4321 } else {
4322 __mark_reg_known(reg, val);
4323 }
4216 break; 4324 break;
4325 }
4217 case BPF_JSET: 4326 case BPF_JSET:
4218 false_reg->var_off = tnum_and(false_reg->var_off, 4327 false_reg->var_off = tnum_and(false_reg->var_off,
4219 tnum_const(~val)); 4328 tnum_const(~val));
@@ -4221,38 +4330,58 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4221 true_reg->var_off = tnum_or(true_reg->var_off, 4330 true_reg->var_off = tnum_or(true_reg->var_off,
4222 tnum_const(val)); 4331 tnum_const(val));
4223 break; 4332 break;
4224 case BPF_JGT:
4225 true_reg->umax_value = min(true_reg->umax_value, val - 1);
4226 false_reg->umin_value = max(false_reg->umin_value, val);
4227 break;
4228 case BPF_JSGT:
4229 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
4230 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
4231 break;
4232 case BPF_JLT:
4233 true_reg->umin_value = max(true_reg->umin_value, val + 1);
4234 false_reg->umax_value = min(false_reg->umax_value, val);
4235 break;
4236 case BPF_JSLT:
4237 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
4238 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
4239 break;
4240 case BPF_JGE: 4333 case BPF_JGE:
4241 true_reg->umax_value = min(true_reg->umax_value, val); 4334 case BPF_JGT:
4242 false_reg->umin_value = max(false_reg->umin_value, val + 1); 4335 {
4336 u64 false_umin = opcode == BPF_JGT ? val : val + 1;
4337 u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
4338
4339 if (is_jmp32) {
4340 false_umin += gen_hi_min(false_reg->var_off);
4341 true_umax += gen_hi_max(true_reg->var_off);
4342 }
4343 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4344 true_reg->umax_value = min(true_reg->umax_value, true_umax);
4243 break; 4345 break;
4346 }
4244 case BPF_JSGE: 4347 case BPF_JSGE:
4245 true_reg->smax_value = min_t(s64, true_reg->smax_value, val); 4348 case BPF_JSGT:
4246 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); 4349 {
4350 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1;
4351 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
4352
4353 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4354 break;
4355 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4356 true_reg->smax_value = min(true_reg->smax_value, true_smax);
4247 break; 4357 break;
4358 }
4248 case BPF_JLE: 4359 case BPF_JLE:
4249 true_reg->umin_value = max(true_reg->umin_value, val); 4360 case BPF_JLT:
4250 false_reg->umax_value = min(false_reg->umax_value, val - 1); 4361 {
4362 u64 false_umax = opcode == BPF_JLT ? val : val - 1;
4363 u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
4364
4365 if (is_jmp32) {
4366 false_umax += gen_hi_max(false_reg->var_off);
4367 true_umin += gen_hi_min(true_reg->var_off);
4368 }
4369 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4370 true_reg->umin_value = max(true_reg->umin_value, true_umin);
4251 break; 4371 break;
4372 }
4252 case BPF_JSLE: 4373 case BPF_JSLE:
4253 true_reg->smin_value = max_t(s64, true_reg->smin_value, val); 4374 case BPF_JSLT:
4254 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); 4375 {
4376 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1;
4377 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
4378
4379 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4380 break;
4381 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4382 true_reg->smin_value = max(true_reg->smin_value, true_smin);
4255 break; 4383 break;
4384 }
4256 default: 4385 default:
4257 break; 4386 break;
4258 } 4387 }
@@ -4390,6 +4519,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4390 if (BPF_SRC(insn->code) != BPF_X) 4519 if (BPF_SRC(insn->code) != BPF_X)
4391 return false; 4520 return false;
4392 4521
4522 /* Pointers are always 64-bit. */
4523 if (BPF_CLASS(insn->code) == BPF_JMP32)
4524 return false;
4525
4393 switch (BPF_OP(insn->code)) { 4526 switch (BPF_OP(insn->code)) {
4394 case BPF_JGT: 4527 case BPF_JGT:
4395 if ((dst_reg->type == PTR_TO_PACKET && 4528 if ((dst_reg->type == PTR_TO_PACKET &&
@@ -4482,16 +4615,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4482 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 4615 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4483 struct bpf_reg_state *dst_reg, *other_branch_regs; 4616 struct bpf_reg_state *dst_reg, *other_branch_regs;
4484 u8 opcode = BPF_OP(insn->code); 4617 u8 opcode = BPF_OP(insn->code);
4618 bool is_jmp32;
4485 int err; 4619 int err;
4486 4620
4487 if (opcode > BPF_JSLE) { 4621 /* Only conditional jumps are expected to reach here. */
4488 verbose(env, "invalid BPF_JMP opcode %x\n", opcode); 4622 if (opcode == BPF_JA || opcode > BPF_JSLE) {
4623 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
4489 return -EINVAL; 4624 return -EINVAL;
4490 } 4625 }
4491 4626
4492 if (BPF_SRC(insn->code) == BPF_X) { 4627 if (BPF_SRC(insn->code) == BPF_X) {
4493 if (insn->imm != 0) { 4628 if (insn->imm != 0) {
4494 verbose(env, "BPF_JMP uses reserved fields\n"); 4629 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4495 return -EINVAL; 4630 return -EINVAL;
4496 } 4631 }
4497 4632
@@ -4507,7 +4642,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4507 } 4642 }
4508 } else { 4643 } else {
4509 if (insn->src_reg != BPF_REG_0) { 4644 if (insn->src_reg != BPF_REG_0) {
4510 verbose(env, "BPF_JMP uses reserved fields\n"); 4645 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4511 return -EINVAL; 4646 return -EINVAL;
4512 } 4647 }
4513 } 4648 }
@@ -4518,9 +4653,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4518 return err; 4653 return err;
4519 4654
4520 dst_reg = &regs[insn->dst_reg]; 4655 dst_reg = &regs[insn->dst_reg];
4656 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
4521 4657
4522 if (BPF_SRC(insn->code) == BPF_K) { 4658 if (BPF_SRC(insn->code) == BPF_K) {
4523 int pred = is_branch_taken(dst_reg, insn->imm, opcode); 4659 int pred = is_branch_taken(dst_reg, insn->imm, opcode,
4660 is_jmp32);
4524 4661
4525 if (pred == 1) { 4662 if (pred == 1) {
4526 /* only follow the goto, ignore fall-through */ 4663 /* only follow the goto, ignore fall-through */
@@ -4548,30 +4685,51 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
4548 * comparable. 4685 * comparable.
4549 */ 4686 */
4550 if (BPF_SRC(insn->code) == BPF_X) { 4687 if (BPF_SRC(insn->code) == BPF_X) {
4688 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
4689 struct bpf_reg_state lo_reg0 = *dst_reg;
4690 struct bpf_reg_state lo_reg1 = *src_reg;
4691 struct bpf_reg_state *src_lo, *dst_lo;
4692
4693 dst_lo = &lo_reg0;
4694 src_lo = &lo_reg1;
4695 coerce_reg_to_size(dst_lo, 4);
4696 coerce_reg_to_size(src_lo, 4);
4697
4551 if (dst_reg->type == SCALAR_VALUE && 4698 if (dst_reg->type == SCALAR_VALUE &&
4552 regs[insn->src_reg].type == SCALAR_VALUE) { 4699 src_reg->type == SCALAR_VALUE) {
4553 if (tnum_is_const(regs[insn->src_reg].var_off)) 4700 if (tnum_is_const(src_reg->var_off) ||
4701 (is_jmp32 && tnum_is_const(src_lo->var_off)))
4554 reg_set_min_max(&other_branch_regs[insn->dst_reg], 4702 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4555 dst_reg, regs[insn->src_reg].var_off.value, 4703 dst_reg,
4556 opcode); 4704 is_jmp32
4557 else if (tnum_is_const(dst_reg->var_off)) 4705 ? src_lo->var_off.value
4706 : src_reg->var_off.value,
4707 opcode, is_jmp32);
4708 else if (tnum_is_const(dst_reg->var_off) ||
4709 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
4558 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], 4710 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
4559 &regs[insn->src_reg], 4711 src_reg,
4560 dst_reg->var_off.value, opcode); 4712 is_jmp32
4561 else if (opcode == BPF_JEQ || opcode == BPF_JNE) 4713 ? dst_lo->var_off.value
4714 : dst_reg->var_off.value,
4715 opcode, is_jmp32);
4716 else if (!is_jmp32 &&
4717 (opcode == BPF_JEQ || opcode == BPF_JNE))
4562 /* Comparing for equality, we can combine knowledge */ 4718 /* Comparing for equality, we can combine knowledge */
4563 reg_combine_min_max(&other_branch_regs[insn->src_reg], 4719 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4564 &other_branch_regs[insn->dst_reg], 4720 &other_branch_regs[insn->dst_reg],
4565 &regs[insn->src_reg], 4721 src_reg, dst_reg, opcode);
4566 &regs[insn->dst_reg], opcode);
4567 } 4722 }
4568 } else if (dst_reg->type == SCALAR_VALUE) { 4723 } else if (dst_reg->type == SCALAR_VALUE) {
4569 reg_set_min_max(&other_branch_regs[insn->dst_reg], 4724 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4570 dst_reg, insn->imm, opcode); 4725 dst_reg, insn->imm, opcode, is_jmp32);
4571 } 4726 }
4572 4727
4573 /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ 4728 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
4574 if (BPF_SRC(insn->code) == BPF_K && 4729 * NOTE: these optimizations below are related with pointer comparison
4730 * which will never be JMP32.
4731 */
4732 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
4575 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 4733 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
4576 reg_type_may_be_null(dst_reg->type)) { 4734 reg_type_may_be_null(dst_reg->type)) {
4577 /* Mark all identical registers in each branch as either 4735 /* Mark all identical registers in each branch as either
@@ -4900,7 +5058,8 @@ peek_stack:
4900 goto check_state; 5058 goto check_state;
4901 t = insn_stack[cur_stack - 1]; 5059 t = insn_stack[cur_stack - 1];
4902 5060
4903 if (BPF_CLASS(insns[t].code) == BPF_JMP) { 5061 if (BPF_CLASS(insns[t].code) == BPF_JMP ||
5062 BPF_CLASS(insns[t].code) == BPF_JMP32) {
4904 u8 opcode = BPF_OP(insns[t].code); 5063 u8 opcode = BPF_OP(insns[t].code);
4905 5064
4906 if (opcode == BPF_EXIT) { 5065 if (opcode == BPF_EXIT) {
@@ -4997,13 +5156,14 @@ static int check_btf_func(struct bpf_verifier_env *env,
4997 const union bpf_attr *attr, 5156 const union bpf_attr *attr,
4998 union bpf_attr __user *uattr) 5157 union bpf_attr __user *uattr)
4999{ 5158{
5000 u32 i, nfuncs, urec_size, min_size, prev_offset; 5159 u32 i, nfuncs, urec_size, min_size;
5001 u32 krec_size = sizeof(struct bpf_func_info); 5160 u32 krec_size = sizeof(struct bpf_func_info);
5002 struct bpf_func_info *krecord; 5161 struct bpf_func_info *krecord;
5003 const struct btf_type *type; 5162 const struct btf_type *type;
5004 struct bpf_prog *prog; 5163 struct bpf_prog *prog;
5005 const struct btf *btf; 5164 const struct btf *btf;
5006 void __user *urecord; 5165 void __user *urecord;
5166 u32 prev_offset = 0;
5007 int ret = 0; 5167 int ret = 0;
5008 5168
5009 nfuncs = attr->func_info_cnt; 5169 nfuncs = attr->func_info_cnt;
@@ -6055,7 +6215,7 @@ static int do_check(struct bpf_verifier_env *env)
6055 if (err) 6215 if (err)
6056 return err; 6216 return err;
6057 6217
6058 } else if (class == BPF_JMP) { 6218 } else if (class == BPF_JMP || class == BPF_JMP32) {
6059 u8 opcode = BPF_OP(insn->code); 6219 u8 opcode = BPF_OP(insn->code);
6060 6220
6061 if (opcode == BPF_CALL) { 6221 if (opcode == BPF_CALL) {
@@ -6063,7 +6223,8 @@ static int do_check(struct bpf_verifier_env *env)
6063 insn->off != 0 || 6223 insn->off != 0 ||
6064 (insn->src_reg != BPF_REG_0 && 6224 (insn->src_reg != BPF_REG_0 &&
6065 insn->src_reg != BPF_PSEUDO_CALL) || 6225 insn->src_reg != BPF_PSEUDO_CALL) ||
6066 insn->dst_reg != BPF_REG_0) { 6226 insn->dst_reg != BPF_REG_0 ||
6227 class == BPF_JMP32) {
6067 verbose(env, "BPF_CALL uses reserved fields\n"); 6228 verbose(env, "BPF_CALL uses reserved fields\n");
6068 return -EINVAL; 6229 return -EINVAL;
6069 } 6230 }
@@ -6079,7 +6240,8 @@ static int do_check(struct bpf_verifier_env *env)
6079 if (BPF_SRC(insn->code) != BPF_K || 6240 if (BPF_SRC(insn->code) != BPF_K ||
6080 insn->imm != 0 || 6241 insn->imm != 0 ||
6081 insn->src_reg != BPF_REG_0 || 6242 insn->src_reg != BPF_REG_0 ||
6082 insn->dst_reg != BPF_REG_0) { 6243 insn->dst_reg != BPF_REG_0 ||
6244 class == BPF_JMP32) {
6083 verbose(env, "BPF_JA uses reserved fields\n"); 6245 verbose(env, "BPF_JA uses reserved fields\n");
6084 return -EINVAL; 6246 return -EINVAL;
6085 } 6247 }
@@ -6091,7 +6253,8 @@ static int do_check(struct bpf_verifier_env *env)
6091 if (BPF_SRC(insn->code) != BPF_K || 6253 if (BPF_SRC(insn->code) != BPF_K ||
6092 insn->imm != 0 || 6254 insn->imm != 0 ||
6093 insn->src_reg != BPF_REG_0 || 6255 insn->src_reg != BPF_REG_0 ||
6094 insn->dst_reg != BPF_REG_0) { 6256 insn->dst_reg != BPF_REG_0 ||
6257 class == BPF_JMP32) {
6095 verbose(env, "BPF_EXIT uses reserved fields\n"); 6258 verbose(env, "BPF_EXIT uses reserved fields\n");
6096 return -EINVAL; 6259 return -EINVAL;
6097 } 6260 }
@@ -6431,6 +6594,153 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
6431 return new_prog; 6594 return new_prog;
6432} 6595}
6433 6596
6597static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
6598 u32 off, u32 cnt)
6599{
6600 int i, j;
6601
6602 /* find first prog starting at or after off (first to remove) */
6603 for (i = 0; i < env->subprog_cnt; i++)
6604 if (env->subprog_info[i].start >= off)
6605 break;
6606 /* find first prog starting at or after off + cnt (first to stay) */
6607 for (j = i; j < env->subprog_cnt; j++)
6608 if (env->subprog_info[j].start >= off + cnt)
6609 break;
6610 /* if j doesn't start exactly at off + cnt, we are just removing
6611 * the front of previous prog
6612 */
6613 if (env->subprog_info[j].start != off + cnt)
6614 j--;
6615
6616 if (j > i) {
6617 struct bpf_prog_aux *aux = env->prog->aux;
6618 int move;
6619
6620 /* move fake 'exit' subprog as well */
6621 move = env->subprog_cnt + 1 - j;
6622
6623 memmove(env->subprog_info + i,
6624 env->subprog_info + j,
6625 sizeof(*env->subprog_info) * move);
6626 env->subprog_cnt -= j - i;
6627
6628 /* remove func_info */
6629 if (aux->func_info) {
6630 move = aux->func_info_cnt - j;
6631
6632 memmove(aux->func_info + i,
6633 aux->func_info + j,
6634 sizeof(*aux->func_info) * move);
6635 aux->func_info_cnt -= j - i;
6636 /* func_info->insn_off is set after all code rewrites,
6637 * in adjust_btf_func() - no need to adjust
6638 */
6639 }
6640 } else {
6641 /* convert i from "first prog to remove" to "first to adjust" */
6642 if (env->subprog_info[i].start == off)
6643 i++;
6644 }
6645
6646 /* update fake 'exit' subprog as well */
6647 for (; i <= env->subprog_cnt; i++)
6648 env->subprog_info[i].start -= cnt;
6649
6650 return 0;
6651}
6652
6653static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
6654 u32 cnt)
6655{
6656 struct bpf_prog *prog = env->prog;
6657 u32 i, l_off, l_cnt, nr_linfo;
6658 struct bpf_line_info *linfo;
6659
6660 nr_linfo = prog->aux->nr_linfo;
6661 if (!nr_linfo)
6662 return 0;
6663
6664 linfo = prog->aux->linfo;
6665
6666 /* find first line info to remove, count lines to be removed */
6667 for (i = 0; i < nr_linfo; i++)
6668 if (linfo[i].insn_off >= off)
6669 break;
6670
6671 l_off = i;
6672 l_cnt = 0;
6673 for (; i < nr_linfo; i++)
6674 if (linfo[i].insn_off < off + cnt)
6675 l_cnt++;
6676 else
6677 break;
6678
6679 /* First live insn doesn't match first live linfo, it needs to "inherit"
6680 * last removed linfo. prog is already modified, so prog->len == off
6681 * means no live instructions after (tail of the program was removed).
6682 */
6683 if (prog->len != off && l_cnt &&
6684 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
6685 l_cnt--;
6686 linfo[--i].insn_off = off + cnt;
6687 }
6688
6689 /* remove the line info which refer to the removed instructions */
6690 if (l_cnt) {
6691 memmove(linfo + l_off, linfo + i,
6692 sizeof(*linfo) * (nr_linfo - i));
6693
6694 prog->aux->nr_linfo -= l_cnt;
6695 nr_linfo = prog->aux->nr_linfo;
6696 }
6697
6698 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
6699 for (i = l_off; i < nr_linfo; i++)
6700 linfo[i].insn_off -= cnt;
6701
6702 /* fix up all subprogs (incl. 'exit') which start >= off */
6703 for (i = 0; i <= env->subprog_cnt; i++)
6704 if (env->subprog_info[i].linfo_idx > l_off) {
6705 /* program may have started in the removed region but
6706 * may not be fully removed
6707 */
6708 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
6709 env->subprog_info[i].linfo_idx -= l_cnt;
6710 else
6711 env->subprog_info[i].linfo_idx = l_off;
6712 }
6713
6714 return 0;
6715}
6716
6717static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
6718{
6719 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6720 unsigned int orig_prog_len = env->prog->len;
6721 int err;
6722
6723 if (bpf_prog_is_dev_bound(env->prog->aux))
6724 bpf_prog_offload_remove_insns(env, off, cnt);
6725
6726 err = bpf_remove_insns(env->prog, off, cnt);
6727 if (err)
6728 return err;
6729
6730 err = adjust_subprog_starts_after_remove(env, off, cnt);
6731 if (err)
6732 return err;
6733
6734 err = bpf_adj_linfo_after_remove(env, off, cnt);
6735 if (err)
6736 return err;
6737
6738 memmove(aux_data + off, aux_data + off + cnt,
6739 sizeof(*aux_data) * (orig_prog_len - off - cnt));
6740
6741 return 0;
6742}
6743
6434/* The verifier does more data flow analysis than llvm and will not 6744/* The verifier does more data flow analysis than llvm and will not
6435 * explore branches that are dead at run time. Malicious programs can 6745 * explore branches that are dead at run time. Malicious programs can
6436 * have dead code too. Therefore replace all dead at-run-time code 6746 * have dead code too. Therefore replace all dead at-run-time code
@@ -6457,6 +6767,91 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
6457 } 6767 }
6458} 6768}
6459 6769
6770static bool insn_is_cond_jump(u8 code)
6771{
6772 u8 op;
6773
6774 if (BPF_CLASS(code) == BPF_JMP32)
6775 return true;
6776
6777 if (BPF_CLASS(code) != BPF_JMP)
6778 return false;
6779
6780 op = BPF_OP(code);
6781 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
6782}
6783
6784static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
6785{
6786 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6787 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
6788 struct bpf_insn *insn = env->prog->insnsi;
6789 const int insn_cnt = env->prog->len;
6790 int i;
6791
6792 for (i = 0; i < insn_cnt; i++, insn++) {
6793 if (!insn_is_cond_jump(insn->code))
6794 continue;
6795
6796 if (!aux_data[i + 1].seen)
6797 ja.off = insn->off;
6798 else if (!aux_data[i + 1 + insn->off].seen)
6799 ja.off = 0;
6800 else
6801 continue;
6802
6803 if (bpf_prog_is_dev_bound(env->prog->aux))
6804 bpf_prog_offload_replace_insn(env, i, &ja);
6805
6806 memcpy(insn, &ja, sizeof(ja));
6807 }
6808}
6809
6810static int opt_remove_dead_code(struct bpf_verifier_env *env)
6811{
6812 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6813 int insn_cnt = env->prog->len;
6814 int i, err;
6815
6816 for (i = 0; i < insn_cnt; i++) {
6817 int j;
6818
6819 j = 0;
6820 while (i + j < insn_cnt && !aux_data[i + j].seen)
6821 j++;
6822 if (!j)
6823 continue;
6824
6825 err = verifier_remove_insns(env, i, j);
6826 if (err)
6827 return err;
6828 insn_cnt = env->prog->len;
6829 }
6830
6831 return 0;
6832}
6833
6834static int opt_remove_nops(struct bpf_verifier_env *env)
6835{
6836 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
6837 struct bpf_insn *insn = env->prog->insnsi;
6838 int insn_cnt = env->prog->len;
6839 int i, err;
6840
6841 for (i = 0; i < insn_cnt; i++) {
6842 if (memcmp(&insn[i], &ja, sizeof(ja)))
6843 continue;
6844
6845 err = verifier_remove_insns(env, i, 1);
6846 if (err)
6847 return err;
6848 insn_cnt--;
6849 i--;
6850 }
6851
6852 return 0;
6853}
6854
6460/* convert load instructions that access fields of a context type into a 6855/* convert load instructions that access fields of a context type into a
6461 * sequence of instructions that access fields of the underlying structure: 6856 * sequence of instructions that access fields of the underlying structure:
6462 * struct __sk_buff -> struct sk_buff 6857 * struct __sk_buff -> struct sk_buff
@@ -7147,7 +7542,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7147{ 7542{
7148 struct bpf_verifier_env *env; 7543 struct bpf_verifier_env *env;
7149 struct bpf_verifier_log *log; 7544 struct bpf_verifier_log *log;
7150 int ret = -EINVAL; 7545 int i, len, ret = -EINVAL;
7546 bool is_priv;
7151 7547
7152 /* no program is valid */ 7548 /* no program is valid */
7153 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 7549 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
@@ -7161,12 +7557,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7161 return -ENOMEM; 7557 return -ENOMEM;
7162 log = &env->log; 7558 log = &env->log;
7163 7559
7560 len = (*prog)->len;
7164 env->insn_aux_data = 7561 env->insn_aux_data =
7165 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), 7562 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
7166 (*prog)->len));
7167 ret = -ENOMEM; 7563 ret = -ENOMEM;
7168 if (!env->insn_aux_data) 7564 if (!env->insn_aux_data)
7169 goto err_free_env; 7565 goto err_free_env;
7566 for (i = 0; i < len; i++)
7567 env->insn_aux_data[i].orig_idx = i;
7170 env->prog = *prog; 7568 env->prog = *prog;
7171 env->ops = bpf_verifier_ops[env->prog->type]; 7569 env->ops = bpf_verifier_ops[env->prog->type];
7172 7570
@@ -7194,6 +7592,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7194 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 7592 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
7195 env->strict_alignment = false; 7593 env->strict_alignment = false;
7196 7594
7595 is_priv = capable(CAP_SYS_ADMIN);
7596 env->allow_ptr_leaks = is_priv;
7597
7197 ret = replace_map_fd_with_map_ptr(env); 7598 ret = replace_map_fd_with_map_ptr(env);
7198 if (ret < 0) 7599 if (ret < 0)
7199 goto skip_full_check; 7600 goto skip_full_check;
@@ -7211,8 +7612,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7211 if (!env->explored_states) 7612 if (!env->explored_states)
7212 goto skip_full_check; 7613 goto skip_full_check;
7213 7614
7214 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
7215
7216 ret = check_subprogs(env); 7615 ret = check_subprogs(env);
7217 if (ret < 0) 7616 if (ret < 0)
7218 goto skip_full_check; 7617 goto skip_full_check;
@@ -7242,8 +7641,17 @@ skip_full_check:
7242 ret = check_max_stack_depth(env); 7641 ret = check_max_stack_depth(env);
7243 7642
7244 /* instruction rewrites happen after this point */ 7643 /* instruction rewrites happen after this point */
7245 if (ret == 0) 7644 if (is_priv) {
7246 sanitize_dead_code(env); 7645 if (ret == 0)
7646 opt_hard_wire_dead_code_branches(env);
7647 if (ret == 0)
7648 ret = opt_remove_dead_code(env);
7649 if (ret == 0)
7650 ret = opt_remove_nops(env);
7651 } else {
7652 if (ret == 0)
7653 sanitize_dead_code(env);
7654 }
7247 7655
7248 if (ret == 0) 7656 if (ret == 0)
7249 /* program is valid, convert *(u32*)(ctx + off) accesses */ 7657 /* program is valid, convert *(u32*)(ctx + off) accesses */