aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--kernel/bpf/core.c5
-rw-r--r--kernel/bpf/verifier.c441
3 files changed, 440 insertions, 8 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b7b0fb1292e7..406459b935a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -370,6 +370,8 @@ struct __sk_buff {
370 __u32 cb[5]; 370 __u32 cb[5];
371 __u32 hash; 371 __u32 hash;
372 __u32 tc_classid; 372 __u32 tc_classid;
373 __u32 data;
374 __u32 data_end;
373}; 375};
374 376
375struct bpf_tunnel_key { 377struct bpf_tunnel_key {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e4248fe79513..d781b077431f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -794,6 +794,11 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog)
794{ 794{
795} 795}
796 796
797bool __weak bpf_helper_changes_skb_data(void *func)
798{
799 return false;
800}
801
797/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call 802/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
798 * skb_copy_bits(), so provide a weak definition of it for NET-less config. 803 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
799 */ 804 */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index afeb62808902..6338c61fc2a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1,4 +1,5 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2016 Facebook
2 * 3 *
3 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -136,13 +137,32 @@ enum bpf_reg_type {
136 FRAME_PTR, /* reg == frame_pointer */ 137 FRAME_PTR, /* reg == frame_pointer */
137 PTR_TO_STACK, /* reg == frame_pointer + imm */ 138 PTR_TO_STACK, /* reg == frame_pointer + imm */
138 CONST_IMM, /* constant integer value */ 139 CONST_IMM, /* constant integer value */
140
141 /* PTR_TO_PACKET represents:
142 * skb->data
143 * skb->data + imm
144 * skb->data + (u16) var
145 * skb->data + (u16) var + imm
146 * if (range > 0) then [ptr, ptr + range - off) is safe to access
147 * if (id > 0) means that some 'var' was added
148 * if (off > 0) menas that 'imm' was added
149 */
150 PTR_TO_PACKET,
151 PTR_TO_PACKET_END, /* skb->data + headlen */
139}; 152};
140 153
141struct reg_state { 154struct reg_state {
142 enum bpf_reg_type type; 155 enum bpf_reg_type type;
143 union { 156 union {
144 /* valid when type == CONST_IMM | PTR_TO_STACK */ 157 /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
145 long imm; 158 s64 imm;
159
160 /* valid when type == PTR_TO_PACKET* */
161 struct {
162 u32 id;
163 u16 off;
164 u16 range;
165 };
146 166
147 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | 167 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
148 * PTR_TO_MAP_VALUE_OR_NULL 168 * PTR_TO_MAP_VALUE_OR_NULL
@@ -247,6 +267,8 @@ static const char * const reg_type_str[] = {
247 [FRAME_PTR] = "fp", 267 [FRAME_PTR] = "fp",
248 [PTR_TO_STACK] = "fp", 268 [PTR_TO_STACK] = "fp",
249 [CONST_IMM] = "imm", 269 [CONST_IMM] = "imm",
270 [PTR_TO_PACKET] = "pkt",
271 [PTR_TO_PACKET_END] = "pkt_end",
250}; 272};
251 273
252static void print_verifier_state(struct verifier_state *state) 274static void print_verifier_state(struct verifier_state *state)
@@ -262,7 +284,12 @@ static void print_verifier_state(struct verifier_state *state)
262 continue; 284 continue;
263 verbose(" R%d=%s", i, reg_type_str[t]); 285 verbose(" R%d=%s", i, reg_type_str[t]);
264 if (t == CONST_IMM || t == PTR_TO_STACK) 286 if (t == CONST_IMM || t == PTR_TO_STACK)
265 verbose("%ld", reg->imm); 287 verbose("%lld", reg->imm);
288 else if (t == PTR_TO_PACKET)
289 verbose("(id=%d,off=%d,r=%d)",
290 reg->id, reg->off, reg->range);
291 else if (t == UNKNOWN_VALUE && reg->imm)
292 verbose("%lld", reg->imm);
266 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || 293 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
267 t == PTR_TO_MAP_VALUE_OR_NULL) 294 t == PTR_TO_MAP_VALUE_OR_NULL)
268 verbose("(ks=%d,vs=%d)", 295 verbose("(ks=%d,vs=%d)",
@@ -548,6 +575,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
548 case PTR_TO_MAP_VALUE_OR_NULL: 575 case PTR_TO_MAP_VALUE_OR_NULL:
549 case PTR_TO_STACK: 576 case PTR_TO_STACK:
550 case PTR_TO_CTX: 577 case PTR_TO_CTX:
578 case PTR_TO_PACKET:
579 case PTR_TO_PACKET_END:
551 case FRAME_PTR: 580 case FRAME_PTR:
552 case CONST_PTR_TO_MAP: 581 case CONST_PTR_TO_MAP:
553 return true; 582 return true;
@@ -647,6 +676,27 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
647 return 0; 676 return 0;
648} 677}
649 678
679#define MAX_PACKET_OFF 0xffff
680
681static int check_packet_access(struct verifier_env *env, u32 regno, int off,
682 int size)
683{
684 struct reg_state *regs = env->cur_state.regs;
685 struct reg_state *reg = &regs[regno];
686 int linear_size = (int) reg->range - (int) reg->off;
687
688 if (linear_size < 0 || linear_size >= MAX_PACKET_OFF) {
689 verbose("verifier bug\n");
690 return -EFAULT;
691 }
692 if (off < 0 || off + size > linear_size) {
693 verbose("invalid access to packet, off=%d size=%d, allowed=%d\n",
694 off, size, linear_size);
695 return -EACCES;
696 }
697 return 0;
698}
699
650/* check access to 'struct bpf_context' fields */ 700/* check access to 'struct bpf_context' fields */
651static int check_ctx_access(struct verifier_env *env, int off, int size, 701static int check_ctx_access(struct verifier_env *env, int off, int size,
652 enum bpf_access_type t) 702 enum bpf_access_type t)
@@ -677,6 +727,45 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
677 } 727 }
678} 728}
679 729
730static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
731 int off, int size)
732{
733 if (reg->type != PTR_TO_PACKET) {
734 if (off % size != 0) {
735 verbose("misaligned access off %d size %d\n", off, size);
736 return -EACCES;
737 } else {
738 return 0;
739 }
740 }
741
742 switch (env->prog->type) {
743 case BPF_PROG_TYPE_SCHED_CLS:
744 case BPF_PROG_TYPE_SCHED_ACT:
745 break;
746 default:
747 verbose("verifier is misconfigured\n");
748 return -EACCES;
749 }
750
751 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
752 /* misaligned access to packet is ok on x86,arm,arm64 */
753 return 0;
754
755 if (reg->id && size != 1) {
756 verbose("Unknown packet alignment. Only byte-sized access allowed\n");
757 return -EACCES;
758 }
759
760 /* skb->data is NET_IP_ALIGN-ed */
761 if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
762 verbose("misaligned packet access off %d+%d+%d size %d\n",
763 NET_IP_ALIGN, reg->off, off, size);
764 return -EACCES;
765 }
766 return 0;
767}
768
680/* check whether memory at (regno + off) is accessible for t = (read | write) 769/* check whether memory at (regno + off) is accessible for t = (read | write)
681 * if t==write, value_regno is a register which value is stored into memory 770 * if t==write, value_regno is a register which value is stored into memory
682 * if t==read, value_regno is a register which will receive the value from memory 771 * if t==read, value_regno is a register which will receive the value from memory
@@ -698,10 +787,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
698 if (size < 0) 787 if (size < 0)
699 return size; 788 return size;
700 789
701 if (off % size != 0) { 790 err = check_ptr_alignment(env, reg, off, size);
702 verbose("misaligned access off %d size %d\n", off, size); 791 if (err)
703 return -EACCES; 792 return err;
704 }
705 793
706 if (reg->type == PTR_TO_MAP_VALUE) { 794 if (reg->type == PTR_TO_MAP_VALUE) {
707 if (t == BPF_WRITE && value_regno >= 0 && 795 if (t == BPF_WRITE && value_regno >= 0 &&
@@ -720,8 +808,16 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
720 return -EACCES; 808 return -EACCES;
721 } 809 }
722 err = check_ctx_access(env, off, size, t); 810 err = check_ctx_access(env, off, size, t);
723 if (!err && t == BPF_READ && value_regno >= 0) 811 if (!err && t == BPF_READ && value_regno >= 0) {
724 mark_reg_unknown_value(state->regs, value_regno); 812 mark_reg_unknown_value(state->regs, value_regno);
813 if (off == offsetof(struct __sk_buff, data) &&
814 env->allow_ptr_leaks)
815 /* note that reg.[id|off|range] == 0 */
816 state->regs[value_regno].type = PTR_TO_PACKET;
817 else if (off == offsetof(struct __sk_buff, data_end) &&
818 env->allow_ptr_leaks)
819 state->regs[value_regno].type = PTR_TO_PACKET_END;
820 }
725 821
726 } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { 822 } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
727 if (off >= 0 || off < -MAX_BPF_STACK) { 823 if (off >= 0 || off < -MAX_BPF_STACK) {
@@ -739,11 +835,28 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
739 } else { 835 } else {
740 err = check_stack_read(state, off, size, value_regno); 836 err = check_stack_read(state, off, size, value_regno);
741 } 837 }
838 } else if (state->regs[regno].type == PTR_TO_PACKET) {
839 if (t == BPF_WRITE) {
840 verbose("cannot write into packet\n");
841 return -EACCES;
842 }
843 err = check_packet_access(env, regno, off, size);
844 if (!err && t == BPF_READ && value_regno >= 0)
845 mark_reg_unknown_value(state->regs, value_regno);
742 } else { 846 } else {
743 verbose("R%d invalid mem access '%s'\n", 847 verbose("R%d invalid mem access '%s'\n",
744 regno, reg_type_str[reg->type]); 848 regno, reg_type_str[reg->type]);
745 return -EACCES; 849 return -EACCES;
746 } 850 }
851
852 if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
853 state->regs[value_regno].type == UNKNOWN_VALUE) {
854 /* 1 or 2 byte load zero-extends, determine the number of
855 * zero upper bits. Not doing it fo 4 byte load, since
856 * such values cannot be added to ptr_to_packet anyway.
857 */
858 state->regs[value_regno].imm = 64 - size * 8;
859 }
747 return err; 860 return err;
748} 861}
749 862
@@ -1001,6 +1114,29 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
1001 return count > 1 ? -EINVAL : 0; 1114 return count > 1 ? -EINVAL : 0;
1002} 1115}
1003 1116
1117static void clear_all_pkt_pointers(struct verifier_env *env)
1118{
1119 struct verifier_state *state = &env->cur_state;
1120 struct reg_state *regs = state->regs, *reg;
1121 int i;
1122
1123 for (i = 0; i < MAX_BPF_REG; i++)
1124 if (regs[i].type == PTR_TO_PACKET ||
1125 regs[i].type == PTR_TO_PACKET_END)
1126 mark_reg_unknown_value(regs, i);
1127
1128 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
1129 if (state->stack_slot_type[i] != STACK_SPILL)
1130 continue;
1131 reg = &state->spilled_regs[i / BPF_REG_SIZE];
1132 if (reg->type != PTR_TO_PACKET &&
1133 reg->type != PTR_TO_PACKET_END)
1134 continue;
1135 reg->type = UNKNOWN_VALUE;
1136 reg->imm = 0;
1137 }
1138}
1139
1004static int check_call(struct verifier_env *env, int func_id) 1140static int check_call(struct verifier_env *env, int func_id)
1005{ 1141{
1006 struct verifier_state *state = &env->cur_state; 1142 struct verifier_state *state = &env->cur_state;
@@ -1008,6 +1144,7 @@ static int check_call(struct verifier_env *env, int func_id)
1008 struct reg_state *regs = state->regs; 1144 struct reg_state *regs = state->regs;
1009 struct reg_state *reg; 1145 struct reg_state *reg;
1010 struct bpf_call_arg_meta meta; 1146 struct bpf_call_arg_meta meta;
1147 bool changes_data;
1011 int i, err; 1148 int i, err;
1012 1149
1013 /* find function prototype */ 1150 /* find function prototype */
@@ -1030,6 +1167,8 @@ static int check_call(struct verifier_env *env, int func_id)
1030 return -EINVAL; 1167 return -EINVAL;
1031 } 1168 }
1032 1169
1170 changes_data = bpf_helper_changes_skb_data(fn->func);
1171
1033 memset(&meta, 0, sizeof(meta)); 1172 memset(&meta, 0, sizeof(meta));
1034 1173
1035 /* We only support one arg being in raw mode at the moment, which 1174 /* We only support one arg being in raw mode at the moment, which
@@ -1100,6 +1239,189 @@ static int check_call(struct verifier_env *env, int func_id)
1100 if (err) 1239 if (err)
1101 return err; 1240 return err;
1102 1241
1242 if (changes_data)
1243 clear_all_pkt_pointers(env);
1244 return 0;
1245}
1246
1247static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
1248{
1249 struct reg_state *regs = env->cur_state.regs;
1250 struct reg_state *dst_reg = &regs[insn->dst_reg];
1251 struct reg_state *src_reg = &regs[insn->src_reg];
1252 s32 imm;
1253
1254 if (BPF_SRC(insn->code) == BPF_K) {
1255 /* pkt_ptr += imm */
1256 imm = insn->imm;
1257
1258add_imm:
1259 if (imm <= 0) {
1260 verbose("addition of negative constant to packet pointer is not allowed\n");
1261 return -EACCES;
1262 }
1263 if (imm >= MAX_PACKET_OFF ||
1264 imm + dst_reg->off >= MAX_PACKET_OFF) {
1265 verbose("constant %d is too large to add to packet pointer\n",
1266 imm);
1267 return -EACCES;
1268 }
1269 /* a constant was added to pkt_ptr.
1270 * Remember it while keeping the same 'id'
1271 */
1272 dst_reg->off += imm;
1273 } else {
1274 if (src_reg->type == CONST_IMM) {
1275 /* pkt_ptr += reg where reg is known constant */
1276 imm = src_reg->imm;
1277 goto add_imm;
1278 }
1279 /* disallow pkt_ptr += reg
1280 * if reg is not uknown_value with guaranteed zero upper bits
1281 * otherwise pkt_ptr may overflow and addition will become
1282 * subtraction which is not allowed
1283 */
1284 if (src_reg->type != UNKNOWN_VALUE) {
1285 verbose("cannot add '%s' to ptr_to_packet\n",
1286 reg_type_str[src_reg->type]);
1287 return -EACCES;
1288 }
1289 if (src_reg->imm < 48) {
1290 verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
1291 src_reg->imm);
1292 return -EACCES;
1293 }
1294 /* dst_reg stays as pkt_ptr type and since some positive
1295 * integer value was added to the pointer, increment its 'id'
1296 */
1297 dst_reg->id++;
1298
1299 /* something was added to pkt_ptr, set range and off to zero */
1300 dst_reg->off = 0;
1301 dst_reg->range = 0;
1302 }
1303 return 0;
1304}
1305
1306static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
1307{
1308 struct reg_state *regs = env->cur_state.regs;
1309 struct reg_state *dst_reg = &regs[insn->dst_reg];
1310 u8 opcode = BPF_OP(insn->code);
1311 s64 imm_log2;
1312
1313 /* for type == UNKNOWN_VALUE:
1314 * imm > 0 -> number of zero upper bits
1315 * imm == 0 -> don't track which is the same as all bits can be non-zero
1316 */
1317
1318 if (BPF_SRC(insn->code) == BPF_X) {
1319 struct reg_state *src_reg = &regs[insn->src_reg];
1320
1321 if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
1322 dst_reg->imm && opcode == BPF_ADD) {
1323 /* dreg += sreg
1324 * where both have zero upper bits. Adding them
1325 * can only result making one more bit non-zero
1326 * in the larger value.
1327 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
1328 * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
1329 */
1330 dst_reg->imm = min(dst_reg->imm, src_reg->imm);
1331 dst_reg->imm--;
1332 return 0;
1333 }
1334 if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
1335 dst_reg->imm && opcode == BPF_ADD) {
1336 /* dreg += sreg
1337 * where dreg has zero upper bits and sreg is const.
1338 * Adding them can only result making one more bit
1339 * non-zero in the larger value.
1340 */
1341 imm_log2 = __ilog2_u64((long long)src_reg->imm);
1342 dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
1343 dst_reg->imm--;
1344 return 0;
1345 }
1346 /* all other cases non supported yet, just mark dst_reg */
1347 dst_reg->imm = 0;
1348 return 0;
1349 }
1350
1351 /* sign extend 32-bit imm into 64-bit to make sure that
1352 * negative values occupy bit 63. Note ilog2() would have
1353 * been incorrect, since sizeof(insn->imm) == 4
1354 */
1355 imm_log2 = __ilog2_u64((long long)insn->imm);
1356
1357 if (dst_reg->imm && opcode == BPF_LSH) {
1358 /* reg <<= imm
1359 * if reg was a result of 2 byte load, then its imm == 48
1360 * which means that upper 48 bits are zero and shifting this reg
1361 * left by 4 would mean that upper 44 bits are still zero
1362 */
1363 dst_reg->imm -= insn->imm;
1364 } else if (dst_reg->imm && opcode == BPF_MUL) {
1365 /* reg *= imm
1366 * if multiplying by 14 subtract 4
1367 * This is conservative calculation of upper zero bits.
1368 * It's not trying to special case insn->imm == 1 or 0 cases
1369 */
1370 dst_reg->imm -= imm_log2 + 1;
1371 } else if (opcode == BPF_AND) {
1372 /* reg &= imm */
1373 dst_reg->imm = 63 - imm_log2;
1374 } else if (dst_reg->imm && opcode == BPF_ADD) {
1375 /* reg += imm */
1376 dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
1377 dst_reg->imm--;
1378 } else if (opcode == BPF_RSH) {
1379 /* reg >>= imm
1380 * which means that after right shift, upper bits will be zero
1381 * note that verifier already checked that
1382 * 0 <= imm < 64 for shift insn
1383 */
1384 dst_reg->imm += insn->imm;
1385 if (unlikely(dst_reg->imm > 64))
1386 /* some dumb code did:
1387 * r2 = *(u32 *)mem;
1388 * r2 >>= 32;
1389 * and all bits are zero now */
1390 dst_reg->imm = 64;
1391 } else {
1392 /* all other alu ops, means that we don't know what will
1393 * happen to the value, mark it with unknown number of zero bits
1394 */
1395 dst_reg->imm = 0;
1396 }
1397
1398 if (dst_reg->imm < 0) {
1399 /* all 64 bits of the register can contain non-zero bits
1400 * and such value cannot be added to ptr_to_packet, since it
1401 * may overflow, mark it as unknown to avoid further eval
1402 */
1403 dst_reg->imm = 0;
1404 }
1405 return 0;
1406}
1407
1408static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
1409{
1410 struct reg_state *regs = env->cur_state.regs;
1411 struct reg_state *dst_reg = &regs[insn->dst_reg];
1412 struct reg_state *src_reg = &regs[insn->src_reg];
1413 u8 opcode = BPF_OP(insn->code);
1414
1415 /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
1416 * Don't care about overflow or negative values, just add them
1417 */
1418 if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K)
1419 dst_reg->imm += insn->imm;
1420 else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
1421 src_reg->type == CONST_IMM)
1422 dst_reg->imm += src_reg->imm;
1423 else
1424 mark_reg_unknown_value(regs, insn->dst_reg);
1103 return 0; 1425 return 0;
1104} 1426}
1105 1427
@@ -1245,6 +1567,21 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
1245 dst_reg->type = PTR_TO_STACK; 1567 dst_reg->type = PTR_TO_STACK;
1246 dst_reg->imm = insn->imm; 1568 dst_reg->imm = insn->imm;
1247 return 0; 1569 return 0;
1570 } else if (opcode == BPF_ADD &&
1571 BPF_CLASS(insn->code) == BPF_ALU64 &&
1572 dst_reg->type == PTR_TO_PACKET) {
1573 /* ptr_to_packet += K|X */
1574 return check_packet_ptr_add(env, insn);
1575 } else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
1576 dst_reg->type == UNKNOWN_VALUE &&
1577 env->allow_ptr_leaks) {
1578 /* unknown += K|X */
1579 return evaluate_reg_alu(env, insn);
1580 } else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
1581 dst_reg->type == CONST_IMM &&
1582 env->allow_ptr_leaks) {
1583 /* reg_imm += K|X */
1584 return evaluate_reg_imm_alu(env, insn);
1248 } else if (is_pointer_value(env, insn->dst_reg)) { 1585 } else if (is_pointer_value(env, insn->dst_reg)) {
1249 verbose("R%d pointer arithmetic prohibited\n", 1586 verbose("R%d pointer arithmetic prohibited\n",
1250 insn->dst_reg); 1587 insn->dst_reg);
@@ -1263,6 +1600,34 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
1263 return 0; 1600 return 0;
1264} 1601}
1265 1602
1603static void find_good_pkt_pointers(struct verifier_env *env,
1604 struct reg_state *dst_reg)
1605{
1606 struct verifier_state *state = &env->cur_state;
1607 struct reg_state *regs = state->regs, *reg;
1608 int i;
1609 /* r2 = r3;
1610 * r2 += 8
1611 * if (r2 > pkt_end) goto somewhere
1612 * r2 == dst_reg, pkt_end == src_reg,
1613 * r2=pkt(id=n,off=8,r=0)
1614 * r3=pkt(id=n,off=0,r=0)
1615 * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
1616 * so that range of bytes [r3, r3 + 8) is safe to access
1617 */
1618 for (i = 0; i < MAX_BPF_REG; i++)
1619 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
1620 regs[i].range = dst_reg->off;
1621
1622 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
1623 if (state->stack_slot_type[i] != STACK_SPILL)
1624 continue;
1625 reg = &state->spilled_regs[i / BPF_REG_SIZE];
1626 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
1627 reg->range = dst_reg->off;
1628 }
1629}
1630
1266static int check_cond_jmp_op(struct verifier_env *env, 1631static int check_cond_jmp_op(struct verifier_env *env,
1267 struct bpf_insn *insn, int *insn_idx) 1632 struct bpf_insn *insn, int *insn_idx)
1268{ 1633{
@@ -1346,6 +1711,10 @@ static int check_cond_jmp_op(struct verifier_env *env,
1346 regs[insn->dst_reg].type = CONST_IMM; 1711 regs[insn->dst_reg].type = CONST_IMM;
1347 regs[insn->dst_reg].imm = 0; 1712 regs[insn->dst_reg].imm = 0;
1348 } 1713 }
1714 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
1715 dst_reg->type == PTR_TO_PACKET &&
1716 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
1717 find_good_pkt_pointers(env, dst_reg);
1349 } else if (is_pointer_value(env, insn->dst_reg)) { 1718 } else if (is_pointer_value(env, insn->dst_reg)) {
1350 verbose("R%d pointer comparison prohibited\n", insn->dst_reg); 1719 verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
1351 return -EACCES; 1720 return -EACCES;
@@ -1685,6 +2054,58 @@ err_free:
1685 return ret; 2054 return ret;
1686} 2055}
1687 2056
2057/* the following conditions reduce the number of explored insns
2058 * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
2059 */
2060static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
2061{
2062 if (old->id != cur->id)
2063 return false;
2064
2065 /* old ptr_to_packet is more conservative, since it allows smaller
2066 * range. Ex:
2067 * old(off=0,r=10) is equal to cur(off=0,r=20), because
2068 * old(off=0,r=10) means that with range=10 the verifier proceeded
2069 * further and found no issues with the program. Now we're in the same
2070 * spot with cur(off=0,r=20), so we're safe too, since anything further
2071 * will only be looking at most 10 bytes after this pointer.
2072 */
2073 if (old->off == cur->off && old->range < cur->range)
2074 return true;
2075
2076 /* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
2077 * since both cannot be used for packet access and safe(old)
2078 * pointer has smaller off that could be used for further
2079 * 'if (ptr > data_end)' check
2080 * Ex:
2081 * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
2082 * that we cannot access the packet.
2083 * The safe range is:
2084 * [ptr, ptr + range - off)
2085 * so whenever off >=range, it means no safe bytes from this pointer.
2086 * When comparing old->off <= cur->off, it means that older code
2087 * went with smaller offset and that offset was later
2088 * used to figure out the safe range after 'if (ptr > data_end)' check
2089 * Say, 'old' state was explored like:
2090 * ... R3(off=0, r=0)
2091 * R4 = R3 + 20
2092 * ... now R4(off=20,r=0) <-- here
2093 * if (R4 > data_end)
2094 * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
2095 * ... the code further went all the way to bpf_exit.
2096 * Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
2097 * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
2098 * goes further, such cur_R4 will give larger safe packet range after
2099 * 'if (R4 > data_end)' and all further insn were already good with r=20,
2100 * so they will be good with r=30 and we can prune the search.
2101 */
2102 if (old->off <= cur->off &&
2103 old->off >= old->range && cur->off >= cur->range)
2104 return true;
2105
2106 return false;
2107}
2108
1688/* compare two verifier states 2109/* compare two verifier states
1689 * 2110 *
1690 * all states stored in state_list are known to be valid, since 2111 * all states stored in state_list are known to be valid, since
@@ -1727,6 +2148,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
1727 (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT)) 2148 (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))
1728 continue; 2149 continue;
1729 2150
2151 if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
2152 compare_ptrs_to_packet(rold, rcur))
2153 continue;
2154
1730 return false; 2155 return false;
1731 } 2156 }
1732 2157