aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2016-05-05 22:49:10 -0400
committerDavid S. Miller <davem@davemloft.net>2016-05-06 16:01:53 -0400
commit969bf05eb3cedd5a8d4b7c346a85c2ede87a6d6d (patch)
tree000a84e285d11b22cc72dead3074c50b325f195c /kernel/bpf/verifier.c
parent1a0dc1ac1d2928e25739ee82d7e04423b01da563 (diff)
bpf: direct packet access
Extended BPF carried over two instructions from classic to access packet data: LD_ABS and LD_IND. They're highly optimized in JITs, but due to their design they have to do length check for every access. When BPF is processing 20M packets per second single LD_ABS after JIT is consuming 3% cpu. Hence the need to optimize it further by amortizing the cost of 'off < skb_headlen' over multiple packet accesses. One option is to introduce two new eBPF instructions LD_ABS_DW and LD_IND_DW with similar usage as skb_header_pointer(). The kernel part for interpreter and x64 JIT was implemented in [1], but such new insns behave like old ld_abs and abort the program with 'return 0' if access is beyond linear data. Such hidden control flow is hard to workaround plus changing JITs and rolling out new llvm is incovenient. Therefore allow cls_bpf/act_bpf program access skb->data directly: int bpf_prog(struct __sk_buff *skb) { struct iphdr *ip; if (skb->data + sizeof(struct iphdr) + ETH_HLEN > skb->data_end) /* packet too small */ return 0; ip = skb->data + ETH_HLEN; /* access IP header fields with direct loads */ if (ip->version != 4 || ip->saddr == 0x7f000001) return 1; [...] } This solution avoids introduction of new instructions. llvm stays the same and all JITs stay the same, but verifier has to work extra hard to prove safety of the above program. For XDP the direct store instructions can be allowed as well. The skb->data is NET_IP_ALIGNED, so for common cases the verifier can check the alignment. The complex packet parsers where packet pointer is adjusted incrementally cannot be tracked for alignment, so allow byte access in such cases and misaligned access on architectures that define efficient_unaligned_access [1] https://git.kernel.org/cgit/linux/kernel/git/ast/bpf.git/?h=ld_abs_dw Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c441
1 files changed, 433 insertions, 8 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index afeb62808902..6338c61fc2a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1,4 +1,5 @@
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2016 Facebook
2 * 3 *
3 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -136,13 +137,32 @@ enum bpf_reg_type {
136 FRAME_PTR, /* reg == frame_pointer */ 137 FRAME_PTR, /* reg == frame_pointer */
137 PTR_TO_STACK, /* reg == frame_pointer + imm */ 138 PTR_TO_STACK, /* reg == frame_pointer + imm */
138 CONST_IMM, /* constant integer value */ 139 CONST_IMM, /* constant integer value */
140
141 /* PTR_TO_PACKET represents:
142 * skb->data
143 * skb->data + imm
144 * skb->data + (u16) var
145 * skb->data + (u16) var + imm
146 * if (range > 0) then [ptr, ptr + range - off) is safe to access
147 * if (id > 0) means that some 'var' was added
148 * if (off > 0) menas that 'imm' was added
149 */
150 PTR_TO_PACKET,
151 PTR_TO_PACKET_END, /* skb->data + headlen */
139}; 152};
140 153
141struct reg_state { 154struct reg_state {
142 enum bpf_reg_type type; 155 enum bpf_reg_type type;
143 union { 156 union {
144 /* valid when type == CONST_IMM | PTR_TO_STACK */ 157 /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
145 long imm; 158 s64 imm;
159
160 /* valid when type == PTR_TO_PACKET* */
161 struct {
162 u32 id;
163 u16 off;
164 u16 range;
165 };
146 166
147 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | 167 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
148 * PTR_TO_MAP_VALUE_OR_NULL 168 * PTR_TO_MAP_VALUE_OR_NULL
@@ -247,6 +267,8 @@ static const char * const reg_type_str[] = {
247 [FRAME_PTR] = "fp", 267 [FRAME_PTR] = "fp",
248 [PTR_TO_STACK] = "fp", 268 [PTR_TO_STACK] = "fp",
249 [CONST_IMM] = "imm", 269 [CONST_IMM] = "imm",
270 [PTR_TO_PACKET] = "pkt",
271 [PTR_TO_PACKET_END] = "pkt_end",
250}; 272};
251 273
252static void print_verifier_state(struct verifier_state *state) 274static void print_verifier_state(struct verifier_state *state)
@@ -262,7 +284,12 @@ static void print_verifier_state(struct verifier_state *state)
262 continue; 284 continue;
263 verbose(" R%d=%s", i, reg_type_str[t]); 285 verbose(" R%d=%s", i, reg_type_str[t]);
264 if (t == CONST_IMM || t == PTR_TO_STACK) 286 if (t == CONST_IMM || t == PTR_TO_STACK)
265 verbose("%ld", reg->imm); 287 verbose("%lld", reg->imm);
288 else if (t == PTR_TO_PACKET)
289 verbose("(id=%d,off=%d,r=%d)",
290 reg->id, reg->off, reg->range);
291 else if (t == UNKNOWN_VALUE && reg->imm)
292 verbose("%lld", reg->imm);
266 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || 293 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
267 t == PTR_TO_MAP_VALUE_OR_NULL) 294 t == PTR_TO_MAP_VALUE_OR_NULL)
268 verbose("(ks=%d,vs=%d)", 295 verbose("(ks=%d,vs=%d)",
@@ -548,6 +575,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
548 case PTR_TO_MAP_VALUE_OR_NULL: 575 case PTR_TO_MAP_VALUE_OR_NULL:
549 case PTR_TO_STACK: 576 case PTR_TO_STACK:
550 case PTR_TO_CTX: 577 case PTR_TO_CTX:
578 case PTR_TO_PACKET:
579 case PTR_TO_PACKET_END:
551 case FRAME_PTR: 580 case FRAME_PTR:
552 case CONST_PTR_TO_MAP: 581 case CONST_PTR_TO_MAP:
553 return true; 582 return true;
@@ -647,6 +676,27 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
647 return 0; 676 return 0;
648} 677}
649 678
679#define MAX_PACKET_OFF 0xffff
680
681static int check_packet_access(struct verifier_env *env, u32 regno, int off,
682 int size)
683{
684 struct reg_state *regs = env->cur_state.regs;
685 struct reg_state *reg = &regs[regno];
686 int linear_size = (int) reg->range - (int) reg->off;
687
688 if (linear_size < 0 || linear_size >= MAX_PACKET_OFF) {
689 verbose("verifier bug\n");
690 return -EFAULT;
691 }
692 if (off < 0 || off + size > linear_size) {
693 verbose("invalid access to packet, off=%d size=%d, allowed=%d\n",
694 off, size, linear_size);
695 return -EACCES;
696 }
697 return 0;
698}
699
650/* check access to 'struct bpf_context' fields */ 700/* check access to 'struct bpf_context' fields */
651static int check_ctx_access(struct verifier_env *env, int off, int size, 701static int check_ctx_access(struct verifier_env *env, int off, int size,
652 enum bpf_access_type t) 702 enum bpf_access_type t)
@@ -677,6 +727,45 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
677 } 727 }
678} 728}
679 729
730static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
731 int off, int size)
732{
733 if (reg->type != PTR_TO_PACKET) {
734 if (off % size != 0) {
735 verbose("misaligned access off %d size %d\n", off, size);
736 return -EACCES;
737 } else {
738 return 0;
739 }
740 }
741
742 switch (env->prog->type) {
743 case BPF_PROG_TYPE_SCHED_CLS:
744 case BPF_PROG_TYPE_SCHED_ACT:
745 break;
746 default:
747 verbose("verifier is misconfigured\n");
748 return -EACCES;
749 }
750
751 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
752 /* misaligned access to packet is ok on x86,arm,arm64 */
753 return 0;
754
755 if (reg->id && size != 1) {
756 verbose("Unknown packet alignment. Only byte-sized access allowed\n");
757 return -EACCES;
758 }
759
760 /* skb->data is NET_IP_ALIGN-ed */
761 if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
762 verbose("misaligned packet access off %d+%d+%d size %d\n",
763 NET_IP_ALIGN, reg->off, off, size);
764 return -EACCES;
765 }
766 return 0;
767}
768
680/* check whether memory at (regno + off) is accessible for t = (read | write) 769/* check whether memory at (regno + off) is accessible for t = (read | write)
681 * if t==write, value_regno is a register which value is stored into memory 770 * if t==write, value_regno is a register which value is stored into memory
682 * if t==read, value_regno is a register which will receive the value from memory 771 * if t==read, value_regno is a register which will receive the value from memory
@@ -698,10 +787,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
698 if (size < 0) 787 if (size < 0)
699 return size; 788 return size;
700 789
701 if (off % size != 0) { 790 err = check_ptr_alignment(env, reg, off, size);
702 verbose("misaligned access off %d size %d\n", off, size); 791 if (err)
703 return -EACCES; 792 return err;
704 }
705 793
706 if (reg->type == PTR_TO_MAP_VALUE) { 794 if (reg->type == PTR_TO_MAP_VALUE) {
707 if (t == BPF_WRITE && value_regno >= 0 && 795 if (t == BPF_WRITE && value_regno >= 0 &&
@@ -720,8 +808,16 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
720 return -EACCES; 808 return -EACCES;
721 } 809 }
722 err = check_ctx_access(env, off, size, t); 810 err = check_ctx_access(env, off, size, t);
723 if (!err && t == BPF_READ && value_regno >= 0) 811 if (!err && t == BPF_READ && value_regno >= 0) {
724 mark_reg_unknown_value(state->regs, value_regno); 812 mark_reg_unknown_value(state->regs, value_regno);
813 if (off == offsetof(struct __sk_buff, data) &&
814 env->allow_ptr_leaks)
815 /* note that reg.[id|off|range] == 0 */
816 state->regs[value_regno].type = PTR_TO_PACKET;
817 else if (off == offsetof(struct __sk_buff, data_end) &&
818 env->allow_ptr_leaks)
819 state->regs[value_regno].type = PTR_TO_PACKET_END;
820 }
725 821
726 } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { 822 } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
727 if (off >= 0 || off < -MAX_BPF_STACK) { 823 if (off >= 0 || off < -MAX_BPF_STACK) {
@@ -739,11 +835,28 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
739 } else { 835 } else {
740 err = check_stack_read(state, off, size, value_regno); 836 err = check_stack_read(state, off, size, value_regno);
741 } 837 }
838 } else if (state->regs[regno].type == PTR_TO_PACKET) {
839 if (t == BPF_WRITE) {
840 verbose("cannot write into packet\n");
841 return -EACCES;
842 }
843 err = check_packet_access(env, regno, off, size);
844 if (!err && t == BPF_READ && value_regno >= 0)
845 mark_reg_unknown_value(state->regs, value_regno);
742 } else { 846 } else {
743 verbose("R%d invalid mem access '%s'\n", 847 verbose("R%d invalid mem access '%s'\n",
744 regno, reg_type_str[reg->type]); 848 regno, reg_type_str[reg->type]);
745 return -EACCES; 849 return -EACCES;
746 } 850 }
851
852 if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
853 state->regs[value_regno].type == UNKNOWN_VALUE) {
854 /* 1 or 2 byte load zero-extends, determine the number of
855 * zero upper bits. Not doing it fo 4 byte load, since
856 * such values cannot be added to ptr_to_packet anyway.
857 */
858 state->regs[value_regno].imm = 64 - size * 8;
859 }
747 return err; 860 return err;
748} 861}
749 862
@@ -1001,6 +1114,29 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
1001 return count > 1 ? -EINVAL : 0; 1114 return count > 1 ? -EINVAL : 0;
1002} 1115}
1003 1116
1117static void clear_all_pkt_pointers(struct verifier_env *env)
1118{
1119 struct verifier_state *state = &env->cur_state;
1120 struct reg_state *regs = state->regs, *reg;
1121 int i;
1122
1123 for (i = 0; i < MAX_BPF_REG; i++)
1124 if (regs[i].type == PTR_TO_PACKET ||
1125 regs[i].type == PTR_TO_PACKET_END)
1126 mark_reg_unknown_value(regs, i);
1127
1128 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
1129 if (state->stack_slot_type[i] != STACK_SPILL)
1130 continue;
1131 reg = &state->spilled_regs[i / BPF_REG_SIZE];
1132 if (reg->type != PTR_TO_PACKET &&
1133 reg->type != PTR_TO_PACKET_END)
1134 continue;
1135 reg->type = UNKNOWN_VALUE;
1136 reg->imm = 0;
1137 }
1138}
1139
1004static int check_call(struct verifier_env *env, int func_id) 1140static int check_call(struct verifier_env *env, int func_id)
1005{ 1141{
1006 struct verifier_state *state = &env->cur_state; 1142 struct verifier_state *state = &env->cur_state;
@@ -1008,6 +1144,7 @@ static int check_call(struct verifier_env *env, int func_id)
1008 struct reg_state *regs = state->regs; 1144 struct reg_state *regs = state->regs;
1009 struct reg_state *reg; 1145 struct reg_state *reg;
1010 struct bpf_call_arg_meta meta; 1146 struct bpf_call_arg_meta meta;
1147 bool changes_data;
1011 int i, err; 1148 int i, err;
1012 1149
1013 /* find function prototype */ 1150 /* find function prototype */
@@ -1030,6 +1167,8 @@ static int check_call(struct verifier_env *env, int func_id)
1030 return -EINVAL; 1167 return -EINVAL;
1031 } 1168 }
1032 1169
1170 changes_data = bpf_helper_changes_skb_data(fn->func);
1171
1033 memset(&meta, 0, sizeof(meta)); 1172 memset(&meta, 0, sizeof(meta));
1034 1173
1035 /* We only support one arg being in raw mode at the moment, which 1174 /* We only support one arg being in raw mode at the moment, which
@@ -1100,6 +1239,189 @@ static int check_call(struct verifier_env *env, int func_id)
1100 if (err) 1239 if (err)
1101 return err; 1240 return err;
1102 1241
1242 if (changes_data)
1243 clear_all_pkt_pointers(env);
1244 return 0;
1245}
1246
1247static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
1248{
1249 struct reg_state *regs = env->cur_state.regs;
1250 struct reg_state *dst_reg = &regs[insn->dst_reg];
1251 struct reg_state *src_reg = &regs[insn->src_reg];
1252 s32 imm;
1253
1254 if (BPF_SRC(insn->code) == BPF_K) {
1255 /* pkt_ptr += imm */
1256 imm = insn->imm;
1257
1258add_imm:
1259 if (imm <= 0) {
1260 verbose("addition of negative constant to packet pointer is not allowed\n");
1261 return -EACCES;
1262 }
1263 if (imm >= MAX_PACKET_OFF ||
1264 imm + dst_reg->off >= MAX_PACKET_OFF) {
1265 verbose("constant %d is too large to add to packet pointer\n",
1266 imm);
1267 return -EACCES;
1268 }
1269 /* a constant was added to pkt_ptr.
1270 * Remember it while keeping the same 'id'
1271 */
1272 dst_reg->off += imm;
1273 } else {
1274 if (src_reg->type == CONST_IMM) {
1275 /* pkt_ptr += reg where reg is known constant */
1276 imm = src_reg->imm;
1277 goto add_imm;
1278 }
1279 /* disallow pkt_ptr += reg
1280 * if reg is not uknown_value with guaranteed zero upper bits
1281 * otherwise pkt_ptr may overflow and addition will become
1282 * subtraction which is not allowed
1283 */
1284 if (src_reg->type != UNKNOWN_VALUE) {
1285 verbose("cannot add '%s' to ptr_to_packet\n",
1286 reg_type_str[src_reg->type]);
1287 return -EACCES;
1288 }
1289 if (src_reg->imm < 48) {
1290 verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
1291 src_reg->imm);
1292 return -EACCES;
1293 }
1294 /* dst_reg stays as pkt_ptr type and since some positive
1295 * integer value was added to the pointer, increment its 'id'
1296 */
1297 dst_reg->id++;
1298
1299 /* something was added to pkt_ptr, set range and off to zero */
1300 dst_reg->off = 0;
1301 dst_reg->range = 0;
1302 }
1303 return 0;
1304}
1305
1306static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
1307{
1308 struct reg_state *regs = env->cur_state.regs;
1309 struct reg_state *dst_reg = &regs[insn->dst_reg];
1310 u8 opcode = BPF_OP(insn->code);
1311 s64 imm_log2;
1312
1313 /* for type == UNKNOWN_VALUE:
1314 * imm > 0 -> number of zero upper bits
1315 * imm == 0 -> don't track which is the same as all bits can be non-zero
1316 */
1317
1318 if (BPF_SRC(insn->code) == BPF_X) {
1319 struct reg_state *src_reg = &regs[insn->src_reg];
1320
1321 if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
1322 dst_reg->imm && opcode == BPF_ADD) {
1323 /* dreg += sreg
1324 * where both have zero upper bits. Adding them
1325 * can only result making one more bit non-zero
1326 * in the larger value.
1327 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
1328 * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
1329 */
1330 dst_reg->imm = min(dst_reg->imm, src_reg->imm);
1331 dst_reg->imm--;
1332 return 0;
1333 }
1334 if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
1335 dst_reg->imm && opcode == BPF_ADD) {
1336 /* dreg += sreg
1337 * where dreg has zero upper bits and sreg is const.
1338 * Adding them can only result making one more bit
1339 * non-zero in the larger value.
1340 */
1341 imm_log2 = __ilog2_u64((long long)src_reg->imm);
1342 dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
1343 dst_reg->imm--;
1344 return 0;
1345 }
1346 /* all other cases non supported yet, just mark dst_reg */
1347 dst_reg->imm = 0;
1348 return 0;
1349 }
1350
1351 /* sign extend 32-bit imm into 64-bit to make sure that
1352 * negative values occupy bit 63. Note ilog2() would have
1353 * been incorrect, since sizeof(insn->imm) == 4
1354 */
1355 imm_log2 = __ilog2_u64((long long)insn->imm);
1356
1357 if (dst_reg->imm && opcode == BPF_LSH) {
1358 /* reg <<= imm
1359 * if reg was a result of 2 byte load, then its imm == 48
1360 * which means that upper 48 bits are zero and shifting this reg
1361 * left by 4 would mean that upper 44 bits are still zero
1362 */
1363 dst_reg->imm -= insn->imm;
1364 } else if (dst_reg->imm && opcode == BPF_MUL) {
1365 /* reg *= imm
1366 * if multiplying by 14 subtract 4
1367 * This is conservative calculation of upper zero bits.
1368 * It's not trying to special case insn->imm == 1 or 0 cases
1369 */
1370 dst_reg->imm -= imm_log2 + 1;
1371 } else if (opcode == BPF_AND) {
1372 /* reg &= imm */
1373 dst_reg->imm = 63 - imm_log2;
1374 } else if (dst_reg->imm && opcode == BPF_ADD) {
1375 /* reg += imm */
1376 dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
1377 dst_reg->imm--;
1378 } else if (opcode == BPF_RSH) {
1379 /* reg >>= imm
1380 * which means that after right shift, upper bits will be zero
1381 * note that verifier already checked that
1382 * 0 <= imm < 64 for shift insn
1383 */
1384 dst_reg->imm += insn->imm;
1385 if (unlikely(dst_reg->imm > 64))
1386 /* some dumb code did:
1387 * r2 = *(u32 *)mem;
1388 * r2 >>= 32;
1389 * and all bits are zero now */
1390 dst_reg->imm = 64;
1391 } else {
1392 /* all other alu ops, means that we don't know what will
1393 * happen to the value, mark it with unknown number of zero bits
1394 */
1395 dst_reg->imm = 0;
1396 }
1397
1398 if (dst_reg->imm < 0) {
1399 /* all 64 bits of the register can contain non-zero bits
1400 * and such value cannot be added to ptr_to_packet, since it
1401 * may overflow, mark it as unknown to avoid further eval
1402 */
1403 dst_reg->imm = 0;
1404 }
1405 return 0;
1406}
1407
1408static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
1409{
1410 struct reg_state *regs = env->cur_state.regs;
1411 struct reg_state *dst_reg = &regs[insn->dst_reg];
1412 struct reg_state *src_reg = &regs[insn->src_reg];
1413 u8 opcode = BPF_OP(insn->code);
1414
1415 /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
1416 * Don't care about overflow or negative values, just add them
1417 */
1418 if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K)
1419 dst_reg->imm += insn->imm;
1420 else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
1421 src_reg->type == CONST_IMM)
1422 dst_reg->imm += src_reg->imm;
1423 else
1424 mark_reg_unknown_value(regs, insn->dst_reg);
1103 return 0; 1425 return 0;
1104} 1426}
1105 1427
@@ -1245,6 +1567,21 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
1245 dst_reg->type = PTR_TO_STACK; 1567 dst_reg->type = PTR_TO_STACK;
1246 dst_reg->imm = insn->imm; 1568 dst_reg->imm = insn->imm;
1247 return 0; 1569 return 0;
1570 } else if (opcode == BPF_ADD &&
1571 BPF_CLASS(insn->code) == BPF_ALU64 &&
1572 dst_reg->type == PTR_TO_PACKET) {
1573 /* ptr_to_packet += K|X */
1574 return check_packet_ptr_add(env, insn);
1575 } else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
1576 dst_reg->type == UNKNOWN_VALUE &&
1577 env->allow_ptr_leaks) {
1578 /* unknown += K|X */
1579 return evaluate_reg_alu(env, insn);
1580 } else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
1581 dst_reg->type == CONST_IMM &&
1582 env->allow_ptr_leaks) {
1583 /* reg_imm += K|X */
1584 return evaluate_reg_imm_alu(env, insn);
1248 } else if (is_pointer_value(env, insn->dst_reg)) { 1585 } else if (is_pointer_value(env, insn->dst_reg)) {
1249 verbose("R%d pointer arithmetic prohibited\n", 1586 verbose("R%d pointer arithmetic prohibited\n",
1250 insn->dst_reg); 1587 insn->dst_reg);
@@ -1263,6 +1600,34 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
1263 return 0; 1600 return 0;
1264} 1601}
1265 1602
1603static void find_good_pkt_pointers(struct verifier_env *env,
1604 struct reg_state *dst_reg)
1605{
1606 struct verifier_state *state = &env->cur_state;
1607 struct reg_state *regs = state->regs, *reg;
1608 int i;
1609 /* r2 = r3;
1610 * r2 += 8
1611 * if (r2 > pkt_end) goto somewhere
1612 * r2 == dst_reg, pkt_end == src_reg,
1613 * r2=pkt(id=n,off=8,r=0)
1614 * r3=pkt(id=n,off=0,r=0)
1615 * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
1616 * so that range of bytes [r3, r3 + 8) is safe to access
1617 */
1618 for (i = 0; i < MAX_BPF_REG; i++)
1619 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
1620 regs[i].range = dst_reg->off;
1621
1622 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
1623 if (state->stack_slot_type[i] != STACK_SPILL)
1624 continue;
1625 reg = &state->spilled_regs[i / BPF_REG_SIZE];
1626 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
1627 reg->range = dst_reg->off;
1628 }
1629}
1630
1266static int check_cond_jmp_op(struct verifier_env *env, 1631static int check_cond_jmp_op(struct verifier_env *env,
1267 struct bpf_insn *insn, int *insn_idx) 1632 struct bpf_insn *insn, int *insn_idx)
1268{ 1633{
@@ -1346,6 +1711,10 @@ static int check_cond_jmp_op(struct verifier_env *env,
1346 regs[insn->dst_reg].type = CONST_IMM; 1711 regs[insn->dst_reg].type = CONST_IMM;
1347 regs[insn->dst_reg].imm = 0; 1712 regs[insn->dst_reg].imm = 0;
1348 } 1713 }
1714 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
1715 dst_reg->type == PTR_TO_PACKET &&
1716 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
1717 find_good_pkt_pointers(env, dst_reg);
1349 } else if (is_pointer_value(env, insn->dst_reg)) { 1718 } else if (is_pointer_value(env, insn->dst_reg)) {
1350 verbose("R%d pointer comparison prohibited\n", insn->dst_reg); 1719 verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
1351 return -EACCES; 1720 return -EACCES;
@@ -1685,6 +2054,58 @@ err_free:
1685 return ret; 2054 return ret;
1686} 2055}
1687 2056
2057/* the following conditions reduce the number of explored insns
2058 * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
2059 */
2060static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
2061{
2062 if (old->id != cur->id)
2063 return false;
2064
2065 /* old ptr_to_packet is more conservative, since it allows smaller
2066 * range. Ex:
2067 * old(off=0,r=10) is equal to cur(off=0,r=20), because
2068 * old(off=0,r=10) means that with range=10 the verifier proceeded
2069 * further and found no issues with the program. Now we're in the same
2070 * spot with cur(off=0,r=20), so we're safe too, since anything further
2071 * will only be looking at most 10 bytes after this pointer.
2072 */
2073 if (old->off == cur->off && old->range < cur->range)
2074 return true;
2075
2076 /* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
2077 * since both cannot be used for packet access and safe(old)
2078 * pointer has smaller off that could be used for further
2079 * 'if (ptr > data_end)' check
2080 * Ex:
2081 * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
2082 * that we cannot access the packet.
2083 * The safe range is:
2084 * [ptr, ptr + range - off)
2085 * so whenever off >=range, it means no safe bytes from this pointer.
2086 * When comparing old->off <= cur->off, it means that older code
2087 * went with smaller offset and that offset was later
2088 * used to figure out the safe range after 'if (ptr > data_end)' check
2089 * Say, 'old' state was explored like:
2090 * ... R3(off=0, r=0)
2091 * R4 = R3 + 20
2092 * ... now R4(off=20,r=0) <-- here
2093 * if (R4 > data_end)
2094 * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
2095 * ... the code further went all the way to bpf_exit.
2096 * Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
2097 * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
2098 * goes further, such cur_R4 will give larger safe packet range after
2099 * 'if (R4 > data_end)' and all further insn were already good with r=20,
2100 * so they will be good with r=30 and we can prune the search.
2101 */
2102 if (old->off <= cur->off &&
2103 old->off >= old->range && cur->off >= cur->range)
2104 return true;
2105
2106 return false;
2107}
2108
1688/* compare two verifier states 2109/* compare two verifier states
1689 * 2110 *
1690 * all states stored in state_list are known to be valid, since 2111 * all states stored in state_list are known to be valid, since
@@ -1727,6 +2148,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
1727 (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT)) 2148 (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))
1728 continue; 2149 continue;
1729 2150
2151 if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
2152 compare_ptrs_to_packet(rold, rcur))
2153 continue;
2154
1730 return false; 2155 return false;
1731 } 2156 }
1732 2157