aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2014-09-26 03:17:06 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-26 15:05:15 -0400
commit17a5267067f3c372fec9ffb798d6eaba6b5e6a4c (patch)
tree62746fb36df3ad84c88ce1421ebe524075493362 /kernel/bpf
parent475fb78fbf48592ce541627c60a7b331060e31f5 (diff)
bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and records the state of registers and program stack. Every branch instruction seen during simulation is pushed into state stack. When verifier reaches BPF_EXIT, it pops the state from the stack and continues until it reaches BPF_EXIT again. For program: 1: bpf_mov r1, xxx 2: if (r1 == 0) goto 5 3: bpf_mov r0, 1 4: goto 6 5: bpf_mov r0, 2 6: bpf_exit The verifier will walk insns: 1, 2, 3, 4, 6 then it will pop the state recorded at insn#2 and will continue: 5, 6 This way it walks all possible paths through the program and checks all possible values of registers. While doing so, it checks for: - invalid instructions - uninitialized register access - uninitialized stack access - misaligned stack access - out of range stack access - invalid calling convention - instruction encoding is not using reserved fields Kernel subsystem configures the verifier with two callbacks: - bool (*is_valid_access)(int off, int size, enum bpf_access_type type); that provides information to the verifer which fields of 'ctx' are accessible (remember 'ctx' is the first argument to eBPF program) - const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); returns argument constraints of kernel helper functions that eBPF program may call, so that verifier can checks that R1-R5 types match the prototype More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/verifier.c1075
1 files changed, 1074 insertions, 1 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c689ab8e2713..a086dd3210a8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -125,6 +125,70 @@
125 * are set to NOT_INIT to indicate that they are no longer readable. 125 * are set to NOT_INIT to indicate that they are no longer readable.
126 */ 126 */
127 127
128/* types of values stored in eBPF registers */
129enum bpf_reg_type {
130 NOT_INIT = 0, /* nothing was written into register */
131 UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */
132 PTR_TO_CTX, /* reg points to bpf_context */
133 CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
134 PTR_TO_MAP_VALUE, /* reg points to map element value */
135 PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
136 FRAME_PTR, /* reg == frame_pointer */
137 PTR_TO_STACK, /* reg == frame_pointer + imm */
138 CONST_IMM, /* constant integer value */
139};
140
141struct reg_state {
142 enum bpf_reg_type type;
143 union {
144 /* valid when type == CONST_IMM | PTR_TO_STACK */
145 int imm;
146
147 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
148 * PTR_TO_MAP_VALUE_OR_NULL
149 */
150 struct bpf_map *map_ptr;
151 };
152};
153
154enum bpf_stack_slot_type {
155 STACK_INVALID, /* nothing was stored in this stack slot */
156 STACK_SPILL, /* 1st byte of register spilled into stack */
157 STACK_SPILL_PART, /* other 7 bytes of register spill */
158 STACK_MISC /* BPF program wrote some data into this slot */
159};
160
161struct bpf_stack_slot {
162 enum bpf_stack_slot_type stype;
163 struct reg_state reg_st;
164};
165
166/* state of the program:
167 * type of all registers and stack info
168 */
169struct verifier_state {
170 struct reg_state regs[MAX_BPF_REG];
171 struct bpf_stack_slot stack[MAX_BPF_STACK];
172};
173
174/* linked list of verifier states used to prune search */
175struct verifier_state_list {
176 struct verifier_state state;
177 struct verifier_state_list *next;
178};
179
180/* verifier_state + insn_idx are pushed to stack when branch is encountered */
181struct verifier_stack_elem {
182 /* verifer state is 'st'
183 * before processing instruction 'insn_idx'
184 * and after processing instruction 'prev_insn_idx'
185 */
186 struct verifier_state st;
187 int insn_idx;
188 int prev_insn_idx;
189 struct verifier_stack_elem *next;
190};
191
128#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ 192#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
129 193
130/* single container for all structs 194/* single container for all structs
@@ -132,6 +196,9 @@
132 */ 196 */
133struct verifier_env { 197struct verifier_env {
134 struct bpf_prog *prog; /* eBPF program being verified */ 198 struct bpf_prog *prog; /* eBPF program being verified */
199 struct verifier_stack_elem *head; /* stack of verifier states to be processed */
200 int stack_size; /* number of states to be processed */
201 struct verifier_state cur_state; /* current verifier state */
135 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ 202 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
136 u32 used_map_cnt; /* number of used maps */ 203 u32 used_map_cnt; /* number of used maps */
137}; 204};
@@ -160,6 +227,45 @@ static void verbose(const char *fmt, ...)
160 va_end(args); 227 va_end(args);
161} 228}
162 229
230/* string representation of 'enum bpf_reg_type' */
231static const char * const reg_type_str[] = {
232 [NOT_INIT] = "?",
233 [UNKNOWN_VALUE] = "inv",
234 [PTR_TO_CTX] = "ctx",
235 [CONST_PTR_TO_MAP] = "map_ptr",
236 [PTR_TO_MAP_VALUE] = "map_value",
237 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
238 [FRAME_PTR] = "fp",
239 [PTR_TO_STACK] = "fp",
240 [CONST_IMM] = "imm",
241};
242
243static void print_verifier_state(struct verifier_env *env)
244{
245 enum bpf_reg_type t;
246 int i;
247
248 for (i = 0; i < MAX_BPF_REG; i++) {
249 t = env->cur_state.regs[i].type;
250 if (t == NOT_INIT)
251 continue;
252 verbose(" R%d=%s", i, reg_type_str[t]);
253 if (t == CONST_IMM || t == PTR_TO_STACK)
254 verbose("%d", env->cur_state.regs[i].imm);
255 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
256 t == PTR_TO_MAP_VALUE_OR_NULL)
257 verbose("(ks=%d,vs=%d)",
258 env->cur_state.regs[i].map_ptr->key_size,
259 env->cur_state.regs[i].map_ptr->value_size);
260 }
261 for (i = 0; i < MAX_BPF_STACK; i++) {
262 if (env->cur_state.stack[i].stype == STACK_SPILL)
263 verbose(" fp%d=%s", -MAX_BPF_STACK + i,
264 reg_type_str[env->cur_state.stack[i].reg_st.type]);
265 }
266 verbose("\n");
267}
268
163static const char *const bpf_class_string[] = { 269static const char *const bpf_class_string[] = {
164 [BPF_LD] = "ld", 270 [BPF_LD] = "ld",
165 [BPF_LDX] = "ldx", 271 [BPF_LDX] = "ldx",
@@ -305,6 +411,735 @@ static void print_bpf_insn(struct bpf_insn *insn)
305 } 411 }
306} 412}
307 413
414static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
415{
416 struct verifier_stack_elem *elem;
417 int insn_idx;
418
419 if (env->head == NULL)
420 return -1;
421
422 memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
423 insn_idx = env->head->insn_idx;
424 if (prev_insn_idx)
425 *prev_insn_idx = env->head->prev_insn_idx;
426 elem = env->head->next;
427 kfree(env->head);
428 env->head = elem;
429 env->stack_size--;
430 return insn_idx;
431}
432
433static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
434 int prev_insn_idx)
435{
436 struct verifier_stack_elem *elem;
437
438 elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
439 if (!elem)
440 goto err;
441
442 memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
443 elem->insn_idx = insn_idx;
444 elem->prev_insn_idx = prev_insn_idx;
445 elem->next = env->head;
446 env->head = elem;
447 env->stack_size++;
448 if (env->stack_size > 1024) {
449 verbose("BPF program is too complex\n");
450 goto err;
451 }
452 return &elem->st;
453err:
454 /* pop all elements and return */
455 while (pop_stack(env, NULL) >= 0);
456 return NULL;
457}
458
459#define CALLER_SAVED_REGS 6
460static const int caller_saved[CALLER_SAVED_REGS] = {
461 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
462};
463
464static void init_reg_state(struct reg_state *regs)
465{
466 int i;
467
468 for (i = 0; i < MAX_BPF_REG; i++) {
469 regs[i].type = NOT_INIT;
470 regs[i].imm = 0;
471 regs[i].map_ptr = NULL;
472 }
473
474 /* frame pointer */
475 regs[BPF_REG_FP].type = FRAME_PTR;
476
477 /* 1st arg to a function */
478 regs[BPF_REG_1].type = PTR_TO_CTX;
479}
480
481static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
482{
483 BUG_ON(regno >= MAX_BPF_REG);
484 regs[regno].type = UNKNOWN_VALUE;
485 regs[regno].imm = 0;
486 regs[regno].map_ptr = NULL;
487}
488
489enum reg_arg_type {
490 SRC_OP, /* register is used as source operand */
491 DST_OP, /* register is used as destination operand */
492 DST_OP_NO_MARK /* same as above, check only, don't mark */
493};
494
495static int check_reg_arg(struct reg_state *regs, u32 regno,
496 enum reg_arg_type t)
497{
498 if (regno >= MAX_BPF_REG) {
499 verbose("R%d is invalid\n", regno);
500 return -EINVAL;
501 }
502
503 if (t == SRC_OP) {
504 /* check whether register used as source operand can be read */
505 if (regs[regno].type == NOT_INIT) {
506 verbose("R%d !read_ok\n", regno);
507 return -EACCES;
508 }
509 } else {
510 /* check whether register used as dest operand can be written to */
511 if (regno == BPF_REG_FP) {
512 verbose("frame pointer is read only\n");
513 return -EACCES;
514 }
515 if (t == DST_OP)
516 mark_reg_unknown_value(regs, regno);
517 }
518 return 0;
519}
520
521static int bpf_size_to_bytes(int bpf_size)
522{
523 if (bpf_size == BPF_W)
524 return 4;
525 else if (bpf_size == BPF_H)
526 return 2;
527 else if (bpf_size == BPF_B)
528 return 1;
529 else if (bpf_size == BPF_DW)
530 return 8;
531 else
532 return -EINVAL;
533}
534
535/* check_stack_read/write functions track spill/fill of registers,
536 * stack boundary and alignment are checked in check_mem_access()
537 */
538static int check_stack_write(struct verifier_state *state, int off, int size,
539 int value_regno)
540{
541 struct bpf_stack_slot *slot;
542 int i;
543
544 if (value_regno >= 0 &&
545 (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
546 state->regs[value_regno].type == PTR_TO_STACK ||
547 state->regs[value_regno].type == PTR_TO_CTX)) {
548
549 /* register containing pointer is being spilled into stack */
550 if (size != 8) {
551 verbose("invalid size of register spill\n");
552 return -EACCES;
553 }
554
555 slot = &state->stack[MAX_BPF_STACK + off];
556 slot->stype = STACK_SPILL;
557 /* save register state */
558 slot->reg_st = state->regs[value_regno];
559 for (i = 1; i < 8; i++) {
560 slot = &state->stack[MAX_BPF_STACK + off + i];
561 slot->stype = STACK_SPILL_PART;
562 slot->reg_st.type = UNKNOWN_VALUE;
563 slot->reg_st.map_ptr = NULL;
564 }
565 } else {
566
567 /* regular write of data into stack */
568 for (i = 0; i < size; i++) {
569 slot = &state->stack[MAX_BPF_STACK + off + i];
570 slot->stype = STACK_MISC;
571 slot->reg_st.type = UNKNOWN_VALUE;
572 slot->reg_st.map_ptr = NULL;
573 }
574 }
575 return 0;
576}
577
578static int check_stack_read(struct verifier_state *state, int off, int size,
579 int value_regno)
580{
581 int i;
582 struct bpf_stack_slot *slot;
583
584 slot = &state->stack[MAX_BPF_STACK + off];
585
586 if (slot->stype == STACK_SPILL) {
587 if (size != 8) {
588 verbose("invalid size of register spill\n");
589 return -EACCES;
590 }
591 for (i = 1; i < 8; i++) {
592 if (state->stack[MAX_BPF_STACK + off + i].stype !=
593 STACK_SPILL_PART) {
594 verbose("corrupted spill memory\n");
595 return -EACCES;
596 }
597 }
598
599 if (value_regno >= 0)
600 /* restore register state from stack */
601 state->regs[value_regno] = slot->reg_st;
602 return 0;
603 } else {
604 for (i = 0; i < size; i++) {
605 if (state->stack[MAX_BPF_STACK + off + i].stype !=
606 STACK_MISC) {
607 verbose("invalid read from stack off %d+%d size %d\n",
608 off, i, size);
609 return -EACCES;
610 }
611 }
612 if (value_regno >= 0)
613 /* have read misc data from the stack */
614 mark_reg_unknown_value(state->regs, value_regno);
615 return 0;
616 }
617}
618
619/* check read/write into map element returned by bpf_map_lookup_elem() */
620static int check_map_access(struct verifier_env *env, u32 regno, int off,
621 int size)
622{
623 struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
624
625 if (off < 0 || off + size > map->value_size) {
626 verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
627 map->value_size, off, size);
628 return -EACCES;
629 }
630 return 0;
631}
632
633/* check access to 'struct bpf_context' fields */
634static int check_ctx_access(struct verifier_env *env, int off, int size,
635 enum bpf_access_type t)
636{
637 if (env->prog->aux->ops->is_valid_access &&
638 env->prog->aux->ops->is_valid_access(off, size, t))
639 return 0;
640
641 verbose("invalid bpf_context access off=%d size=%d\n", off, size);
642 return -EACCES;
643}
644
645/* check whether memory at (regno + off) is accessible for t = (read | write)
646 * if t==write, value_regno is a register which value is stored into memory
647 * if t==read, value_regno is a register which will receive the value from memory
648 * if t==write && value_regno==-1, some unknown value is stored into memory
649 * if t==read && value_regno==-1, don't care what we read from memory
650 */
651static int check_mem_access(struct verifier_env *env, u32 regno, int off,
652 int bpf_size, enum bpf_access_type t,
653 int value_regno)
654{
655 struct verifier_state *state = &env->cur_state;
656 int size, err = 0;
657
658 size = bpf_size_to_bytes(bpf_size);
659 if (size < 0)
660 return size;
661
662 if (off % size != 0) {
663 verbose("misaligned access off %d size %d\n", off, size);
664 return -EACCES;
665 }
666
667 if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
668 err = check_map_access(env, regno, off, size);
669 if (!err && t == BPF_READ && value_regno >= 0)
670 mark_reg_unknown_value(state->regs, value_regno);
671
672 } else if (state->regs[regno].type == PTR_TO_CTX) {
673 err = check_ctx_access(env, off, size, t);
674 if (!err && t == BPF_READ && value_regno >= 0)
675 mark_reg_unknown_value(state->regs, value_regno);
676
677 } else if (state->regs[regno].type == FRAME_PTR) {
678 if (off >= 0 || off < -MAX_BPF_STACK) {
679 verbose("invalid stack off=%d size=%d\n", off, size);
680 return -EACCES;
681 }
682 if (t == BPF_WRITE)
683 err = check_stack_write(state, off, size, value_regno);
684 else
685 err = check_stack_read(state, off, size, value_regno);
686 } else {
687 verbose("R%d invalid mem access '%s'\n",
688 regno, reg_type_str[state->regs[regno].type]);
689 return -EACCES;
690 }
691 return err;
692}
693
694static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
695{
696 struct reg_state *regs = env->cur_state.regs;
697 int err;
698
699 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
700 insn->imm != 0) {
701 verbose("BPF_XADD uses reserved fields\n");
702 return -EINVAL;
703 }
704
705 /* check src1 operand */
706 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
707 if (err)
708 return err;
709
710 /* check src2 operand */
711 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
712 if (err)
713 return err;
714
715 /* check whether atomic_add can read the memory */
716 err = check_mem_access(env, insn->dst_reg, insn->off,
717 BPF_SIZE(insn->code), BPF_READ, -1);
718 if (err)
719 return err;
720
721 /* check whether atomic_add can write into the same memory */
722 return check_mem_access(env, insn->dst_reg, insn->off,
723 BPF_SIZE(insn->code), BPF_WRITE, -1);
724}
725
726/* when register 'regno' is passed into function that will read 'access_size'
727 * bytes from that pointer, make sure that it's within stack boundary
728 * and all elements of stack are initialized
729 */
730static int check_stack_boundary(struct verifier_env *env,
731 int regno, int access_size)
732{
733 struct verifier_state *state = &env->cur_state;
734 struct reg_state *regs = state->regs;
735 int off, i;
736
737 if (regs[regno].type != PTR_TO_STACK)
738 return -EACCES;
739
740 off = regs[regno].imm;
741 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
742 access_size <= 0) {
743 verbose("invalid stack type R%d off=%d access_size=%d\n",
744 regno, off, access_size);
745 return -EACCES;
746 }
747
748 for (i = 0; i < access_size; i++) {
749 if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) {
750 verbose("invalid indirect read from stack off %d+%d size %d\n",
751 off, i, access_size);
752 return -EACCES;
753 }
754 }
755 return 0;
756}
757
758static int check_func_arg(struct verifier_env *env, u32 regno,
759 enum bpf_arg_type arg_type, struct bpf_map **mapp)
760{
761 struct reg_state *reg = env->cur_state.regs + regno;
762 enum bpf_reg_type expected_type;
763 int err = 0;
764
765 if (arg_type == ARG_ANYTHING)
766 return 0;
767
768 if (reg->type == NOT_INIT) {
769 verbose("R%d !read_ok\n", regno);
770 return -EACCES;
771 }
772
773 if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
774 arg_type == ARG_PTR_TO_MAP_VALUE) {
775 expected_type = PTR_TO_STACK;
776 } else if (arg_type == ARG_CONST_STACK_SIZE) {
777 expected_type = CONST_IMM;
778 } else if (arg_type == ARG_CONST_MAP_PTR) {
779 expected_type = CONST_PTR_TO_MAP;
780 } else {
781 verbose("unsupported arg_type %d\n", arg_type);
782 return -EFAULT;
783 }
784
785 if (reg->type != expected_type) {
786 verbose("R%d type=%s expected=%s\n", regno,
787 reg_type_str[reg->type], reg_type_str[expected_type]);
788 return -EACCES;
789 }
790
791 if (arg_type == ARG_CONST_MAP_PTR) {
792 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
793 *mapp = reg->map_ptr;
794
795 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
796 /* bpf_map_xxx(..., map_ptr, ..., key) call:
797 * check that [key, key + map->key_size) are within
798 * stack limits and initialized
799 */
800 if (!*mapp) {
801 /* in function declaration map_ptr must come before
802 * map_key, so that it's verified and known before
803 * we have to check map_key here. Otherwise it means
804 * that kernel subsystem misconfigured verifier
805 */
806 verbose("invalid map_ptr to access map->key\n");
807 return -EACCES;
808 }
809 err = check_stack_boundary(env, regno, (*mapp)->key_size);
810
811 } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
812 /* bpf_map_xxx(..., map_ptr, ..., value) call:
813 * check [value, value + map->value_size) validity
814 */
815 if (!*mapp) {
816 /* kernel subsystem misconfigured verifier */
817 verbose("invalid map_ptr to access map->value\n");
818 return -EACCES;
819 }
820 err = check_stack_boundary(env, regno, (*mapp)->value_size);
821
822 } else if (arg_type == ARG_CONST_STACK_SIZE) {
823 /* bpf_xxx(..., buf, len) call will access 'len' bytes
824 * from stack pointer 'buf'. Check it
825 * note: regno == len, regno - 1 == buf
826 */
827 if (regno == 0) {
828 /* kernel subsystem misconfigured verifier */
829 verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
830 return -EACCES;
831 }
832 err = check_stack_boundary(env, regno - 1, reg->imm);
833 }
834
835 return err;
836}
837
838static int check_call(struct verifier_env *env, int func_id)
839{
840 struct verifier_state *state = &env->cur_state;
841 const struct bpf_func_proto *fn = NULL;
842 struct reg_state *regs = state->regs;
843 struct bpf_map *map = NULL;
844 struct reg_state *reg;
845 int i, err;
846
847 /* find function prototype */
848 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
849 verbose("invalid func %d\n", func_id);
850 return -EINVAL;
851 }
852
853 if (env->prog->aux->ops->get_func_proto)
854 fn = env->prog->aux->ops->get_func_proto(func_id);
855
856 if (!fn) {
857 verbose("unknown func %d\n", func_id);
858 return -EINVAL;
859 }
860
861 /* eBPF programs must be GPL compatible to use GPL-ed functions */
862 if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
863 verbose("cannot call GPL only function from proprietary program\n");
864 return -EINVAL;
865 }
866
867 /* check args */
868 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
869 if (err)
870 return err;
871 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
872 if (err)
873 return err;
874 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
875 if (err)
876 return err;
877 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
878 if (err)
879 return err;
880 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
881 if (err)
882 return err;
883
884 /* reset caller saved regs */
885 for (i = 0; i < CALLER_SAVED_REGS; i++) {
886 reg = regs + caller_saved[i];
887 reg->type = NOT_INIT;
888 reg->imm = 0;
889 }
890
891 /* update return register */
892 if (fn->ret_type == RET_INTEGER) {
893 regs[BPF_REG_0].type = UNKNOWN_VALUE;
894 } else if (fn->ret_type == RET_VOID) {
895 regs[BPF_REG_0].type = NOT_INIT;
896 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
897 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
898 /* remember map_ptr, so that check_map_access()
899 * can check 'value_size' boundary of memory access
900 * to map element returned from bpf_map_lookup_elem()
901 */
902 if (map == NULL) {
903 verbose("kernel subsystem misconfigured verifier\n");
904 return -EINVAL;
905 }
906 regs[BPF_REG_0].map_ptr = map;
907 } else {
908 verbose("unknown return type %d of func %d\n",
909 fn->ret_type, func_id);
910 return -EINVAL;
911 }
912 return 0;
913}
914
915/* check validity of 32-bit and 64-bit arithmetic operations */
916static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
917{
918 u8 opcode = BPF_OP(insn->code);
919 int err;
920
921 if (opcode == BPF_END || opcode == BPF_NEG) {
922 if (opcode == BPF_NEG) {
923 if (BPF_SRC(insn->code) != 0 ||
924 insn->src_reg != BPF_REG_0 ||
925 insn->off != 0 || insn->imm != 0) {
926 verbose("BPF_NEG uses reserved fields\n");
927 return -EINVAL;
928 }
929 } else {
930 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
931 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
932 verbose("BPF_END uses reserved fields\n");
933 return -EINVAL;
934 }
935 }
936
937 /* check src operand */
938 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
939 if (err)
940 return err;
941
942 /* check dest operand */
943 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
944 if (err)
945 return err;
946
947 } else if (opcode == BPF_MOV) {
948
949 if (BPF_SRC(insn->code) == BPF_X) {
950 if (insn->imm != 0 || insn->off != 0) {
951 verbose("BPF_MOV uses reserved fields\n");
952 return -EINVAL;
953 }
954
955 /* check src operand */
956 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
957 if (err)
958 return err;
959 } else {
960 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
961 verbose("BPF_MOV uses reserved fields\n");
962 return -EINVAL;
963 }
964 }
965
966 /* check dest operand */
967 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
968 if (err)
969 return err;
970
971 if (BPF_SRC(insn->code) == BPF_X) {
972 if (BPF_CLASS(insn->code) == BPF_ALU64) {
973 /* case: R1 = R2
974 * copy register state to dest reg
975 */
976 regs[insn->dst_reg] = regs[insn->src_reg];
977 } else {
978 regs[insn->dst_reg].type = UNKNOWN_VALUE;
979 regs[insn->dst_reg].map_ptr = NULL;
980 }
981 } else {
982 /* case: R = imm
983 * remember the value we stored into this reg
984 */
985 regs[insn->dst_reg].type = CONST_IMM;
986 regs[insn->dst_reg].imm = insn->imm;
987 }
988
989 } else if (opcode > BPF_END) {
990 verbose("invalid BPF_ALU opcode %x\n", opcode);
991 return -EINVAL;
992
993 } else { /* all other ALU ops: and, sub, xor, add, ... */
994
995 bool stack_relative = false;
996
997 if (BPF_SRC(insn->code) == BPF_X) {
998 if (insn->imm != 0 || insn->off != 0) {
999 verbose("BPF_ALU uses reserved fields\n");
1000 return -EINVAL;
1001 }
1002 /* check src1 operand */
1003 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1004 if (err)
1005 return err;
1006 } else {
1007 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
1008 verbose("BPF_ALU uses reserved fields\n");
1009 return -EINVAL;
1010 }
1011 }
1012
1013 /* check src2 operand */
1014 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1015 if (err)
1016 return err;
1017
1018 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
1019 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
1020 verbose("div by zero\n");
1021 return -EINVAL;
1022 }
1023
1024 /* pattern match 'bpf_add Rx, imm' instruction */
1025 if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
1026 regs[insn->dst_reg].type == FRAME_PTR &&
1027 BPF_SRC(insn->code) == BPF_K)
1028 stack_relative = true;
1029
1030 /* check dest operand */
1031 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
1032 if (err)
1033 return err;
1034
1035 if (stack_relative) {
1036 regs[insn->dst_reg].type = PTR_TO_STACK;
1037 regs[insn->dst_reg].imm = insn->imm;
1038 }
1039 }
1040
1041 return 0;
1042}
1043
1044static int check_cond_jmp_op(struct verifier_env *env,
1045 struct bpf_insn *insn, int *insn_idx)
1046{
1047 struct reg_state *regs = env->cur_state.regs;
1048 struct verifier_state *other_branch;
1049 u8 opcode = BPF_OP(insn->code);
1050 int err;
1051
1052 if (opcode > BPF_EXIT) {
1053 verbose("invalid BPF_JMP opcode %x\n", opcode);
1054 return -EINVAL;
1055 }
1056
1057 if (BPF_SRC(insn->code) == BPF_X) {
1058 if (insn->imm != 0) {
1059 verbose("BPF_JMP uses reserved fields\n");
1060 return -EINVAL;
1061 }
1062
1063 /* check src1 operand */
1064 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1065 if (err)
1066 return err;
1067 } else {
1068 if (insn->src_reg != BPF_REG_0) {
1069 verbose("BPF_JMP uses reserved fields\n");
1070 return -EINVAL;
1071 }
1072 }
1073
1074 /* check src2 operand */
1075 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1076 if (err)
1077 return err;
1078
1079 /* detect if R == 0 where R was initialized to zero earlier */
1080 if (BPF_SRC(insn->code) == BPF_K &&
1081 (opcode == BPF_JEQ || opcode == BPF_JNE) &&
1082 regs[insn->dst_reg].type == CONST_IMM &&
1083 regs[insn->dst_reg].imm == insn->imm) {
1084 if (opcode == BPF_JEQ) {
1085 /* if (imm == imm) goto pc+off;
1086 * only follow the goto, ignore fall-through
1087 */
1088 *insn_idx += insn->off;
1089 return 0;
1090 } else {
1091 /* if (imm != imm) goto pc+off;
1092 * only follow fall-through branch, since
1093 * that's where the program will go
1094 */
1095 return 0;
1096 }
1097 }
1098
1099 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
1100 if (!other_branch)
1101 return -EFAULT;
1102
1103 /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
1104 if (BPF_SRC(insn->code) == BPF_K &&
1105 insn->imm == 0 && (opcode == BPF_JEQ ||
1106 opcode == BPF_JNE) &&
1107 regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) {
1108 if (opcode == BPF_JEQ) {
1109 /* next fallthrough insn can access memory via
1110 * this register
1111 */
1112 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
1113 /* branch targer cannot access it, since reg == 0 */
1114 other_branch->regs[insn->dst_reg].type = CONST_IMM;
1115 other_branch->regs[insn->dst_reg].imm = 0;
1116 } else {
1117 other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
1118 regs[insn->dst_reg].type = CONST_IMM;
1119 regs[insn->dst_reg].imm = 0;
1120 }
1121 } else if (BPF_SRC(insn->code) == BPF_K &&
1122 (opcode == BPF_JEQ || opcode == BPF_JNE)) {
1123
1124 if (opcode == BPF_JEQ) {
1125 /* detect if (R == imm) goto
1126 * and in the target state recognize that R = imm
1127 */
1128 other_branch->regs[insn->dst_reg].type = CONST_IMM;
1129 other_branch->regs[insn->dst_reg].imm = insn->imm;
1130 } else {
1131 /* detect if (R != imm) goto
1132 * and in the fall-through state recognize that R = imm
1133 */
1134 regs[insn->dst_reg].type = CONST_IMM;
1135 regs[insn->dst_reg].imm = insn->imm;
1136 }
1137 }
1138 if (log_level)
1139 print_verifier_state(env);
1140 return 0;
1141}
1142
308/* return the map pointer stored inside BPF_LD_IMM64 instruction */ 1143/* return the map pointer stored inside BPF_LD_IMM64 instruction */
309static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) 1144static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
310{ 1145{
@@ -313,6 +1148,37 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
313 return (struct bpf_map *) (unsigned long) imm64; 1148 return (struct bpf_map *) (unsigned long) imm64;
314} 1149}
315 1150
1151/* verify BPF_LD_IMM64 instruction */
1152static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
1153{
1154 struct reg_state *regs = env->cur_state.regs;
1155 int err;
1156
1157 if (BPF_SIZE(insn->code) != BPF_DW) {
1158 verbose("invalid BPF_LD_IMM insn\n");
1159 return -EINVAL;
1160 }
1161 if (insn->off != 0) {
1162 verbose("BPF_LD_IMM64 uses reserved fields\n");
1163 return -EINVAL;
1164 }
1165
1166 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
1167 if (err)
1168 return err;
1169
1170 if (insn->src_reg == 0)
1171 /* generic move 64-bit immediate into a register */
1172 return 0;
1173
1174 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
1175 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
1176
1177 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
1178 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
1179 return 0;
1180}
1181
316/* non-recursive DFS pseudo code 1182/* non-recursive DFS pseudo code
317 * 1 procedure DFS-iterative(G,v): 1183 * 1 procedure DFS-iterative(G,v):
318 * 2 label v as discovered 1184 * 2 label v as discovered
@@ -498,6 +1364,212 @@ err_free:
498 return ret; 1364 return ret;
499} 1365}
500 1366
1367static int do_check(struct verifier_env *env)
1368{
1369 struct verifier_state *state = &env->cur_state;
1370 struct bpf_insn *insns = env->prog->insnsi;
1371 struct reg_state *regs = state->regs;
1372 int insn_cnt = env->prog->len;
1373 int insn_idx, prev_insn_idx = 0;
1374 int insn_processed = 0;
1375 bool do_print_state = false;
1376
1377 init_reg_state(regs);
1378 insn_idx = 0;
1379 for (;;) {
1380 struct bpf_insn *insn;
1381 u8 class;
1382 int err;
1383
1384 if (insn_idx >= insn_cnt) {
1385 verbose("invalid insn idx %d insn_cnt %d\n",
1386 insn_idx, insn_cnt);
1387 return -EFAULT;
1388 }
1389
1390 insn = &insns[insn_idx];
1391 class = BPF_CLASS(insn->code);
1392
1393 if (++insn_processed > 32768) {
1394 verbose("BPF program is too large. Proccessed %d insn\n",
1395 insn_processed);
1396 return -E2BIG;
1397 }
1398
1399 if (log_level && do_print_state) {
1400 verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
1401 print_verifier_state(env);
1402 do_print_state = false;
1403 }
1404
1405 if (log_level) {
1406 verbose("%d: ", insn_idx);
1407 print_bpf_insn(insn);
1408 }
1409
1410 if (class == BPF_ALU || class == BPF_ALU64) {
1411 err = check_alu_op(regs, insn);
1412 if (err)
1413 return err;
1414
1415 } else if (class == BPF_LDX) {
1416 if (BPF_MODE(insn->code) != BPF_MEM ||
1417 insn->imm != 0) {
1418 verbose("BPF_LDX uses reserved fields\n");
1419 return -EINVAL;
1420 }
1421 /* check src operand */
1422 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1423 if (err)
1424 return err;
1425
1426 err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
1427 if (err)
1428 return err;
1429
1430 /* check that memory (src_reg + off) is readable,
1431 * the state of dst_reg will be updated by this func
1432 */
1433 err = check_mem_access(env, insn->src_reg, insn->off,
1434 BPF_SIZE(insn->code), BPF_READ,
1435 insn->dst_reg);
1436 if (err)
1437 return err;
1438
1439 } else if (class == BPF_STX) {
1440 if (BPF_MODE(insn->code) == BPF_XADD) {
1441 err = check_xadd(env, insn);
1442 if (err)
1443 return err;
1444 insn_idx++;
1445 continue;
1446 }
1447
1448 if (BPF_MODE(insn->code) != BPF_MEM ||
1449 insn->imm != 0) {
1450 verbose("BPF_STX uses reserved fields\n");
1451 return -EINVAL;
1452 }
1453 /* check src1 operand */
1454 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1455 if (err)
1456 return err;
1457 /* check src2 operand */
1458 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1459 if (err)
1460 return err;
1461
1462 /* check that memory (dst_reg + off) is writeable */
1463 err = check_mem_access(env, insn->dst_reg, insn->off,
1464 BPF_SIZE(insn->code), BPF_WRITE,
1465 insn->src_reg);
1466 if (err)
1467 return err;
1468
1469 } else if (class == BPF_ST) {
1470 if (BPF_MODE(insn->code) != BPF_MEM ||
1471 insn->src_reg != BPF_REG_0) {
1472 verbose("BPF_ST uses reserved fields\n");
1473 return -EINVAL;
1474 }
1475 /* check src operand */
1476 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1477 if (err)
1478 return err;
1479
1480 /* check that memory (dst_reg + off) is writeable */
1481 err = check_mem_access(env, insn->dst_reg, insn->off,
1482 BPF_SIZE(insn->code), BPF_WRITE,
1483 -1);
1484 if (err)
1485 return err;
1486
1487 } else if (class == BPF_JMP) {
1488 u8 opcode = BPF_OP(insn->code);
1489
1490 if (opcode == BPF_CALL) {
1491 if (BPF_SRC(insn->code) != BPF_K ||
1492 insn->off != 0 ||
1493 insn->src_reg != BPF_REG_0 ||
1494 insn->dst_reg != BPF_REG_0) {
1495 verbose("BPF_CALL uses reserved fields\n");
1496 return -EINVAL;
1497 }
1498
1499 err = check_call(env, insn->imm);
1500 if (err)
1501 return err;
1502
1503 } else if (opcode == BPF_JA) {
1504 if (BPF_SRC(insn->code) != BPF_K ||
1505 insn->imm != 0 ||
1506 insn->src_reg != BPF_REG_0 ||
1507 insn->dst_reg != BPF_REG_0) {
1508 verbose("BPF_JA uses reserved fields\n");
1509 return -EINVAL;
1510 }
1511
1512 insn_idx += insn->off + 1;
1513 continue;
1514
1515 } else if (opcode == BPF_EXIT) {
1516 if (BPF_SRC(insn->code) != BPF_K ||
1517 insn->imm != 0 ||
1518 insn->src_reg != BPF_REG_0 ||
1519 insn->dst_reg != BPF_REG_0) {
1520 verbose("BPF_EXIT uses reserved fields\n");
1521 return -EINVAL;
1522 }
1523
1524 /* eBPF calling convetion is such that R0 is used
1525 * to return the value from eBPF program.
1526 * Make sure that it's readable at this time
1527 * of bpf_exit, which means that program wrote
1528 * something into it earlier
1529 */
1530 err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
1531 if (err)
1532 return err;
1533
1534 insn_idx = pop_stack(env, &prev_insn_idx);
1535 if (insn_idx < 0) {
1536 break;
1537 } else {
1538 do_print_state = true;
1539 continue;
1540 }
1541 } else {
1542 err = check_cond_jmp_op(env, insn, &insn_idx);
1543 if (err)
1544 return err;
1545 }
1546 } else if (class == BPF_LD) {
1547 u8 mode = BPF_MODE(insn->code);
1548
1549 if (mode == BPF_ABS || mode == BPF_IND) {
1550 verbose("LD_ABS is not supported yet\n");
1551 return -EINVAL;
1552 } else if (mode == BPF_IMM) {
1553 err = check_ld_imm(env, insn);
1554 if (err)
1555 return err;
1556
1557 insn_idx++;
1558 } else {
1559 verbose("invalid BPF_LD mode\n");
1560 return -EINVAL;
1561 }
1562 } else {
1563 verbose("unknown insn class %d\n", class);
1564 return -EINVAL;
1565 }
1566
1567 insn_idx++;
1568 }
1569
1570 return 0;
1571}
1572
501/* look for pseudo eBPF instructions that access map FDs and 1573/* look for pseudo eBPF instructions that access map FDs and
502 * replace them with actual map pointers 1574 * replace them with actual map pointers
503 */ 1575 */
@@ -651,9 +1723,10 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
651 if (ret < 0) 1723 if (ret < 0)
652 goto skip_full_check; 1724 goto skip_full_check;
653 1725
654 /* ret = do_check(env); */ 1726 ret = do_check(env);
655 1727
656skip_full_check: 1728skip_full_check:
1729 while (pop_stack(env, NULL) >= 0);
657 1730
658 if (log_level && log_len >= log_size - 1) { 1731 if (log_level && log_len >= log_size - 1) {
659 BUG_ON(log_len >= log_size); 1732 BUG_ON(log_len >= log_size);