aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2017-12-17 14:34:37 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2017-12-17 14:34:37 -0500
commitef9fde06a259f5da660ada63214addf8cd86a7b9 (patch)
tree8b0d109f49281f68709343f72c5c3c89549ab9af
parent0bce7c9a607f1dbf8d83dd2865e1657096dbce59 (diff)
parent28ab173e96b3971842414bf88eb02eca6ea3f018 (diff)
Merge branch 'bpf-to-bpf-function-calls'
Alexei Starovoitov says: ==================== First of all huge thank you to Daniel, John, Jakub, Edward and others who reviewed multiple iterations of this patch set over the last many months and to Dave and others who gave critical feedback during netconf/netdev. The patch is solid enough and we thought through numerous corner cases, but it's not the end. More followups with code reorg and features to follow. TLDR: Allow arbitrary function calls from bpf function to another bpf function. Since the beginning of bpf all bpf programs were represented as a single function and program authors were forced to use always_inline for all functions in their C code. That was causing llvm to unnecessary inflate the code size and forcing developers to move code to header files with little code reuse. With a bit of additional complexity teach verifier to recognize arbitrary function calls from one bpf function to another as long as all of functions are presented to the verifier as a single bpf program. Extended program layout: .. r1 = .. // arg1 r2 = .. // arg2 call pc+1 // function call pc-relative exit .. = r1 // access arg1 .. = r2 // access arg2 .. call pc+20 // second level of function call ... It allows for better optimized code and finally allows to introduce the core bpf libraries that can be reused in different projects, since programs are no longer limited by single elf file. With function calls bpf can be compiled into multiple .o files. This patch is the first step. It detects programs that contain multiple functions and checks that calls between them are valid. It splits the sequence of bpf instructions (one program) into a set of bpf functions that call each other. Calls to only known functions are allowed. Since all functions are presented to the verifier at once conceptually it is 'static linking'. Future plans: - introduce BPF_PROG_TYPE_LIBRARY and allow a set of bpf functions to be loaded into the kernel that can be later linked to other programs with concrete program types. Aka 'dynamic linking'. - introduce function pointer type and indirect calls to allow bpf functions call other dynamically loaded bpf functions while the caller bpf function is already executing. Aka 'runtime linking'. This will be more generic and more flexible alternative to bpf_tail_calls. FAQ: Q: Interpreter and JIT changes mean that new instruction is introduced ? A: No. The call instruction technically stays the same. Now it can call both kernel helpers and other bpf functions. Calling convention stays the same as well. From uapi point of view the call insn got new 'relocation' BPF_PSEUDO_CALL similar to BPF_PSEUDO_MAP_FD 'relocation' of bpf_ldimm64 insn. Q: What had to change on LLVM side? A: Trivial LLVM patch to allow calls was applied to upcoming 6.0 release: https://reviews.llvm.org/rL318614 with few bugfixes as well. Make sure to build the latest llvm to have bpf_call support. More details in the patches. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--arch/arm/net/bpf_jit_32.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c70
-rw-r--r--arch/mips/net/ebpf_jit.c2
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c49
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/bpf_verifier.h45
-rw-r--r--include/linux/filter.h13
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--kernel/bpf/core.c104
-rw-r--r--kernel/bpf/disasm.c8
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c1120
-rw-r--r--tools/include/uapi/linux/bpf.h6
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/libbpf.c170
-rw-r--r--tools/testing/selftests/bpf/Makefile12
-rw-r--r--tools/testing/selftests/bpf/test_l4lb_noinline.c473
-rw-r--r--tools/testing/selftests/bpf/test_progs.c95
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c1624
-rw-r--r--tools/testing/selftests/bpf/test_xdp_noinline.c833
23 files changed, 4378 insertions, 269 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index c199990e12b6..4425189bb24c 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1824,7 +1824,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1824 /* If BPF JIT was not enabled then we must fall back to 1824 /* If BPF JIT was not enabled then we must fall back to
1825 * the interpreter. 1825 * the interpreter.
1826 */ 1826 */
1827 if (!bpf_jit_enable) 1827 if (!prog->jit_requested)
1828 return orig_prog; 1828 return orig_prog;
1829 1829
1830 /* If constant blinding was enabled and we failed during blinding 1830 /* If constant blinding was enabled and we failed during blinding
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index ba38d403abb2..396490cf7316 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -99,6 +99,20 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val,
99 } 99 }
100} 100}
101 101
102static inline void emit_addr_mov_i64(const int reg, const u64 val,
103 struct jit_ctx *ctx)
104{
105 u64 tmp = val;
106 int shift = 0;
107
108 emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
109 for (;shift < 48;) {
110 tmp >>= 16;
111 shift += 16;
112 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
113 }
114}
115
102static inline void emit_a64_mov_i(const int is64, const int reg, 116static inline void emit_a64_mov_i(const int is64, const int reg,
103 const s32 val, struct jit_ctx *ctx) 117 const s32 val, struct jit_ctx *ctx)
104{ 118{
@@ -603,7 +617,10 @@ emit_cond_jmp:
603 const u8 r0 = bpf2a64[BPF_REG_0]; 617 const u8 r0 = bpf2a64[BPF_REG_0];
604 const u64 func = (u64)__bpf_call_base + imm; 618 const u64 func = (u64)__bpf_call_base + imm;
605 619
606 emit_a64_mov_i64(tmp, func, ctx); 620 if (ctx->prog->is_func)
621 emit_addr_mov_i64(tmp, func, ctx);
622 else
623 emit_a64_mov_i64(tmp, func, ctx);
607 emit(A64_BLR(tmp), ctx); 624 emit(A64_BLR(tmp), ctx);
608 emit(A64_MOV(1, r0, A64_R(0)), ctx); 625 emit(A64_MOV(1, r0, A64_R(0)), ctx);
609 break; 626 break;
@@ -835,16 +852,24 @@ static inline void bpf_flush_icache(void *start, void *end)
835 flush_icache_range((unsigned long)start, (unsigned long)end); 852 flush_icache_range((unsigned long)start, (unsigned long)end);
836} 853}
837 854
855struct arm64_jit_data {
856 struct bpf_binary_header *header;
857 u8 *image;
858 struct jit_ctx ctx;
859};
860
838struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 861struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
839{ 862{
840 struct bpf_prog *tmp, *orig_prog = prog; 863 struct bpf_prog *tmp, *orig_prog = prog;
841 struct bpf_binary_header *header; 864 struct bpf_binary_header *header;
865 struct arm64_jit_data *jit_data;
842 bool tmp_blinded = false; 866 bool tmp_blinded = false;
867 bool extra_pass = false;
843 struct jit_ctx ctx; 868 struct jit_ctx ctx;
844 int image_size; 869 int image_size;
845 u8 *image_ptr; 870 u8 *image_ptr;
846 871
847 if (!bpf_jit_enable) 872 if (!prog->jit_requested)
848 return orig_prog; 873 return orig_prog;
849 874
850 tmp = bpf_jit_blind_constants(prog); 875 tmp = bpf_jit_blind_constants(prog);
@@ -858,13 +883,29 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
858 prog = tmp; 883 prog = tmp;
859 } 884 }
860 885
886 jit_data = prog->aux->jit_data;
887 if (!jit_data) {
888 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
889 if (!jit_data) {
890 prog = orig_prog;
891 goto out;
892 }
893 prog->aux->jit_data = jit_data;
894 }
895 if (jit_data->ctx.offset) {
896 ctx = jit_data->ctx;
897 image_ptr = jit_data->image;
898 header = jit_data->header;
899 extra_pass = true;
900 goto skip_init_ctx;
901 }
861 memset(&ctx, 0, sizeof(ctx)); 902 memset(&ctx, 0, sizeof(ctx));
862 ctx.prog = prog; 903 ctx.prog = prog;
863 904
864 ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); 905 ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
865 if (ctx.offset == NULL) { 906 if (ctx.offset == NULL) {
866 prog = orig_prog; 907 prog = orig_prog;
867 goto out; 908 goto out_off;
868 } 909 }
869 910
870 /* 1. Initial fake pass to compute ctx->idx. */ 911 /* 1. Initial fake pass to compute ctx->idx. */
@@ -895,6 +936,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
895 /* 2. Now, the actual pass. */ 936 /* 2. Now, the actual pass. */
896 937
897 ctx.image = (__le32 *)image_ptr; 938 ctx.image = (__le32 *)image_ptr;
939skip_init_ctx:
898 ctx.idx = 0; 940 ctx.idx = 0;
899 941
900 build_prologue(&ctx); 942 build_prologue(&ctx);
@@ -920,13 +962,31 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
920 962
921 bpf_flush_icache(header, ctx.image + ctx.idx); 963 bpf_flush_icache(header, ctx.image + ctx.idx);
922 964
923 bpf_jit_binary_lock_ro(header); 965 if (!prog->is_func || extra_pass) {
966 if (extra_pass && ctx.idx != jit_data->ctx.idx) {
967 pr_err_once("multi-func JIT bug %d != %d\n",
968 ctx.idx, jit_data->ctx.idx);
969 bpf_jit_binary_free(header);
970 prog->bpf_func = NULL;
971 prog->jited = 0;
972 goto out_off;
973 }
974 bpf_jit_binary_lock_ro(header);
975 } else {
976 jit_data->ctx = ctx;
977 jit_data->image = image_ptr;
978 jit_data->header = header;
979 }
924 prog->bpf_func = (void *)ctx.image; 980 prog->bpf_func = (void *)ctx.image;
925 prog->jited = 1; 981 prog->jited = 1;
926 prog->jited_len = image_size; 982 prog->jited_len = image_size;
927 983
984 if (!prog->is_func || extra_pass) {
928out_off: 985out_off:
929 kfree(ctx.offset); 986 kfree(ctx.offset);
987 kfree(jit_data);
988 prog->aux->jit_data = NULL;
989 }
930out: 990out:
931 if (tmp_blinded) 991 if (tmp_blinded)
932 bpf_jit_prog_release_other(prog, prog == orig_prog ? 992 bpf_jit_prog_release_other(prog, prog == orig_prog ?
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 962b0259b4b6..97069a1b6f43 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1869,7 +1869,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1869 unsigned int image_size; 1869 unsigned int image_size;
1870 u8 *image_ptr; 1870 u8 *image_ptr;
1871 1871
1872 if (!bpf_jit_enable || !cpu_has_mips64r2) 1872 if (!prog->jit_requested || !cpu_has_mips64r2)
1873 return prog; 1873 return prog;
1874 1874
1875 tmp = bpf_jit_blind_constants(prog); 1875 tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d5a5bc43cf8f 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -993,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
993 struct bpf_prog *tmp_fp; 993 struct bpf_prog *tmp_fp;
994 bool bpf_blinded = false; 994 bool bpf_blinded = false;
995 995
996 if (!bpf_jit_enable) 996 if (!fp->jit_requested)
997 return org_fp; 997 return org_fp;
998 998
999 tmp_fp = bpf_jit_blind_constants(org_fp); 999 tmp_fp = bpf_jit_blind_constants(org_fp);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..f4baa8c514d3 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1300,7 +1300,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1300 struct bpf_jit jit; 1300 struct bpf_jit jit;
1301 int pass; 1301 int pass;
1302 1302
1303 if (!bpf_jit_enable) 1303 if (!fp->jit_requested)
1304 return orig_fp; 1304 return orig_fp;
1305 1305
1306 tmp = bpf_jit_blind_constants(fp); 1306 tmp = bpf_jit_blind_constants(fp);
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..a2f1b5e774a7 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1517,7 +1517,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1517 u8 *image_ptr; 1517 u8 *image_ptr;
1518 int pass; 1518 int pass;
1519 1519
1520 if (!bpf_jit_enable) 1520 if (!prog->jit_requested)
1521 return orig_prog; 1521 return orig_prog;
1522 1522
1523 tmp = bpf_jit_blind_constants(prog); 1523 tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0554e8aef4d5..87f214fbe66e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1109,19 +1109,29 @@ common_load:
1109 return proglen; 1109 return proglen;
1110} 1110}
1111 1111
1112struct x64_jit_data {
1113 struct bpf_binary_header *header;
1114 int *addrs;
1115 u8 *image;
1116 int proglen;
1117 struct jit_context ctx;
1118};
1119
1112struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1120struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1113{ 1121{
1114 struct bpf_binary_header *header = NULL; 1122 struct bpf_binary_header *header = NULL;
1115 struct bpf_prog *tmp, *orig_prog = prog; 1123 struct bpf_prog *tmp, *orig_prog = prog;
1124 struct x64_jit_data *jit_data;
1116 int proglen, oldproglen = 0; 1125 int proglen, oldproglen = 0;
1117 struct jit_context ctx = {}; 1126 struct jit_context ctx = {};
1118 bool tmp_blinded = false; 1127 bool tmp_blinded = false;
1128 bool extra_pass = false;
1119 u8 *image = NULL; 1129 u8 *image = NULL;
1120 int *addrs; 1130 int *addrs;
1121 int pass; 1131 int pass;
1122 int i; 1132 int i;
1123 1133
1124 if (!bpf_jit_enable) 1134 if (!prog->jit_requested)
1125 return orig_prog; 1135 return orig_prog;
1126 1136
1127 tmp = bpf_jit_blind_constants(prog); 1137 tmp = bpf_jit_blind_constants(prog);
@@ -1135,10 +1145,28 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1135 prog = tmp; 1145 prog = tmp;
1136 } 1146 }
1137 1147
1148 jit_data = prog->aux->jit_data;
1149 if (!jit_data) {
1150 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1151 if (!jit_data) {
1152 prog = orig_prog;
1153 goto out;
1154 }
1155 prog->aux->jit_data = jit_data;
1156 }
1157 addrs = jit_data->addrs;
1158 if (addrs) {
1159 ctx = jit_data->ctx;
1160 oldproglen = jit_data->proglen;
1161 image = jit_data->image;
1162 header = jit_data->header;
1163 extra_pass = true;
1164 goto skip_init_addrs;
1165 }
1138 addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); 1166 addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
1139 if (!addrs) { 1167 if (!addrs) {
1140 prog = orig_prog; 1168 prog = orig_prog;
1141 goto out; 1169 goto out_addrs;
1142 } 1170 }
1143 1171
1144 /* Before first pass, make a rough estimation of addrs[] 1172 /* Before first pass, make a rough estimation of addrs[]
@@ -1149,6 +1177,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1149 addrs[i] = proglen; 1177 addrs[i] = proglen;
1150 } 1178 }
1151 ctx.cleanup_addr = proglen; 1179 ctx.cleanup_addr = proglen;
1180skip_init_addrs:
1152 1181
1153 /* JITed image shrinks with every pass and the loop iterates 1182 /* JITed image shrinks with every pass and the loop iterates
1154 * until the image stops shrinking. Very large bpf programs 1183 * until the image stops shrinking. Very large bpf programs
@@ -1189,7 +1218,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1189 1218
1190 if (image) { 1219 if (image) {
1191 bpf_flush_icache(header, image + proglen); 1220 bpf_flush_icache(header, image + proglen);
1192 bpf_jit_binary_lock_ro(header); 1221 if (!prog->is_func || extra_pass) {
1222 bpf_jit_binary_lock_ro(header);
1223 } else {
1224 jit_data->addrs = addrs;
1225 jit_data->ctx = ctx;
1226 jit_data->proglen = proglen;
1227 jit_data->image = image;
1228 jit_data->header = header;
1229 }
1193 prog->bpf_func = (void *)image; 1230 prog->bpf_func = (void *)image;
1194 prog->jited = 1; 1231 prog->jited = 1;
1195 prog->jited_len = proglen; 1232 prog->jited_len = proglen;
@@ -1197,8 +1234,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1197 prog = orig_prog; 1234 prog = orig_prog;
1198 } 1235 }
1199 1236
1237 if (!prog->is_func || extra_pass) {
1200out_addrs: 1238out_addrs:
1201 kfree(addrs); 1239 kfree(addrs);
1240 kfree(jit_data);
1241 prog->aux->jit_data = NULL;
1242 }
1202out: 1243out:
1203 if (tmp_blinded) 1244 if (tmp_blinded)
1204 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1245 bpf_jit_prog_release_other(prog, prog == orig_prog ?
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 54dc7cae2949..da54ef644fcd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -200,6 +200,9 @@ struct bpf_prog_aux {
200 u32 max_ctx_offset; 200 u32 max_ctx_offset;
201 u32 stack_depth; 201 u32 stack_depth;
202 u32 id; 202 u32 id;
203 u32 func_cnt;
204 struct bpf_prog **func;
205 void *jit_data; /* JIT specific data. arch dependent */
203 struct latch_tree_node ksym_tnode; 206 struct latch_tree_node ksym_tnode;
204 struct list_head ksym_lnode; 207 struct list_head ksym_lnode;
205 const struct bpf_prog_ops *ops; 208 const struct bpf_prog_ops *ops;
@@ -402,6 +405,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
402 405
403/* verify correctness of eBPF program */ 406/* verify correctness of eBPF program */
404int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); 407int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
408void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
405 409
406/* Map specifics */ 410/* Map specifics */
407struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); 411struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c561b986bab0..aaac589e490c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -76,6 +76,14 @@ struct bpf_reg_state {
76 s64 smax_value; /* maximum possible (s64)value */ 76 s64 smax_value; /* maximum possible (s64)value */
77 u64 umin_value; /* minimum possible (u64)value */ 77 u64 umin_value; /* minimum possible (u64)value */
78 u64 umax_value; /* maximum possible (u64)value */ 78 u64 umax_value; /* maximum possible (u64)value */
79 /* Inside the callee two registers can be both PTR_TO_STACK like
80 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
81 * while another to the caller's stack. To differentiate them 'frameno'
82 * is used which is an index in bpf_verifier_state->frame[] array
83 * pointing to bpf_func_state.
84 * This field must be second to last, for states_equal() reasons.
85 */
86 u32 frameno;
79 /* This field must be last, for states_equal() reasons. */ 87 /* This field must be last, for states_equal() reasons. */
80 enum bpf_reg_liveness live; 88 enum bpf_reg_liveness live;
81}; 89};
@@ -83,7 +91,8 @@ struct bpf_reg_state {
83enum bpf_stack_slot_type { 91enum bpf_stack_slot_type {
84 STACK_INVALID, /* nothing was stored in this stack slot */ 92 STACK_INVALID, /* nothing was stored in this stack slot */
85 STACK_SPILL, /* register spilled into stack */ 93 STACK_SPILL, /* register spilled into stack */
86 STACK_MISC /* BPF program wrote some data into this slot */ 94 STACK_MISC, /* BPF program wrote some data into this slot */
95 STACK_ZERO, /* BPF program wrote constant zero */
87}; 96};
88 97
89#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ 98#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
@@ -96,13 +105,34 @@ struct bpf_stack_state {
96/* state of the program: 105/* state of the program:
97 * type of all registers and stack info 106 * type of all registers and stack info
98 */ 107 */
99struct bpf_verifier_state { 108struct bpf_func_state {
100 struct bpf_reg_state regs[MAX_BPF_REG]; 109 struct bpf_reg_state regs[MAX_BPF_REG];
101 struct bpf_verifier_state *parent; 110 struct bpf_verifier_state *parent;
111 /* index of call instruction that called into this func */
112 int callsite;
113 /* stack frame number of this function state from pov of
114 * enclosing bpf_verifier_state.
115 * 0 = main function, 1 = first callee.
116 */
117 u32 frameno;
118 /* subprog number == index within subprog_stack_depth
119 * zero == main subprog
120 */
121 u32 subprogno;
122
123 /* should be second to last. See copy_func_state() */
102 int allocated_stack; 124 int allocated_stack;
103 struct bpf_stack_state *stack; 125 struct bpf_stack_state *stack;
104}; 126};
105 127
128#define MAX_CALL_FRAMES 8
129struct bpf_verifier_state {
130 /* call stack tracking */
131 struct bpf_func_state *frame[MAX_CALL_FRAMES];
132 struct bpf_verifier_state *parent;
133 u32 curframe;
134};
135
106/* linked list of verifier states used to prune search */ 136/* linked list of verifier states used to prune search */
107struct bpf_verifier_state_list { 137struct bpf_verifier_state_list {
108 struct bpf_verifier_state state; 138 struct bpf_verifier_state state;
@@ -113,6 +143,7 @@ struct bpf_insn_aux_data {
113 union { 143 union {
114 enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ 144 enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
115 struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ 145 struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
146 s32 call_imm; /* saved imm field of call insn */
116 }; 147 };
117 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ 148 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
118 bool seen; /* this insn was processed by the verifier */ 149 bool seen; /* this insn was processed by the verifier */
@@ -141,6 +172,8 @@ struct bpf_ext_analyzer_ops {
141 int insn_idx, int prev_insn_idx); 172 int insn_idx, int prev_insn_idx);
142}; 173};
143 174
175#define BPF_MAX_SUBPROGS 256
176
144/* single container for all structs 177/* single container for all structs
145 * one verifier_env per bpf_check() call 178 * one verifier_env per bpf_check() call
146 */ 179 */
@@ -159,13 +192,17 @@ struct bpf_verifier_env {
159 bool allow_ptr_leaks; 192 bool allow_ptr_leaks;
160 bool seen_direct_write; 193 bool seen_direct_write;
161 struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ 194 struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
162
163 struct bpf_verifer_log log; 195 struct bpf_verifer_log log;
196 u32 subprog_starts[BPF_MAX_SUBPROGS];
197 u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
198 u32 subprog_cnt;
164}; 199};
165 200
166static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) 201static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
167{ 202{
168 return env->cur_state->regs; 203 struct bpf_verifier_state *cur = env->cur_state;
204
205 return cur->frame[cur->curframe]->regs;
169} 206}
170 207
171#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) 208#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5feb441d3dd9..e872b4ebaa57 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -58,6 +58,9 @@ struct bpf_prog_aux;
58/* unused opcode to mark special call to bpf_tail_call() helper */ 58/* unused opcode to mark special call to bpf_tail_call() helper */
59#define BPF_TAIL_CALL 0xf0 59#define BPF_TAIL_CALL 0xf0
60 60
61/* unused opcode to mark call to interpreter with arguments */
62#define BPF_CALL_ARGS 0xe0
63
61/* As per nm, we expose JITed images as text (code) section for 64/* As per nm, we expose JITed images as text (code) section for
62 * kallsyms. That way, tools like perf can find it to match 65 * kallsyms. That way, tools like perf can find it to match
63 * addresses. 66 * addresses.
@@ -455,10 +458,13 @@ struct bpf_binary_header {
455struct bpf_prog { 458struct bpf_prog {
456 u16 pages; /* Number of allocated pages */ 459 u16 pages; /* Number of allocated pages */
457 u16 jited:1, /* Is our filter JIT'ed? */ 460 u16 jited:1, /* Is our filter JIT'ed? */
461 jit_requested:1,/* archs need to JIT the prog */
458 locked:1, /* Program image locked? */ 462 locked:1, /* Program image locked? */
459 gpl_compatible:1, /* Is filter GPL compatible? */ 463 gpl_compatible:1, /* Is filter GPL compatible? */
460 cb_access:1, /* Is control block accessed? */ 464 cb_access:1, /* Is control block accessed? */
461 dst_needed:1, /* Do we need dst entry? */ 465 dst_needed:1, /* Do we need dst entry? */
466 blinded:1, /* Was blinded */
467 is_func:1, /* program is a bpf function */
462 kprobe_override:1; /* Do we override a kprobe? */ 468 kprobe_override:1; /* Do we override a kprobe? */
463 enum bpf_prog_type type; /* Type of BPF program */ 469 enum bpf_prog_type type; /* Type of BPF program */
464 u32 len; /* Number of filter blocks */ 470 u32 len; /* Number of filter blocks */
@@ -710,6 +716,9 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
710void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); 716void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
711 717
712u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 718u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
719#define __bpf_call_base_args \
720 ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
721 __bpf_call_base)
713 722
714struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); 723struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
715void bpf_jit_compile(struct bpf_prog *prog); 724void bpf_jit_compile(struct bpf_prog *prog);
@@ -798,7 +807,7 @@ static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
798 return fp->jited && bpf_jit_is_ebpf(); 807 return fp->jited && bpf_jit_is_ebpf();
799} 808}
800 809
801static inline bool bpf_jit_blinding_enabled(void) 810static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
802{ 811{
803 /* These are the prerequisites, should someone ever have the 812 /* These are the prerequisites, should someone ever have the
804 * idea to call blinding outside of them, we make sure to 813 * idea to call blinding outside of them, we make sure to
@@ -806,7 +815,7 @@ static inline bool bpf_jit_blinding_enabled(void)
806 */ 815 */
807 if (!bpf_jit_is_ebpf()) 816 if (!bpf_jit_is_ebpf())
808 return false; 817 return false;
809 if (!bpf_jit_enable) 818 if (!prog->jit_requested)
810 return false; 819 return false;
811 if (!bpf_jit_harden) 820 if (!bpf_jit_harden)
812 return false; 821 return false;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 595bda120cfb..d01f1cb3cfc0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -197,8 +197,14 @@ enum bpf_attach_type {
197 */ 197 */
198#define BPF_F_STRICT_ALIGNMENT (1U << 0) 198#define BPF_F_STRICT_ALIGNMENT (1U << 0)
199 199
200/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
200#define BPF_PSEUDO_MAP_FD 1 201#define BPF_PSEUDO_MAP_FD 1
201 202
203/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
204 * offset to another bpf function
205 */
206#define BPF_PSEUDO_CALL 1
207
202/* flags for BPF_MAP_UPDATE_ELEM command */ 208/* flags for BPF_MAP_UPDATE_ELEM command */
203#define BPF_ANY 0 /* create new element or update existing */ 209#define BPF_ANY 0 /* create new element or update existing */
204#define BPF_NOEXIST 1 /* create new element if it didn't exist */ 210#define BPF_NOEXIST 1 /* create new element if it didn't exist */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d32bebf4f2de..768e0a02d8c8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -94,6 +94,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
94 fp->pages = size / PAGE_SIZE; 94 fp->pages = size / PAGE_SIZE;
95 fp->aux = aux; 95 fp->aux = aux;
96 fp->aux->prog = fp; 96 fp->aux->prog = fp;
97 fp->jit_requested = ebpf_jit_enabled();
97 98
98 INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); 99 INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
99 100
@@ -217,30 +218,40 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
217 return 0; 218 return 0;
218} 219}
219 220
220static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
221{
222 return BPF_CLASS(insn->code) == BPF_JMP &&
223 /* Call and Exit are both special jumps with no
224 * target inside the BPF instruction image.
225 */
226 BPF_OP(insn->code) != BPF_CALL &&
227 BPF_OP(insn->code) != BPF_EXIT;
228}
229
230static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) 221static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
231{ 222{
232 struct bpf_insn *insn = prog->insnsi; 223 struct bpf_insn *insn = prog->insnsi;
233 u32 i, insn_cnt = prog->len; 224 u32 i, insn_cnt = prog->len;
225 bool pseudo_call;
226 u8 code;
227 int off;
234 228
235 for (i = 0; i < insn_cnt; i++, insn++) { 229 for (i = 0; i < insn_cnt; i++, insn++) {
236 if (!bpf_is_jmp_and_has_target(insn)) 230 code = insn->code;
231 if (BPF_CLASS(code) != BPF_JMP)
237 continue; 232 continue;
233 if (BPF_OP(code) == BPF_EXIT)
234 continue;
235 if (BPF_OP(code) == BPF_CALL) {
236 if (insn->src_reg == BPF_PSEUDO_CALL)
237 pseudo_call = true;
238 else
239 continue;
240 } else {
241 pseudo_call = false;
242 }
243 off = pseudo_call ? insn->imm : insn->off;
238 244
239 /* Adjust offset of jmps if we cross boundaries. */ 245 /* Adjust offset of jmps if we cross boundaries. */
240 if (i < pos && i + insn->off + 1 > pos) 246 if (i < pos && i + off + 1 > pos)
241 insn->off += delta; 247 off += delta;
242 else if (i > pos + delta && i + insn->off + 1 <= pos + delta) 248 else if (i > pos + delta && i + off + 1 <= pos + delta)
243 insn->off -= delta; 249 off -= delta;
250
251 if (pseudo_call)
252 insn->imm = off;
253 else
254 insn->off = off;
244 } 255 }
245} 256}
246 257
@@ -711,7 +722,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
711 struct bpf_insn *insn; 722 struct bpf_insn *insn;
712 int i, rewritten; 723 int i, rewritten;
713 724
714 if (!bpf_jit_blinding_enabled()) 725 if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
715 return prog; 726 return prog;
716 727
717 clone = bpf_prog_clone_create(prog, GFP_USER); 728 clone = bpf_prog_clone_create(prog, GFP_USER);
@@ -753,6 +764,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
753 i += insn_delta; 764 i += insn_delta;
754 } 765 }
755 766
767 clone->blinded = 1;
756 return clone; 768 return clone;
757} 769}
758#endif /* CONFIG_BPF_JIT */ 770#endif /* CONFIG_BPF_JIT */
@@ -774,8 +786,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
774 * 786 *
775 * Decode and execute eBPF instructions. 787 * Decode and execute eBPF instructions.
776 */ 788 */
777static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, 789static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
778 u64 *stack)
779{ 790{
780 u64 tmp; 791 u64 tmp;
781 static const void *jumptable[256] = { 792 static const void *jumptable[256] = {
@@ -835,6 +846,7 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
835 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, 846 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
836 /* Call instruction */ 847 /* Call instruction */
837 [BPF_JMP | BPF_CALL] = &&JMP_CALL, 848 [BPF_JMP | BPF_CALL] = &&JMP_CALL,
849 [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
838 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, 850 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
839 /* Jumps */ 851 /* Jumps */
840 [BPF_JMP | BPF_JA] = &&JMP_JA, 852 [BPF_JMP | BPF_JA] = &&JMP_JA,
@@ -1025,6 +1037,13 @@ select_insn:
1025 BPF_R4, BPF_R5); 1037 BPF_R4, BPF_R5);
1026 CONT; 1038 CONT;
1027 1039
1040 JMP_CALL_ARGS:
1041 BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
1042 BPF_R3, BPF_R4,
1043 BPF_R5,
1044 insn + insn->off + 1);
1045 CONT;
1046
1028 JMP_TAIL_CALL: { 1047 JMP_TAIL_CALL: {
1029 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; 1048 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
1030 struct bpf_array *array = container_of(map, struct bpf_array, map); 1049 struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -1297,6 +1316,23 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn
1297 return ___bpf_prog_run(regs, insn, stack); \ 1316 return ___bpf_prog_run(regs, insn, stack); \
1298} 1317}
1299 1318
1319#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
1320#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
1321static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
1322 const struct bpf_insn *insn) \
1323{ \
1324 u64 stack[stack_size / sizeof(u64)]; \
1325 u64 regs[MAX_BPF_REG]; \
1326\
1327 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1328 BPF_R1 = r1; \
1329 BPF_R2 = r2; \
1330 BPF_R3 = r3; \
1331 BPF_R4 = r4; \
1332 BPF_R5 = r5; \
1333 return ___bpf_prog_run(regs, insn, stack); \
1334}
1335
1300#define EVAL1(FN, X) FN(X) 1336#define EVAL1(FN, X) FN(X)
1301#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y) 1337#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
1302#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y) 1338#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
@@ -1308,6 +1344,10 @@ EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
1308EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384); 1344EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
1309EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512); 1345EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
1310 1346
1347EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
1348EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
1349EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
1350
1311#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), 1351#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
1312 1352
1313static unsigned int (*interpreters[])(const void *ctx, 1353static unsigned int (*interpreters[])(const void *ctx,
@@ -1316,6 +1356,24 @@ EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
1316EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) 1356EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1317EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) 1357EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1318}; 1358};
1359#undef PROG_NAME_LIST
1360#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
1361static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
1362 const struct bpf_insn *insn) = {
1363EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
1364EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1365EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1366};
1367#undef PROG_NAME_LIST
1368
1369void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
1370{
1371 stack_depth = max_t(u32, stack_depth, 1);
1372 insn->off = (s16) insn->imm;
1373 insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
1374 __bpf_call_base_args;
1375 insn->code = BPF_JMP | BPF_CALL_ARGS;
1376}
1319 1377
1320bool bpf_prog_array_compatible(struct bpf_array *array, 1378bool bpf_prog_array_compatible(struct bpf_array *array,
1321 const struct bpf_prog *fp) 1379 const struct bpf_prog *fp)
@@ -1572,11 +1630,19 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
1572static void bpf_prog_free_deferred(struct work_struct *work) 1630static void bpf_prog_free_deferred(struct work_struct *work)
1573{ 1631{
1574 struct bpf_prog_aux *aux; 1632 struct bpf_prog_aux *aux;
1633 int i;
1575 1634
1576 aux = container_of(work, struct bpf_prog_aux, work); 1635 aux = container_of(work, struct bpf_prog_aux, work);
1577 if (bpf_prog_is_dev_bound(aux)) 1636 if (bpf_prog_is_dev_bound(aux))
1578 bpf_prog_offload_destroy(aux->prog); 1637 bpf_prog_offload_destroy(aux->prog);
1579 bpf_jit_free(aux->prog); 1638 for (i = 0; i < aux->func_cnt; i++)
1639 bpf_jit_free(aux->func[i]);
1640 if (aux->func_cnt) {
1641 kfree(aux->func);
1642 bpf_prog_unlock_free(aux->prog);
1643 } else {
1644 bpf_jit_free(aux->prog);
1645 }
1580} 1646}
1581 1647
1582/* Free internal BPF program */ 1648/* Free internal BPF program */
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index e682850c9715..883f88fa5bfc 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -189,8 +189,12 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
189 u8 opcode = BPF_OP(insn->code); 189 u8 opcode = BPF_OP(insn->code);
190 190
191 if (opcode == BPF_CALL) { 191 if (opcode == BPF_CALL) {
192 verbose(env, "(%02x) call %s#%d\n", insn->code, 192 if (insn->src_reg == BPF_PSEUDO_CALL)
193 func_id_name(insn->imm), insn->imm); 193 verbose(env, "(%02x) call pc%+d\n", insn->code,
194 insn->imm);
195 else
196 verbose(env, "(%02x) call %s#%d\n", insn->code,
197 func_id_name(insn->imm), insn->imm);
194 } else if (insn->code == (BPF_JMP | BPF_JA)) { 198 } else if (insn->code == (BPF_JMP | BPF_JA)) {
195 verbose(env, "(%02x) goto pc%+d\n", 199 verbose(env, "(%02x) goto pc%+d\n",
196 insn->code, insn->off); 200 insn->code, insn->off);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfeaa8d5e..e2e1c78ce1dc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1194,7 +1194,8 @@ static int bpf_prog_load(union bpf_attr *attr)
1194 goto free_used_maps; 1194 goto free_used_maps;
1195 1195
1196 /* eBPF program is ready to be JITed */ 1196 /* eBPF program is ready to be JITed */
1197 prog = bpf_prog_select_runtime(prog, &err); 1197 if (!prog->bpf_func)
1198 prog = bpf_prog_select_runtime(prog, &err);
1198 if (err < 0) 1199 if (err < 0)
1199 goto free_used_maps; 1200 goto free_used_maps;
1200 1201
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e807bda7fe29..48b2901cf483 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20,6 +20,8 @@
20#include <linux/file.h> 20#include <linux/file.h>
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/stringify.h> 22#include <linux/stringify.h>
23#include <linux/bsearch.h>
24#include <linux/sort.h>
23 25
24#include "disasm.h" 26#include "disasm.h"
25 27
@@ -227,13 +229,23 @@ static void print_liveness(struct bpf_verifier_env *env,
227 verbose(env, "w"); 229 verbose(env, "w");
228} 230}
229 231
232static struct bpf_func_state *func(struct bpf_verifier_env *env,
233 const struct bpf_reg_state *reg)
234{
235 struct bpf_verifier_state *cur = env->cur_state;
236
237 return cur->frame[reg->frameno];
238}
239
230static void print_verifier_state(struct bpf_verifier_env *env, 240static void print_verifier_state(struct bpf_verifier_env *env,
231 struct bpf_verifier_state *state) 241 const struct bpf_func_state *state)
232{ 242{
233 struct bpf_reg_state *reg; 243 const struct bpf_reg_state *reg;
234 enum bpf_reg_type t; 244 enum bpf_reg_type t;
235 int i; 245 int i;
236 246
247 if (state->frameno)
248 verbose(env, " frame%d:", state->frameno);
237 for (i = 0; i < MAX_BPF_REG; i++) { 249 for (i = 0; i < MAX_BPF_REG; i++) {
238 reg = &state->regs[i]; 250 reg = &state->regs[i];
239 t = reg->type; 251 t = reg->type;
@@ -246,6 +258,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
246 tnum_is_const(reg->var_off)) { 258 tnum_is_const(reg->var_off)) {
247 /* reg->off should be 0 for SCALAR_VALUE */ 259 /* reg->off should be 0 for SCALAR_VALUE */
248 verbose(env, "%lld", reg->var_off.value + reg->off); 260 verbose(env, "%lld", reg->var_off.value + reg->off);
261 if (t == PTR_TO_STACK)
262 verbose(env, ",call_%d", func(env, reg)->callsite);
249 } else { 263 } else {
250 verbose(env, "(id=%d", reg->id); 264 verbose(env, "(id=%d", reg->id);
251 if (t != SCALAR_VALUE) 265 if (t != SCALAR_VALUE)
@@ -297,12 +311,14 @@ static void print_verifier_state(struct bpf_verifier_env *env,
297 verbose(env, "=%s", 311 verbose(env, "=%s",
298 reg_type_str[state->stack[i].spilled_ptr.type]); 312 reg_type_str[state->stack[i].spilled_ptr.type]);
299 } 313 }
314 if (state->stack[i].slot_type[0] == STACK_ZERO)
315 verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
300 } 316 }
301 verbose(env, "\n"); 317 verbose(env, "\n");
302} 318}
303 319
304static int copy_stack_state(struct bpf_verifier_state *dst, 320static int copy_stack_state(struct bpf_func_state *dst,
305 const struct bpf_verifier_state *src) 321 const struct bpf_func_state *src)
306{ 322{
307 if (!src->stack) 323 if (!src->stack)
308 return 0; 324 return 0;
@@ -318,13 +334,13 @@ static int copy_stack_state(struct bpf_verifier_state *dst,
318 334
319/* do_check() starts with zero-sized stack in struct bpf_verifier_state to 335/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
320 * make it consume minimal amount of memory. check_stack_write() access from 336 * make it consume minimal amount of memory. check_stack_write() access from
321 * the program calls into realloc_verifier_state() to grow the stack size. 337 * the program calls into realloc_func_state() to grow the stack size.
322 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state 338 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
323 * which this function copies over. It points to previous bpf_verifier_state 339 * which this function copies over. It points to previous bpf_verifier_state
324 * which is never reallocated 340 * which is never reallocated
325 */ 341 */
326static int realloc_verifier_state(struct bpf_verifier_state *state, int size, 342static int realloc_func_state(struct bpf_func_state *state, int size,
327 bool copy_old) 343 bool copy_old)
328{ 344{
329 u32 old_size = state->allocated_stack; 345 u32 old_size = state->allocated_stack;
330 struct bpf_stack_state *new_stack; 346 struct bpf_stack_state *new_stack;
@@ -357,10 +373,21 @@ static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
357 return 0; 373 return 0;
358} 374}
359 375
376static void free_func_state(struct bpf_func_state *state)
377{
378 kfree(state->stack);
379 kfree(state);
380}
381
360static void free_verifier_state(struct bpf_verifier_state *state, 382static void free_verifier_state(struct bpf_verifier_state *state,
361 bool free_self) 383 bool free_self)
362{ 384{
363 kfree(state->stack); 385 int i;
386
387 for (i = 0; i <= state->curframe; i++) {
388 free_func_state(state->frame[i]);
389 state->frame[i] = NULL;
390 }
364 if (free_self) 391 if (free_self)
365 kfree(state); 392 kfree(state);
366} 393}
@@ -368,18 +395,46 @@ static void free_verifier_state(struct bpf_verifier_state *state,
368/* copy verifier state from src to dst growing dst stack space 395/* copy verifier state from src to dst growing dst stack space
369 * when necessary to accommodate larger src stack 396 * when necessary to accommodate larger src stack
370 */ 397 */
371static int copy_verifier_state(struct bpf_verifier_state *dst, 398static int copy_func_state(struct bpf_func_state *dst,
372 const struct bpf_verifier_state *src) 399 const struct bpf_func_state *src)
373{ 400{
374 int err; 401 int err;
375 402
376 err = realloc_verifier_state(dst, src->allocated_stack, false); 403 err = realloc_func_state(dst, src->allocated_stack, false);
377 if (err) 404 if (err)
378 return err; 405 return err;
379 memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack)); 406 memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
380 return copy_stack_state(dst, src); 407 return copy_stack_state(dst, src);
381} 408}
382 409
410static int copy_verifier_state(struct bpf_verifier_state *dst_state,
411 const struct bpf_verifier_state *src)
412{
413 struct bpf_func_state *dst;
414 int i, err;
415
416 /* if dst has more stack frames then src frame, free them */
417 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
418 free_func_state(dst_state->frame[i]);
419 dst_state->frame[i] = NULL;
420 }
421 dst_state->curframe = src->curframe;
422 dst_state->parent = src->parent;
423 for (i = 0; i <= src->curframe; i++) {
424 dst = dst_state->frame[i];
425 if (!dst) {
426 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
427 if (!dst)
428 return -ENOMEM;
429 dst_state->frame[i] = dst;
430 }
431 err = copy_func_state(dst, src->frame[i]);
432 if (err)
433 return err;
434 }
435 return 0;
436}
437
383static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 438static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
384 int *insn_idx) 439 int *insn_idx)
385{ 440{
@@ -441,6 +496,10 @@ err:
441static const int caller_saved[CALLER_SAVED_REGS] = { 496static const int caller_saved[CALLER_SAVED_REGS] = {
442 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 497 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
443}; 498};
499#define CALLEE_SAVED_REGS 5
500static const int callee_saved[CALLEE_SAVED_REGS] = {
501 BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
502};
444 503
445static void __mark_reg_not_init(struct bpf_reg_state *reg); 504static void __mark_reg_not_init(struct bpf_reg_state *reg);
446 505
@@ -465,6 +524,13 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg)
465 __mark_reg_known(reg, 0); 524 __mark_reg_known(reg, 0);
466} 525}
467 526
527static void __mark_reg_const_zero(struct bpf_reg_state *reg)
528{
529 __mark_reg_known(reg, 0);
530 reg->off = 0;
531 reg->type = SCALAR_VALUE;
532}
533
468static void mark_reg_known_zero(struct bpf_verifier_env *env, 534static void mark_reg_known_zero(struct bpf_verifier_env *env,
469 struct bpf_reg_state *regs, u32 regno) 535 struct bpf_reg_state *regs, u32 regno)
470{ 536{
@@ -576,6 +642,7 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg)
576 reg->id = 0; 642 reg->id = 0;
577 reg->off = 0; 643 reg->off = 0;
578 reg->var_off = tnum_unknown; 644 reg->var_off = tnum_unknown;
645 reg->frameno = 0;
579 __mark_reg_unbounded(reg); 646 __mark_reg_unbounded(reg);
580} 647}
581 648
@@ -612,8 +679,9 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
612} 679}
613 680
614static void init_reg_state(struct bpf_verifier_env *env, 681static void init_reg_state(struct bpf_verifier_env *env,
615 struct bpf_reg_state *regs) 682 struct bpf_func_state *state)
616{ 683{
684 struct bpf_reg_state *regs = state->regs;
617 int i; 685 int i;
618 686
619 for (i = 0; i < MAX_BPF_REG; i++) { 687 for (i = 0; i < MAX_BPF_REG; i++) {
@@ -624,41 +692,217 @@ static void init_reg_state(struct bpf_verifier_env *env,
624 /* frame pointer */ 692 /* frame pointer */
625 regs[BPF_REG_FP].type = PTR_TO_STACK; 693 regs[BPF_REG_FP].type = PTR_TO_STACK;
626 mark_reg_known_zero(env, regs, BPF_REG_FP); 694 mark_reg_known_zero(env, regs, BPF_REG_FP);
695 regs[BPF_REG_FP].frameno = state->frameno;
627 696
628 /* 1st arg to a function */ 697 /* 1st arg to a function */
629 regs[BPF_REG_1].type = PTR_TO_CTX; 698 regs[BPF_REG_1].type = PTR_TO_CTX;
630 mark_reg_known_zero(env, regs, BPF_REG_1); 699 mark_reg_known_zero(env, regs, BPF_REG_1);
631} 700}
632 701
702#define BPF_MAIN_FUNC (-1)
703static void init_func_state(struct bpf_verifier_env *env,
704 struct bpf_func_state *state,
705 int callsite, int frameno, int subprogno)
706{
707 state->callsite = callsite;
708 state->frameno = frameno;
709 state->subprogno = subprogno;
710 init_reg_state(env, state);
711}
712
633enum reg_arg_type { 713enum reg_arg_type {
634 SRC_OP, /* register is used as source operand */ 714 SRC_OP, /* register is used as source operand */
635 DST_OP, /* register is used as destination operand */ 715 DST_OP, /* register is used as destination operand */
636 DST_OP_NO_MARK /* same as above, check only, don't mark */ 716 DST_OP_NO_MARK /* same as above, check only, don't mark */
637}; 717};
638 718
639static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) 719static int cmp_subprogs(const void *a, const void *b)
720{
721 return *(int *)a - *(int *)b;
722}
723
724static int find_subprog(struct bpf_verifier_env *env, int off)
640{ 725{
641 struct bpf_verifier_state *parent = state->parent; 726 u32 *p;
727
728 p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
729 sizeof(env->subprog_starts[0]), cmp_subprogs);
730 if (!p)
731 return -ENOENT;
732 return p - env->subprog_starts;
733
734}
735
736static int add_subprog(struct bpf_verifier_env *env, int off)
737{
738 int insn_cnt = env->prog->len;
739 int ret;
740
741 if (off >= insn_cnt || off < 0) {
742 verbose(env, "call to invalid destination\n");
743 return -EINVAL;
744 }
745 ret = find_subprog(env, off);
746 if (ret >= 0)
747 return 0;
748 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
749 verbose(env, "too many subprograms\n");
750 return -E2BIG;
751 }
752 env->subprog_starts[env->subprog_cnt++] = off;
753 sort(env->subprog_starts, env->subprog_cnt,
754 sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
755 return 0;
756}
757
758static int check_subprogs(struct bpf_verifier_env *env)
759{
760 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
761 struct bpf_insn *insn = env->prog->insnsi;
762 int insn_cnt = env->prog->len;
763
764 /* determine subprog starts. The end is one before the next starts */
765 for (i = 0; i < insn_cnt; i++) {
766 if (insn[i].code != (BPF_JMP | BPF_CALL))
767 continue;
768 if (insn[i].src_reg != BPF_PSEUDO_CALL)
769 continue;
770 if (!env->allow_ptr_leaks) {
771 verbose(env, "function calls to other bpf functions are allowed for root only\n");
772 return -EPERM;
773 }
774 if (bpf_prog_is_dev_bound(env->prog->aux)) {
775 verbose(env, "funcation calls in offloaded programs are not supported yet\n");
776 return -EINVAL;
777 }
778 ret = add_subprog(env, i + insn[i].imm + 1);
779 if (ret < 0)
780 return ret;
781 }
782
783 if (env->log.level > 1)
784 for (i = 0; i < env->subprog_cnt; i++)
785 verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
786
787 /* now check that all jumps are within the same subprog */
788 subprog_start = 0;
789 if (env->subprog_cnt == cur_subprog)
790 subprog_end = insn_cnt;
791 else
792 subprog_end = env->subprog_starts[cur_subprog++];
793 for (i = 0; i < insn_cnt; i++) {
794 u8 code = insn[i].code;
795
796 if (BPF_CLASS(code) != BPF_JMP)
797 goto next;
798 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
799 goto next;
800 off = i + insn[i].off + 1;
801 if (off < subprog_start || off >= subprog_end) {
802 verbose(env, "jump out of range from insn %d to %d\n", i, off);
803 return -EINVAL;
804 }
805next:
806 if (i == subprog_end - 1) {
807 /* to avoid fall-through from one subprog into another
808 * the last insn of the subprog should be either exit
809 * or unconditional jump back
810 */
811 if (code != (BPF_JMP | BPF_EXIT) &&
812 code != (BPF_JMP | BPF_JA)) {
813 verbose(env, "last insn is not an exit or jmp\n");
814 return -EINVAL;
815 }
816 subprog_start = subprog_end;
817 if (env->subprog_cnt == cur_subprog)
818 subprog_end = insn_cnt;
819 else
820 subprog_end = env->subprog_starts[cur_subprog++];
821 }
822 }
823 return 0;
824}
825
826struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
827 const struct bpf_verifier_state *state,
828 struct bpf_verifier_state *parent,
829 u32 regno)
830{
831 struct bpf_verifier_state *tmp = NULL;
832
833 /* 'parent' could be a state of caller and
834 * 'state' could be a state of callee. In such case
835 * parent->curframe < state->curframe
836 * and it's ok for r1 - r5 registers
837 *
838 * 'parent' could be a callee's state after it bpf_exit-ed.
839 * In such case parent->curframe > state->curframe
840 * and it's ok for r0 only
841 */
842 if (parent->curframe == state->curframe ||
843 (parent->curframe < state->curframe &&
844 regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
845 (parent->curframe > state->curframe &&
846 regno == BPF_REG_0))
847 return parent;
848
849 if (parent->curframe > state->curframe &&
850 regno >= BPF_REG_6) {
851 /* for callee saved regs we have to skip the whole chain
852 * of states that belong to callee and mark as LIVE_READ
853 * the registers before the call
854 */
855 tmp = parent;
856 while (tmp && tmp->curframe != state->curframe) {
857 tmp = tmp->parent;
858 }
859 if (!tmp)
860 goto bug;
861 parent = tmp;
862 } else {
863 goto bug;
864 }
865 return parent;
866bug:
867 verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
868 verbose(env, "regno %d parent frame %d current frame %d\n",
869 regno, parent->curframe, state->curframe);
870 return 0;
871}
872
873static int mark_reg_read(struct bpf_verifier_env *env,
874 const struct bpf_verifier_state *state,
875 struct bpf_verifier_state *parent,
876 u32 regno)
877{
878 bool writes = parent == state->parent; /* Observe write marks */
642 879
643 if (regno == BPF_REG_FP) 880 if (regno == BPF_REG_FP)
644 /* We don't need to worry about FP liveness because it's read-only */ 881 /* We don't need to worry about FP liveness because it's read-only */
645 return; 882 return 0;
646 883
647 while (parent) { 884 while (parent) {
648 /* if read wasn't screened by an earlier write ... */ 885 /* if read wasn't screened by an earlier write ... */
649 if (state->regs[regno].live & REG_LIVE_WRITTEN) 886 if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
650 break; 887 break;
888 parent = skip_callee(env, state, parent, regno);
889 if (!parent)
890 return -EFAULT;
651 /* ... then we depend on parent's value */ 891 /* ... then we depend on parent's value */
652 parent->regs[regno].live |= REG_LIVE_READ; 892 parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
653 state = parent; 893 state = parent;
654 parent = state->parent; 894 parent = state->parent;
895 writes = true;
655 } 896 }
897 return 0;
656} 898}
657 899
658static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 900static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
659 enum reg_arg_type t) 901 enum reg_arg_type t)
660{ 902{
661 struct bpf_reg_state *regs = env->cur_state->regs; 903 struct bpf_verifier_state *vstate = env->cur_state;
904 struct bpf_func_state *state = vstate->frame[vstate->curframe];
905 struct bpf_reg_state *regs = state->regs;
662 906
663 if (regno >= MAX_BPF_REG) { 907 if (regno >= MAX_BPF_REG) {
664 verbose(env, "R%d is invalid\n", regno); 908 verbose(env, "R%d is invalid\n", regno);
@@ -671,7 +915,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
671 verbose(env, "R%d !read_ok\n", regno); 915 verbose(env, "R%d !read_ok\n", regno);
672 return -EACCES; 916 return -EACCES;
673 } 917 }
674 mark_reg_read(env->cur_state, regno); 918 return mark_reg_read(env, vstate, vstate->parent, regno);
675 } else { 919 } else {
676 /* check whether register used as dest operand can be written to */ 920 /* check whether register used as dest operand can be written to */
677 if (regno == BPF_REG_FP) { 921 if (regno == BPF_REG_FP) {
@@ -702,17 +946,25 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
702 } 946 }
703} 947}
704 948
949/* Does this register contain a constant zero? */
950static bool register_is_null(struct bpf_reg_state *reg)
951{
952 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
953}
954
705/* check_stack_read/write functions track spill/fill of registers, 955/* check_stack_read/write functions track spill/fill of registers,
706 * stack boundary and alignment are checked in check_mem_access() 956 * stack boundary and alignment are checked in check_mem_access()
707 */ 957 */
708static int check_stack_write(struct bpf_verifier_env *env, 958static int check_stack_write(struct bpf_verifier_env *env,
709 struct bpf_verifier_state *state, int off, 959 struct bpf_func_state *state, /* func where register points to */
710 int size, int value_regno) 960 int off, int size, int value_regno)
711{ 961{
962 struct bpf_func_state *cur; /* state of the current function */
712 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 963 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
964 enum bpf_reg_type type;
713 965
714 err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE), 966 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
715 true); 967 true);
716 if (err) 968 if (err)
717 return err; 969 return err;
718 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, 970 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -725,8 +977,9 @@ static int check_stack_write(struct bpf_verifier_env *env,
725 return -EACCES; 977 return -EACCES;
726 } 978 }
727 979
980 cur = env->cur_state->frame[env->cur_state->curframe];
728 if (value_regno >= 0 && 981 if (value_regno >= 0 &&
729 is_spillable_regtype(state->regs[value_regno].type)) { 982 is_spillable_regtype((type = cur->regs[value_regno].type))) {
730 983
731 /* register containing pointer is being spilled into stack */ 984 /* register containing pointer is being spilled into stack */
732 if (size != BPF_REG_SIZE) { 985 if (size != BPF_REG_SIZE) {
@@ -734,51 +987,116 @@ static int check_stack_write(struct bpf_verifier_env *env,
734 return -EACCES; 987 return -EACCES;
735 } 988 }
736 989
990 if (state != cur && type == PTR_TO_STACK) {
991 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
992 return -EINVAL;
993 }
994
737 /* save register state */ 995 /* save register state */
738 state->stack[spi].spilled_ptr = state->regs[value_regno]; 996 state->stack[spi].spilled_ptr = cur->regs[value_regno];
739 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 997 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
740 998
741 for (i = 0; i < BPF_REG_SIZE; i++) 999 for (i = 0; i < BPF_REG_SIZE; i++)
742 state->stack[spi].slot_type[i] = STACK_SPILL; 1000 state->stack[spi].slot_type[i] = STACK_SPILL;
743 } else { 1001 } else {
1002 u8 type = STACK_MISC;
1003
744 /* regular write of data into stack */ 1004 /* regular write of data into stack */
745 state->stack[spi].spilled_ptr = (struct bpf_reg_state) {}; 1005 state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
746 1006
1007 /* only mark the slot as written if all 8 bytes were written
1008 * otherwise read propagation may incorrectly stop too soon
1009 * when stack slots are partially written.
1010 * This heuristic means that read propagation will be
1011 * conservative, since it will add reg_live_read marks
1012 * to stack slots all the way to first state when programs
1013 * writes+reads less than 8 bytes
1014 */
1015 if (size == BPF_REG_SIZE)
1016 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1017
1018 /* when we zero initialize stack slots mark them as such */
1019 if (value_regno >= 0 &&
1020 register_is_null(&cur->regs[value_regno]))
1021 type = STACK_ZERO;
1022
747 for (i = 0; i < size; i++) 1023 for (i = 0; i < size; i++)
748 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = 1024 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
749 STACK_MISC; 1025 type;
750 } 1026 }
751 return 0; 1027 return 0;
752} 1028}
753 1029
754static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot) 1030/* registers of every function are unique and mark_reg_read() propagates
1031 * the liveness in the following cases:
1032 * - from callee into caller for R1 - R5 that were used as arguments
1033 * - from caller into callee for R0 that used as result of the call
1034 * - from caller to the same caller skipping states of the callee for R6 - R9,
1035 * since R6 - R9 are callee saved by implicit function prologue and
1036 * caller's R6 != callee's R6, so when we propagate liveness up to
1037 * parent states we need to skip callee states for R6 - R9.
1038 *
1039 * stack slot marking is different, since stacks of caller and callee are
1040 * accessible in both (since caller can pass a pointer to caller's stack to
1041 * callee which can pass it to another function), hence mark_stack_slot_read()
1042 * has to propagate the stack liveness to all parent states at given frame number.
1043 * Consider code:
1044 * f1() {
1045 * ptr = fp - 8;
1046 * *ptr = ctx;
1047 * call f2 {
1048 * .. = *ptr;
1049 * }
1050 * .. = *ptr;
1051 * }
1052 * First *ptr is reading from f1's stack and mark_stack_slot_read() has
1053 * to mark liveness at the f1's frame and not f2's frame.
1054 * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
1055 * to propagate liveness to f2 states at f1's frame level and further into
1056 * f1 states at f1's frame level until write into that stack slot
1057 */
1058static void mark_stack_slot_read(struct bpf_verifier_env *env,
1059 const struct bpf_verifier_state *state,
1060 struct bpf_verifier_state *parent,
1061 int slot, int frameno)
755{ 1062{
756 struct bpf_verifier_state *parent = state->parent; 1063 bool writes = parent == state->parent; /* Observe write marks */
757 1064
758 while (parent) { 1065 while (parent) {
1066 if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
1067 /* since LIVE_WRITTEN mark is only done for full 8-byte
1068 * write the read marks are conservative and parent
1069 * state may not even have the stack allocated. In such case
1070 * end the propagation, since the loop reached beginning
1071 * of the function
1072 */
1073 break;
759 /* if read wasn't screened by an earlier write ... */ 1074 /* if read wasn't screened by an earlier write ... */
760 if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) 1075 if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
761 break; 1076 break;
762 /* ... then we depend on parent's value */ 1077 /* ... then we depend on parent's value */
763 parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ; 1078 parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
764 state = parent; 1079 state = parent;
765 parent = state->parent; 1080 parent = state->parent;
1081 writes = true;
766 } 1082 }
767} 1083}
768 1084
769static int check_stack_read(struct bpf_verifier_env *env, 1085static int check_stack_read(struct bpf_verifier_env *env,
770 struct bpf_verifier_state *state, int off, int size, 1086 struct bpf_func_state *reg_state /* func where register points to */,
771 int value_regno) 1087 int off, int size, int value_regno)
772{ 1088{
1089 struct bpf_verifier_state *vstate = env->cur_state;
1090 struct bpf_func_state *state = vstate->frame[vstate->curframe];
773 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 1091 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
774 u8 *stype; 1092 u8 *stype;
775 1093
776 if (state->allocated_stack <= slot) { 1094 if (reg_state->allocated_stack <= slot) {
777 verbose(env, "invalid read from stack off %d+0 size %d\n", 1095 verbose(env, "invalid read from stack off %d+0 size %d\n",
778 off, size); 1096 off, size);
779 return -EACCES; 1097 return -EACCES;
780 } 1098 }
781 stype = state->stack[spi].slot_type; 1099 stype = reg_state->stack[spi].slot_type;
782 1100
783 if (stype[0] == STACK_SPILL) { 1101 if (stype[0] == STACK_SPILL) {
784 if (size != BPF_REG_SIZE) { 1102 if (size != BPF_REG_SIZE) {
@@ -794,26 +1112,44 @@ static int check_stack_read(struct bpf_verifier_env *env,
794 1112
795 if (value_regno >= 0) { 1113 if (value_regno >= 0) {
796 /* restore register state from stack */ 1114 /* restore register state from stack */
797 state->regs[value_regno] = state->stack[spi].spilled_ptr; 1115 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
798 /* mark reg as written since spilled pointer state likely 1116 /* mark reg as written since spilled pointer state likely
799 * has its liveness marks cleared by is_state_visited() 1117 * has its liveness marks cleared by is_state_visited()
800 * which resets stack/reg liveness for state transitions 1118 * which resets stack/reg liveness for state transitions
801 */ 1119 */
802 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 1120 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
803 mark_stack_slot_read(state, spi);
804 } 1121 }
1122 mark_stack_slot_read(env, vstate, vstate->parent, spi,
1123 reg_state->frameno);
805 return 0; 1124 return 0;
806 } else { 1125 } else {
1126 int zeros = 0;
1127
807 for (i = 0; i < size; i++) { 1128 for (i = 0; i < size; i++) {
808 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) { 1129 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
809 verbose(env, "invalid read from stack off %d+%d size %d\n", 1130 continue;
810 off, i, size); 1131 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
811 return -EACCES; 1132 zeros++;
1133 continue;
812 } 1134 }
1135 verbose(env, "invalid read from stack off %d+%d size %d\n",
1136 off, i, size);
1137 return -EACCES;
1138 }
1139 mark_stack_slot_read(env, vstate, vstate->parent, spi,
1140 reg_state->frameno);
1141 if (value_regno >= 0) {
1142 if (zeros == size) {
1143 /* any size read into register is zero extended,
1144 * so the whole register == const_zero
1145 */
1146 __mark_reg_const_zero(&state->regs[value_regno]);
1147 } else {
1148 /* have read misc data from the stack */
1149 mark_reg_unknown(env, state->regs, value_regno);
1150 }
1151 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
813 } 1152 }
814 if (value_regno >= 0)
815 /* have read misc data from the stack */
816 mark_reg_unknown(env, state->regs, value_regno);
817 return 0; 1153 return 0;
818 } 1154 }
819} 1155}
@@ -838,7 +1174,8 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
838static int check_map_access(struct bpf_verifier_env *env, u32 regno, 1174static int check_map_access(struct bpf_verifier_env *env, u32 regno,
839 int off, int size, bool zero_size_allowed) 1175 int off, int size, bool zero_size_allowed)
840{ 1176{
841 struct bpf_verifier_state *state = env->cur_state; 1177 struct bpf_verifier_state *vstate = env->cur_state;
1178 struct bpf_func_state *state = vstate->frame[vstate->curframe];
842 struct bpf_reg_state *reg = &state->regs[regno]; 1179 struct bpf_reg_state *reg = &state->regs[regno];
843 int err; 1180 int err;
844 1181
@@ -1088,6 +1425,54 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1088 strict); 1425 strict);
1089} 1426}
1090 1427
1428static int update_stack_depth(struct bpf_verifier_env *env,
1429 const struct bpf_func_state *func,
1430 int off)
1431{
1432 u16 stack = env->subprog_stack_depth[func->subprogno], total = 0;
1433 struct bpf_verifier_state *cur = env->cur_state;
1434 int i;
1435
1436 if (stack >= -off)
1437 return 0;
1438
1439 /* update known max for given subprogram */
1440 env->subprog_stack_depth[func->subprogno] = -off;
1441
1442 /* compute the total for current call chain */
1443 for (i = 0; i <= cur->curframe; i++) {
1444 u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno];
1445
1446 /* round up to 32-bytes, since this is granularity
1447 * of interpreter stack sizes
1448 */
1449 depth = round_up(depth, 32);
1450 total += depth;
1451 }
1452
1453 if (total > MAX_BPF_STACK) {
1454 verbose(env, "combined stack size of %d calls is %d. Too large\n",
1455 cur->curframe, total);
1456 return -EACCES;
1457 }
1458 return 0;
1459}
1460
1461static int get_callee_stack_depth(struct bpf_verifier_env *env,
1462 const struct bpf_insn *insn, int idx)
1463{
1464 int start = idx + insn->imm + 1, subprog;
1465
1466 subprog = find_subprog(env, start);
1467 if (subprog < 0) {
1468 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1469 start);
1470 return -EFAULT;
1471 }
1472 subprog++;
1473 return env->subprog_stack_depth[subprog];
1474}
1475
1091/* check whether memory at (regno + off) is accessible for t = (read | write) 1476/* check whether memory at (regno + off) is accessible for t = (read | write)
1092 * if t==write, value_regno is a register which value is stored into memory 1477 * if t==write, value_regno is a register which value is stored into memory
1093 * if t==read, value_regno is a register which will receive the value from memory 1478 * if t==read, value_regno is a register which will receive the value from memory
@@ -1098,9 +1483,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1098 int bpf_size, enum bpf_access_type t, 1483 int bpf_size, enum bpf_access_type t,
1099 int value_regno) 1484 int value_regno)
1100{ 1485{
1101 struct bpf_verifier_state *state = env->cur_state;
1102 struct bpf_reg_state *regs = cur_regs(env); 1486 struct bpf_reg_state *regs = cur_regs(env);
1103 struct bpf_reg_state *reg = regs + regno; 1487 struct bpf_reg_state *reg = regs + regno;
1488 struct bpf_func_state *state;
1104 int size, err = 0; 1489 int size, err = 0;
1105 1490
1106 size = bpf_size_to_bytes(bpf_size); 1491 size = bpf_size_to_bytes(bpf_size);
@@ -1189,8 +1574,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1189 return -EACCES; 1574 return -EACCES;
1190 } 1575 }
1191 1576
1192 if (env->prog->aux->stack_depth < -off) 1577 state = func(env, reg);
1193 env->prog->aux->stack_depth = -off; 1578 err = update_stack_depth(env, state, off);
1579 if (err)
1580 return err;
1194 1581
1195 if (t == BPF_WRITE) 1582 if (t == BPF_WRITE)
1196 err = check_stack_write(env, state, off, size, 1583 err = check_stack_write(env, state, off, size,
@@ -1264,12 +1651,6 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
1264 BPF_SIZE(insn->code), BPF_WRITE, -1); 1651 BPF_SIZE(insn->code), BPF_WRITE, -1);
1265} 1652}
1266 1653
1267/* Does this register contain a constant zero? */
1268static bool register_is_null(struct bpf_reg_state *reg)
1269{
1270 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1271}
1272
1273/* when register 'regno' is passed into function that will read 'access_size' 1654/* when register 'regno' is passed into function that will read 'access_size'
1274 * bytes from that pointer, make sure that it's within stack boundary 1655 * bytes from that pointer, make sure that it's within stack boundary
1275 * and all elements of stack are initialized. 1656 * and all elements of stack are initialized.
@@ -1281,7 +1662,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1281 struct bpf_call_arg_meta *meta) 1662 struct bpf_call_arg_meta *meta)
1282{ 1663{
1283 struct bpf_reg_state *reg = cur_regs(env) + regno; 1664 struct bpf_reg_state *reg = cur_regs(env) + regno;
1284 struct bpf_verifier_state *state = env->cur_state; 1665 struct bpf_func_state *state = func(env, reg);
1285 int off, i, slot, spi; 1666 int off, i, slot, spi;
1286 1667
1287 if (reg->type != PTR_TO_STACK) { 1668 if (reg->type != PTR_TO_STACK) {
@@ -1312,9 +1693,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1312 return -EACCES; 1693 return -EACCES;
1313 } 1694 }
1314 1695
1315 if (env->prog->aux->stack_depth < -off)
1316 env->prog->aux->stack_depth = -off;
1317
1318 if (meta && meta->raw_mode) { 1696 if (meta && meta->raw_mode) {
1319 meta->access_size = access_size; 1697 meta->access_size = access_size;
1320 meta->regno = regno; 1698 meta->regno = regno;
@@ -1322,17 +1700,32 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1322 } 1700 }
1323 1701
1324 for (i = 0; i < access_size; i++) { 1702 for (i = 0; i < access_size; i++) {
1703 u8 *stype;
1704
1325 slot = -(off + i) - 1; 1705 slot = -(off + i) - 1;
1326 spi = slot / BPF_REG_SIZE; 1706 spi = slot / BPF_REG_SIZE;
1327 if (state->allocated_stack <= slot || 1707 if (state->allocated_stack <= slot)
1328 state->stack[spi].slot_type[slot % BPF_REG_SIZE] != 1708 goto err;
1329 STACK_MISC) { 1709 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
1330 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 1710 if (*stype == STACK_MISC)
1331 off, i, access_size); 1711 goto mark;
1332 return -EACCES; 1712 if (*stype == STACK_ZERO) {
1713 /* helper can write anything into the stack */
1714 *stype = STACK_MISC;
1715 goto mark;
1333 } 1716 }
1717err:
1718 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1719 off, i, access_size);
1720 return -EACCES;
1721mark:
1722 /* reading any byte out of 8-byte 'spill_slot' will cause
1723 * the whole slot to be marked as 'read'
1724 */
1725 mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
1726 spi, state->frameno);
1334 } 1727 }
1335 return 0; 1728 return update_stack_depth(env, state, off);
1336} 1729}
1337 1730
1338static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, 1731static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@ -1585,6 +1978,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
1585 case BPF_FUNC_tail_call: 1978 case BPF_FUNC_tail_call:
1586 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 1979 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1587 goto error; 1980 goto error;
1981 if (env->subprog_cnt) {
1982 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
1983 return -EINVAL;
1984 }
1588 break; 1985 break;
1589 case BPF_FUNC_perf_event_read: 1986 case BPF_FUNC_perf_event_read:
1590 case BPF_FUNC_perf_event_output: 1987 case BPF_FUNC_perf_event_output:
@@ -1646,9 +2043,9 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
1646/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 2043/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
1647 * are now invalid, so turn them into unknown SCALAR_VALUE. 2044 * are now invalid, so turn them into unknown SCALAR_VALUE.
1648 */ 2045 */
1649static void clear_all_pkt_pointers(struct bpf_verifier_env *env) 2046static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2047 struct bpf_func_state *state)
1650{ 2048{
1651 struct bpf_verifier_state *state = env->cur_state;
1652 struct bpf_reg_state *regs = state->regs, *reg; 2049 struct bpf_reg_state *regs = state->regs, *reg;
1653 int i; 2050 int i;
1654 2051
@@ -1665,7 +2062,121 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
1665 } 2062 }
1666} 2063}
1667 2064
1668static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) 2065static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2066{
2067 struct bpf_verifier_state *vstate = env->cur_state;
2068 int i;
2069
2070 for (i = 0; i <= vstate->curframe; i++)
2071 __clear_all_pkt_pointers(env, vstate->frame[i]);
2072}
2073
2074static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2075 int *insn_idx)
2076{
2077 struct bpf_verifier_state *state = env->cur_state;
2078 struct bpf_func_state *caller, *callee;
2079 int i, subprog, target_insn;
2080
2081 if (state->curframe >= MAX_CALL_FRAMES) {
2082 verbose(env, "the call stack of %d frames is too deep\n",
2083 state->curframe);
2084 return -E2BIG;
2085 }
2086
2087 target_insn = *insn_idx + insn->imm;
2088 subprog = find_subprog(env, target_insn + 1);
2089 if (subprog < 0) {
2090 verbose(env, "verifier bug. No program starts at insn %d\n",
2091 target_insn + 1);
2092 return -EFAULT;
2093 }
2094
2095 caller = state->frame[state->curframe];
2096 if (state->frame[state->curframe + 1]) {
2097 verbose(env, "verifier bug. Frame %d already allocated\n",
2098 state->curframe + 1);
2099 return -EFAULT;
2100 }
2101
2102 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2103 if (!callee)
2104 return -ENOMEM;
2105 state->frame[state->curframe + 1] = callee;
2106
2107 /* callee cannot access r0, r6 - r9 for reading and has to write
2108 * into its own stack before reading from it.
2109 * callee can read/write into caller's stack
2110 */
2111 init_func_state(env, callee,
2112 /* remember the callsite, it will be used by bpf_exit */
2113 *insn_idx /* callsite */,
2114 state->curframe + 1 /* frameno within this callchain */,
2115 subprog + 1 /* subprog number within this prog */);
2116
2117 /* copy r1 - r5 args that callee can access */
2118 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2119 callee->regs[i] = caller->regs[i];
2120
2121 /* after the call regsiters r0 - r5 were scratched */
2122 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2123 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2124 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2125 }
2126
2127 /* only increment it after check_reg_arg() finished */
2128 state->curframe++;
2129
2130 /* and go analyze first insn of the callee */
2131 *insn_idx = target_insn;
2132
2133 if (env->log.level) {
2134 verbose(env, "caller:\n");
2135 print_verifier_state(env, caller);
2136 verbose(env, "callee:\n");
2137 print_verifier_state(env, callee);
2138 }
2139 return 0;
2140}
2141
2142static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2143{
2144 struct bpf_verifier_state *state = env->cur_state;
2145 struct bpf_func_state *caller, *callee;
2146 struct bpf_reg_state *r0;
2147
2148 callee = state->frame[state->curframe];
2149 r0 = &callee->regs[BPF_REG_0];
2150 if (r0->type == PTR_TO_STACK) {
2151 /* technically it's ok to return caller's stack pointer
2152 * (or caller's caller's pointer) back to the caller,
2153 * since these pointers are valid. Only current stack
2154 * pointer will be invalid as soon as function exits,
2155 * but let's be conservative
2156 */
2157 verbose(env, "cannot return stack pointer to the caller\n");
2158 return -EINVAL;
2159 }
2160
2161 state->curframe--;
2162 caller = state->frame[state->curframe];
2163 /* return to the caller whatever r0 had in the callee */
2164 caller->regs[BPF_REG_0] = *r0;
2165
2166 *insn_idx = callee->callsite + 1;
2167 if (env->log.level) {
2168 verbose(env, "returning from callee:\n");
2169 print_verifier_state(env, callee);
2170 verbose(env, "to caller at %d:\n", *insn_idx);
2171 print_verifier_state(env, caller);
2172 }
2173 /* clear everything in the callee */
2174 free_func_state(callee);
2175 state->frame[state->curframe + 1] = NULL;
2176 return 0;
2177}
2178
2179static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1669{ 2180{
1670 const struct bpf_func_proto *fn = NULL; 2181 const struct bpf_func_proto *fn = NULL;
1671 struct bpf_reg_state *regs; 2182 struct bpf_reg_state *regs;
@@ -1825,7 +2336,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1825 const struct bpf_reg_state *ptr_reg, 2336 const struct bpf_reg_state *ptr_reg,
1826 const struct bpf_reg_state *off_reg) 2337 const struct bpf_reg_state *off_reg)
1827{ 2338{
1828 struct bpf_reg_state *regs = cur_regs(env), *dst_reg; 2339 struct bpf_verifier_state *vstate = env->cur_state;
2340 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2341 struct bpf_reg_state *regs = state->regs, *dst_reg;
1829 bool known = tnum_is_const(off_reg->var_off); 2342 bool known = tnum_is_const(off_reg->var_off);
1830 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, 2343 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
1831 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; 2344 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
@@ -1837,13 +2350,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1837 dst_reg = &regs[dst]; 2350 dst_reg = &regs[dst];
1838 2351
1839 if (WARN_ON_ONCE(known && (smin_val != smax_val))) { 2352 if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
1840 print_verifier_state(env, env->cur_state); 2353 print_verifier_state(env, state);
1841 verbose(env, 2354 verbose(env,
1842 "verifier internal error: known but bad sbounds\n"); 2355 "verifier internal error: known but bad sbounds\n");
1843 return -EINVAL; 2356 return -EINVAL;
1844 } 2357 }
1845 if (WARN_ON_ONCE(known && (umin_val != umax_val))) { 2358 if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
1846 print_verifier_state(env, env->cur_state); 2359 print_verifier_state(env, state);
1847 verbose(env, 2360 verbose(env,
1848 "verifier internal error: known but bad ubounds\n"); 2361 "verifier internal error: known but bad ubounds\n");
1849 return -EINVAL; 2362 return -EINVAL;
@@ -2245,7 +2758,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2245static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, 2758static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2246 struct bpf_insn *insn) 2759 struct bpf_insn *insn)
2247{ 2760{
2248 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; 2761 struct bpf_verifier_state *vstate = env->cur_state;
2762 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2763 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
2249 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 2764 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2250 u8 opcode = BPF_OP(insn->code); 2765 u8 opcode = BPF_OP(insn->code);
2251 int rc; 2766 int rc;
@@ -2319,12 +2834,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2319 2834
2320 /* Got here implies adding two SCALAR_VALUEs */ 2835 /* Got here implies adding two SCALAR_VALUEs */
2321 if (WARN_ON_ONCE(ptr_reg)) { 2836 if (WARN_ON_ONCE(ptr_reg)) {
2322 print_verifier_state(env, env->cur_state); 2837 print_verifier_state(env, state);
2323 verbose(env, "verifier internal error: unexpected ptr_reg\n"); 2838 verbose(env, "verifier internal error: unexpected ptr_reg\n");
2324 return -EINVAL; 2839 return -EINVAL;
2325 } 2840 }
2326 if (WARN_ON(!src_reg)) { 2841 if (WARN_ON(!src_reg)) {
2327 print_verifier_state(env, env->cur_state); 2842 print_verifier_state(env, state);
2328 verbose(env, "verifier internal error: no src_reg\n"); 2843 verbose(env, "verifier internal error: no src_reg\n");
2329 return -EINVAL; 2844 return -EINVAL;
2330 } 2845 }
@@ -2478,14 +2993,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2478 return 0; 2993 return 0;
2479} 2994}
2480 2995
2481static void find_good_pkt_pointers(struct bpf_verifier_state *state, 2996static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
2482 struct bpf_reg_state *dst_reg, 2997 struct bpf_reg_state *dst_reg,
2483 enum bpf_reg_type type, 2998 enum bpf_reg_type type,
2484 bool range_right_open) 2999 bool range_right_open)
2485{ 3000{
3001 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2486 struct bpf_reg_state *regs = state->regs, *reg; 3002 struct bpf_reg_state *regs = state->regs, *reg;
2487 u16 new_range; 3003 u16 new_range;
2488 int i; 3004 int i, j;
2489 3005
2490 if (dst_reg->off < 0 || 3006 if (dst_reg->off < 0 ||
2491 (dst_reg->off == 0 && range_right_open)) 3007 (dst_reg->off == 0 && range_right_open))
@@ -2555,12 +3071,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2555 /* keep the maximum range already checked */ 3071 /* keep the maximum range already checked */
2556 regs[i].range = max(regs[i].range, new_range); 3072 regs[i].range = max(regs[i].range, new_range);
2557 3073
2558 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 3074 for (j = 0; j <= vstate->curframe; j++) {
2559 if (state->stack[i].slot_type[0] != STACK_SPILL) 3075 state = vstate->frame[j];
2560 continue; 3076 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2561 reg = &state->stack[i].spilled_ptr; 3077 if (state->stack[i].slot_type[0] != STACK_SPILL)
2562 if (reg->type == type && reg->id == dst_reg->id) 3078 continue;
2563 reg->range = max(reg->range, new_range); 3079 reg = &state->stack[i].spilled_ptr;
3080 if (reg->type == type && reg->id == dst_reg->id)
3081 reg->range = max(reg->range, new_range);
3082 }
2564 } 3083 }
2565} 3084}
2566 3085
@@ -2798,20 +3317,24 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
2798/* The logic is similar to find_good_pkt_pointers(), both could eventually 3317/* The logic is similar to find_good_pkt_pointers(), both could eventually
2799 * be folded together at some point. 3318 * be folded together at some point.
2800 */ 3319 */
2801static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, 3320static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
2802 bool is_null) 3321 bool is_null)
2803{ 3322{
3323 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2804 struct bpf_reg_state *regs = state->regs; 3324 struct bpf_reg_state *regs = state->regs;
2805 u32 id = regs[regno].id; 3325 u32 id = regs[regno].id;
2806 int i; 3326 int i, j;
2807 3327
2808 for (i = 0; i < MAX_BPF_REG; i++) 3328 for (i = 0; i < MAX_BPF_REG; i++)
2809 mark_map_reg(regs, i, id, is_null); 3329 mark_map_reg(regs, i, id, is_null);
2810 3330
2811 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 3331 for (j = 0; j <= vstate->curframe; j++) {
2812 if (state->stack[i].slot_type[0] != STACK_SPILL) 3332 state = vstate->frame[j];
2813 continue; 3333 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2814 mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); 3334 if (state->stack[i].slot_type[0] != STACK_SPILL)
3335 continue;
3336 mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
3337 }
2815 } 3338 }
2816} 3339}
2817 3340
@@ -2911,8 +3434,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
2911static int check_cond_jmp_op(struct bpf_verifier_env *env, 3434static int check_cond_jmp_op(struct bpf_verifier_env *env,
2912 struct bpf_insn *insn, int *insn_idx) 3435 struct bpf_insn *insn, int *insn_idx)
2913{ 3436{
2914 struct bpf_verifier_state *other_branch, *this_branch = env->cur_state; 3437 struct bpf_verifier_state *this_branch = env->cur_state;
2915 struct bpf_reg_state *regs = this_branch->regs, *dst_reg; 3438 struct bpf_verifier_state *other_branch;
3439 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
3440 struct bpf_reg_state *dst_reg, *other_branch_regs;
2916 u8 opcode = BPF_OP(insn->code); 3441 u8 opcode = BPF_OP(insn->code);
2917 int err; 3442 int err;
2918 3443
@@ -2975,6 +3500,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
2975 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); 3500 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
2976 if (!other_branch) 3501 if (!other_branch)
2977 return -EFAULT; 3502 return -EFAULT;
3503 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
2978 3504
2979 /* detect if we are comparing against a constant value so we can adjust 3505 /* detect if we are comparing against a constant value so we can adjust
2980 * our min/max values for our dst register. 3506 * our min/max values for our dst register.
@@ -2987,22 +3513,22 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
2987 if (dst_reg->type == SCALAR_VALUE && 3513 if (dst_reg->type == SCALAR_VALUE &&
2988 regs[insn->src_reg].type == SCALAR_VALUE) { 3514 regs[insn->src_reg].type == SCALAR_VALUE) {
2989 if (tnum_is_const(regs[insn->src_reg].var_off)) 3515 if (tnum_is_const(regs[insn->src_reg].var_off))
2990 reg_set_min_max(&other_branch->regs[insn->dst_reg], 3516 reg_set_min_max(&other_branch_regs[insn->dst_reg],
2991 dst_reg, regs[insn->src_reg].var_off.value, 3517 dst_reg, regs[insn->src_reg].var_off.value,
2992 opcode); 3518 opcode);
2993 else if (tnum_is_const(dst_reg->var_off)) 3519 else if (tnum_is_const(dst_reg->var_off))
2994 reg_set_min_max_inv(&other_branch->regs[insn->src_reg], 3520 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
2995 &regs[insn->src_reg], 3521 &regs[insn->src_reg],
2996 dst_reg->var_off.value, opcode); 3522 dst_reg->var_off.value, opcode);
2997 else if (opcode == BPF_JEQ || opcode == BPF_JNE) 3523 else if (opcode == BPF_JEQ || opcode == BPF_JNE)
2998 /* Comparing for equality, we can combine knowledge */ 3524 /* Comparing for equality, we can combine knowledge */
2999 reg_combine_min_max(&other_branch->regs[insn->src_reg], 3525 reg_combine_min_max(&other_branch_regs[insn->src_reg],
3000 &other_branch->regs[insn->dst_reg], 3526 &other_branch_regs[insn->dst_reg],
3001 &regs[insn->src_reg], 3527 &regs[insn->src_reg],
3002 &regs[insn->dst_reg], opcode); 3528 &regs[insn->dst_reg], opcode);
3003 } 3529 }
3004 } else if (dst_reg->type == SCALAR_VALUE) { 3530 } else if (dst_reg->type == SCALAR_VALUE) {
3005 reg_set_min_max(&other_branch->regs[insn->dst_reg], 3531 reg_set_min_max(&other_branch_regs[insn->dst_reg],
3006 dst_reg, insn->imm, opcode); 3532 dst_reg, insn->imm, opcode);
3007 } 3533 }
3008 3534
@@ -3023,7 +3549,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
3023 return -EACCES; 3549 return -EACCES;
3024 } 3550 }
3025 if (env->log.level) 3551 if (env->log.level)
3026 print_verifier_state(env, this_branch); 3552 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
3027 return 0; 3553 return 0;
3028} 3554}
3029 3555
@@ -3108,6 +3634,18 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
3108 return -EINVAL; 3634 return -EINVAL;
3109 } 3635 }
3110 3636
3637 if (env->subprog_cnt) {
3638 /* when program has LD_ABS insn JITs and interpreter assume
3639 * that r1 == ctx == skb which is not the case for callees
3640 * that can have arbitrary arguments. It's problematic
3641 * for main prog as well since JITs would need to analyze
3642 * all functions in order to make proper register save/restore
3643 * decisions in the main prog. Hence disallow LD_ABS with calls
3644 */
3645 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
3646 return -EINVAL;
3647 }
3648
3111 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || 3649 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
3112 BPF_SIZE(insn->code) == BPF_DW || 3650 BPF_SIZE(insn->code) == BPF_DW ||
3113 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { 3651 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@ -3284,6 +3822,10 @@ static int check_cfg(struct bpf_verifier_env *env)
3284 int ret = 0; 3822 int ret = 0;
3285 int i, t; 3823 int i, t;
3286 3824
3825 ret = check_subprogs(env);
3826 if (ret < 0)
3827 return ret;
3828
3287 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 3829 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3288 if (!insn_state) 3830 if (!insn_state)
3289 return -ENOMEM; 3831 return -ENOMEM;
@@ -3316,6 +3858,14 @@ peek_stack:
3316 goto err_free; 3858 goto err_free;
3317 if (t + 1 < insn_cnt) 3859 if (t + 1 < insn_cnt)
3318 env->explored_states[t + 1] = STATE_LIST_MARK; 3860 env->explored_states[t + 1] = STATE_LIST_MARK;
3861 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
3862 env->explored_states[t] = STATE_LIST_MARK;
3863 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
3864 if (ret == 1)
3865 goto peek_stack;
3866 else if (ret < 0)
3867 goto err_free;
3868 }
3319 } else if (opcode == BPF_JA) { 3869 } else if (opcode == BPF_JA) {
3320 if (BPF_SRC(insns[t].code) != BPF_K) { 3870 if (BPF_SRC(insns[t].code) != BPF_K) {
3321 ret = -EINVAL; 3871 ret = -EINVAL;
@@ -3434,11 +3984,21 @@ static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
3434static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, 3984static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3435 struct idpair *idmap) 3985 struct idpair *idmap)
3436{ 3986{
3987 bool equal;
3988
3437 if (!(rold->live & REG_LIVE_READ)) 3989 if (!(rold->live & REG_LIVE_READ))
3438 /* explored state didn't use this */ 3990 /* explored state didn't use this */
3439 return true; 3991 return true;
3440 3992
3441 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0) 3993 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
3994
3995 if (rold->type == PTR_TO_STACK)
3996 /* two stack pointers are equal only if they're pointing to
3997 * the same stack frame, since fp-8 in foo != fp-8 in bar
3998 */
3999 return equal && rold->frameno == rcur->frameno;
4000
4001 if (equal)
3442 return true; 4002 return true;
3443 4003
3444 if (rold->type == NOT_INIT) 4004 if (rold->type == NOT_INIT)
@@ -3511,7 +4071,6 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3511 tnum_in(rold->var_off, rcur->var_off); 4071 tnum_in(rold->var_off, rcur->var_off);
3512 case PTR_TO_CTX: 4072 case PTR_TO_CTX:
3513 case CONST_PTR_TO_MAP: 4073 case CONST_PTR_TO_MAP:
3514 case PTR_TO_STACK:
3515 case PTR_TO_PACKET_END: 4074 case PTR_TO_PACKET_END:
3516 /* Only valid matches are exact, which memcmp() above 4075 /* Only valid matches are exact, which memcmp() above
3517 * would have accepted 4076 * would have accepted
@@ -3526,8 +4085,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3526 return false; 4085 return false;
3527} 4086}
3528 4087
3529static bool stacksafe(struct bpf_verifier_state *old, 4088static bool stacksafe(struct bpf_func_state *old,
3530 struct bpf_verifier_state *cur, 4089 struct bpf_func_state *cur,
3531 struct idpair *idmap) 4090 struct idpair *idmap)
3532{ 4091{
3533 int i, spi; 4092 int i, spi;
@@ -3545,8 +4104,19 @@ static bool stacksafe(struct bpf_verifier_state *old,
3545 for (i = 0; i < old->allocated_stack; i++) { 4104 for (i = 0; i < old->allocated_stack; i++) {
3546 spi = i / BPF_REG_SIZE; 4105 spi = i / BPF_REG_SIZE;
3547 4106
4107 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
4108 /* explored state didn't use this */
4109 return true;
4110
3548 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) 4111 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
3549 continue; 4112 continue;
4113 /* if old state was safe with misc data in the stack
4114 * it will be safe with zero-initialized stack.
4115 * The opposite is not true
4116 */
4117 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
4118 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
4119 continue;
3550 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 4120 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
3551 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 4121 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
3552 /* Ex: old explored (safe) state has STACK_SPILL in 4122 /* Ex: old explored (safe) state has STACK_SPILL in
@@ -3603,9 +4173,8 @@ static bool stacksafe(struct bpf_verifier_state *old,
3603 * whereas register type in current state is meaningful, it means that 4173 * whereas register type in current state is meaningful, it means that
3604 * the current state will reach 'bpf_exit' instruction safely 4174 * the current state will reach 'bpf_exit' instruction safely
3605 */ 4175 */
3606static bool states_equal(struct bpf_verifier_env *env, 4176static bool func_states_equal(struct bpf_func_state *old,
3607 struct bpf_verifier_state *old, 4177 struct bpf_func_state *cur)
3608 struct bpf_verifier_state *cur)
3609{ 4178{
3610 struct idpair *idmap; 4179 struct idpair *idmap;
3611 bool ret = false; 4180 bool ret = false;
@@ -3629,71 +4198,72 @@ out_free:
3629 return ret; 4198 return ret;
3630} 4199}
3631 4200
4201static bool states_equal(struct bpf_verifier_env *env,
4202 struct bpf_verifier_state *old,
4203 struct bpf_verifier_state *cur)
4204{
4205 int i;
4206
4207 if (old->curframe != cur->curframe)
4208 return false;
4209
4210 /* for states to be equal callsites have to be the same
4211 * and all frame states need to be equivalent
4212 */
4213 for (i = 0; i <= old->curframe; i++) {
4214 if (old->frame[i]->callsite != cur->frame[i]->callsite)
4215 return false;
4216 if (!func_states_equal(old->frame[i], cur->frame[i]))
4217 return false;
4218 }
4219 return true;
4220}
4221
3632/* A write screens off any subsequent reads; but write marks come from the 4222/* A write screens off any subsequent reads; but write marks come from the
3633 * straight-line code between a state and its parent. When we arrive at a 4223 * straight-line code between a state and its parent. When we arrive at an
3634 * jump target (in the first iteration of the propagate_liveness() loop), 4224 * equivalent state (jump target or such) we didn't arrive by the straight-line
3635 * we didn't arrive by the straight-line code, so read marks in state must 4225 * code, so read marks in the state must propagate to the parent regardless
3636 * propagate to parent regardless of state's write marks. 4226 * of the state's write marks. That's what 'parent == state->parent' comparison
4227 * in mark_reg_read() and mark_stack_slot_read() is for.
3637 */ 4228 */
3638static bool do_propagate_liveness(const struct bpf_verifier_state *state, 4229static int propagate_liveness(struct bpf_verifier_env *env,
3639 struct bpf_verifier_state *parent) 4230 const struct bpf_verifier_state *vstate,
4231 struct bpf_verifier_state *vparent)
3640{ 4232{
3641 bool writes = parent == state->parent; /* Observe write marks */ 4233 int i, frame, err = 0;
3642 bool touched = false; /* any changes made? */ 4234 struct bpf_func_state *state, *parent;
3643 int i;
3644 4235
3645 if (!parent) 4236 if (vparent->curframe != vstate->curframe) {
3646 return touched; 4237 WARN(1, "propagate_live: parent frame %d current frame %d\n",
4238 vparent->curframe, vstate->curframe);
4239 return -EFAULT;
4240 }
3647 /* Propagate read liveness of registers... */ 4241 /* Propagate read liveness of registers... */
3648 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 4242 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
3649 /* We don't need to worry about FP liveness because it's read-only */ 4243 /* We don't need to worry about FP liveness because it's read-only */
3650 for (i = 0; i < BPF_REG_FP; i++) { 4244 for (i = 0; i < BPF_REG_FP; i++) {
3651 if (parent->regs[i].live & REG_LIVE_READ) 4245 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
3652 continue; 4246 continue;
3653 if (writes && (state->regs[i].live & REG_LIVE_WRITTEN)) 4247 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
3654 continue; 4248 err = mark_reg_read(env, vstate, vparent, i);
3655 if (state->regs[i].live & REG_LIVE_READ) { 4249 if (err)
3656 parent->regs[i].live |= REG_LIVE_READ; 4250 return err;
3657 touched = true;
3658 } 4251 }
3659 } 4252 }
4253
3660 /* ... and stack slots */ 4254 /* ... and stack slots */
3661 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && 4255 for (frame = 0; frame <= vstate->curframe; frame++) {
3662 i < parent->allocated_stack / BPF_REG_SIZE; i++) { 4256 state = vstate->frame[frame];
3663 if (parent->stack[i].slot_type[0] != STACK_SPILL) 4257 parent = vparent->frame[frame];
3664 continue; 4258 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
3665 if (state->stack[i].slot_type[0] != STACK_SPILL) 4259 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
3666 continue; 4260 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
3667 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) 4261 continue;
3668 continue; 4262 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
3669 if (writes && 4263 mark_stack_slot_read(env, vstate, vparent, i, frame);
3670 (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
3671 continue;
3672 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
3673 parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
3674 touched = true;
3675 } 4264 }
3676 } 4265 }
3677 return touched; 4266 return err;
3678}
3679
3680/* "parent" is "a state from which we reach the current state", but initially
3681 * it is not the state->parent (i.e. "the state whose straight-line code leads
3682 * to the current state"), instead it is the state that happened to arrive at
3683 * a (prunable) equivalent of the current state. See comment above
3684 * do_propagate_liveness() for consequences of this.
3685 * This function is just a more efficient way of calling mark_reg_read() or
3686 * mark_stack_slot_read() on each reg in "parent" that is read in "state",
3687 * though it requires that parent != state->parent in the call arguments.
3688 */
3689static void propagate_liveness(const struct bpf_verifier_state *state,
3690 struct bpf_verifier_state *parent)
3691{
3692 while (do_propagate_liveness(state, parent)) {
3693 /* Something changed, so we need to feed those changes onward */
3694 state = parent;
3695 parent = state->parent;
3696 }
3697} 4267}
3698 4268
3699static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 4269static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@@ -3701,7 +4271,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
3701 struct bpf_verifier_state_list *new_sl; 4271 struct bpf_verifier_state_list *new_sl;
3702 struct bpf_verifier_state_list *sl; 4272 struct bpf_verifier_state_list *sl;
3703 struct bpf_verifier_state *cur = env->cur_state; 4273 struct bpf_verifier_state *cur = env->cur_state;
3704 int i, err; 4274 int i, j, err;
3705 4275
3706 sl = env->explored_states[insn_idx]; 4276 sl = env->explored_states[insn_idx];
3707 if (!sl) 4277 if (!sl)
@@ -3722,7 +4292,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
3722 * they'll be immediately forgotten as we're pruning 4292 * they'll be immediately forgotten as we're pruning
3723 * this state and will pop a new one. 4293 * this state and will pop a new one.
3724 */ 4294 */
3725 propagate_liveness(&sl->state, cur); 4295 err = propagate_liveness(env, &sl->state, cur);
4296 if (err)
4297 return err;
3726 return 1; 4298 return 1;
3727 } 4299 }
3728 sl = sl->next; 4300 sl = sl->next;
@@ -3730,9 +4302,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
3730 4302
3731 /* there were no equivalent states, remember current one. 4303 /* there were no equivalent states, remember current one.
3732 * technically the current state is not proven to be safe yet, 4304 * technically the current state is not proven to be safe yet,
3733 * but it will either reach bpf_exit (which means it's safe) or 4305 * but it will either reach outer most bpf_exit (which means it's safe)
3734 * it will be rejected. Since there are no loops, we won't be 4306 * or it will be rejected. Since there are no loops, we won't be
3735 * seeing this 'insn_idx' instruction again on the way to bpf_exit 4307 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
4308 * again on the way to bpf_exit
3736 */ 4309 */
3737 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 4310 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
3738 if (!new_sl) 4311 if (!new_sl)
@@ -3756,10 +4329,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
3756 * explored_states can get read marks.) 4329 * explored_states can get read marks.)
3757 */ 4330 */
3758 for (i = 0; i < BPF_REG_FP; i++) 4331 for (i = 0; i < BPF_REG_FP; i++)
3759 cur->regs[i].live = REG_LIVE_NONE; 4332 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
3760 for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++) 4333
3761 if (cur->stack[i].slot_type[0] == STACK_SPILL) 4334 /* all stack frames are accessible from callee, clear them all */
3762 cur->stack[i].spilled_ptr.live = REG_LIVE_NONE; 4335 for (j = 0; j <= cur->curframe; j++) {
4336 struct bpf_func_state *frame = cur->frame[j];
4337
4338 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
4339 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
4340 }
3763 return 0; 4341 return 0;
3764} 4342}
3765 4343
@@ -3777,7 +4355,7 @@ static int do_check(struct bpf_verifier_env *env)
3777 struct bpf_verifier_state *state; 4355 struct bpf_verifier_state *state;
3778 struct bpf_insn *insns = env->prog->insnsi; 4356 struct bpf_insn *insns = env->prog->insnsi;
3779 struct bpf_reg_state *regs; 4357 struct bpf_reg_state *regs;
3780 int insn_cnt = env->prog->len; 4358 int insn_cnt = env->prog->len, i;
3781 int insn_idx, prev_insn_idx = 0; 4359 int insn_idx, prev_insn_idx = 0;
3782 int insn_processed = 0; 4360 int insn_processed = 0;
3783 bool do_print_state = false; 4361 bool do_print_state = false;
@@ -3785,9 +4363,18 @@ static int do_check(struct bpf_verifier_env *env)
3785 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); 4363 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
3786 if (!state) 4364 if (!state)
3787 return -ENOMEM; 4365 return -ENOMEM;
3788 env->cur_state = state; 4366 state->curframe = 0;
3789 init_reg_state(env, state->regs);
3790 state->parent = NULL; 4367 state->parent = NULL;
4368 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
4369 if (!state->frame[0]) {
4370 kfree(state);
4371 return -ENOMEM;
4372 }
4373 env->cur_state = state;
4374 init_func_state(env, state->frame[0],
4375 BPF_MAIN_FUNC /* callsite */,
4376 0 /* frameno */,
4377 0 /* subprogno, zero == main subprog */);
3791 insn_idx = 0; 4378 insn_idx = 0;
3792 for (;;) { 4379 for (;;) {
3793 struct bpf_insn *insn; 4380 struct bpf_insn *insn;
@@ -3834,7 +4421,7 @@ static int do_check(struct bpf_verifier_env *env)
3834 else 4421 else
3835 verbose(env, "\nfrom %d to %d:", 4422 verbose(env, "\nfrom %d to %d:",
3836 prev_insn_idx, insn_idx); 4423 prev_insn_idx, insn_idx);
3837 print_verifier_state(env, state); 4424 print_verifier_state(env, state->frame[state->curframe]);
3838 do_print_state = false; 4425 do_print_state = false;
3839 } 4426 }
3840 4427
@@ -3967,13 +4554,17 @@ static int do_check(struct bpf_verifier_env *env)
3967 if (opcode == BPF_CALL) { 4554 if (opcode == BPF_CALL) {
3968 if (BPF_SRC(insn->code) != BPF_K || 4555 if (BPF_SRC(insn->code) != BPF_K ||
3969 insn->off != 0 || 4556 insn->off != 0 ||
3970 insn->src_reg != BPF_REG_0 || 4557 (insn->src_reg != BPF_REG_0 &&
4558 insn->src_reg != BPF_PSEUDO_CALL) ||
3971 insn->dst_reg != BPF_REG_0) { 4559 insn->dst_reg != BPF_REG_0) {
3972 verbose(env, "BPF_CALL uses reserved fields\n"); 4560 verbose(env, "BPF_CALL uses reserved fields\n");
3973 return -EINVAL; 4561 return -EINVAL;
3974 } 4562 }
3975 4563
3976 err = check_call(env, insn->imm, insn_idx); 4564 if (insn->src_reg == BPF_PSEUDO_CALL)
4565 err = check_func_call(env, insn, &insn_idx);
4566 else
4567 err = check_helper_call(env, insn->imm, insn_idx);
3977 if (err) 4568 if (err)
3978 return err; 4569 return err;
3979 4570
@@ -3998,6 +4589,16 @@ static int do_check(struct bpf_verifier_env *env)
3998 return -EINVAL; 4589 return -EINVAL;
3999 } 4590 }
4000 4591
4592 if (state->curframe) {
4593 /* exit from nested function */
4594 prev_insn_idx = insn_idx;
4595 err = prepare_func_exit(env, &insn_idx);
4596 if (err)
4597 return err;
4598 do_print_state = true;
4599 continue;
4600 }
4601
4001 /* eBPF calling convetion is such that R0 is used 4602 /* eBPF calling convetion is such that R0 is used
4002 * to return the value from eBPF program. 4603 * to return the value from eBPF program.
4003 * Make sure that it's readable at this time 4604 * Make sure that it's readable at this time
@@ -4058,8 +4659,16 @@ process_bpf_exit:
4058 insn_idx++; 4659 insn_idx++;
4059 } 4660 }
4060 4661
4061 verbose(env, "processed %d insns, stack depth %d\n", insn_processed, 4662 verbose(env, "processed %d insns, stack depth ", insn_processed);
4062 env->prog->aux->stack_depth); 4663 for (i = 0; i < env->subprog_cnt + 1; i++) {
4664 u32 depth = env->subprog_stack_depth[i];
4665
4666 verbose(env, "%d", depth);
4667 if (i + 1 < env->subprog_cnt + 1)
4668 verbose(env, "+");
4669 }
4670 verbose(env, "\n");
4671 env->prog->aux->stack_depth = env->subprog_stack_depth[0];
4063 return 0; 4672 return 0;
4064} 4673}
4065 4674
@@ -4245,6 +4854,19 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
4245 return 0; 4854 return 0;
4246} 4855}
4247 4856
4857static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
4858{
4859 int i;
4860
4861 if (len == 1)
4862 return;
4863 for (i = 0; i < env->subprog_cnt; i++) {
4864 if (env->subprog_starts[i] < off)
4865 continue;
4866 env->subprog_starts[i] += len - 1;
4867 }
4868}
4869
4248static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 4870static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
4249 const struct bpf_insn *patch, u32 len) 4871 const struct bpf_insn *patch, u32 len)
4250{ 4872{
@@ -4255,6 +4877,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
4255 return NULL; 4877 return NULL;
4256 if (adjust_insn_aux_data(env, new_prog->len, off, len)) 4878 if (adjust_insn_aux_data(env, new_prog->len, off, len))
4257 return NULL; 4879 return NULL;
4880 adjust_subprog_starts(env, off, len);
4258 return new_prog; 4881 return new_prog;
4259} 4882}
4260 4883
@@ -4389,6 +5012,150 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
4389 return 0; 5012 return 0;
4390} 5013}
4391 5014
5015static int jit_subprogs(struct bpf_verifier_env *env)
5016{
5017 struct bpf_prog *prog = env->prog, **func, *tmp;
5018 int i, j, subprog_start, subprog_end = 0, len, subprog;
5019 struct bpf_insn *insn = prog->insnsi;
5020 void *old_bpf_func;
5021 int err = -ENOMEM;
5022
5023 if (env->subprog_cnt == 0)
5024 return 0;
5025
5026 for (i = 0; i < prog->len; i++, insn++) {
5027 if (insn->code != (BPF_JMP | BPF_CALL) ||
5028 insn->src_reg != BPF_PSEUDO_CALL)
5029 continue;
5030 subprog = find_subprog(env, i + insn->imm + 1);
5031 if (subprog < 0) {
5032 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5033 i + insn->imm + 1);
5034 return -EFAULT;
5035 }
5036 /* temporarily remember subprog id inside insn instead of
5037 * aux_data, since next loop will split up all insns into funcs
5038 */
5039 insn->off = subprog + 1;
5040 /* remember original imm in case JIT fails and fallback
5041 * to interpreter will be needed
5042 */
5043 env->insn_aux_data[i].call_imm = insn->imm;
5044 /* point imm to __bpf_call_base+1 from JITs point of view */
5045 insn->imm = 1;
5046 }
5047
5048 func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
5049 if (!func)
5050 return -ENOMEM;
5051
5052 for (i = 0; i <= env->subprog_cnt; i++) {
5053 subprog_start = subprog_end;
5054 if (env->subprog_cnt == i)
5055 subprog_end = prog->len;
5056 else
5057 subprog_end = env->subprog_starts[i];
5058
5059 len = subprog_end - subprog_start;
5060 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
5061 if (!func[i])
5062 goto out_free;
5063 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
5064 len * sizeof(struct bpf_insn));
5065 func[i]->len = len;
5066 func[i]->is_func = 1;
5067 /* Use bpf_prog_F_tag to indicate functions in stack traces.
5068 * Long term would need debug info to populate names
5069 */
5070 func[i]->aux->name[0] = 'F';
5071 func[i]->aux->stack_depth = env->subprog_stack_depth[i];
5072 func[i]->jit_requested = 1;
5073 func[i] = bpf_int_jit_compile(func[i]);
5074 if (!func[i]->jited) {
5075 err = -ENOTSUPP;
5076 goto out_free;
5077 }
5078 cond_resched();
5079 }
5080 /* at this point all bpf functions were successfully JITed
5081 * now populate all bpf_calls with correct addresses and
5082 * run last pass of JIT
5083 */
5084 for (i = 0; i <= env->subprog_cnt; i++) {
5085 insn = func[i]->insnsi;
5086 for (j = 0; j < func[i]->len; j++, insn++) {
5087 if (insn->code != (BPF_JMP | BPF_CALL) ||
5088 insn->src_reg != BPF_PSEUDO_CALL)
5089 continue;
5090 subprog = insn->off;
5091 insn->off = 0;
5092 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
5093 func[subprog]->bpf_func -
5094 __bpf_call_base;
5095 }
5096 }
5097 for (i = 0; i <= env->subprog_cnt; i++) {
5098 old_bpf_func = func[i]->bpf_func;
5099 tmp = bpf_int_jit_compile(func[i]);
5100 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
5101 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
5102 err = -EFAULT;
5103 goto out_free;
5104 }
5105 cond_resched();
5106 }
5107
5108 /* finally lock prog and jit images for all functions and
5109 * populate kallsysm
5110 */
5111 for (i = 0; i <= env->subprog_cnt; i++) {
5112 bpf_prog_lock_ro(func[i]);
5113 bpf_prog_kallsyms_add(func[i]);
5114 }
5115 prog->jited = 1;
5116 prog->bpf_func = func[0]->bpf_func;
5117 prog->aux->func = func;
5118 prog->aux->func_cnt = env->subprog_cnt + 1;
5119 return 0;
5120out_free:
5121 for (i = 0; i <= env->subprog_cnt; i++)
5122 if (func[i])
5123 bpf_jit_free(func[i]);
5124 kfree(func);
5125 /* cleanup main prog to be interpreted */
5126 prog->jit_requested = 0;
5127 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5128 if (insn->code != (BPF_JMP | BPF_CALL) ||
5129 insn->src_reg != BPF_PSEUDO_CALL)
5130 continue;
5131 insn->off = 0;
5132 insn->imm = env->insn_aux_data[i].call_imm;
5133 }
5134 return err;
5135}
5136
5137static int fixup_call_args(struct bpf_verifier_env *env)
5138{
5139 struct bpf_prog *prog = env->prog;
5140 struct bpf_insn *insn = prog->insnsi;
5141 int i, depth;
5142
5143 if (env->prog->jit_requested)
5144 if (jit_subprogs(env) == 0)
5145 return 0;
5146
5147 for (i = 0; i < prog->len; i++, insn++) {
5148 if (insn->code != (BPF_JMP | BPF_CALL) ||
5149 insn->src_reg != BPF_PSEUDO_CALL)
5150 continue;
5151 depth = get_callee_stack_depth(env, insn, i);
5152 if (depth < 0)
5153 return depth;
5154 bpf_patch_call_args(insn, depth);
5155 }
5156 return 0;
5157}
5158
4392/* fixup insn->imm field of bpf_call instructions 5159/* fixup insn->imm field of bpf_call instructions
4393 * and inline eligible helpers as explicit sequence of BPF instructions 5160 * and inline eligible helpers as explicit sequence of BPF instructions
4394 * 5161 *
@@ -4408,6 +5175,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
4408 for (i = 0; i < insn_cnt; i++, insn++) { 5175 for (i = 0; i < insn_cnt; i++, insn++) {
4409 if (insn->code != (BPF_JMP | BPF_CALL)) 5176 if (insn->code != (BPF_JMP | BPF_CALL))
4410 continue; 5177 continue;
5178 if (insn->src_reg == BPF_PSEUDO_CALL)
5179 continue;
4411 5180
4412 if (insn->imm == BPF_FUNC_get_route_realm) 5181 if (insn->imm == BPF_FUNC_get_route_realm)
4413 prog->dst_needed = 1; 5182 prog->dst_needed = 1;
@@ -4437,7 +5206,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
4437 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 5206 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
4438 * handlers are currently limited to 64 bit only. 5207 * handlers are currently limited to 64 bit only.
4439 */ 5208 */
4440 if (ebpf_jit_enabled() && BITS_PER_LONG == 64 && 5209 if (prog->jit_requested && BITS_PER_LONG == 64 &&
4441 insn->imm == BPF_FUNC_map_lookup_elem) { 5210 insn->imm == BPF_FUNC_map_lookup_elem) {
4442 map_ptr = env->insn_aux_data[i + delta].map_ptr; 5211 map_ptr = env->insn_aux_data[i + delta].map_ptr;
4443 if (map_ptr == BPF_MAP_PTR_POISON || 5212 if (map_ptr == BPF_MAP_PTR_POISON ||
@@ -4589,12 +5358,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
4589 if (!env->explored_states) 5358 if (!env->explored_states)
4590 goto skip_full_check; 5359 goto skip_full_check;
4591 5360
5361 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
5362
4592 ret = check_cfg(env); 5363 ret = check_cfg(env);
4593 if (ret < 0) 5364 if (ret < 0)
4594 goto skip_full_check; 5365 goto skip_full_check;
4595 5366
4596 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
4597
4598 ret = do_check(env); 5367 ret = do_check(env);
4599 if (env->cur_state) { 5368 if (env->cur_state) {
4600 free_verifier_state(env->cur_state, true); 5369 free_verifier_state(env->cur_state, true);
@@ -4615,6 +5384,9 @@ skip_full_check:
4615 if (ret == 0) 5384 if (ret == 0)
4616 ret = fixup_bpf_calls(env); 5385 ret = fixup_bpf_calls(env);
4617 5386
5387 if (ret == 0)
5388 ret = fixup_call_args(env);
5389
4618 if (log->level && bpf_verifier_log_full(log)) 5390 if (log->level && bpf_verifier_log_full(log))
4619 ret = -ENOSPC; 5391 ret = -ENOSPC;
4620 if (log->level && !log->ubuf) { 5392 if (log->level && !log->ubuf) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index cf446c25c0ec..db1b0923a308 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -197,8 +197,14 @@ enum bpf_attach_type {
197 */ 197 */
198#define BPF_F_STRICT_ALIGNMENT (1U << 0) 198#define BPF_F_STRICT_ALIGNMENT (1U << 0)
199 199
200/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
200#define BPF_PSEUDO_MAP_FD 1 201#define BPF_PSEUDO_MAP_FD 1
201 202
203/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
204 * offset to another bpf function
205 */
206#define BPF_PSEUDO_CALL 1
207
202/* flags for BPF_MAP_UPDATE_ELEM command */ 208/* flags for BPF_MAP_UPDATE_ELEM command */
203#define BPF_ANY 0 /* create new element or update existing */ 209#define BPF_ANY 0 /* create new element or update existing */
204#define BPF_NOEXIST 1 /* create new element if it didn't exist */ 210#define BPF_NOEXIST 1 /* create new element if it didn't exist */
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6534889e2b2f..9f44c196931e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -40,7 +40,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
40 __u32 map_flags); 40 __u32 map_flags);
41 41
42/* Recommend log buffer size */ 42/* Recommend log buffer size */
43#define BPF_LOG_BUF_SIZE 65536 43#define BPF_LOG_BUF_SIZE (256 * 1024)
44int bpf_load_program_name(enum bpf_prog_type type, const char *name, 44int bpf_load_program_name(enum bpf_prog_type type, const char *name,
45 const struct bpf_insn *insns, 45 const struct bpf_insn *insns,
46 size_t insns_cnt, const char *license, 46 size_t insns_cnt, const char *license,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 65d0d0aff4fa..5b83875b3594 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -174,12 +174,19 @@ struct bpf_program {
174 char *name; 174 char *name;
175 char *section_name; 175 char *section_name;
176 struct bpf_insn *insns; 176 struct bpf_insn *insns;
177 size_t insns_cnt; 177 size_t insns_cnt, main_prog_cnt;
178 enum bpf_prog_type type; 178 enum bpf_prog_type type;
179 179
180 struct { 180 struct reloc_desc {
181 enum {
182 RELO_LD64,
183 RELO_CALL,
184 } type;
181 int insn_idx; 185 int insn_idx;
182 int map_idx; 186 union {
187 int map_idx;
188 int text_off;
189 };
183 } *reloc_desc; 190 } *reloc_desc;
184 int nr_reloc; 191 int nr_reloc;
185 192
@@ -234,6 +241,7 @@ struct bpf_object {
234 } *reloc; 241 } *reloc;
235 int nr_reloc; 242 int nr_reloc;
236 int maps_shndx; 243 int maps_shndx;
244 int text_shndx;
237 } efile; 245 } efile;
238 /* 246 /*
239 * All loaded bpf_object is linked in a list, which is 247 * All loaded bpf_object is linked in a list, which is
@@ -375,9 +383,13 @@ bpf_object__init_prog_names(struct bpf_object *obj)
375 size_t pi, si; 383 size_t pi, si;
376 384
377 for (pi = 0; pi < obj->nr_programs; pi++) { 385 for (pi = 0; pi < obj->nr_programs; pi++) {
378 char *name = NULL; 386 const char *name = NULL;
379 387
380 prog = &obj->programs[pi]; 388 prog = &obj->programs[pi];
389 if (prog->idx == obj->efile.text_shndx) {
390 name = ".text";
391 goto skip_search;
392 }
381 393
382 for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; 394 for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
383 si++) { 395 si++) {
@@ -405,7 +417,7 @@ bpf_object__init_prog_names(struct bpf_object *obj)
405 prog->section_name); 417 prog->section_name);
406 return -EINVAL; 418 return -EINVAL;
407 } 419 }
408 420skip_search:
409 prog->name = strdup(name); 421 prog->name = strdup(name);
410 if (!prog->name) { 422 if (!prog->name) {
411 pr_warning("failed to allocate memory for prog sym %s\n", 423 pr_warning("failed to allocate memory for prog sym %s\n",
@@ -795,6 +807,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
795 } else if ((sh.sh_type == SHT_PROGBITS) && 807 } else if ((sh.sh_type == SHT_PROGBITS) &&
796 (sh.sh_flags & SHF_EXECINSTR) && 808 (sh.sh_flags & SHF_EXECINSTR) &&
797 (data->d_size > 0)) { 809 (data->d_size > 0)) {
810 if (strcmp(name, ".text") == 0)
811 obj->efile.text_shndx = idx;
798 err = bpf_object__add_program(obj, data->d_buf, 812 err = bpf_object__add_program(obj, data->d_buf,
799 data->d_size, name, idx); 813 data->d_size, name, idx);
800 if (err) { 814 if (err) {
@@ -856,11 +870,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
856} 870}
857 871
858static int 872static int
859bpf_program__collect_reloc(struct bpf_program *prog, 873bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
860 size_t nr_maps, GElf_Shdr *shdr, 874 Elf_Data *data, struct bpf_object *obj)
861 Elf_Data *data, Elf_Data *symbols,
862 int maps_shndx, struct bpf_map *maps)
863{ 875{
876 Elf_Data *symbols = obj->efile.symbols;
877 int text_shndx = obj->efile.text_shndx;
878 int maps_shndx = obj->efile.maps_shndx;
879 struct bpf_map *maps = obj->maps;
880 size_t nr_maps = obj->nr_maps;
864 int i, nrels; 881 int i, nrels;
865 882
866 pr_debug("collecting relocating info for: '%s'\n", 883 pr_debug("collecting relocating info for: '%s'\n",
@@ -893,8 +910,10 @@ bpf_program__collect_reloc(struct bpf_program *prog,
893 GELF_R_SYM(rel.r_info)); 910 GELF_R_SYM(rel.r_info));
894 return -LIBBPF_ERRNO__FORMAT; 911 return -LIBBPF_ERRNO__FORMAT;
895 } 912 }
913 pr_debug("relo for %ld value %ld name %d\n",
914 rel.r_info >> 32, sym.st_value, sym.st_name);
896 915
897 if (sym.st_shndx != maps_shndx) { 916 if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
898 pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", 917 pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
899 prog->section_name, sym.st_shndx); 918 prog->section_name, sym.st_shndx);
900 return -LIBBPF_ERRNO__RELOC; 919 return -LIBBPF_ERRNO__RELOC;
@@ -903,6 +922,17 @@ bpf_program__collect_reloc(struct bpf_program *prog,
903 insn_idx = rel.r_offset / sizeof(struct bpf_insn); 922 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
904 pr_debug("relocation: insn_idx=%u\n", insn_idx); 923 pr_debug("relocation: insn_idx=%u\n", insn_idx);
905 924
925 if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
926 if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
927 pr_warning("incorrect bpf_call opcode\n");
928 return -LIBBPF_ERRNO__RELOC;
929 }
930 prog->reloc_desc[i].type = RELO_CALL;
931 prog->reloc_desc[i].insn_idx = insn_idx;
932 prog->reloc_desc[i].text_off = sym.st_value;
933 continue;
934 }
935
906 if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { 936 if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
907 pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", 937 pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
908 insn_idx, insns[insn_idx].code); 938 insn_idx, insns[insn_idx].code);
@@ -924,6 +954,7 @@ bpf_program__collect_reloc(struct bpf_program *prog,
924 return -LIBBPF_ERRNO__RELOC; 954 return -LIBBPF_ERRNO__RELOC;
925 } 955 }
926 956
957 prog->reloc_desc[i].type = RELO_LD64;
927 prog->reloc_desc[i].insn_idx = insn_idx; 958 prog->reloc_desc[i].insn_idx = insn_idx;
928 prog->reloc_desc[i].map_idx = map_idx; 959 prog->reloc_desc[i].map_idx = map_idx;
929 } 960 }
@@ -963,27 +994,76 @@ bpf_object__create_maps(struct bpf_object *obj)
963} 994}
964 995
965static int 996static int
997bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
998 struct reloc_desc *relo)
999{
1000 struct bpf_insn *insn, *new_insn;
1001 struct bpf_program *text;
1002 size_t new_cnt;
1003
1004 if (relo->type != RELO_CALL)
1005 return -LIBBPF_ERRNO__RELOC;
1006
1007 if (prog->idx == obj->efile.text_shndx) {
1008 pr_warning("relo in .text insn %d into off %d\n",
1009 relo->insn_idx, relo->text_off);
1010 return -LIBBPF_ERRNO__RELOC;
1011 }
1012
1013 if (prog->main_prog_cnt == 0) {
1014 text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
1015 if (!text) {
1016 pr_warning("no .text section found yet relo into text exist\n");
1017 return -LIBBPF_ERRNO__RELOC;
1018 }
1019 new_cnt = prog->insns_cnt + text->insns_cnt;
1020 new_insn = realloc(prog->insns, new_cnt * sizeof(*insn));
1021 if (!new_insn) {
1022 pr_warning("oom in prog realloc\n");
1023 return -ENOMEM;
1024 }
1025 memcpy(new_insn + prog->insns_cnt, text->insns,
1026 text->insns_cnt * sizeof(*insn));
1027 prog->insns = new_insn;
1028 prog->main_prog_cnt = prog->insns_cnt;
1029 prog->insns_cnt = new_cnt;
1030 }
1031 insn = &prog->insns[relo->insn_idx];
1032 insn->imm += prog->main_prog_cnt - relo->insn_idx;
1033 pr_debug("added %zd insn from %s to prog %s\n",
1034 text->insns_cnt, text->section_name, prog->section_name);
1035 return 0;
1036}
1037
1038static int
966bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) 1039bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
967{ 1040{
968 int i; 1041 int i, err;
969 1042
970 if (!prog || !prog->reloc_desc) 1043 if (!prog || !prog->reloc_desc)
971 return 0; 1044 return 0;
972 1045
973 for (i = 0; i < prog->nr_reloc; i++) { 1046 for (i = 0; i < prog->nr_reloc; i++) {
974 int insn_idx, map_idx; 1047 if (prog->reloc_desc[i].type == RELO_LD64) {
975 struct bpf_insn *insns = prog->insns; 1048 struct bpf_insn *insns = prog->insns;
1049 int insn_idx, map_idx;
976 1050
977 insn_idx = prog->reloc_desc[i].insn_idx; 1051 insn_idx = prog->reloc_desc[i].insn_idx;
978 map_idx = prog->reloc_desc[i].map_idx; 1052 map_idx = prog->reloc_desc[i].map_idx;
979 1053
980 if (insn_idx >= (int)prog->insns_cnt) { 1054 if (insn_idx >= (int)prog->insns_cnt) {
981 pr_warning("relocation out of range: '%s'\n", 1055 pr_warning("relocation out of range: '%s'\n",
982 prog->section_name); 1056 prog->section_name);
983 return -LIBBPF_ERRNO__RELOC; 1057 return -LIBBPF_ERRNO__RELOC;
1058 }
1059 insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
1060 insns[insn_idx].imm = obj->maps[map_idx].fd;
1061 } else {
1062 err = bpf_program__reloc_text(prog, obj,
1063 &prog->reloc_desc[i]);
1064 if (err)
1065 return err;
984 } 1066 }
985 insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
986 insns[insn_idx].imm = obj->maps[map_idx].fd;
987 } 1067 }
988 1068
989 zfree(&prog->reloc_desc); 1069 zfree(&prog->reloc_desc);
@@ -1026,7 +1106,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
1026 Elf_Data *data = obj->efile.reloc[i].data; 1106 Elf_Data *data = obj->efile.reloc[i].data;
1027 int idx = shdr->sh_info; 1107 int idx = shdr->sh_info;
1028 struct bpf_program *prog; 1108 struct bpf_program *prog;
1029 size_t nr_maps = obj->nr_maps;
1030 1109
1031 if (shdr->sh_type != SHT_REL) { 1110 if (shdr->sh_type != SHT_REL) {
1032 pr_warning("internal error at %d\n", __LINE__); 1111 pr_warning("internal error at %d\n", __LINE__);
@@ -1040,11 +1119,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
1040 return -LIBBPF_ERRNO__RELOC; 1119 return -LIBBPF_ERRNO__RELOC;
1041 } 1120 }
1042 1121
1043 err = bpf_program__collect_reloc(prog, nr_maps, 1122 err = bpf_program__collect_reloc(prog,
1044 shdr, data, 1123 shdr, data,
1045 obj->efile.symbols, 1124 obj);
1046 obj->efile.maps_shndx,
1047 obj->maps);
1048 if (err) 1125 if (err)
1049 return err; 1126 return err;
1050 } 1127 }
@@ -1197,6 +1274,8 @@ bpf_object__load_progs(struct bpf_object *obj)
1197 int err; 1274 int err;
1198 1275
1199 for (i = 0; i < obj->nr_programs; i++) { 1276 for (i = 0; i < obj->nr_programs; i++) {
1277 if (obj->programs[i].idx == obj->efile.text_shndx)
1278 continue;
1200 err = bpf_program__load(&obj->programs[i], 1279 err = bpf_program__load(&obj->programs[i],
1201 obj->license, 1280 obj->license,
1202 obj->kern_version); 1281 obj->kern_version);
@@ -1859,7 +1938,7 @@ long libbpf_get_error(const void *ptr)
1859int bpf_prog_load(const char *file, enum bpf_prog_type type, 1938int bpf_prog_load(const char *file, enum bpf_prog_type type,
1860 struct bpf_object **pobj, int *prog_fd) 1939 struct bpf_object **pobj, int *prog_fd)
1861{ 1940{
1862 struct bpf_program *prog; 1941 struct bpf_program *prog, *first_prog = NULL;
1863 struct bpf_object *obj; 1942 struct bpf_object *obj;
1864 int err; 1943 int err;
1865 1944
@@ -1867,25 +1946,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
1867 if (IS_ERR(obj)) 1946 if (IS_ERR(obj))
1868 return -ENOENT; 1947 return -ENOENT;
1869 1948
1870 prog = bpf_program__next(NULL, obj); 1949 bpf_object__for_each_program(prog, obj) {
1871 if (!prog) { 1950 /*
1872 bpf_object__close(obj); 1951 * If type is not specified, try to guess it based on
1873 return -ENOENT; 1952 * section name.
1874 } 1953 */
1875
1876 /*
1877 * If type is not specified, try to guess it based on
1878 * section name.
1879 */
1880 if (type == BPF_PROG_TYPE_UNSPEC) {
1881 type = bpf_program__guess_type(prog);
1882 if (type == BPF_PROG_TYPE_UNSPEC) { 1954 if (type == BPF_PROG_TYPE_UNSPEC) {
1883 bpf_object__close(obj); 1955 type = bpf_program__guess_type(prog);
1884 return -EINVAL; 1956 if (type == BPF_PROG_TYPE_UNSPEC) {
1957 bpf_object__close(obj);
1958 return -EINVAL;
1959 }
1885 } 1960 }
1961
1962 bpf_program__set_type(prog, type);
1963 if (prog->idx != obj->efile.text_shndx && !first_prog)
1964 first_prog = prog;
1965 }
1966
1967 if (!first_prog) {
1968 pr_warning("object file doesn't contain bpf program\n");
1969 bpf_object__close(obj);
1970 return -ENOENT;
1886 } 1971 }
1887 1972
1888 bpf_program__set_type(prog, type);
1889 err = bpf_object__load(obj); 1973 err = bpf_object__load(obj);
1890 if (err) { 1974 if (err) {
1891 bpf_object__close(obj); 1975 bpf_object__close(obj);
@@ -1893,6 +1977,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
1893 } 1977 }
1894 1978
1895 *pobj = obj; 1979 *pobj = obj;
1896 *prog_fd = bpf_program__fd(prog); 1980 *prog_fd = bpf_program__fd(first_prog);
1897 return 0; 1981 return 0;
1898} 1982}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 255fb1f50f6b..7ef9601d04bf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -17,7 +17,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
17 17
18TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 18TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
19 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 19 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
20 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o 20 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
21 test_l4lb_noinline.o test_xdp_noinline.o
21 22
22TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ 23TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
23 test_offload.py 24 test_offload.py
@@ -49,8 +50,13 @@ else
49 CPU ?= generic 50 CPU ?= generic
50endif 51endif
51 52
53CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
54 -Wno-compare-distinct-pointer-types
55
56$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
57$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
58
52%.o: %.c 59%.o: %.c
53 $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ 60 $(CLANG) $(CLANG_FLAGS) \
54 -Wno-compare-distinct-pointer-types \
55 -O2 -target bpf -emit-llvm -c $< -o - | \ 61 -O2 -target bpf -emit-llvm -c $< -o - | \
56 $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ 62 $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c
new file mode 100644
index 000000000000..ba44a14e6dc4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c
@@ -0,0 +1,473 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2017 Facebook
3#include <stddef.h>
4#include <stdbool.h>
5#include <string.h>
6#include <linux/pkt_cls.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/if_ether.h>
10#include <linux/ip.h>
11#include <linux/ipv6.h>
12#include <linux/icmp.h>
13#include <linux/icmpv6.h>
14#include <linux/tcp.h>
15#include <linux/udp.h>
16#include "bpf_helpers.h"
17#include "test_iptunnel_common.h"
18#include "bpf_endian.h"
19
20int _version SEC("version") = 1;
21
22static __u32 rol32(__u32 word, unsigned int shift)
23{
24 return (word << shift) | (word >> ((-shift) & 31));
25}
26
27/* copy paste of jhash from kernel sources to make sure llvm
28 * can compile it into valid sequence of bpf instructions
29 */
30#define __jhash_mix(a, b, c) \
31{ \
32 a -= c; a ^= rol32(c, 4); c += b; \
33 b -= a; b ^= rol32(a, 6); a += c; \
34 c -= b; c ^= rol32(b, 8); b += a; \
35 a -= c; a ^= rol32(c, 16); c += b; \
36 b -= a; b ^= rol32(a, 19); a += c; \
37 c -= b; c ^= rol32(b, 4); b += a; \
38}
39
40#define __jhash_final(a, b, c) \
41{ \
42 c ^= b; c -= rol32(b, 14); \
43 a ^= c; a -= rol32(c, 11); \
44 b ^= a; b -= rol32(a, 25); \
45 c ^= b; c -= rol32(b, 16); \
46 a ^= c; a -= rol32(c, 4); \
47 b ^= a; b -= rol32(a, 14); \
48 c ^= b; c -= rol32(b, 24); \
49}
50
51#define JHASH_INITVAL 0xdeadbeef
52
53typedef unsigned int u32;
54
55static u32 jhash(const void *key, u32 length, u32 initval)
56{
57 u32 a, b, c;
58 const unsigned char *k = key;
59
60 a = b = c = JHASH_INITVAL + length + initval;
61
62 while (length > 12) {
63 a += *(u32 *)(k);
64 b += *(u32 *)(k + 4);
65 c += *(u32 *)(k + 8);
66 __jhash_mix(a, b, c);
67 length -= 12;
68 k += 12;
69 }
70 switch (length) {
71 case 12: c += (u32)k[11]<<24;
72 case 11: c += (u32)k[10]<<16;
73 case 10: c += (u32)k[9]<<8;
74 case 9: c += k[8];
75 case 8: b += (u32)k[7]<<24;
76 case 7: b += (u32)k[6]<<16;
77 case 6: b += (u32)k[5]<<8;
78 case 5: b += k[4];
79 case 4: a += (u32)k[3]<<24;
80 case 3: a += (u32)k[2]<<16;
81 case 2: a += (u32)k[1]<<8;
82 case 1: a += k[0];
83 __jhash_final(a, b, c);
84 case 0: /* Nothing left to add */
85 break;
86 }
87
88 return c;
89}
90
91static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
92{
93 a += initval;
94 b += initval;
95 c += initval;
96 __jhash_final(a, b, c);
97 return c;
98}
99
100static u32 jhash_2words(u32 a, u32 b, u32 initval)
101{
102 return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
103}
104
105#define PCKT_FRAGMENTED 65343
106#define IPV4_HDR_LEN_NO_OPT 20
107#define IPV4_PLUS_ICMP_HDR 28
108#define IPV6_PLUS_ICMP_HDR 48
109#define RING_SIZE 2
110#define MAX_VIPS 12
111#define MAX_REALS 5
112#define CTL_MAP_SIZE 16
113#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
114#define F_IPV6 (1 << 0)
115#define F_HASH_NO_SRC_PORT (1 << 0)
116#define F_ICMP (1 << 0)
117#define F_SYN_SET (1 << 1)
118
119struct packet_description {
120 union {
121 __be32 src;
122 __be32 srcv6[4];
123 };
124 union {
125 __be32 dst;
126 __be32 dstv6[4];
127 };
128 union {
129 __u32 ports;
130 __u16 port16[2];
131 };
132 __u8 proto;
133 __u8 flags;
134};
135
136struct ctl_value {
137 union {
138 __u64 value;
139 __u32 ifindex;
140 __u8 mac[6];
141 };
142};
143
144struct vip_meta {
145 __u32 flags;
146 __u32 vip_num;
147};
148
149struct real_definition {
150 union {
151 __be32 dst;
152 __be32 dstv6[4];
153 };
154 __u8 flags;
155};
156
157struct vip_stats {
158 __u64 bytes;
159 __u64 pkts;
160};
161
162struct eth_hdr {
163 unsigned char eth_dest[ETH_ALEN];
164 unsigned char eth_source[ETH_ALEN];
165 unsigned short eth_proto;
166};
167
168struct bpf_map_def SEC("maps") vip_map = {
169 .type = BPF_MAP_TYPE_HASH,
170 .key_size = sizeof(struct vip),
171 .value_size = sizeof(struct vip_meta),
172 .max_entries = MAX_VIPS,
173};
174
175struct bpf_map_def SEC("maps") ch_rings = {
176 .type = BPF_MAP_TYPE_ARRAY,
177 .key_size = sizeof(__u32),
178 .value_size = sizeof(__u32),
179 .max_entries = CH_RINGS_SIZE,
180};
181
182struct bpf_map_def SEC("maps") reals = {
183 .type = BPF_MAP_TYPE_ARRAY,
184 .key_size = sizeof(__u32),
185 .value_size = sizeof(struct real_definition),
186 .max_entries = MAX_REALS,
187};
188
189struct bpf_map_def SEC("maps") stats = {
190 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
191 .key_size = sizeof(__u32),
192 .value_size = sizeof(struct vip_stats),
193 .max_entries = MAX_VIPS,
194};
195
196struct bpf_map_def SEC("maps") ctl_array = {
197 .type = BPF_MAP_TYPE_ARRAY,
198 .key_size = sizeof(__u32),
199 .value_size = sizeof(struct ctl_value),
200 .max_entries = CTL_MAP_SIZE,
201};
202
203static __u32 get_packet_hash(struct packet_description *pckt,
204 bool ipv6)
205{
206 if (ipv6)
207 return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
208 pckt->ports, CH_RINGS_SIZE);
209 else
210 return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
211}
212
213static bool get_packet_dst(struct real_definition **real,
214 struct packet_description *pckt,
215 struct vip_meta *vip_info,
216 bool is_ipv6)
217{
218 __u32 hash = get_packet_hash(pckt, is_ipv6);
219 __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
220 __u32 *real_pos;
221
222 if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
223 hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
224 return 0;
225
226 real_pos = bpf_map_lookup_elem(&ch_rings, &key);
227 if (!real_pos)
228 return false;
229 key = *real_pos;
230 *real = bpf_map_lookup_elem(&reals, &key);
231 if (!(*real))
232 return false;
233 return true;
234}
235
236static int parse_icmpv6(void *data, void *data_end, __u64 off,
237 struct packet_description *pckt)
238{
239 struct icmp6hdr *icmp_hdr;
240 struct ipv6hdr *ip6h;
241
242 icmp_hdr = data + off;
243 if (icmp_hdr + 1 > data_end)
244 return TC_ACT_SHOT;
245 if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
246 return TC_ACT_OK;
247 off += sizeof(struct icmp6hdr);
248 ip6h = data + off;
249 if (ip6h + 1 > data_end)
250 return TC_ACT_SHOT;
251 pckt->proto = ip6h->nexthdr;
252 pckt->flags |= F_ICMP;
253 memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
254 memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
255 return TC_ACT_UNSPEC;
256}
257
258static int parse_icmp(void *data, void *data_end, __u64 off,
259 struct packet_description *pckt)
260{
261 struct icmphdr *icmp_hdr;
262 struct iphdr *iph;
263
264 icmp_hdr = data + off;
265 if (icmp_hdr + 1 > data_end)
266 return TC_ACT_SHOT;
267 if (icmp_hdr->type != ICMP_DEST_UNREACH ||
268 icmp_hdr->code != ICMP_FRAG_NEEDED)
269 return TC_ACT_OK;
270 off += sizeof(struct icmphdr);
271 iph = data + off;
272 if (iph + 1 > data_end)
273 return TC_ACT_SHOT;
274 if (iph->ihl != 5)
275 return TC_ACT_SHOT;
276 pckt->proto = iph->protocol;
277 pckt->flags |= F_ICMP;
278 pckt->src = iph->daddr;
279 pckt->dst = iph->saddr;
280 return TC_ACT_UNSPEC;
281}
282
283static bool parse_udp(void *data, __u64 off, void *data_end,
284 struct packet_description *pckt)
285{
286 struct udphdr *udp;
287 udp = data + off;
288
289 if (udp + 1 > data_end)
290 return false;
291
292 if (!(pckt->flags & F_ICMP)) {
293 pckt->port16[0] = udp->source;
294 pckt->port16[1] = udp->dest;
295 } else {
296 pckt->port16[0] = udp->dest;
297 pckt->port16[1] = udp->source;
298 }
299 return true;
300}
301
302static bool parse_tcp(void *data, __u64 off, void *data_end,
303 struct packet_description *pckt)
304{
305 struct tcphdr *tcp;
306
307 tcp = data + off;
308 if (tcp + 1 > data_end)
309 return false;
310
311 if (tcp->syn)
312 pckt->flags |= F_SYN_SET;
313
314 if (!(pckt->flags & F_ICMP)) {
315 pckt->port16[0] = tcp->source;
316 pckt->port16[1] = tcp->dest;
317 } else {
318 pckt->port16[0] = tcp->dest;
319 pckt->port16[1] = tcp->source;
320 }
321 return true;
322}
323
324static int process_packet(void *data, __u64 off, void *data_end,
325 bool is_ipv6, struct __sk_buff *skb)
326{
327 void *pkt_start = (void *)(long)skb->data;
328 struct packet_description pckt = {};
329 struct eth_hdr *eth = pkt_start;
330 struct bpf_tunnel_key tkey = {};
331 struct vip_stats *data_stats;
332 struct real_definition *dst;
333 struct vip_meta *vip_info;
334 struct ctl_value *cval;
335 __u32 v4_intf_pos = 1;
336 __u32 v6_intf_pos = 2;
337 struct ipv6hdr *ip6h;
338 struct vip vip = {};
339 struct iphdr *iph;
340 int tun_flag = 0;
341 __u16 pkt_bytes;
342 __u64 iph_len;
343 __u32 ifindex;
344 __u8 protocol;
345 __u32 vip_num;
346 int action;
347
348 tkey.tunnel_ttl = 64;
349 if (is_ipv6) {
350 ip6h = data + off;
351 if (ip6h + 1 > data_end)
352 return TC_ACT_SHOT;
353
354 iph_len = sizeof(struct ipv6hdr);
355 protocol = ip6h->nexthdr;
356 pckt.proto = protocol;
357 pkt_bytes = bpf_ntohs(ip6h->payload_len);
358 off += iph_len;
359 if (protocol == IPPROTO_FRAGMENT) {
360 return TC_ACT_SHOT;
361 } else if (protocol == IPPROTO_ICMPV6) {
362 action = parse_icmpv6(data, data_end, off, &pckt);
363 if (action >= 0)
364 return action;
365 off += IPV6_PLUS_ICMP_HDR;
366 } else {
367 memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
368 memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
369 }
370 } else {
371 iph = data + off;
372 if (iph + 1 > data_end)
373 return TC_ACT_SHOT;
374 if (iph->ihl != 5)
375 return TC_ACT_SHOT;
376
377 protocol = iph->protocol;
378 pckt.proto = protocol;
379 pkt_bytes = bpf_ntohs(iph->tot_len);
380 off += IPV4_HDR_LEN_NO_OPT;
381
382 if (iph->frag_off & PCKT_FRAGMENTED)
383 return TC_ACT_SHOT;
384 if (protocol == IPPROTO_ICMP) {
385 action = parse_icmp(data, data_end, off, &pckt);
386 if (action >= 0)
387 return action;
388 off += IPV4_PLUS_ICMP_HDR;
389 } else {
390 pckt.src = iph->saddr;
391 pckt.dst = iph->daddr;
392 }
393 }
394 protocol = pckt.proto;
395
396 if (protocol == IPPROTO_TCP) {
397 if (!parse_tcp(data, off, data_end, &pckt))
398 return TC_ACT_SHOT;
399 } else if (protocol == IPPROTO_UDP) {
400 if (!parse_udp(data, off, data_end, &pckt))
401 return TC_ACT_SHOT;
402 } else {
403 return TC_ACT_SHOT;
404 }
405
406 if (is_ipv6)
407 memcpy(vip.daddr.v6, pckt.dstv6, 16);
408 else
409 vip.daddr.v4 = pckt.dst;
410
411 vip.dport = pckt.port16[1];
412 vip.protocol = pckt.proto;
413 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
414 if (!vip_info) {
415 vip.dport = 0;
416 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
417 if (!vip_info)
418 return TC_ACT_SHOT;
419 pckt.port16[1] = 0;
420 }
421
422 if (vip_info->flags & F_HASH_NO_SRC_PORT)
423 pckt.port16[0] = 0;
424
425 if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
426 return TC_ACT_SHOT;
427
428 if (dst->flags & F_IPV6) {
429 cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
430 if (!cval)
431 return TC_ACT_SHOT;
432 ifindex = cval->ifindex;
433 memcpy(tkey.remote_ipv6, dst->dstv6, 16);
434 tun_flag = BPF_F_TUNINFO_IPV6;
435 } else {
436 cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
437 if (!cval)
438 return TC_ACT_SHOT;
439 ifindex = cval->ifindex;
440 tkey.remote_ipv4 = dst->dst;
441 }
442 vip_num = vip_info->vip_num;
443 data_stats = bpf_map_lookup_elem(&stats, &vip_num);
444 if (!data_stats)
445 return TC_ACT_SHOT;
446 data_stats->pkts++;
447 data_stats->bytes += pkt_bytes;
448 bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
449 *(u32 *)eth->eth_dest = tkey.remote_ipv4;
450 return bpf_redirect(ifindex, 0);
451}
452
453SEC("l4lb-demo")
454int balancer_ingress(struct __sk_buff *ctx)
455{
456 void *data_end = (void *)(long)ctx->data_end;
457 void *data = (void *)(long)ctx->data;
458 struct eth_hdr *eth = data;
459 __u32 eth_proto;
460 __u32 nh_off;
461
462 nh_off = sizeof(struct eth_hdr);
463 if (data + nh_off > data_end)
464 return TC_ACT_SHOT;
465 eth_proto = eth->eth_proto;
466 if (eth_proto == bpf_htons(ETH_P_IP))
467 return process_packet(data, nh_off, data_end, false, ctx);
468 else if (eth_proto == bpf_htons(ETH_P_IPV6))
469 return process_packet(data, nh_off, data_end, true, ctx);
470 else
471 return TC_ACT_SHOT;
472}
473char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1d7d2149163a..6472ca98690e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -169,10 +169,9 @@ out:
169#define NUM_ITER 100000 169#define NUM_ITER 100000
170#define VIP_NUM 5 170#define VIP_NUM 5
171 171
172static void test_l4lb(void) 172static void test_l4lb(const char *file)
173{ 173{
174 unsigned int nr_cpus = bpf_num_possible_cpus(); 174 unsigned int nr_cpus = bpf_num_possible_cpus();
175 const char *file = "./test_l4lb.o";
176 struct vip key = {.protocol = 6}; 175 struct vip key = {.protocol = 6};
177 struct vip_meta { 176 struct vip_meta {
178 __u32 flags; 177 __u32 flags;
@@ -249,6 +248,95 @@ out:
249 bpf_object__close(obj); 248 bpf_object__close(obj);
250} 249}
251 250
251static void test_l4lb_all(void)
252{
253 const char *file1 = "./test_l4lb.o";
254 const char *file2 = "./test_l4lb_noinline.o";
255
256 test_l4lb(file1);
257 test_l4lb(file2);
258}
259
260static void test_xdp_noinline(void)
261{
262 const char *file = "./test_xdp_noinline.o";
263 unsigned int nr_cpus = bpf_num_possible_cpus();
264 struct vip key = {.protocol = 6};
265 struct vip_meta {
266 __u32 flags;
267 __u32 vip_num;
268 } value = {.vip_num = VIP_NUM};
269 __u32 stats_key = VIP_NUM;
270 struct vip_stats {
271 __u64 bytes;
272 __u64 pkts;
273 } stats[nr_cpus];
274 struct real_definition {
275 union {
276 __be32 dst;
277 __be32 dstv6[4];
278 };
279 __u8 flags;
280 } real_def = {.dst = MAGIC_VAL};
281 __u32 ch_key = 11, real_num = 3;
282 __u32 duration, retval, size;
283 int err, i, prog_fd, map_fd;
284 __u64 bytes = 0, pkts = 0;
285 struct bpf_object *obj;
286 char buf[128];
287 u32 *magic = (u32 *)buf;
288
289 err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
290 if (err) {
291 error_cnt++;
292 return;
293 }
294
295 map_fd = bpf_find_map(__func__, obj, "vip_map");
296 if (map_fd < 0)
297 goto out;
298 bpf_map_update_elem(map_fd, &key, &value, 0);
299
300 map_fd = bpf_find_map(__func__, obj, "ch_rings");
301 if (map_fd < 0)
302 goto out;
303 bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
304
305 map_fd = bpf_find_map(__func__, obj, "reals");
306 if (map_fd < 0)
307 goto out;
308 bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
309
310 err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
311 buf, &size, &retval, &duration);
312 CHECK(err || errno || retval != 1 || size != 54 ||
313 *magic != MAGIC_VAL, "ipv4",
314 "err %d errno %d retval %d size %d magic %x\n",
315 err, errno, retval, size, *magic);
316
317 err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
318 buf, &size, &retval, &duration);
319 CHECK(err || errno || retval != 1 || size != 74 ||
320 *magic != MAGIC_VAL, "ipv6",
321 "err %d errno %d retval %d size %d magic %x\n",
322 err, errno, retval, size, *magic);
323
324 map_fd = bpf_find_map(__func__, obj, "stats");
325 if (map_fd < 0)
326 goto out;
327 bpf_map_lookup_elem(map_fd, &stats_key, stats);
328 for (i = 0; i < nr_cpus; i++) {
329 bytes += stats[i].bytes;
330 pkts += stats[i].pkts;
331 }
332 if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
333 error_cnt++;
334 printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
335 }
336out:
337 bpf_object__close(obj);
338}
339
252static void test_tcp_estats(void) 340static void test_tcp_estats(void)
253{ 341{
254 const char *file = "./test_tcp_estats.o"; 342 const char *file = "./test_tcp_estats.o";
@@ -757,7 +845,8 @@ int main(void)
757 845
758 test_pkt_access(); 846 test_pkt_access();
759 test_xdp(); 847 test_xdp();
760 test_l4lb(); 848 test_l4lb_all();
849 test_xdp_noinline();
761 test_tcp_estats(); 850 test_tcp_estats();
762 test_bpf_obj_id(); 851 test_bpf_obj_id();
763 test_pkt_md_access(); 852 test_pkt_md_access();
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3c64f30cf63c..3bacff0d6f91 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2,6 +2,7 @@
2 * Testsuite for eBPF verifier 2 * Testsuite for eBPF verifier
3 * 3 *
4 * Copyright (c) 2014 PLUMgrid, http://plumgrid.com 4 * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
5 * Copyright (c) 2017 Facebook
5 * 6 *
6 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public 8 * modify it under the terms of version 2 of the GNU General Public
@@ -277,7 +278,7 @@ static struct bpf_test tests[] = {
277 .insns = { 278 .insns = {
278 BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), 279 BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
279 }, 280 },
280 .errstr = "jump out of range", 281 .errstr = "not an exit",
281 .result = REJECT, 282 .result = REJECT,
282 }, 283 },
283 { 284 {
@@ -5648,7 +5649,7 @@ static struct bpf_test tests[] = {
5648 "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", 5649 "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
5649 .insns = { 5650 .insns = {
5650 BPF_MOV64_IMM(BPF_REG_1, 0), 5651 BPF_MOV64_IMM(BPF_REG_1, 0),
5651 BPF_MOV64_IMM(BPF_REG_2, 0), 5652 BPF_MOV64_IMM(BPF_REG_2, 1),
5652 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), 5653 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
5653 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), 5654 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
5654 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), 5655 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -5883,7 +5884,7 @@ static struct bpf_test tests[] = {
5883 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), 5884 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
5884 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), 5885 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
5885 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), 5886 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
5886 BPF_MOV64_IMM(BPF_REG_2, 0), 5887 BPF_MOV64_IMM(BPF_REG_2, 1),
5887 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), 5888 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
5888 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), 5889 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
5889 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), 5890 BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
@@ -8097,6 +8098,1623 @@ static struct bpf_test tests[] = {
8097 .result = REJECT, 8098 .result = REJECT,
8098 .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, 8099 .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
8099 }, 8100 },
8101 {
8102 "calls: basic sanity",
8103 .insns = {
8104 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8105 BPF_MOV64_IMM(BPF_REG_0, 1),
8106 BPF_EXIT_INSN(),
8107 BPF_MOV64_IMM(BPF_REG_0, 2),
8108 BPF_EXIT_INSN(),
8109 },
8110 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8111 .result = ACCEPT,
8112 },
8113 {
8114 "calls: not on unpriviledged",
8115 .insns = {
8116 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8117 BPF_MOV64_IMM(BPF_REG_0, 1),
8118 BPF_EXIT_INSN(),
8119 BPF_MOV64_IMM(BPF_REG_0, 2),
8120 BPF_EXIT_INSN(),
8121 },
8122 .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
8123 .result_unpriv = REJECT,
8124 .result = ACCEPT,
8125 },
8126 {
8127 "calls: overlapping caller/callee",
8128 .insns = {
8129 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
8130 BPF_MOV64_IMM(BPF_REG_0, 1),
8131 BPF_EXIT_INSN(),
8132 },
8133 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8134 .errstr = "last insn is not an exit or jmp",
8135 .result = REJECT,
8136 },
8137 {
8138 "calls: wrong recursive calls",
8139 .insns = {
8140 BPF_JMP_IMM(BPF_JA, 0, 0, 4),
8141 BPF_JMP_IMM(BPF_JA, 0, 0, 4),
8142 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
8143 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
8144 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
8145 BPF_MOV64_IMM(BPF_REG_0, 1),
8146 BPF_EXIT_INSN(),
8147 },
8148 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8149 .errstr = "jump out of range",
8150 .result = REJECT,
8151 },
8152 {
8153 "calls: wrong src reg",
8154 .insns = {
8155 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
8156 BPF_MOV64_IMM(BPF_REG_0, 1),
8157 BPF_EXIT_INSN(),
8158 },
8159 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8160 .errstr = "BPF_CALL uses reserved fields",
8161 .result = REJECT,
8162 },
8163 {
8164 "calls: wrong off value",
8165 .insns = {
8166 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
8167 BPF_MOV64_IMM(BPF_REG_0, 1),
8168 BPF_EXIT_INSN(),
8169 BPF_MOV64_IMM(BPF_REG_0, 2),
8170 BPF_EXIT_INSN(),
8171 },
8172 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8173 .errstr = "BPF_CALL uses reserved fields",
8174 .result = REJECT,
8175 },
8176 {
8177 "calls: jump back loop",
8178 .insns = {
8179 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
8180 BPF_MOV64_IMM(BPF_REG_0, 1),
8181 BPF_EXIT_INSN(),
8182 },
8183 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8184 .errstr = "back-edge from insn 0 to 0",
8185 .result = REJECT,
8186 },
8187 {
8188 "calls: conditional call",
8189 .insns = {
8190 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8191 offsetof(struct __sk_buff, mark)),
8192 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
8193 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8194 BPF_MOV64_IMM(BPF_REG_0, 1),
8195 BPF_EXIT_INSN(),
8196 BPF_MOV64_IMM(BPF_REG_0, 2),
8197 BPF_EXIT_INSN(),
8198 },
8199 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8200 .errstr = "jump out of range",
8201 .result = REJECT,
8202 },
8203 {
8204 "calls: conditional call 2",
8205 .insns = {
8206 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8207 offsetof(struct __sk_buff, mark)),
8208 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
8209 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
8210 BPF_MOV64_IMM(BPF_REG_0, 1),
8211 BPF_EXIT_INSN(),
8212 BPF_MOV64_IMM(BPF_REG_0, 2),
8213 BPF_EXIT_INSN(),
8214 BPF_MOV64_IMM(BPF_REG_0, 3),
8215 BPF_EXIT_INSN(),
8216 },
8217 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8218 .result = ACCEPT,
8219 },
8220 {
8221 "calls: conditional call 3",
8222 .insns = {
8223 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8224 offsetof(struct __sk_buff, mark)),
8225 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
8226 BPF_JMP_IMM(BPF_JA, 0, 0, 4),
8227 BPF_MOV64_IMM(BPF_REG_0, 1),
8228 BPF_EXIT_INSN(),
8229 BPF_MOV64_IMM(BPF_REG_0, 1),
8230 BPF_JMP_IMM(BPF_JA, 0, 0, -6),
8231 BPF_MOV64_IMM(BPF_REG_0, 3),
8232 BPF_JMP_IMM(BPF_JA, 0, 0, -6),
8233 },
8234 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8235 .errstr = "back-edge from insn",
8236 .result = REJECT,
8237 },
8238 {
8239 "calls: conditional call 4",
8240 .insns = {
8241 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8242 offsetof(struct __sk_buff, mark)),
8243 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
8244 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
8245 BPF_MOV64_IMM(BPF_REG_0, 1),
8246 BPF_EXIT_INSN(),
8247 BPF_MOV64_IMM(BPF_REG_0, 1),
8248 BPF_JMP_IMM(BPF_JA, 0, 0, -5),
8249 BPF_MOV64_IMM(BPF_REG_0, 3),
8250 BPF_EXIT_INSN(),
8251 },
8252 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8253 .result = ACCEPT,
8254 },
8255 {
8256 "calls: conditional call 5",
8257 .insns = {
8258 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8259 offsetof(struct __sk_buff, mark)),
8260 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
8261 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
8262 BPF_MOV64_IMM(BPF_REG_0, 1),
8263 BPF_EXIT_INSN(),
8264 BPF_MOV64_IMM(BPF_REG_0, 1),
8265 BPF_JMP_IMM(BPF_JA, 0, 0, -6),
8266 BPF_MOV64_IMM(BPF_REG_0, 3),
8267 BPF_EXIT_INSN(),
8268 },
8269 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8270 .errstr = "back-edge from insn",
8271 .result = REJECT,
8272 },
8273 {
8274 "calls: conditional call 6",
8275 .insns = {
8276 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8277 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
8278 BPF_EXIT_INSN(),
8279 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8280 offsetof(struct __sk_buff, mark)),
8281 BPF_EXIT_INSN(),
8282 },
8283 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8284 .errstr = "back-edge from insn",
8285 .result = REJECT,
8286 },
8287 {
8288 "calls: using r0 returned by callee",
8289 .insns = {
8290 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8291 BPF_EXIT_INSN(),
8292 BPF_MOV64_IMM(BPF_REG_0, 2),
8293 BPF_EXIT_INSN(),
8294 },
8295 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8296 .result = ACCEPT,
8297 },
8298 {
8299 "calls: using uninit r0 from callee",
8300 .insns = {
8301 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8302 BPF_EXIT_INSN(),
8303 BPF_EXIT_INSN(),
8304 },
8305 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8306 .errstr = "!read_ok",
8307 .result = REJECT,
8308 },
8309 {
8310 "calls: callee is using r1",
8311 .insns = {
8312 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8313 BPF_EXIT_INSN(),
8314 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8315 offsetof(struct __sk_buff, len)),
8316 BPF_EXIT_INSN(),
8317 },
8318 .prog_type = BPF_PROG_TYPE_SCHED_ACT,
8319 .result = ACCEPT,
8320 },
8321 {
8322 "calls: callee using args1",
8323 .insns = {
8324 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8325 BPF_EXIT_INSN(),
8326 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
8327 BPF_EXIT_INSN(),
8328 },
8329 .errstr_unpriv = "allowed for root only",
8330 .result_unpriv = REJECT,
8331 .result = ACCEPT,
8332 },
8333 {
8334 "calls: callee using wrong args2",
8335 .insns = {
8336 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8337 BPF_EXIT_INSN(),
8338 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
8339 BPF_EXIT_INSN(),
8340 },
8341 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8342 .errstr = "R2 !read_ok",
8343 .result = REJECT,
8344 },
8345 {
8346 "calls: callee using two args",
8347 .insns = {
8348 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8349 BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
8350 offsetof(struct __sk_buff, len)),
8351 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
8352 offsetof(struct __sk_buff, len)),
8353 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8354 BPF_EXIT_INSN(),
8355 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
8356 BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
8357 BPF_EXIT_INSN(),
8358 },
8359 .errstr_unpriv = "allowed for root only",
8360 .result_unpriv = REJECT,
8361 .result = ACCEPT,
8362 },
8363 {
8364 "calls: callee changing pkt pointers",
8365 .insns = {
8366 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
8367 offsetof(struct xdp_md, data)),
8368 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
8369 offsetof(struct xdp_md, data_end)),
8370 BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
8371 BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
8372 BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
8373 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8374 /* clear_all_pkt_pointers() has to walk all frames
8375 * to make sure that pkt pointers in the caller
8376 * are cleared when callee is calling a helper that
8377 * adjusts packet size
8378 */
8379 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
8380 BPF_MOV32_IMM(BPF_REG_0, 0),
8381 BPF_EXIT_INSN(),
8382 BPF_MOV64_IMM(BPF_REG_2, 0),
8383 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8384 BPF_FUNC_xdp_adjust_head),
8385 BPF_EXIT_INSN(),
8386 },
8387 .result = REJECT,
8388 .errstr = "R6 invalid mem access 'inv'",
8389 .prog_type = BPF_PROG_TYPE_XDP,
8390 },
8391 {
8392 "calls: two calls with args",
8393 .insns = {
8394 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8395 BPF_EXIT_INSN(),
8396 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8397 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
8398 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
8399 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
8400 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8401 BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
8402 BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
8403 BPF_EXIT_INSN(),
8404 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8405 offsetof(struct __sk_buff, len)),
8406 BPF_EXIT_INSN(),
8407 },
8408 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8409 .result = ACCEPT,
8410 },
8411 {
8412 "calls: calls with stack arith",
8413 .insns = {
8414 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8415 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
8416 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8417 BPF_EXIT_INSN(),
8418 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
8419 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8420 BPF_EXIT_INSN(),
8421 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
8422 BPF_MOV64_IMM(BPF_REG_0, 42),
8423 BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
8424 BPF_EXIT_INSN(),
8425 },
8426 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8427 .result = ACCEPT,
8428 },
8429 {
8430 "calls: calls with misaligned stack access",
8431 .insns = {
8432 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8433 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
8434 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8435 BPF_EXIT_INSN(),
8436 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
8437 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8438 BPF_EXIT_INSN(),
8439 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
8440 BPF_MOV64_IMM(BPF_REG_0, 42),
8441 BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
8442 BPF_EXIT_INSN(),
8443 },
8444 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8445 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
8446 .errstr = "misaligned stack access",
8447 .result = REJECT,
8448 },
8449 {
8450 "calls: calls control flow, jump test",
8451 .insns = {
8452 BPF_MOV64_IMM(BPF_REG_0, 42),
8453 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
8454 BPF_MOV64_IMM(BPF_REG_0, 43),
8455 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
8456 BPF_JMP_IMM(BPF_JA, 0, 0, -3),
8457 BPF_EXIT_INSN(),
8458 },
8459 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8460 .result = ACCEPT,
8461 },
8462 {
8463 "calls: calls control flow, jump test 2",
8464 .insns = {
8465 BPF_MOV64_IMM(BPF_REG_0, 42),
8466 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
8467 BPF_MOV64_IMM(BPF_REG_0, 43),
8468 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
8469 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
8470 BPF_EXIT_INSN(),
8471 },
8472 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8473 .errstr = "jump out of range from insn 1 to 4",
8474 .result = REJECT,
8475 },
8476 {
8477 "calls: two calls with bad jump",
8478 .insns = {
8479 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8480 BPF_EXIT_INSN(),
8481 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8482 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
8483 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
8484 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
8485 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8486 BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
8487 BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
8488 BPF_EXIT_INSN(),
8489 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8490 offsetof(struct __sk_buff, len)),
8491 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
8492 BPF_EXIT_INSN(),
8493 },
8494 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8495 .errstr = "jump out of range from insn 11 to 9",
8496 .result = REJECT,
8497 },
8498 {
8499 "calls: recursive call. test1",
8500 .insns = {
8501 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8502 BPF_EXIT_INSN(),
8503 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
8504 BPF_EXIT_INSN(),
8505 },
8506 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8507 .errstr = "back-edge",
8508 .result = REJECT,
8509 },
8510 {
8511 "calls: recursive call. test2",
8512 .insns = {
8513 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8514 BPF_EXIT_INSN(),
8515 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
8516 BPF_EXIT_INSN(),
8517 },
8518 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8519 .errstr = "back-edge",
8520 .result = REJECT,
8521 },
8522 {
8523 "calls: unreachable code",
8524 .insns = {
8525 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8526 BPF_EXIT_INSN(),
8527 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8528 BPF_EXIT_INSN(),
8529 BPF_MOV64_IMM(BPF_REG_0, 0),
8530 BPF_EXIT_INSN(),
8531 BPF_MOV64_IMM(BPF_REG_0, 0),
8532 BPF_EXIT_INSN(),
8533 },
8534 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8535 .errstr = "unreachable insn 6",
8536 .result = REJECT,
8537 },
8538 {
8539 "calls: invalid call",
8540 .insns = {
8541 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8542 BPF_EXIT_INSN(),
8543 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
8544 BPF_EXIT_INSN(),
8545 },
8546 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8547 .errstr = "invalid destination",
8548 .result = REJECT,
8549 },
8550 {
8551 "calls: invalid call 2",
8552 .insns = {
8553 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8554 BPF_EXIT_INSN(),
8555 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
8556 BPF_EXIT_INSN(),
8557 },
8558 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8559 .errstr = "invalid destination",
8560 .result = REJECT,
8561 },
8562 {
8563 "calls: jumping across function bodies. test1",
8564 .insns = {
8565 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8566 BPF_MOV64_IMM(BPF_REG_0, 0),
8567 BPF_EXIT_INSN(),
8568 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
8569 BPF_EXIT_INSN(),
8570 },
8571 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8572 .errstr = "jump out of range",
8573 .result = REJECT,
8574 },
8575 {
8576 "calls: jumping across function bodies. test2",
8577 .insns = {
8578 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
8579 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8580 BPF_MOV64_IMM(BPF_REG_0, 0),
8581 BPF_EXIT_INSN(),
8582 BPF_EXIT_INSN(),
8583 },
8584 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8585 .errstr = "jump out of range",
8586 .result = REJECT,
8587 },
8588 {
8589 "calls: call without exit",
8590 .insns = {
8591 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8592 BPF_EXIT_INSN(),
8593 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8594 BPF_EXIT_INSN(),
8595 BPF_MOV64_IMM(BPF_REG_0, 0),
8596 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
8597 },
8598 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8599 .errstr = "not an exit",
8600 .result = REJECT,
8601 },
8602 {
8603 "calls: call into middle of ld_imm64",
8604 .insns = {
8605 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8606 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8607 BPF_MOV64_IMM(BPF_REG_0, 0),
8608 BPF_EXIT_INSN(),
8609 BPF_LD_IMM64(BPF_REG_0, 0),
8610 BPF_EXIT_INSN(),
8611 },
8612 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8613 .errstr = "last insn",
8614 .result = REJECT,
8615 },
8616 {
8617 "calls: call into middle of other call",
8618 .insns = {
8619 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8620 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8621 BPF_MOV64_IMM(BPF_REG_0, 0),
8622 BPF_EXIT_INSN(),
8623 BPF_MOV64_IMM(BPF_REG_0, 0),
8624 BPF_MOV64_IMM(BPF_REG_0, 0),
8625 BPF_EXIT_INSN(),
8626 },
8627 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8628 .errstr = "last insn",
8629 .result = REJECT,
8630 },
8631 {
8632 "calls: ld_abs with changing ctx data in callee",
8633 .insns = {
8634 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8635 BPF_LD_ABS(BPF_B, 0),
8636 BPF_LD_ABS(BPF_H, 0),
8637 BPF_LD_ABS(BPF_W, 0),
8638 BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
8639 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
8640 BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
8641 BPF_LD_ABS(BPF_B, 0),
8642 BPF_LD_ABS(BPF_H, 0),
8643 BPF_LD_ABS(BPF_W, 0),
8644 BPF_EXIT_INSN(),
8645 BPF_MOV64_IMM(BPF_REG_2, 1),
8646 BPF_MOV64_IMM(BPF_REG_3, 2),
8647 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8648 BPF_FUNC_skb_vlan_push),
8649 BPF_EXIT_INSN(),
8650 },
8651 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8652 .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
8653 .result = REJECT,
8654 },
8655 {
8656 "calls: two calls with bad fallthrough",
8657 .insns = {
8658 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8659 BPF_EXIT_INSN(),
8660 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8661 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
8662 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
8663 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
8664 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8665 BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
8666 BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
8667 BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
8668 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
8669 offsetof(struct __sk_buff, len)),
8670 BPF_EXIT_INSN(),
8671 },
8672 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
8673 .errstr = "not an exit",
8674 .result = REJECT,
8675 },
8676 {
8677 "calls: two calls with stack read",
8678 .insns = {
8679 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8680 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8681 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8682 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8683 BPF_EXIT_INSN(),
8684 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8685 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
8686 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
8687 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
8688 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8689 BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
8690 BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
8691 BPF_EXIT_INSN(),
8692 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
8693 BPF_EXIT_INSN(),
8694 },
8695 .prog_type = BPF_PROG_TYPE_XDP,
8696 .result = ACCEPT,
8697 },
8698 {
8699 "calls: two calls with stack write",
8700 .insns = {
8701 /* main prog */
8702 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8703 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8704 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8705 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8706 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
8707 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8708 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
8709 BPF_EXIT_INSN(),
8710
8711 /* subprog 1 */
8712 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8713 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
8714 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
8715 BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
8716 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
8717 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
8718 BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
8719 BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
8720 /* write into stack frame of main prog */
8721 BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
8722 BPF_EXIT_INSN(),
8723
8724 /* subprog 2 */
8725 /* read from stack frame of main prog */
8726 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
8727 BPF_EXIT_INSN(),
8728 },
8729 .prog_type = BPF_PROG_TYPE_XDP,
8730 .result = ACCEPT,
8731 },
8732 {
8733 "calls: spill into caller stack frame",
8734 .insns = {
8735 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8736 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8737 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8738 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8739 BPF_EXIT_INSN(),
8740 BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
8741 BPF_MOV64_IMM(BPF_REG_0, 0),
8742 BPF_EXIT_INSN(),
8743 },
8744 .prog_type = BPF_PROG_TYPE_XDP,
8745 .errstr = "cannot spill",
8746 .result = REJECT,
8747 },
8748 {
8749 "calls: write into caller stack frame",
8750 .insns = {
8751 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8752 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8753 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8754 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8755 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
8756 BPF_EXIT_INSN(),
8757 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
8758 BPF_MOV64_IMM(BPF_REG_0, 0),
8759 BPF_EXIT_INSN(),
8760 },
8761 .prog_type = BPF_PROG_TYPE_XDP,
8762 .result = ACCEPT,
8763 },
8764 {
8765 "calls: write into callee stack frame",
8766 .insns = {
8767 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8768 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
8769 BPF_EXIT_INSN(),
8770 BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
8771 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
8772 BPF_EXIT_INSN(),
8773 },
8774 .prog_type = BPF_PROG_TYPE_XDP,
8775 .errstr = "cannot return stack pointer",
8776 .result = REJECT,
8777 },
8778 {
8779 "calls: two calls with stack write and void return",
8780 .insns = {
8781 /* main prog */
8782 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8783 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8784 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8785 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8786 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
8787 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8788 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
8789 BPF_EXIT_INSN(),
8790
8791 /* subprog 1 */
8792 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8793 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
8794 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8795 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
8796 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8797 BPF_EXIT_INSN(),
8798
8799 /* subprog 2 */
8800 /* write into stack frame of main prog */
8801 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
8802 BPF_EXIT_INSN(), /* void return */
8803 },
8804 .prog_type = BPF_PROG_TYPE_XDP,
8805 .result = ACCEPT,
8806 },
8807 {
8808 "calls: ambiguous return value",
8809 .insns = {
8810 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8811 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
8812 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
8813 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
8814 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8815 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
8816 BPF_EXIT_INSN(),
8817 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
8818 BPF_MOV64_IMM(BPF_REG_0, 0),
8819 BPF_EXIT_INSN(),
8820 },
8821 .errstr_unpriv = "allowed for root only",
8822 .result_unpriv = REJECT,
8823 .errstr = "R0 !read_ok",
8824 .result = REJECT,
8825 },
8826 {
8827 "calls: two calls that return map_value",
8828 .insns = {
8829 /* main prog */
8830 /* pass fp-16, fp-8 into a function */
8831 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8832 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8833 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8834 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
8835 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
8836
8837 /* fetch map_value_ptr from the stack of this function */
8838 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
8839 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
8840 /* write into map value */
8841 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
8842 /* fetch secound map_value_ptr from the stack */
8843 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
8844 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
8845 /* write into map value */
8846 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
8847 BPF_MOV64_IMM(BPF_REG_0, 0),
8848 BPF_EXIT_INSN(),
8849
8850 /* subprog 1 */
8851 /* call 3rd function twice */
8852 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8853 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
8854 /* first time with fp-8 */
8855 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
8856 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
8857 /* second time with fp-16 */
8858 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
8859 BPF_EXIT_INSN(),
8860
8861 /* subprog 2 */
8862 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8863 /* lookup from map */
8864 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8865 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8866 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
8867 BPF_LD_MAP_FD(BPF_REG_1, 0),
8868 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8869 BPF_FUNC_map_lookup_elem),
8870 /* write map_value_ptr into stack frame of main prog */
8871 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
8872 BPF_MOV64_IMM(BPF_REG_0, 0),
8873 BPF_EXIT_INSN(), /* return 0 */
8874 },
8875 .prog_type = BPF_PROG_TYPE_XDP,
8876 .fixup_map1 = { 23 },
8877 .result = ACCEPT,
8878 },
8879 {
8880 "calls: two calls that return map_value with bool condition",
8881 .insns = {
8882 /* main prog */
8883 /* pass fp-16, fp-8 into a function */
8884 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8885 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8886 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8887 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
8888 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8889 BPF_MOV64_IMM(BPF_REG_0, 0),
8890 BPF_EXIT_INSN(),
8891
8892 /* subprog 1 */
8893 /* call 3rd function twice */
8894 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8895 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
8896 /* first time with fp-8 */
8897 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
8898 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
8899 /* fetch map_value_ptr from the stack of this function */
8900 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
8901 /* write into map value */
8902 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
8903 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
8904 /* second time with fp-16 */
8905 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
8906 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
8907 /* fetch secound map_value_ptr from the stack */
8908 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
8909 /* write into map value */
8910 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
8911 BPF_EXIT_INSN(),
8912
8913 /* subprog 2 */
8914 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8915 /* lookup from map */
8916 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8917 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8918 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
8919 BPF_LD_MAP_FD(BPF_REG_1, 0),
8920 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8921 BPF_FUNC_map_lookup_elem),
8922 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
8923 BPF_MOV64_IMM(BPF_REG_0, 0),
8924 BPF_EXIT_INSN(), /* return 0 */
8925 /* write map_value_ptr into stack frame of main prog */
8926 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
8927 BPF_MOV64_IMM(BPF_REG_0, 1),
8928 BPF_EXIT_INSN(), /* return 1 */
8929 },
8930 .prog_type = BPF_PROG_TYPE_XDP,
8931 .fixup_map1 = { 23 },
8932 .result = ACCEPT,
8933 },
8934 {
8935 "calls: two calls that return map_value with incorrect bool check",
8936 .insns = {
8937 /* main prog */
8938 /* pass fp-16, fp-8 into a function */
8939 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8940 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8941 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8942 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
8943 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
8944 BPF_MOV64_IMM(BPF_REG_0, 0),
8945 BPF_EXIT_INSN(),
8946
8947 /* subprog 1 */
8948 /* call 3rd function twice */
8949 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8950 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
8951 /* first time with fp-8 */
8952 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
8953 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
8954 /* fetch map_value_ptr from the stack of this function */
8955 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
8956 /* write into map value */
8957 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
8958 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
8959 /* second time with fp-16 */
8960 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
8961 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
8962 /* fetch secound map_value_ptr from the stack */
8963 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
8964 /* write into map value */
8965 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
8966 BPF_EXIT_INSN(),
8967
8968 /* subprog 2 */
8969 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
8970 /* lookup from map */
8971 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
8972 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8973 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
8974 BPF_LD_MAP_FD(BPF_REG_1, 0),
8975 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8976 BPF_FUNC_map_lookup_elem),
8977 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
8978 BPF_MOV64_IMM(BPF_REG_0, 0),
8979 BPF_EXIT_INSN(), /* return 0 */
8980 /* write map_value_ptr into stack frame of main prog */
8981 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
8982 BPF_MOV64_IMM(BPF_REG_0, 1),
8983 BPF_EXIT_INSN(), /* return 1 */
8984 },
8985 .prog_type = BPF_PROG_TYPE_XDP,
8986 .fixup_map1 = { 23 },
8987 .result = REJECT,
8988 .errstr = "invalid read from stack off -16+0 size 8",
8989 },
8990 {
8991 "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
8992 .insns = {
8993 /* main prog */
8994 /* pass fp-16, fp-8 into a function */
8995 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
8996 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
8997 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
8998 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
8999 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
9000 BPF_MOV64_IMM(BPF_REG_0, 0),
9001 BPF_EXIT_INSN(),
9002
9003 /* subprog 1 */
9004 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
9005 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
9006 /* 1st lookup from map */
9007 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9008 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9009 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9010 BPF_LD_MAP_FD(BPF_REG_1, 0),
9011 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9012 BPF_FUNC_map_lookup_elem),
9013 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9014 BPF_MOV64_IMM(BPF_REG_8, 0),
9015 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
9016 /* write map_value_ptr into stack frame of main prog at fp-8 */
9017 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
9018 BPF_MOV64_IMM(BPF_REG_8, 1),
9019
9020 /* 2nd lookup from map */
9021 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
9022 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9023 BPF_LD_MAP_FD(BPF_REG_1, 0),
9024 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
9025 BPF_FUNC_map_lookup_elem),
9026 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9027 BPF_MOV64_IMM(BPF_REG_9, 0),
9028 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
9029 /* write map_value_ptr into stack frame of main prog at fp-16 */
9030 BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
9031 BPF_MOV64_IMM(BPF_REG_9, 1),
9032
9033 /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
9034 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
9035 BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
9036 BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
9037 BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
9038 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
9039 BPF_EXIT_INSN(),
9040
9041 /* subprog 2 */
9042 /* if arg2 == 1 do *arg1 = 0 */
9043 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
9044 /* fetch map_value_ptr from the stack of this function */
9045 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
9046 /* write into map value */
9047 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9048
9049 /* if arg4 == 1 do *arg3 = 0 */
9050 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
9051 /* fetch map_value_ptr from the stack of this function */
9052 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
9053 /* write into map value */
9054 BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
9055 BPF_EXIT_INSN(),
9056 },
9057 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9058 .fixup_map1 = { 12, 22 },
9059 .result = REJECT,
9060 .errstr = "invalid access to map value, value_size=8 off=2 size=8",
9061 },
9062 {
9063 "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
9064 .insns = {
9065 /* main prog */
9066 /* pass fp-16, fp-8 into a function */
9067 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
9068 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
9069 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9070 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
9071 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
9072 BPF_MOV64_IMM(BPF_REG_0, 0),
9073 BPF_EXIT_INSN(),
9074
9075 /* subprog 1 */
9076 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
9077 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
9078 /* 1st lookup from map */
9079 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9080 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9081 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9082 BPF_LD_MAP_FD(BPF_REG_1, 0),
9083 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9084 BPF_FUNC_map_lookup_elem),
9085 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9086 BPF_MOV64_IMM(BPF_REG_8, 0),
9087 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
9088 /* write map_value_ptr into stack frame of main prog at fp-8 */
9089 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
9090 BPF_MOV64_IMM(BPF_REG_8, 1),
9091
9092 /* 2nd lookup from map */
9093 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
9094 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9095 BPF_LD_MAP_FD(BPF_REG_1, 0),
9096 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
9097 BPF_FUNC_map_lookup_elem),
9098 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9099 BPF_MOV64_IMM(BPF_REG_9, 0),
9100 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
9101 /* write map_value_ptr into stack frame of main prog at fp-16 */
9102 BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
9103 BPF_MOV64_IMM(BPF_REG_9, 1),
9104
9105 /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
9106 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
9107 BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
9108 BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
9109 BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
9110 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
9111 BPF_EXIT_INSN(),
9112
9113 /* subprog 2 */
9114 /* if arg2 == 1 do *arg1 = 0 */
9115 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
9116 /* fetch map_value_ptr from the stack of this function */
9117 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
9118 /* write into map value */
9119 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9120
9121 /* if arg4 == 1 do *arg3 = 0 */
9122 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
9123 /* fetch map_value_ptr from the stack of this function */
9124 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
9125 /* write into map value */
9126 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9127 BPF_EXIT_INSN(),
9128 },
9129 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9130 .fixup_map1 = { 12, 22 },
9131 .result = ACCEPT,
9132 },
9133 {
9134 "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
9135 .insns = {
9136 /* main prog */
9137 /* pass fp-16, fp-8 into a function */
9138 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
9139 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
9140 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9141 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
9142 BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
9143 BPF_MOV64_IMM(BPF_REG_0, 0),
9144 BPF_EXIT_INSN(),
9145
9146 /* subprog 1 */
9147 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
9148 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
9149 /* 1st lookup from map */
9150 BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
9151 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9152 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
9153 BPF_LD_MAP_FD(BPF_REG_1, 0),
9154 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9155 BPF_FUNC_map_lookup_elem),
9156 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9157 BPF_MOV64_IMM(BPF_REG_8, 0),
9158 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
9159 /* write map_value_ptr into stack frame of main prog at fp-8 */
9160 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
9161 BPF_MOV64_IMM(BPF_REG_8, 1),
9162
9163 /* 2nd lookup from map */
9164 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9165 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
9166 BPF_LD_MAP_FD(BPF_REG_1, 0),
9167 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9168 BPF_FUNC_map_lookup_elem),
9169 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9170 BPF_MOV64_IMM(BPF_REG_9, 0), // 26
9171 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
9172 /* write map_value_ptr into stack frame of main prog at fp-16 */
9173 BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
9174 BPF_MOV64_IMM(BPF_REG_9, 1),
9175
9176 /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
9177 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
9178 BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
9179 BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
9180 BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
9181 BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
9182 BPF_JMP_IMM(BPF_JA, 0, 0, -30),
9183
9184 /* subprog 2 */
9185 /* if arg2 == 1 do *arg1 = 0 */
9186 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
9187 /* fetch map_value_ptr from the stack of this function */
9188 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
9189 /* write into map value */
9190 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9191
9192 /* if arg4 == 1 do *arg3 = 0 */
9193 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
9194 /* fetch map_value_ptr from the stack of this function */
9195 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
9196 /* write into map value */
9197 BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
9198 BPF_JMP_IMM(BPF_JA, 0, 0, -8),
9199 },
9200 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9201 .fixup_map1 = { 12, 22 },
9202 .result = REJECT,
9203 .errstr = "invalid access to map value, value_size=8 off=2 size=8",
9204 },
9205 {
9206 "calls: two calls that receive map_value_ptr_or_null via arg. test1",
9207 .insns = {
9208 /* main prog */
9209 /* pass fp-16, fp-8 into a function */
9210 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
9211 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
9212 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9213 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
9214 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
9215 BPF_MOV64_IMM(BPF_REG_0, 0),
9216 BPF_EXIT_INSN(),
9217
9218 /* subprog 1 */
9219 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
9220 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
9221 /* 1st lookup from map */
9222 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9223 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9224 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9225 BPF_LD_MAP_FD(BPF_REG_1, 0),
9226 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9227 BPF_FUNC_map_lookup_elem),
9228 /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
9229 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
9230 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9231 BPF_MOV64_IMM(BPF_REG_8, 0),
9232 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9233 BPF_MOV64_IMM(BPF_REG_8, 1),
9234
9235 /* 2nd lookup from map */
9236 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9237 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9238 BPF_LD_MAP_FD(BPF_REG_1, 0),
9239 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9240 BPF_FUNC_map_lookup_elem),
9241 /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
9242 BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
9243 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9244 BPF_MOV64_IMM(BPF_REG_9, 0),
9245 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9246 BPF_MOV64_IMM(BPF_REG_9, 1),
9247
9248 /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
9249 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
9250 BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
9251 BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
9252 BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
9253 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
9254 BPF_EXIT_INSN(),
9255
9256 /* subprog 2 */
9257 /* if arg2 == 1 do *arg1 = 0 */
9258 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
9259 /* fetch map_value_ptr from the stack of this function */
9260 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
9261 /* write into map value */
9262 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9263
9264 /* if arg4 == 1 do *arg3 = 0 */
9265 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
9266 /* fetch map_value_ptr from the stack of this function */
9267 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
9268 /* write into map value */
9269 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9270 BPF_EXIT_INSN(),
9271 },
9272 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9273 .fixup_map1 = { 12, 22 },
9274 .result = ACCEPT,
9275 },
9276 {
9277 "calls: two calls that receive map_value_ptr_or_null via arg. test2",
9278 .insns = {
9279 /* main prog */
9280 /* pass fp-16, fp-8 into a function */
9281 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
9282 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
9283 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9284 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
9285 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
9286 BPF_MOV64_IMM(BPF_REG_0, 0),
9287 BPF_EXIT_INSN(),
9288
9289 /* subprog 1 */
9290 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
9291 BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
9292 /* 1st lookup from map */
9293 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9294 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9295 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9296 BPF_LD_MAP_FD(BPF_REG_1, 0),
9297 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9298 BPF_FUNC_map_lookup_elem),
9299 /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
9300 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
9301 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9302 BPF_MOV64_IMM(BPF_REG_8, 0),
9303 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9304 BPF_MOV64_IMM(BPF_REG_8, 1),
9305
9306 /* 2nd lookup from map */
9307 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9308 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9309 BPF_LD_MAP_FD(BPF_REG_1, 0),
9310 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9311 BPF_FUNC_map_lookup_elem),
9312 /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
9313 BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
9314 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
9315 BPF_MOV64_IMM(BPF_REG_9, 0),
9316 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9317 BPF_MOV64_IMM(BPF_REG_9, 1),
9318
9319 /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
9320 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
9321 BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
9322 BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
9323 BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
9324 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
9325 BPF_EXIT_INSN(),
9326
9327 /* subprog 2 */
9328 /* if arg2 == 1 do *arg1 = 0 */
9329 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
9330 /* fetch map_value_ptr from the stack of this function */
9331 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
9332 /* write into map value */
9333 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9334
9335 /* if arg4 == 0 do *arg3 = 0 */
9336 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
9337 /* fetch map_value_ptr from the stack of this function */
9338 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
9339 /* write into map value */
9340 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
9341 BPF_EXIT_INSN(),
9342 },
9343 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9344 .fixup_map1 = { 12, 22 },
9345 .result = REJECT,
9346 .errstr = "R0 invalid mem access 'inv'",
9347 },
9348 {
9349 "calls: pkt_ptr spill into caller stack",
9350 .insns = {
9351 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9352 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9353 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
9354 BPF_EXIT_INSN(),
9355
9356 /* subprog 1 */
9357 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9358 offsetof(struct __sk_buff, data)),
9359 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9360 offsetof(struct __sk_buff, data_end)),
9361 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9362 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9363 /* spill unchecked pkt_ptr into stack of caller */
9364 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9365 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
9366 /* now the pkt range is verified, read pkt_ptr from stack */
9367 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
9368 /* write 4 bytes into packet */
9369 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9370 BPF_EXIT_INSN(),
9371 },
9372 .result = ACCEPT,
9373 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9374 },
9375 {
9376 "calls: pkt_ptr spill into caller stack 2",
9377 .insns = {
9378 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9379 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9380 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
9381 /* Marking is still kept, but not in all cases safe. */
9382 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9383 BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
9384 BPF_EXIT_INSN(),
9385
9386 /* subprog 1 */
9387 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9388 offsetof(struct __sk_buff, data)),
9389 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9390 offsetof(struct __sk_buff, data_end)),
9391 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9392 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9393 /* spill unchecked pkt_ptr into stack of caller */
9394 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9395 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
9396 /* now the pkt range is verified, read pkt_ptr from stack */
9397 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
9398 /* write 4 bytes into packet */
9399 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9400 BPF_EXIT_INSN(),
9401 },
9402 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9403 .errstr = "invalid access to packet",
9404 .result = REJECT,
9405 },
9406 {
9407 "calls: pkt_ptr spill into caller stack 3",
9408 .insns = {
9409 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9410 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9411 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
9412 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
9413 /* Marking is still kept and safe here. */
9414 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9415 BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
9416 BPF_EXIT_INSN(),
9417
9418 /* subprog 1 */
9419 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9420 offsetof(struct __sk_buff, data)),
9421 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9422 offsetof(struct __sk_buff, data_end)),
9423 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9424 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9425 /* spill unchecked pkt_ptr into stack of caller */
9426 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9427 BPF_MOV64_IMM(BPF_REG_5, 0),
9428 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
9429 BPF_MOV64_IMM(BPF_REG_5, 1),
9430 /* now the pkt range is verified, read pkt_ptr from stack */
9431 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
9432 /* write 4 bytes into packet */
9433 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9434 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9435 BPF_EXIT_INSN(),
9436 },
9437 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9438 .result = ACCEPT,
9439 },
9440 {
9441 "calls: pkt_ptr spill into caller stack 4",
9442 .insns = {
9443 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9444 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9445 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
9446 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
9447 /* Check marking propagated. */
9448 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9449 BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
9450 BPF_EXIT_INSN(),
9451
9452 /* subprog 1 */
9453 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9454 offsetof(struct __sk_buff, data)),
9455 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9456 offsetof(struct __sk_buff, data_end)),
9457 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9458 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9459 /* spill unchecked pkt_ptr into stack of caller */
9460 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9461 BPF_MOV64_IMM(BPF_REG_5, 0),
9462 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
9463 BPF_MOV64_IMM(BPF_REG_5, 1),
9464 /* don't read back pkt_ptr from stack here */
9465 /* write 4 bytes into packet */
9466 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9467 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9468 BPF_EXIT_INSN(),
9469 },
9470 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9471 .result = ACCEPT,
9472 },
9473 {
9474 "calls: pkt_ptr spill into caller stack 5",
9475 .insns = {
9476 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9477 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9478 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
9479 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
9480 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9481 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
9482 BPF_EXIT_INSN(),
9483
9484 /* subprog 1 */
9485 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9486 offsetof(struct __sk_buff, data)),
9487 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9488 offsetof(struct __sk_buff, data_end)),
9489 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9490 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9491 BPF_MOV64_IMM(BPF_REG_5, 0),
9492 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
9493 /* spill checked pkt_ptr into stack of caller */
9494 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9495 BPF_MOV64_IMM(BPF_REG_5, 1),
9496 /* don't read back pkt_ptr from stack here */
9497 /* write 4 bytes into packet */
9498 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9499 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9500 BPF_EXIT_INSN(),
9501 },
9502 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9503 .errstr = "same insn cannot be used with different",
9504 .result = REJECT,
9505 },
9506 {
9507 "calls: pkt_ptr spill into caller stack 6",
9508 .insns = {
9509 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9510 offsetof(struct __sk_buff, data_end)),
9511 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9512 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9513 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9514 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
9515 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9516 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
9517 BPF_EXIT_INSN(),
9518
9519 /* subprog 1 */
9520 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9521 offsetof(struct __sk_buff, data)),
9522 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9523 offsetof(struct __sk_buff, data_end)),
9524 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9525 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9526 BPF_MOV64_IMM(BPF_REG_5, 0),
9527 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
9528 /* spill checked pkt_ptr into stack of caller */
9529 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9530 BPF_MOV64_IMM(BPF_REG_5, 1),
9531 /* don't read back pkt_ptr from stack here */
9532 /* write 4 bytes into packet */
9533 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9534 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9535 BPF_EXIT_INSN(),
9536 },
9537 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9538 .errstr = "R4 invalid mem access",
9539 .result = REJECT,
9540 },
9541 {
9542 "calls: pkt_ptr spill into caller stack 7",
9543 .insns = {
9544 BPF_MOV64_IMM(BPF_REG_2, 0),
9545 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9546 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9547 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9548 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
9549 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9550 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
9551 BPF_EXIT_INSN(),
9552
9553 /* subprog 1 */
9554 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9555 offsetof(struct __sk_buff, data)),
9556 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9557 offsetof(struct __sk_buff, data_end)),
9558 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9559 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9560 BPF_MOV64_IMM(BPF_REG_5, 0),
9561 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
9562 /* spill checked pkt_ptr into stack of caller */
9563 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9564 BPF_MOV64_IMM(BPF_REG_5, 1),
9565 /* don't read back pkt_ptr from stack here */
9566 /* write 4 bytes into packet */
9567 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9568 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9569 BPF_EXIT_INSN(),
9570 },
9571 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9572 .errstr = "R4 invalid mem access",
9573 .result = REJECT,
9574 },
9575 {
9576 "calls: pkt_ptr spill into caller stack 8",
9577 .insns = {
9578 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9579 offsetof(struct __sk_buff, data)),
9580 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9581 offsetof(struct __sk_buff, data_end)),
9582 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9583 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9584 BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
9585 BPF_EXIT_INSN(),
9586 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9587 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9588 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9589 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
9590 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9591 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
9592 BPF_EXIT_INSN(),
9593
9594 /* subprog 1 */
9595 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9596 offsetof(struct __sk_buff, data)),
9597 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9598 offsetof(struct __sk_buff, data_end)),
9599 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9600 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9601 BPF_MOV64_IMM(BPF_REG_5, 0),
9602 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
9603 /* spill checked pkt_ptr into stack of caller */
9604 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9605 BPF_MOV64_IMM(BPF_REG_5, 1),
9606 /* don't read back pkt_ptr from stack here */
9607 /* write 4 bytes into packet */
9608 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9609 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9610 BPF_EXIT_INSN(),
9611 },
9612 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9613 .result = ACCEPT,
9614 },
9615 {
9616 "calls: pkt_ptr spill into caller stack 9",
9617 .insns = {
9618 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9619 offsetof(struct __sk_buff, data)),
9620 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9621 offsetof(struct __sk_buff, data_end)),
9622 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9623 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9624 BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
9625 BPF_EXIT_INSN(),
9626 BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
9627 BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
9628 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9629 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
9630 BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
9631 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
9632 BPF_EXIT_INSN(),
9633
9634 /* subprog 1 */
9635 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
9636 offsetof(struct __sk_buff, data)),
9637 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
9638 offsetof(struct __sk_buff, data_end)),
9639 BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
9640 BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
9641 BPF_MOV64_IMM(BPF_REG_5, 0),
9642 /* spill unchecked pkt_ptr into stack of caller */
9643 BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
9644 BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
9645 BPF_MOV64_IMM(BPF_REG_5, 1),
9646 /* don't read back pkt_ptr from stack here */
9647 /* write 4 bytes into packet */
9648 BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
9649 BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
9650 BPF_EXIT_INSN(),
9651 },
9652 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9653 .errstr = "invalid access to packet",
9654 .result = REJECT,
9655 },
9656 {
9657 "calls: caller stack init to zero or map_value_or_null",
9658 .insns = {
9659 BPF_MOV64_IMM(BPF_REG_0, 0),
9660 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
9661 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9662 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9663 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
9664 /* fetch map_value_or_null or const_zero from stack */
9665 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
9666 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
9667 /* store into map_value */
9668 BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
9669 BPF_EXIT_INSN(),
9670
9671 /* subprog 1 */
9672 /* if (ctx == 0) return; */
9673 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
9674 /* else bpf_map_lookup() and *(fp - 8) = r0 */
9675 BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
9676 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9677 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9678 BPF_LD_MAP_FD(BPF_REG_1, 0),
9679 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9680 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9681 BPF_FUNC_map_lookup_elem),
9682 /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
9683 BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
9684 BPF_EXIT_INSN(),
9685 },
9686 .fixup_map1 = { 13 },
9687 .result = ACCEPT,
9688 .prog_type = BPF_PROG_TYPE_XDP,
9689 },
9690 {
9691 "calls: stack init to zero and pruning",
9692 .insns = {
9693 /* first make allocated_stack 16 byte */
9694 BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
9695 /* now fork the execution such that the false branch
9696 * of JGT insn will be verified second and it skisp zero
9697 * init of fp-8 stack slot. If stack liveness marking
9698 * is missing live_read marks from call map_lookup
9699 * processing then pruning will incorrectly assume
9700 * that fp-8 stack slot was unused in the fall-through
9701 * branch and will accept the program incorrectly
9702 */
9703 BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
9704 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9705 BPF_JMP_IMM(BPF_JA, 0, 0, 0),
9706 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9707 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9708 BPF_LD_MAP_FD(BPF_REG_1, 0),
9709 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9710 BPF_FUNC_map_lookup_elem),
9711 BPF_EXIT_INSN(),
9712 },
9713 .fixup_map2 = { 6 },
9714 .errstr = "invalid indirect read from stack off -8+0 size 8",
9715 .result = REJECT,
9716 .prog_type = BPF_PROG_TYPE_XDP,
9717 },
8100}; 9718};
8101 9719
8102static int probe_filter_length(const struct bpf_insn *fp) 9720static int probe_filter_length(const struct bpf_insn *fp)
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 000000000000..5e4aac74f9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2017 Facebook
3#include <stddef.h>
4#include <stdbool.h>
5#include <string.h>
6#include <linux/pkt_cls.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/if_ether.h>
10#include <linux/ip.h>
11#include <linux/ipv6.h>
12#include <linux/icmp.h>
13#include <linux/icmpv6.h>
14#include <linux/tcp.h>
15#include <linux/udp.h>
16#include "bpf_helpers.h"
17
18#define bpf_printk(fmt, ...) \
19({ \
20 char ____fmt[] = fmt; \
21 bpf_trace_printk(____fmt, sizeof(____fmt), \
22 ##__VA_ARGS__); \
23})
24
25static __u32 rol32(__u32 word, unsigned int shift)
26{
27 return (word << shift) | (word >> ((-shift) & 31));
28}
29
30/* copy paste of jhash from kernel sources to make sure llvm
31 * can compile it into valid sequence of bpf instructions
32 */
33#define __jhash_mix(a, b, c) \
34{ \
35 a -= c; a ^= rol32(c, 4); c += b; \
36 b -= a; b ^= rol32(a, 6); a += c; \
37 c -= b; c ^= rol32(b, 8); b += a; \
38 a -= c; a ^= rol32(c, 16); c += b; \
39 b -= a; b ^= rol32(a, 19); a += c; \
40 c -= b; c ^= rol32(b, 4); b += a; \
41}
42
43#define __jhash_final(a, b, c) \
44{ \
45 c ^= b; c -= rol32(b, 14); \
46 a ^= c; a -= rol32(c, 11); \
47 b ^= a; b -= rol32(a, 25); \
48 c ^= b; c -= rol32(b, 16); \
49 a ^= c; a -= rol32(c, 4); \
50 b ^= a; b -= rol32(a, 14); \
51 c ^= b; c -= rol32(b, 24); \
52}
53
54#define JHASH_INITVAL 0xdeadbeef
55
56typedef unsigned int u32;
57
58static __attribute__ ((noinline))
59u32 jhash(const void *key, u32 length, u32 initval)
60{
61 u32 a, b, c;
62 const unsigned char *k = key;
63
64 a = b = c = JHASH_INITVAL + length + initval;
65
66 while (length > 12) {
67 a += *(u32 *)(k);
68 b += *(u32 *)(k + 4);
69 c += *(u32 *)(k + 8);
70 __jhash_mix(a, b, c);
71 length -= 12;
72 k += 12;
73 }
74 switch (length) {
75 case 12: c += (u32)k[11]<<24;
76 case 11: c += (u32)k[10]<<16;
77 case 10: c += (u32)k[9]<<8;
78 case 9: c += k[8];
79 case 8: b += (u32)k[7]<<24;
80 case 7: b += (u32)k[6]<<16;
81 case 6: b += (u32)k[5]<<8;
82 case 5: b += k[4];
83 case 4: a += (u32)k[3]<<24;
84 case 3: a += (u32)k[2]<<16;
85 case 2: a += (u32)k[1]<<8;
86 case 1: a += k[0];
87 __jhash_final(a, b, c);
88 case 0: /* Nothing left to add */
89 break;
90 }
91
92 return c;
93}
94
95static __attribute__ ((noinline))
96u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
97{
98 a += initval;
99 b += initval;
100 c += initval;
101 __jhash_final(a, b, c);
102 return c;
103}
104
105static __attribute__ ((noinline))
106u32 jhash_2words(u32 a, u32 b, u32 initval)
107{
108 return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
109}
110
111struct flow_key {
112 union {
113 __be32 src;
114 __be32 srcv6[4];
115 };
116 union {
117 __be32 dst;
118 __be32 dstv6[4];
119 };
120 union {
121 __u32 ports;
122 __u16 port16[2];
123 };
124 __u8 proto;
125};
126
127struct packet_description {
128 struct flow_key flow;
129 __u8 flags;
130};
131
132struct ctl_value {
133 union {
134 __u64 value;
135 __u32 ifindex;
136 __u8 mac[6];
137 };
138};
139
140struct vip_definition {
141 union {
142 __be32 vip;
143 __be32 vipv6[4];
144 };
145 __u16 port;
146 __u16 family;
147 __u8 proto;
148};
149
150struct vip_meta {
151 __u32 flags;
152 __u32 vip_num;
153};
154
155struct real_pos_lru {
156 __u32 pos;
157 __u64 atime;
158};
159
160struct real_definition {
161 union {
162 __be32 dst;
163 __be32 dstv6[4];
164 };
165 __u8 flags;
166};
167
168struct lb_stats {
169 __u64 v2;
170 __u64 v1;
171};
172
173struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
174 .type = BPF_MAP_TYPE_HASH,
175 .key_size = sizeof(struct vip_definition),
176 .value_size = sizeof(struct vip_meta),
177 .max_entries = 512,
178 .map_flags = 0,
179};
180
181struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
182 .type = BPF_MAP_TYPE_LRU_HASH,
183 .key_size = sizeof(struct flow_key),
184 .value_size = sizeof(struct real_pos_lru),
185 .max_entries = 300,
186 .map_flags = 1U << 1,
187};
188
189struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
190 .type = BPF_MAP_TYPE_ARRAY,
191 .key_size = sizeof(__u32),
192 .value_size = sizeof(__u32),
193 .max_entries = 12 * 655,
194 .map_flags = 0,
195};
196
197struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
198 .type = BPF_MAP_TYPE_ARRAY,
199 .key_size = sizeof(__u32),
200 .value_size = sizeof(struct real_definition),
201 .max_entries = 40,
202 .map_flags = 0,
203};
204
205struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
206 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
207 .key_size = sizeof(__u32),
208 .value_size = sizeof(struct lb_stats),
209 .max_entries = 515,
210 .map_flags = 0,
211};
212
213struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
214 .type = BPF_MAP_TYPE_ARRAY,
215 .key_size = sizeof(__u32),
216 .value_size = sizeof(struct ctl_value),
217 .max_entries = 16,
218 .map_flags = 0,
219};
220
221struct eth_hdr {
222 unsigned char eth_dest[6];
223 unsigned char eth_source[6];
224 unsigned short eth_proto;
225};
226
227static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
228{
229 __u64 off = sizeof(struct eth_hdr);
230 if (is_ipv6) {
231 off += sizeof(struct ipv6hdr);
232 if (is_icmp)
233 off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
234 } else {
235 off += sizeof(struct iphdr);
236 if (is_icmp)
237 off += sizeof(struct icmphdr) + sizeof(struct iphdr);
238 }
239 return off;
240}
241
242static __attribute__ ((noinline))
243bool parse_udp(void *data, void *data_end,
244 bool is_ipv6, struct packet_description *pckt)
245{
246
247 bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
248 __u64 off = calc_offset(is_ipv6, is_icmp);
249 struct udphdr *udp;
250 udp = data + off;
251
252 if (udp + 1 > data_end)
253 return 0;
254 if (!is_icmp) {
255 pckt->flow.port16[0] = udp->source;
256 pckt->flow.port16[1] = udp->dest;
257 } else {
258 pckt->flow.port16[0] = udp->dest;
259 pckt->flow.port16[1] = udp->source;
260 }
261 return 1;
262}
263
264static __attribute__ ((noinline))
265bool parse_tcp(void *data, void *data_end,
266 bool is_ipv6, struct packet_description *pckt)
267{
268
269 bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
270 __u64 off = calc_offset(is_ipv6, is_icmp);
271 struct tcphdr *tcp;
272
273 tcp = data + off;
274 if (tcp + 1 > data_end)
275 return 0;
276 if (tcp->syn)
277 pckt->flags |= (1 << 1);
278 if (!is_icmp) {
279 pckt->flow.port16[0] = tcp->source;
280 pckt->flow.port16[1] = tcp->dest;
281 } else {
282 pckt->flow.port16[0] = tcp->dest;
283 pckt->flow.port16[1] = tcp->source;
284 }
285 return 1;
286}
287
288static __attribute__ ((noinline))
289bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
290 struct packet_description *pckt,
291 struct real_definition *dst, __u32 pkt_bytes)
292{
293 struct eth_hdr *new_eth;
294 struct eth_hdr *old_eth;
295 struct ipv6hdr *ip6h;
296 __u32 ip_suffix;
297 void *data_end;
298 void *data;
299
300 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
301 return 0;
302 data = (void *)(long)xdp->data;
303 data_end = (void *)(long)xdp->data_end;
304 new_eth = data;
305 ip6h = data + sizeof(struct eth_hdr);
306 old_eth = data + sizeof(struct ipv6hdr);
307 if (new_eth + 1 > data_end ||
308 old_eth + 1 > data_end || ip6h + 1 > data_end)
309 return 0;
310 memcpy(new_eth->eth_dest, cval->mac, 6);
311 memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
312 new_eth->eth_proto = 56710;
313 ip6h->version = 6;
314 ip6h->priority = 0;
315 memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
316
317 ip6h->nexthdr = IPPROTO_IPV6;
318 ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
319 ip6h->payload_len =
320 __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
321 ip6h->hop_limit = 4;
322
323 ip6h->saddr.in6_u.u6_addr32[0] = 1;
324 ip6h->saddr.in6_u.u6_addr32[1] = 2;
325 ip6h->saddr.in6_u.u6_addr32[2] = 3;
326 ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
327 memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
328 return 1;
329}
330
331static __attribute__ ((noinline))
332bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
333 struct packet_description *pckt,
334 struct real_definition *dst, __u32 pkt_bytes)
335{
336
337 __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
338 struct eth_hdr *new_eth;
339 struct eth_hdr *old_eth;
340 __u16 *next_iph_u16;
341 struct iphdr *iph;
342 __u32 csum = 0;
343 void *data_end;
344 void *data;
345
346 ip_suffix <<= 15;
347 ip_suffix ^= pckt->flow.src;
348 if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
349 return 0;
350 data = (void *)(long)xdp->data;
351 data_end = (void *)(long)xdp->data_end;
352 new_eth = data;
353 iph = data + sizeof(struct eth_hdr);
354 old_eth = data + sizeof(struct iphdr);
355 if (new_eth + 1 > data_end ||
356 old_eth + 1 > data_end || iph + 1 > data_end)
357 return 0;
358 memcpy(new_eth->eth_dest, cval->mac, 6);
359 memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
360 new_eth->eth_proto = 8;
361 iph->version = 4;
362 iph->ihl = 5;
363 iph->frag_off = 0;
364 iph->protocol = IPPROTO_IPIP;
365 iph->check = 0;
366 iph->tos = 1;
367 iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
368 /* don't update iph->daddr, since it will overwrite old eth_proto
369 * and multiple iterations of bpf_prog_run() will fail
370 */
371
372 iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
373 iph->ttl = 4;
374
375 next_iph_u16 = (__u16 *) iph;
376#pragma clang loop unroll(full)
377 for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
378 csum += *next_iph_u16++;
379 iph->check = ~((csum & 0xffff) + (csum >> 16));
380 if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
381 return 0;
382 return 1;
383}
384
385static __attribute__ ((noinline))
386bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
387{
388 struct eth_hdr *new_eth;
389 struct eth_hdr *old_eth;
390
391 old_eth = *data;
392 new_eth = *data + sizeof(struct ipv6hdr);
393 memcpy(new_eth->eth_source, old_eth->eth_source, 6);
394 memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
395 if (inner_v4)
396 new_eth->eth_proto = 8;
397 else
398 new_eth->eth_proto = 56710;
399 if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
400 return 0;
401 *data = (void *)(long)xdp->data;
402 *data_end = (void *)(long)xdp->data_end;
403 return 1;
404}
405
406static __attribute__ ((noinline))
407bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
408{
409 struct eth_hdr *new_eth;
410 struct eth_hdr *old_eth;
411
412 old_eth = *data;
413 new_eth = *data + sizeof(struct iphdr);
414 memcpy(new_eth->eth_source, old_eth->eth_source, 6);
415 memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
416 new_eth->eth_proto = 8;
417 if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
418 return 0;
419 *data = (void *)(long)xdp->data;
420 *data_end = (void *)(long)xdp->data_end;
421 return 1;
422}
423
424static __attribute__ ((noinline))
425int swap_mac_and_send(void *data, void *data_end)
426{
427 unsigned char tmp_mac[6];
428 struct eth_hdr *eth;
429
430 eth = data;
431 memcpy(tmp_mac, eth->eth_source, 6);
432 memcpy(eth->eth_source, eth->eth_dest, 6);
433 memcpy(eth->eth_dest, tmp_mac, 6);
434 return XDP_TX;
435}
436
437static __attribute__ ((noinline))
438int send_icmp_reply(void *data, void *data_end)
439{
440 struct icmphdr *icmp_hdr;
441 __u16 *next_iph_u16;
442 __u32 tmp_addr = 0;
443 struct iphdr *iph;
444 __u32 csum1 = 0;
445 __u32 csum = 0;
446 __u64 off = 0;
447
448 if (data + sizeof(struct eth_hdr)
449 + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
450 return XDP_DROP;
451 off += sizeof(struct eth_hdr);
452 iph = data + off;
453 off += sizeof(struct iphdr);
454 icmp_hdr = data + off;
455 icmp_hdr->type = 0;
456 icmp_hdr->checksum += 0x0007;
457 iph->ttl = 4;
458 tmp_addr = iph->daddr;
459 iph->daddr = iph->saddr;
460 iph->saddr = tmp_addr;
461 iph->check = 0;
462 next_iph_u16 = (__u16 *) iph;
463#pragma clang loop unroll(full)
464 for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
465 csum += *next_iph_u16++;
466 iph->check = ~((csum & 0xffff) + (csum >> 16));
467 return swap_mac_and_send(data, data_end);
468}
469
470static __attribute__ ((noinline))
471int send_icmp6_reply(void *data, void *data_end)
472{
473 struct icmp6hdr *icmp_hdr;
474 struct ipv6hdr *ip6h;
475 __be32 tmp_addr[4];
476 __u64 off = 0;
477
478 if (data + sizeof(struct eth_hdr)
479 + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
480 return XDP_DROP;
481 off += sizeof(struct eth_hdr);
482 ip6h = data + off;
483 off += sizeof(struct ipv6hdr);
484 icmp_hdr = data + off;
485 icmp_hdr->icmp6_type = 129;
486 icmp_hdr->icmp6_cksum -= 0x0001;
487 ip6h->hop_limit = 4;
488 memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
489 memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
490 memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
491 return swap_mac_and_send(data, data_end);
492}
493
494static __attribute__ ((noinline))
495int parse_icmpv6(void *data, void *data_end, __u64 off,
496 struct packet_description *pckt)
497{
498 struct icmp6hdr *icmp_hdr;
499 struct ipv6hdr *ip6h;
500
501 icmp_hdr = data + off;
502 if (icmp_hdr + 1 > data_end)
503 return XDP_DROP;
504 if (icmp_hdr->icmp6_type == 128)
505 return send_icmp6_reply(data, data_end);
506 if (icmp_hdr->icmp6_type != 3)
507 return XDP_PASS;
508 off += sizeof(struct icmp6hdr);
509 ip6h = data + off;
510 if (ip6h + 1 > data_end)
511 return XDP_DROP;
512 pckt->flow.proto = ip6h->nexthdr;
513 pckt->flags |= (1 << 0);
514 memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
515 memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
516 return -1;
517}
518
519static __attribute__ ((noinline))
520int parse_icmp(void *data, void *data_end, __u64 off,
521 struct packet_description *pckt)
522{
523 struct icmphdr *icmp_hdr;
524 struct iphdr *iph;
525
526 icmp_hdr = data + off;
527 if (icmp_hdr + 1 > data_end)
528 return XDP_DROP;
529 if (icmp_hdr->type == 8)
530 return send_icmp_reply(data, data_end);
531 if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
532 return XDP_PASS;
533 off += sizeof(struct icmphdr);
534 iph = data + off;
535 if (iph + 1 > data_end)
536 return XDP_DROP;
537 if (iph->ihl != 5)
538 return XDP_DROP;
539 pckt->flow.proto = iph->protocol;
540 pckt->flags |= (1 << 0);
541 pckt->flow.src = iph->daddr;
542 pckt->flow.dst = iph->saddr;
543 return -1;
544}
545
546static __attribute__ ((noinline))
547__u32 get_packet_hash(struct packet_description *pckt,
548 bool hash_16bytes)
549{
550 if (hash_16bytes)
551 return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
552 pckt->flow.ports, 24);
553 else
554 return jhash_2words(pckt->flow.src, pckt->flow.ports,
555 24);
556}
557
558__attribute__ ((noinline))
559static bool get_packet_dst(struct real_definition **real,
560 struct packet_description *pckt,
561 struct vip_meta *vip_info,
562 bool is_ipv6, void *lru_map)
563{
564 struct real_pos_lru new_dst_lru = { };
565 bool hash_16bytes = is_ipv6;
566 __u32 *real_pos, hash, key;
567 __u64 cur_time;
568
569 if (vip_info->flags & (1 << 2))
570 hash_16bytes = 1;
571 if (vip_info->flags & (1 << 3)) {
572 pckt->flow.port16[0] = pckt->flow.port16[1];
573 memset(pckt->flow.srcv6, 0, 16);
574 }
575 hash = get_packet_hash(pckt, hash_16bytes);
576 if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
577 hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
578 return 0;
579 key = 2 * vip_info->vip_num + hash % 2;
580 real_pos = bpf_map_lookup_elem(&ch_rings, &key);
581 if (!real_pos)
582 return 0;
583 key = *real_pos;
584 *real = bpf_map_lookup_elem(&reals, &key);
585 if (!(*real))
586 return 0;
587 if (!(vip_info->flags & (1 << 1))) {
588 __u32 conn_rate_key = 512 + 2;
589 struct lb_stats *conn_rate_stats =
590 bpf_map_lookup_elem(&stats, &conn_rate_key);
591
592 if (!conn_rate_stats)
593 return 1;
594 cur_time = bpf_ktime_get_ns();
595 if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
596 conn_rate_stats->v1 = 1;
597 conn_rate_stats->v2 = cur_time;
598 } else {
599 conn_rate_stats->v1 += 1;
600 if (conn_rate_stats->v1 >= 1)
601 return 1;
602 }
603 if (pckt->flow.proto == IPPROTO_UDP)
604 new_dst_lru.atime = cur_time;
605 new_dst_lru.pos = key;
606 bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
607 }
608 return 1;
609}
610
611__attribute__ ((noinline))
612static void connection_table_lookup(struct real_definition **real,
613 struct packet_description *pckt,
614 void *lru_map)
615{
616
617 struct real_pos_lru *dst_lru;
618 __u64 cur_time;
619 __u32 key;
620
621 dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
622 if (!dst_lru)
623 return;
624 if (pckt->flow.proto == IPPROTO_UDP) {
625 cur_time = bpf_ktime_get_ns();
626 if (cur_time - dst_lru->atime > 300000)
627 return;
628 dst_lru->atime = cur_time;
629 }
630 key = dst_lru->pos;
631 *real = bpf_map_lookup_elem(&reals, &key);
632}
633
634/* don't believe your eyes!
635 * below function has 6 arguments whereas bpf and llvm allow maximum of 5
636 * but since it's _static_ llvm can optimize one argument away
637 */
638__attribute__ ((noinline))
639static int process_l3_headers_v6(struct packet_description *pckt,
640 __u8 *protocol, __u64 off,
641 __u16 *pkt_bytes, void *data,
642 void *data_end)
643{
644 struct ipv6hdr *ip6h;
645 __u64 iph_len;
646 int action;
647
648 ip6h = data + off;
649 if (ip6h + 1 > data_end)
650 return XDP_DROP;
651 iph_len = sizeof(struct ipv6hdr);
652 *protocol = ip6h->nexthdr;
653 pckt->flow.proto = *protocol;
654 *pkt_bytes = __builtin_bswap16(ip6h->payload_len);
655 off += iph_len;
656 if (*protocol == 45) {
657 return XDP_DROP;
658 } else if (*protocol == 59) {
659 action = parse_icmpv6(data, data_end, off, pckt);
660 if (action >= 0)
661 return action;
662 } else {
663 memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
664 memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
665 }
666 return -1;
667}
668
669__attribute__ ((noinline))
670static int process_l3_headers_v4(struct packet_description *pckt,
671 __u8 *protocol, __u64 off,
672 __u16 *pkt_bytes, void *data,
673 void *data_end)
674{
675 struct iphdr *iph;
676 __u64 iph_len;
677 int action;
678
679 iph = data + off;
680 if (iph + 1 > data_end)
681 return XDP_DROP;
682 if (iph->ihl != 5)
683 return XDP_DROP;
684 *protocol = iph->protocol;
685 pckt->flow.proto = *protocol;
686 *pkt_bytes = __builtin_bswap16(iph->tot_len);
687 off += 20;
688 if (iph->frag_off & 65343)
689 return XDP_DROP;
690 if (*protocol == IPPROTO_ICMP) {
691 action = parse_icmp(data, data_end, off, pckt);
692 if (action >= 0)
693 return action;
694 } else {
695 pckt->flow.src = iph->saddr;
696 pckt->flow.dst = iph->daddr;
697 }
698 return -1;
699}
700
701__attribute__ ((noinline))
702static int process_packet(void *data, __u64 off, void *data_end,
703 bool is_ipv6, struct xdp_md *xdp)
704{
705
706 struct real_definition *dst = NULL;
707 struct packet_description pckt = { };
708 struct vip_definition vip = { };
709 struct lb_stats *data_stats;
710 struct eth_hdr *eth = data;
711 void *lru_map = &lru_cache;
712 struct vip_meta *vip_info;
713 __u32 lru_stats_key = 513;
714 __u32 mac_addr_pos = 0;
715 __u32 stats_key = 512;
716 struct ctl_value *cval;
717 __u16 pkt_bytes;
718 __u64 iph_len;
719 __u8 protocol;
720 __u32 vip_num;
721 int action;
722
723 if (is_ipv6)
724 action = process_l3_headers_v6(&pckt, &protocol, off,
725 &pkt_bytes, data, data_end);
726 else
727 action = process_l3_headers_v4(&pckt, &protocol, off,
728 &pkt_bytes, data, data_end);
729 if (action >= 0)
730 return action;
731 protocol = pckt.flow.proto;
732 if (protocol == IPPROTO_TCP) {
733 if (!parse_tcp(data, data_end, is_ipv6, &pckt))
734 return XDP_DROP;
735 } else if (protocol == IPPROTO_UDP) {
736 if (!parse_udp(data, data_end, is_ipv6, &pckt))
737 return XDP_DROP;
738 } else {
739 return XDP_TX;
740 }
741
742 if (is_ipv6)
743 memcpy(vip.vipv6, pckt.flow.dstv6, 16);
744 else
745 vip.vip = pckt.flow.dst;
746 vip.port = pckt.flow.port16[1];
747 vip.proto = pckt.flow.proto;
748 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
749 if (!vip_info) {
750 vip.port = 0;
751 vip_info = bpf_map_lookup_elem(&vip_map, &vip);
752 if (!vip_info)
753 return XDP_PASS;
754 if (!(vip_info->flags & (1 << 4)))
755 pckt.flow.port16[1] = 0;
756 }
757 if (data_end - data > 1400)
758 return XDP_DROP;
759 data_stats = bpf_map_lookup_elem(&stats, &stats_key);
760 if (!data_stats)
761 return XDP_DROP;
762 data_stats->v1 += 1;
763 if (!dst) {
764 if (vip_info->flags & (1 << 0))
765 pckt.flow.port16[0] = 0;
766 if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
767 connection_table_lookup(&dst, &pckt, lru_map);
768 if (dst)
769 goto out;
770 if (pckt.flow.proto == IPPROTO_TCP) {
771 struct lb_stats *lru_stats =
772 bpf_map_lookup_elem(&stats, &lru_stats_key);
773
774 if (!lru_stats)
775 return XDP_DROP;
776 if (pckt.flags & (1 << 1))
777 lru_stats->v1 += 1;
778 else
779 lru_stats->v2 += 1;
780 }
781 if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
782 return XDP_DROP;
783 data_stats->v2 += 1;
784 }
785out:
786 cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
787 if (!cval)
788 return XDP_DROP;
789 if (dst->flags & (1 << 0)) {
790 if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
791 return XDP_DROP;
792 } else {
793 if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
794 return XDP_DROP;
795 }
796 vip_num = vip_info->vip_num;
797 data_stats = bpf_map_lookup_elem(&stats, &vip_num);
798 if (!data_stats)
799 return XDP_DROP;
800 data_stats->v1 += 1;
801 data_stats->v2 += pkt_bytes;
802
803 data = (void *)(long)xdp->data;
804 data_end = (void *)(long)xdp->data_end;
805 if (data + 4 > data_end)
806 return XDP_DROP;
807 *(u32 *)data = dst->dst;
808 return XDP_DROP;
809}
810
811__attribute__ ((section("xdp-test"), used))
812int balancer_ingress(struct xdp_md *ctx)
813{
814 void *data = (void *)(long)ctx->data;
815 void *data_end = (void *)(long)ctx->data_end;
816 struct eth_hdr *eth = data;
817 __u32 eth_proto;
818 __u32 nh_off;
819
820 nh_off = sizeof(struct eth_hdr);
821 if (data + nh_off > data_end)
822 return XDP_DROP;
823 eth_proto = eth->eth_proto;
824 if (eth_proto == 8)
825 return process_packet(data, nh_off, data_end, 0, ctx);
826 else if (eth_proto == 56710)
827 return process_packet(data, nh_off, data_end, 1, ctx);
828 else
829 return XDP_DROP;
830}
831
832char _license[] __attribute__ ((section("license"), used)) = "GPL";
833int _version __attribute__ ((section("version"), used)) = 1;