summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2016-09-19 18:26:13 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-20 23:32:11 -0400
commit36bbef52c7eb646ed6247055a2acd3851e317857 (patch)
tree6098b4c590f33dbd156ba7de48b40f34bf85db58 /kernel/bpf
parentb399cf64e318ac8c5f10d36bb911e61c746b8788 (diff)
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet write in a similar fashion as already available for XDP types via commits 4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and 6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a complementary feature to the already available direct packet read for tc (cls/act) programs. For enabling this, we need to introduce two helpers, bpf_skb_pull_data() and bpf_csum_update(). The first is generally needed for both, read and write, because they would otherwise only be limited to the current linear skb head. Usually, when the data_end test fails, programs just bail out, or, in the direct read case, use bpf_skb_load_bytes() as an alternative to overcome this limitation. If such data sits in non-linear parts, we can just pull them in once with the new helper, retest and eventually access them. At the same time, this also makes sure the skb is uncloned, which is, of course, a necessary condition for direct write. As this needs to be an invariant for the write part only, the verifier detects writes and adds a prologue that is calling bpf_skb_pull_data() to effectively unclone the skb from the very beginning in case it is indeed cloned. The heuristic makes use of a similar trick that was done in 233577a22089 ("net: filter: constify detection of pkt_type_offset"). This comes at zero cost for other programs that do not use the direct write feature. Should a program use this feature only sparsely and has read access for the most parts with, for example, drop return codes, then such write action can be delegated to a tail called program for mitigating this cost of potential uncloning to a late point in time where it would have been paid similarly with the bpf_skb_store_bytes() as well. Advantage of direct write is that the writes are inlined whereas the helper cannot make any length assumptions and thus needs to generate a call to memcpy() also for small sizes, as well as cost of helper call itself with sanity checks are avoided. Plus, when direct read is already used, we don't need to cache or perform rechecks on the data boundaries (due to verifier invalidating previous checks for helpers that change skb->data), so more complex programs using rewrites can benefit from switching to direct read plus write. For direct packet access to helpers, we save the otherwise needed copy into a temp struct sitting on stack memory when use-case allows. Both facilities are enabled via may_access_direct_pkt_data() in verifier. For now, we limit this to map helpers and csum_diff, and can successively enable other helpers where we find it makes sense. Helpers that definitely cannot be allowed for this are those part of bpf_helper_changes_skb_data() since they can change underlying data, and those that write into memory as this could happen for packet typed args when still cloned. bpf_csum_update() helper accommodates for the fact that we need to fixup checksum_complete when using direct write instead of bpf_skb_store_bytes(), meaning the programs can use available helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(), csum_block_add(), csum_block_sub() equivalents in eBPF together with the new helper. A usage example will be provided for iproute2's examples/bpf/ directory. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/helpers.c3
-rw-r--r--kernel/bpf/verifier.c54
2 files changed, 43 insertions, 14 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a5b8bf8cfcfd..39918402e6e9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
36const struct bpf_func_proto bpf_map_lookup_elem_proto = { 36const struct bpf_func_proto bpf_map_lookup_elem_proto = {
37 .func = bpf_map_lookup_elem, 37 .func = bpf_map_lookup_elem,
38 .gpl_only = false, 38 .gpl_only = false,
39 .pkt_access = true,
39 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 40 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
40 .arg1_type = ARG_CONST_MAP_PTR, 41 .arg1_type = ARG_CONST_MAP_PTR,
41 .arg2_type = ARG_PTR_TO_MAP_KEY, 42 .arg2_type = ARG_PTR_TO_MAP_KEY,
@@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
51const struct bpf_func_proto bpf_map_update_elem_proto = { 52const struct bpf_func_proto bpf_map_update_elem_proto = {
52 .func = bpf_map_update_elem, 53 .func = bpf_map_update_elem,
53 .gpl_only = false, 54 .gpl_only = false,
55 .pkt_access = true,
54 .ret_type = RET_INTEGER, 56 .ret_type = RET_INTEGER,
55 .arg1_type = ARG_CONST_MAP_PTR, 57 .arg1_type = ARG_CONST_MAP_PTR,
56 .arg2_type = ARG_PTR_TO_MAP_KEY, 58 .arg2_type = ARG_PTR_TO_MAP_KEY,
@@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
67const struct bpf_func_proto bpf_map_delete_elem_proto = { 69const struct bpf_func_proto bpf_map_delete_elem_proto = {
68 .func = bpf_map_delete_elem, 70 .func = bpf_map_delete_elem,
69 .gpl_only = false, 71 .gpl_only = false,
72 .pkt_access = true,
70 .ret_type = RET_INTEGER, 73 .ret_type = RET_INTEGER,
71 .arg1_type = ARG_CONST_MAP_PTR, 74 .arg1_type = ARG_CONST_MAP_PTR,
72 .arg2_type = ARG_PTR_TO_MAP_KEY, 75 .arg2_type = ARG_PTR_TO_MAP_KEY,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bc138f34e38c..3a75ee3bdcd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -196,6 +196,7 @@ struct verifier_env {
196 u32 used_map_cnt; /* number of used maps */ 196 u32 used_map_cnt; /* number of used maps */
197 u32 id_gen; /* used to generate unique reg IDs */ 197 u32 id_gen; /* used to generate unique reg IDs */
198 bool allow_ptr_leaks; 198 bool allow_ptr_leaks;
199 bool seen_direct_write;
199}; 200};
200 201
201#define BPF_COMPLEXITY_LIMIT_INSNS 65536 202#define BPF_COMPLEXITY_LIMIT_INSNS 65536
@@ -204,6 +205,7 @@ struct verifier_env {
204struct bpf_call_arg_meta { 205struct bpf_call_arg_meta {
205 struct bpf_map *map_ptr; 206 struct bpf_map *map_ptr;
206 bool raw_mode; 207 bool raw_mode;
208 bool pkt_access;
207 int regno; 209 int regno;
208 int access_size; 210 int access_size;
209}; 211};
@@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
654 656
655#define MAX_PACKET_OFF 0xffff 657#define MAX_PACKET_OFF 0xffff
656 658
657static bool may_write_pkt_data(enum bpf_prog_type type) 659static bool may_access_direct_pkt_data(struct verifier_env *env,
660 const struct bpf_call_arg_meta *meta)
658{ 661{
659 switch (type) { 662 switch (env->prog->type) {
663 case BPF_PROG_TYPE_SCHED_CLS:
664 case BPF_PROG_TYPE_SCHED_ACT:
660 case BPF_PROG_TYPE_XDP: 665 case BPF_PROG_TYPE_XDP:
666 if (meta)
667 return meta->pkt_access;
668
669 env->seen_direct_write = true;
661 return true; 670 return true;
662 default: 671 default:
663 return false; 672 return false;
@@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
817 err = check_stack_read(state, off, size, value_regno); 826 err = check_stack_read(state, off, size, value_regno);
818 } 827 }
819 } else if (state->regs[regno].type == PTR_TO_PACKET) { 828 } else if (state->regs[regno].type == PTR_TO_PACKET) {
820 if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { 829 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
821 verbose("cannot write into packet\n"); 830 verbose("cannot write into packet\n");
822 return -EACCES; 831 return -EACCES;
823 } 832 }
@@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
950 return 0; 959 return 0;
951 } 960 }
952 961
953 if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) { 962 if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
954 verbose("helper access to the packet is not allowed for clsact\n"); 963 verbose("helper access to the packet is not allowed\n");
955 return -EACCES; 964 return -EACCES;
956 } 965 }
957 966
@@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id)
1191 changes_data = bpf_helper_changes_skb_data(fn->func); 1200 changes_data = bpf_helper_changes_skb_data(fn->func);
1192 1201
1193 memset(&meta, 0, sizeof(meta)); 1202 memset(&meta, 0, sizeof(meta));
1203 meta.pkt_access = fn->pkt_access;
1194 1204
1195 /* We only support one arg being in raw mode at the moment, which 1205 /* We only support one arg being in raw mode at the moment, which
1196 * is sufficient for the helper functions we have right now. 1206 * is sufficient for the helper functions we have right now.
@@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
2675 */ 2685 */
2676static int convert_ctx_accesses(struct verifier_env *env) 2686static int convert_ctx_accesses(struct verifier_env *env)
2677{ 2687{
2678 struct bpf_insn *insn = env->prog->insnsi; 2688 const struct bpf_verifier_ops *ops = env->prog->aux->ops;
2679 int insn_cnt = env->prog->len; 2689 struct bpf_insn insn_buf[16], *insn;
2680 struct bpf_insn insn_buf[16];
2681 struct bpf_prog *new_prog; 2690 struct bpf_prog *new_prog;
2682 enum bpf_access_type type; 2691 enum bpf_access_type type;
2683 int i; 2692 int i, insn_cnt, cnt;
2684 2693
2685 if (!env->prog->aux->ops->convert_ctx_access) 2694 if (ops->gen_prologue) {
2695 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
2696 env->prog);
2697 if (cnt >= ARRAY_SIZE(insn_buf)) {
2698 verbose("bpf verifier is misconfigured\n");
2699 return -EINVAL;
2700 } else if (cnt) {
2701 new_prog = bpf_patch_insn_single(env->prog, 0,
2702 insn_buf, cnt);
2703 if (!new_prog)
2704 return -ENOMEM;
2705 env->prog = new_prog;
2706 }
2707 }
2708
2709 if (!ops->convert_ctx_access)
2686 return 0; 2710 return 0;
2687 2711
2712 insn_cnt = env->prog->len;
2713 insn = env->prog->insnsi;
2714
2688 for (i = 0; i < insn_cnt; i++, insn++) { 2715 for (i = 0; i < insn_cnt; i++, insn++) {
2689 u32 insn_delta, cnt; 2716 u32 insn_delta;
2690 2717
2691 if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 2718 if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
2692 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) 2719 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
@@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
2703 continue; 2730 continue;
2704 } 2731 }
2705 2732
2706 cnt = env->prog->aux->ops-> 2733 cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
2707 convert_ctx_access(type, insn->dst_reg, insn->src_reg, 2734 insn->off, insn_buf, env->prog);
2708 insn->off, insn_buf, env->prog);
2709 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 2735 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
2710 verbose("bpf verifier is misconfigured\n"); 2736 verbose("bpf verifier is misconfigured\n");
2711 return -EINVAL; 2737 return -EINVAL;