diff options
author | Matt Mullins <mmullins@fb.com> | 2019-04-26 14:49:47 -0400 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2019-04-26 22:04:19 -0400 |
commit | 9df1c28bb75217b244257152ab7d788bb2a386d0 (patch) | |
tree | 7fe10ffd9b59716b52b992f5663ec8b5102406b6 | |
parent | 34b8ab091f9ef57a2bb3c8c8359a0a03a8abf2f9 (diff) |
bpf: add writable context for raw tracepoints
This is an opt-in interface that allows a tracepoint to provide a safe
buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
The size of the buffer must be a compile-time constant, and is checked
before allowing a BPF program to attach to a tracepoint that uses this
feature.
The pointer to this buffer will be the first argument of tracepoints
that opt in; the pointer is valid and can be bpf_probe_read() by both
BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
programs that attach to such a tracepoint, but the buffer to which it
points may only be written by the latter.
Signed-off-by: Matt Mullins <mmullins@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/linux/bpf.h | 2 | ||||
-rw-r--r-- | include/linux/bpf_types.h | 1 | ||||
-rw-r--r-- | include/linux/tracepoint-defs.h | 1 | ||||
-rw-r--r-- | include/trace/bpf_probe.h | 27 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 1 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 8 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 31 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 24 |
8 files changed, 91 insertions, 4 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f15432d90728..cd6341eabd74 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -272,6 +272,7 @@ enum bpf_reg_type { | |||
272 | PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ | 272 | PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ |
273 | PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ | 273 | PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ |
274 | PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ | 274 | PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ |
275 | PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ | ||
275 | }; | 276 | }; |
276 | 277 | ||
277 | /* The information passed from prog-specific *_is_valid_access | 278 | /* The information passed from prog-specific *_is_valid_access |
@@ -361,6 +362,7 @@ struct bpf_prog_aux { | |||
361 | u32 used_map_cnt; | 362 | u32 used_map_cnt; |
362 | u32 max_ctx_offset; | 363 | u32 max_ctx_offset; |
363 | u32 max_pkt_offset; | 364 | u32 max_pkt_offset; |
365 | u32 max_tp_access; | ||
364 | u32 stack_depth; | 366 | u32 stack_depth; |
365 | u32 id; | 367 | u32 id; |
366 | u32 func_cnt; /* used by non-func prog as the number of func progs */ | 368 | u32 func_cnt; /* used by non-func prog as the number of func progs */ |
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d26991a16894..a10d37bce364 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h | |||
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) | |||
25 | BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) | 25 | BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) |
26 | BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) | 26 | BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) |
27 | BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) | 27 | BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) |
28 | BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) | ||
28 | #endif | 29 | #endif |
29 | #ifdef CONFIG_CGROUP_BPF | 30 | #ifdef CONFIG_CGROUP_BPF |
30 | BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) | 31 | BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) |
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4b..b29950a19205 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h | |||
@@ -45,6 +45,7 @@ struct bpf_raw_event_map { | |||
45 | struct tracepoint *tp; | 45 | struct tracepoint *tp; |
46 | void *bpf_func; | 46 | void *bpf_func; |
47 | u32 num_args; | 47 | u32 num_args; |
48 | u32 writable_size; | ||
48 | } __aligned(32); | 49 | } __aligned(32); |
49 | 50 | ||
50 | #endif | 51 | #endif |
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80..d6e556c0a085 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h | |||
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ | |||
69 | * to make sure that if the tracepoint handling changes, the | 69 | * to make sure that if the tracepoint handling changes, the |
70 | * bpf probe will fail to compile unless it too is updated. | 70 | * bpf probe will fail to compile unless it too is updated. |
71 | */ | 71 | */ |
72 | #undef DEFINE_EVENT | 72 | #define __DEFINE_EVENT(template, call, proto, args, size) \ |
73 | #define DEFINE_EVENT(template, call, proto, args) \ | ||
74 | static inline void bpf_test_probe_##call(void) \ | 73 | static inline void bpf_test_probe_##call(void) \ |
75 | { \ | 74 | { \ |
76 | check_trace_callback_type_##call(__bpf_trace_##template); \ | 75 | check_trace_callback_type_##call(__bpf_trace_##template); \ |
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ | |||
81 | .tp = &__tracepoint_##call, \ | 80 | .tp = &__tracepoint_##call, \ |
82 | .bpf_func = (void *)__bpf_trace_##template, \ | 81 | .bpf_func = (void *)__bpf_trace_##template, \ |
83 | .num_args = COUNT_ARGS(args), \ | 82 | .num_args = COUNT_ARGS(args), \ |
83 | .writable_size = size, \ | ||
84 | }; | 84 | }; |
85 | 85 | ||
86 | #define FIRST(x, ...) x | ||
87 | |||
88 | #undef DEFINE_EVENT_WRITABLE | ||
89 | #define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ | ||
90 | static inline void bpf_test_buffer_##call(void) \ | ||
91 | { \ | ||
92 | /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ | ||
93 | * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ | ||
94 | * dead-code-eliminated. \ | ||
95 | */ \ | ||
96 | FIRST(proto); \ | ||
97 | (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ | ||
98 | } \ | ||
99 | __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) | ||
100 | |||
101 | #undef DEFINE_EVENT | ||
102 | #define DEFINE_EVENT(template, call, proto, args) \ | ||
103 | __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) | ||
86 | 104 | ||
87 | #undef DEFINE_EVENT_PRINT | 105 | #undef DEFINE_EVENT_PRINT |
88 | #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ | 106 | #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ |
89 | DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) | 107 | DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) |
90 | 108 | ||
91 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) | 109 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) |
110 | |||
111 | #undef DEFINE_EVENT_WRITABLE | ||
112 | #undef __DEFINE_EVENT | ||
113 | #undef FIRST | ||
114 | |||
92 | #endif /* CONFIG_BPF_EVENTS */ | 115 | #endif /* CONFIG_BPF_EVENTS */ |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eaf2d3284248..f7fa7a34a62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -168,6 +168,7 @@ enum bpf_prog_type { | |||
168 | BPF_PROG_TYPE_SK_REUSEPORT, | 168 | BPF_PROG_TYPE_SK_REUSEPORT, |
169 | BPF_PROG_TYPE_FLOW_DISSECTOR, | 169 | BPF_PROG_TYPE_FLOW_DISSECTOR, |
170 | BPF_PROG_TYPE_CGROUP_SYSCTL, | 170 | BPF_PROG_TYPE_CGROUP_SYSCTL, |
171 | BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, | ||
171 | }; | 172 | }; |
172 | 173 | ||
173 | enum bpf_attach_type { | 174 | enum bpf_attach_type { |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b0de49598341..ae141e745f92 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) | |||
1789 | } | 1789 | } |
1790 | raw_tp->btp = btp; | 1790 | raw_tp->btp = btp; |
1791 | 1791 | ||
1792 | prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, | 1792 | prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); |
1793 | BPF_PROG_TYPE_RAW_TRACEPOINT); | ||
1794 | if (IS_ERR(prog)) { | 1793 | if (IS_ERR(prog)) { |
1795 | err = PTR_ERR(prog); | 1794 | err = PTR_ERR(prog); |
1796 | goto out_free_tp; | 1795 | goto out_free_tp; |
1797 | } | 1796 | } |
1797 | if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && | ||
1798 | prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { | ||
1799 | err = -EINVAL; | ||
1800 | goto out_put_prog; | ||
1801 | } | ||
1798 | 1802 | ||
1799 | err = bpf_probe_register(raw_tp->btp, prog); | 1803 | err = bpf_probe_register(raw_tp->btp, prog); |
1800 | if (err) | 1804 | if (err) |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 423f242a5efb..2ef442c62c0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -405,6 +405,7 @@ static const char * const reg_type_str[] = { | |||
405 | [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", | 405 | [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", |
406 | [PTR_TO_TCP_SOCK] = "tcp_sock", | 406 | [PTR_TO_TCP_SOCK] = "tcp_sock", |
407 | [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", | 407 | [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", |
408 | [PTR_TO_TP_BUFFER] = "tp_buffer", | ||
408 | }; | 409 | }; |
409 | 410 | ||
410 | static char slot_type_char[] = { | 411 | static char slot_type_char[] = { |
@@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env, | |||
1993 | return 0; | 1994 | return 0; |
1994 | } | 1995 | } |
1995 | 1996 | ||
1997 | static int check_tp_buffer_access(struct bpf_verifier_env *env, | ||
1998 | const struct bpf_reg_state *reg, | ||
1999 | int regno, int off, int size) | ||
2000 | { | ||
2001 | if (off < 0) { | ||
2002 | verbose(env, | ||
2003 | "R%d invalid tracepoint buffer access: off=%d, size=%d", | ||
2004 | regno, off, size); | ||
2005 | return -EACCES; | ||
2006 | } | ||
2007 | if (!tnum_is_const(reg->var_off) || reg->var_off.value) { | ||
2008 | char tn_buf[48]; | ||
2009 | |||
2010 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); | ||
2011 | verbose(env, | ||
2012 | "R%d invalid variable buffer offset: off=%d, var_off=%s", | ||
2013 | regno, off, tn_buf); | ||
2014 | return -EACCES; | ||
2015 | } | ||
2016 | if (off + size > env->prog->aux->max_tp_access) | ||
2017 | env->prog->aux->max_tp_access = off + size; | ||
2018 | |||
2019 | return 0; | ||
2020 | } | ||
2021 | |||
2022 | |||
1996 | /* truncate register to smaller size (in bytes) | 2023 | /* truncate register to smaller size (in bytes) |
1997 | * must be called with size < BPF_REG_SIZE | 2024 | * must be called with size < BPF_REG_SIZE |
1998 | */ | 2025 | */ |
@@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
2137 | err = check_sock_access(env, insn_idx, regno, off, size, t); | 2164 | err = check_sock_access(env, insn_idx, regno, off, size, t); |
2138 | if (!err && value_regno >= 0) | 2165 | if (!err && value_regno >= 0) |
2139 | mark_reg_unknown(env, regs, value_regno); | 2166 | mark_reg_unknown(env, regs, value_regno); |
2167 | } else if (reg->type == PTR_TO_TP_BUFFER) { | ||
2168 | err = check_tp_buffer_access(env, reg, regno, off, size); | ||
2169 | if (!err && t == BPF_READ && value_regno >= 0) | ||
2170 | mark_reg_unknown(env, regs, value_regno); | ||
2140 | } else { | 2171 | } else { |
2141 | verbose(env, "R%d invalid mem access '%s'\n", regno, | 2172 | verbose(env, "R%d invalid mem access '%s'\n", regno, |
2142 | reg_type_str[reg->type]); | 2173 | reg_type_str[reg->type]); |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 91800be0c8eb..8607aba1d882 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { | |||
915 | const struct bpf_prog_ops raw_tracepoint_prog_ops = { | 915 | const struct bpf_prog_ops raw_tracepoint_prog_ops = { |
916 | }; | 916 | }; |
917 | 917 | ||
918 | static bool raw_tp_writable_prog_is_valid_access(int off, int size, | ||
919 | enum bpf_access_type type, | ||
920 | const struct bpf_prog *prog, | ||
921 | struct bpf_insn_access_aux *info) | ||
922 | { | ||
923 | if (off == 0) { | ||
924 | if (size != sizeof(u64) || type != BPF_READ) | ||
925 | return false; | ||
926 | info->reg_type = PTR_TO_TP_BUFFER; | ||
927 | } | ||
928 | return raw_tp_prog_is_valid_access(off, size, type, prog, info); | ||
929 | } | ||
930 | |||
931 | const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { | ||
932 | .get_func_proto = raw_tp_prog_func_proto, | ||
933 | .is_valid_access = raw_tp_writable_prog_is_valid_access, | ||
934 | }; | ||
935 | |||
936 | const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { | ||
937 | }; | ||
938 | |||
918 | static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, | 939 | static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, |
919 | const struct bpf_prog *prog, | 940 | const struct bpf_prog *prog, |
920 | struct bpf_insn_access_aux *info) | 941 | struct bpf_insn_access_aux *info) |
@@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * | |||
1204 | if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) | 1225 | if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) |
1205 | return -EINVAL; | 1226 | return -EINVAL; |
1206 | 1227 | ||
1228 | if (prog->aux->max_tp_access > btp->writable_size) | ||
1229 | return -EINVAL; | ||
1230 | |||
1207 | return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); | 1231 | return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); |
1208 | } | 1232 | } |
1209 | 1233 | ||