aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2016-04-06 21:43:25 -0400
committerDavid S. Miller <davem@davemloft.net>2016-04-07 21:04:26 -0400
commit98b5c2c65c2951772a8fc661f50d675e450e8bce (patch)
treea72f04b6056a18b50d6244e8f4b6f41e95f9e0c5
parent1e1dcd93b468901e114f279c94a0b356adc5e7cd (diff)
perf, bpf: allow bpf programs attach to tracepoints
introduce BPF_PROG_TYPE_TRACEPOINT program type and allow it to be attached to the perf tracepoint handler, which will copy the arguments into the per-cpu buffer and pass it to the bpf program as its first argument. The layout of the fields can be discovered by doing 'cat /sys/kernel/debug/tracing/events/sched/sched_switch/format' prior to the compilation of the program with exception that first 8 bytes are reserved and not accessible to the program. This area is used to store the pointer to 'struct pt_regs' which some of the bpf helpers will use: +---------+ | 8 bytes | hidden 'struct pt_regs *' (inaccessible to bpf program) +---------+ | N bytes | static tracepoint fields defined in tracepoint/format (bpf readonly) +---------+ | dynamic | __dynamic_array bytes of tracepoint (inaccessible to bpf yet) +---------+ Not that all of the fields are already dumped to user space via perf ring buffer and broken application access it directly without consulting tracepoint/format. Same rule applies here: static tracepoint fields should only be accessed in a format defined in tracepoint/format. The order of fields and field sizes are not an ABI. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/trace/perf.h10
-rw-r--r--include/uapi/linux/bpf.h1
-rw-r--r--kernel/events/core.c13
3 files changed, 19 insertions, 5 deletions
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 77cd9043b7e4..a182306eefd7 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -34,6 +34,7 @@ perf_trace_##call(void *__data, proto) \
34 struct trace_event_call *event_call = __data; \ 34 struct trace_event_call *event_call = __data; \
35 struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ 35 struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
36 struct trace_event_raw_##call *entry; \ 36 struct trace_event_raw_##call *entry; \
37 struct bpf_prog *prog = event_call->prog; \
37 struct pt_regs *__regs; \ 38 struct pt_regs *__regs; \
38 u64 __count = 1; \ 39 u64 __count = 1; \
39 struct task_struct *__task = NULL; \ 40 struct task_struct *__task = NULL; \
@@ -45,7 +46,7 @@ perf_trace_##call(void *__data, proto) \
45 __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ 46 __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
46 \ 47 \
47 head = this_cpu_ptr(event_call->perf_events); \ 48 head = this_cpu_ptr(event_call->perf_events); \
48 if (__builtin_constant_p(!__task) && !__task && \ 49 if (!prog && __builtin_constant_p(!__task) && !__task && \
49 hlist_empty(head)) \ 50 hlist_empty(head)) \
50 return; \ 51 return; \
51 \ 52 \
@@ -63,6 +64,13 @@ perf_trace_##call(void *__data, proto) \
63 \ 64 \
64 { assign; } \ 65 { assign; } \
65 \ 66 \
67 if (prog) { \
68 *(struct pt_regs **)entry = __regs; \
69 if (!trace_call_bpf(prog, entry) || hlist_empty(head)) { \
70 perf_swevent_put_recursion_context(rctx); \
71 return; \
72 } \
73 } \
66 perf_trace_buf_submit(entry, __entry_size, rctx, \ 74 perf_trace_buf_submit(entry, __entry_size, rctx, \
67 event_call->event.type, __count, __regs, \ 75 event_call->event.type, __count, __regs, \
68 head, __task); \ 76 head, __task); \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 23917bb47bf3..70eda5aeb304 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -92,6 +92,7 @@ enum bpf_prog_type {
92 BPF_PROG_TYPE_KPROBE, 92 BPF_PROG_TYPE_KPROBE,
93 BPF_PROG_TYPE_SCHED_CLS, 93 BPF_PROG_TYPE_SCHED_CLS,
94 BPF_PROG_TYPE_SCHED_ACT, 94 BPF_PROG_TYPE_SCHED_ACT,
95 BPF_PROG_TYPE_TRACEPOINT,
95}; 96};
96 97
97#define BPF_PSEUDO_MAP_FD 1 98#define BPF_PSEUDO_MAP_FD 1
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d8512883c0a0..e5ffe97d6166 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6725,12 +6725,13 @@ int perf_swevent_get_recursion_context(void)
6725} 6725}
6726EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 6726EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
6727 6727
6728inline void perf_swevent_put_recursion_context(int rctx) 6728void perf_swevent_put_recursion_context(int rctx)
6729{ 6729{
6730 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); 6730 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
6731 6731
6732 put_recursion_context(swhash->recursion, rctx); 6732 put_recursion_context(swhash->recursion, rctx);
6733} 6733}
6734EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
6734 6735
6735void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) 6736void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
6736{ 6737{
@@ -7106,6 +7107,7 @@ static void perf_event_free_filter(struct perf_event *event)
7106 7107
7107static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) 7108static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
7108{ 7109{
7110 bool is_kprobe, is_tracepoint;
7109 struct bpf_prog *prog; 7111 struct bpf_prog *prog;
7110 7112
7111 if (event->attr.type != PERF_TYPE_TRACEPOINT) 7113 if (event->attr.type != PERF_TYPE_TRACEPOINT)
@@ -7114,15 +7116,18 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
7114 if (event->tp_event->prog) 7116 if (event->tp_event->prog)
7115 return -EEXIST; 7117 return -EEXIST;
7116 7118
7117 if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) 7119 is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
7118 /* bpf programs can only be attached to u/kprobes */ 7120 is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
7121 if (!is_kprobe && !is_tracepoint)
7122 /* bpf programs can only be attached to u/kprobe or tracepoint */
7119 return -EINVAL; 7123 return -EINVAL;
7120 7124
7121 prog = bpf_prog_get(prog_fd); 7125 prog = bpf_prog_get(prog_fd);
7122 if (IS_ERR(prog)) 7126 if (IS_ERR(prog))
7123 return PTR_ERR(prog); 7127 return PTR_ERR(prog);
7124 7128
7125 if (prog->type != BPF_PROG_TYPE_KPROBE) { 7129 if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
7130 (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
7126 /* valid fd, but invalid bpf program type */ 7131 /* valid fd, but invalid bpf program type */
7127 bpf_prog_put(prog); 7132 bpf_prog_put(prog);
7128 return -EINVAL; 7133 return -EINVAL;