diff options
author | Alexei Starovoitov <ast@fb.com> | 2016-04-06 21:43:25 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-04-07 21:04:26 -0400 |
commit | 98b5c2c65c2951772a8fc661f50d675e450e8bce (patch) | |
tree | a72f04b6056a18b50d6244e8f4b6f41e95f9e0c5 | |
parent | 1e1dcd93b468901e114f279c94a0b356adc5e7cd (diff) |
perf, bpf: allow bpf programs attach to tracepoints
introduce BPF_PROG_TYPE_TRACEPOINT program type and allow it to be attached
to the perf tracepoint handler, which will copy the arguments into
the per-cpu buffer and pass it to the bpf program as its first argument.
The layout of the fields can be discovered by doing
'cat /sys/kernel/debug/tracing/events/sched/sched_switch/format'
prior to the compilation of the program with exception that first 8 bytes
are reserved and not accessible to the program. This area is used to store
the pointer to 'struct pt_regs' which some of the bpf helpers will use:
+---------+
| 8 bytes | hidden 'struct pt_regs *' (inaccessible to bpf program)
+---------+
| N bytes | static tracepoint fields defined in tracepoint/format (bpf readonly)
+---------+
| dynamic | __dynamic_array bytes of tracepoint (inaccessible to bpf yet)
+---------+
Not that all of the fields are already dumped to user space via perf ring buffer
and broken application access it directly without consulting tracepoint/format.
Same rule applies here: static tracepoint fields should only be accessed
in a format defined in tracepoint/format. The order of fields and
field sizes are not an ABI.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/trace/perf.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 1 | ||||
-rw-r--r-- | kernel/events/core.c | 13 |
3 files changed, 19 insertions, 5 deletions
diff --git a/include/trace/perf.h b/include/trace/perf.h index 77cd9043b7e4..a182306eefd7 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h | |||
@@ -34,6 +34,7 @@ perf_trace_##call(void *__data, proto) \ | |||
34 | struct trace_event_call *event_call = __data; \ | 34 | struct trace_event_call *event_call = __data; \ |
35 | struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ | 35 | struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ |
36 | struct trace_event_raw_##call *entry; \ | 36 | struct trace_event_raw_##call *entry; \ |
37 | struct bpf_prog *prog = event_call->prog; \ | ||
37 | struct pt_regs *__regs; \ | 38 | struct pt_regs *__regs; \ |
38 | u64 __count = 1; \ | 39 | u64 __count = 1; \ |
39 | struct task_struct *__task = NULL; \ | 40 | struct task_struct *__task = NULL; \ |
@@ -45,7 +46,7 @@ perf_trace_##call(void *__data, proto) \ | |||
45 | __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ | 46 | __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ |
46 | \ | 47 | \ |
47 | head = this_cpu_ptr(event_call->perf_events); \ | 48 | head = this_cpu_ptr(event_call->perf_events); \ |
48 | if (__builtin_constant_p(!__task) && !__task && \ | 49 | if (!prog && __builtin_constant_p(!__task) && !__task && \ |
49 | hlist_empty(head)) \ | 50 | hlist_empty(head)) \ |
50 | return; \ | 51 | return; \ |
51 | \ | 52 | \ |
@@ -63,6 +64,13 @@ perf_trace_##call(void *__data, proto) \ | |||
63 | \ | 64 | \ |
64 | { assign; } \ | 65 | { assign; } \ |
65 | \ | 66 | \ |
67 | if (prog) { \ | ||
68 | *(struct pt_regs **)entry = __regs; \ | ||
69 | if (!trace_call_bpf(prog, entry) || hlist_empty(head)) { \ | ||
70 | perf_swevent_put_recursion_context(rctx); \ | ||
71 | return; \ | ||
72 | } \ | ||
73 | } \ | ||
66 | perf_trace_buf_submit(entry, __entry_size, rctx, \ | 74 | perf_trace_buf_submit(entry, __entry_size, rctx, \ |
67 | event_call->event.type, __count, __regs, \ | 75 | event_call->event.type, __count, __regs, \ |
68 | head, __task); \ | 76 | head, __task); \ |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23917bb47bf3..70eda5aeb304 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -92,6 +92,7 @@ enum bpf_prog_type { | |||
92 | BPF_PROG_TYPE_KPROBE, | 92 | BPF_PROG_TYPE_KPROBE, |
93 | BPF_PROG_TYPE_SCHED_CLS, | 93 | BPF_PROG_TYPE_SCHED_CLS, |
94 | BPF_PROG_TYPE_SCHED_ACT, | 94 | BPF_PROG_TYPE_SCHED_ACT, |
95 | BPF_PROG_TYPE_TRACEPOINT, | ||
95 | }; | 96 | }; |
96 | 97 | ||
97 | #define BPF_PSEUDO_MAP_FD 1 | 98 | #define BPF_PSEUDO_MAP_FD 1 |
diff --git a/kernel/events/core.c b/kernel/events/core.c index d8512883c0a0..e5ffe97d6166 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -6725,12 +6725,13 @@ int perf_swevent_get_recursion_context(void) | |||
6725 | } | 6725 | } |
6726 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 6726 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
6727 | 6727 | ||
6728 | inline void perf_swevent_put_recursion_context(int rctx) | 6728 | void perf_swevent_put_recursion_context(int rctx) |
6729 | { | 6729 | { |
6730 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); | 6730 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); |
6731 | 6731 | ||
6732 | put_recursion_context(swhash->recursion, rctx); | 6732 | put_recursion_context(swhash->recursion, rctx); |
6733 | } | 6733 | } |
6734 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | ||
6734 | 6735 | ||
6735 | void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | 6736 | void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
6736 | { | 6737 | { |
@@ -7106,6 +7107,7 @@ static void perf_event_free_filter(struct perf_event *event) | |||
7106 | 7107 | ||
7107 | static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | 7108 | static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) |
7108 | { | 7109 | { |
7110 | bool is_kprobe, is_tracepoint; | ||
7109 | struct bpf_prog *prog; | 7111 | struct bpf_prog *prog; |
7110 | 7112 | ||
7111 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 7113 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
@@ -7114,15 +7116,18 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | |||
7114 | if (event->tp_event->prog) | 7116 | if (event->tp_event->prog) |
7115 | return -EEXIST; | 7117 | return -EEXIST; |
7116 | 7118 | ||
7117 | if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) | 7119 | is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; |
7118 | /* bpf programs can only be attached to u/kprobes */ | 7120 | is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; |
7121 | if (!is_kprobe && !is_tracepoint) | ||
7122 | /* bpf programs can only be attached to u/kprobe or tracepoint */ | ||
7119 | return -EINVAL; | 7123 | return -EINVAL; |
7120 | 7124 | ||
7121 | prog = bpf_prog_get(prog_fd); | 7125 | prog = bpf_prog_get(prog_fd); |
7122 | if (IS_ERR(prog)) | 7126 | if (IS_ERR(prog)) |
7123 | return PTR_ERR(prog); | 7127 | return PTR_ERR(prog); |
7124 | 7128 | ||
7125 | if (prog->type != BPF_PROG_TYPE_KPROBE) { | 7129 | if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) || |
7130 | (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) { | ||
7126 | /* valid fd, but invalid bpf program type */ | 7131 | /* valid fd, but invalid bpf program type */ |
7127 | bpf_prog_put(prog); | 7132 | bpf_prog_put(prog); |
7128 | return -EINVAL; | 7133 | return -EINVAL; |