diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2015-10-20 23:02:34 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-22 09:42:15 -0400 |
commit | a43eec304259a6c637f4014a6d4767159b6a3aa3 (patch) | |
tree | aecaeb92ff5263f446b002793d89a2a211dc246b | |
parent | fa128e6a148a0a58355bd6814c6283515bbd028a (diff) |
bpf: introduce bpf_perf_event_output() helper
This helper is used to send raw data from eBPF program into
special PERF_TYPE_SOFTWARE/PERF_COUNT_SW_BPF_OUTPUT perf_event.
User space needs to perf_event_open() it (either for one or all cpus) and
store FD into perf_event_array (similar to bpf_perf_event_read() helper)
before eBPF program can send data into it.
Today the programs triggered by kprobe collect the data and either store
it into the maps or print it via bpf_trace_printk() where latter is the debug
facility and not suitable to stream the data. This new helper replaces
such bpf_trace_printk() usage and allows programs to have dedicated
channel into user space for post-processing of the raw data collected.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/uapi/linux/bpf.h | 11 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 1 | ||||
-rw-r--r-- | kernel/bpf/arraymap.c | 2 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 3 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 46 |
5 files changed, 62 insertions, 1 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 564f1f091991..2e032426cfb7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -287,6 +287,17 @@ enum bpf_func_id { | |||
287 | * Return: realm if != 0 | 287 | * Return: realm if != 0 |
288 | */ | 288 | */ |
289 | BPF_FUNC_get_route_realm, | 289 | BPF_FUNC_get_route_realm, |
290 | |||
291 | /** | ||
292 | * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample | ||
293 | * @ctx: struct pt_regs* | ||
294 | * @map: pointer to perf_event_array map | ||
295 | * @index: index of event in the map | ||
296 | * @data: data on stack to be output as raw data | ||
297 | * @size: size of data | ||
298 | * Return: 0 on success | ||
299 | */ | ||
300 | BPF_FUNC_perf_event_output, | ||
290 | __BPF_FUNC_MAX_ID, | 301 | __BPF_FUNC_MAX_ID, |
291 | }; | 302 | }; |
292 | 303 | ||
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 2881145cda86..d3c417615361 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h | |||
@@ -110,6 +110,7 @@ enum perf_sw_ids { | |||
110 | PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, | 110 | PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, |
111 | PERF_COUNT_SW_EMULATION_FAULTS = 8, | 111 | PERF_COUNT_SW_EMULATION_FAULTS = 8, |
112 | PERF_COUNT_SW_DUMMY = 9, | 112 | PERF_COUNT_SW_DUMMY = 9, |
113 | PERF_COUNT_SW_BPF_OUTPUT = 10, | ||
113 | 114 | ||
114 | PERF_COUNT_SW_MAX, /* non-ABI */ | 115 | PERF_COUNT_SW_MAX, /* non-ABI */ |
115 | }; | 116 | }; |
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f2d9e698c753..e3cfe46b074f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c | |||
@@ -295,6 +295,8 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) | |||
295 | return (void *)attr; | 295 | return (void *)attr; |
296 | 296 | ||
297 | if (attr->type != PERF_TYPE_RAW && | 297 | if (attr->type != PERF_TYPE_RAW && |
298 | !(attr->type == PERF_TYPE_SOFTWARE && | ||
299 | attr->config == PERF_COUNT_SW_BPF_OUTPUT) && | ||
298 | attr->type != PERF_TYPE_HARDWARE) { | 300 | attr->type != PERF_TYPE_HARDWARE) { |
299 | perf_event_release_kernel(event); | 301 | perf_event_release_kernel(event); |
300 | return ERR_PTR(-EINVAL); | 302 | return ERR_PTR(-EINVAL); |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1d6b97be79e1..b56cf51f8d42 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -245,6 +245,7 @@ static const struct { | |||
245 | } func_limit[] = { | 245 | } func_limit[] = { |
246 | {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, | 246 | {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, |
247 | {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, | 247 | {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, |
248 | {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, | ||
248 | }; | 249 | }; |
249 | 250 | ||
250 | static void print_verifier_state(struct verifier_env *env) | 251 | static void print_verifier_state(struct verifier_env *env) |
@@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) | |||
910 | * don't allow any other map type to be passed into | 911 | * don't allow any other map type to be passed into |
911 | * the special func; | 912 | * the special func; |
912 | */ | 913 | */ |
913 | if (bool_map != bool_func) | 914 | if (bool_func && bool_map != bool_func) |
914 | return -EINVAL; | 915 | return -EINVAL; |
915 | } | 916 | } |
916 | 917 | ||
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0fe96c7c8803..47febbe7998e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -215,6 +215,50 @@ const struct bpf_func_proto bpf_perf_event_read_proto = { | |||
215 | .arg2_type = ARG_ANYTHING, | 215 | .arg2_type = ARG_ANYTHING, |
216 | }; | 216 | }; |
217 | 217 | ||
218 | static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) | ||
219 | { | ||
220 | struct pt_regs *regs = (struct pt_regs *) (long) r1; | ||
221 | struct bpf_map *map = (struct bpf_map *) (long) r2; | ||
222 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
223 | void *data = (void *) (long) r4; | ||
224 | struct perf_sample_data sample_data; | ||
225 | struct perf_event *event; | ||
226 | struct perf_raw_record raw = { | ||
227 | .size = size, | ||
228 | .data = data, | ||
229 | }; | ||
230 | |||
231 | if (unlikely(index >= array->map.max_entries)) | ||
232 | return -E2BIG; | ||
233 | |||
234 | event = (struct perf_event *)array->ptrs[index]; | ||
235 | if (unlikely(!event)) | ||
236 | return -ENOENT; | ||
237 | |||
238 | if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || | ||
239 | event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) | ||
240 | return -EINVAL; | ||
241 | |||
242 | if (unlikely(event->oncpu != smp_processor_id())) | ||
243 | return -EOPNOTSUPP; | ||
244 | |||
245 | perf_sample_data_init(&sample_data, 0, 0); | ||
246 | sample_data.raw = &raw; | ||
247 | perf_event_output(event, &sample_data, regs); | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static const struct bpf_func_proto bpf_perf_event_output_proto = { | ||
252 | .func = bpf_perf_event_output, | ||
253 | .gpl_only = false, | ||
254 | .ret_type = RET_INTEGER, | ||
255 | .arg1_type = ARG_PTR_TO_CTX, | ||
256 | .arg2_type = ARG_CONST_MAP_PTR, | ||
257 | .arg3_type = ARG_ANYTHING, | ||
258 | .arg4_type = ARG_PTR_TO_STACK, | ||
259 | .arg5_type = ARG_CONST_STACK_SIZE, | ||
260 | }; | ||
261 | |||
218 | static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) | 262 | static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) |
219 | { | 263 | { |
220 | switch (func_id) { | 264 | switch (func_id) { |
@@ -242,6 +286,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func | |||
242 | return &bpf_get_smp_processor_id_proto; | 286 | return &bpf_get_smp_processor_id_proto; |
243 | case BPF_FUNC_perf_event_read: | 287 | case BPF_FUNC_perf_event_read: |
244 | return &bpf_perf_event_read_proto; | 288 | return &bpf_perf_event_read_proto; |
289 | case BPF_FUNC_perf_event_output: | ||
290 | return &bpf_perf_event_output_proto; | ||
245 | default: | 291 | default: |
246 | return NULL; | 292 | return NULL; |
247 | } | 293 | } |