aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2017-06-03 00:03:52 -0400
committerDavid S. Miller <davem@davemloft.net>2017-06-04 21:58:01 -0400
commitf91840a32deef5cb1bf73338bc5010f843b01426 (patch)
treee7a3eec8f6794fda623941afb426db5c1f8472b0
parent5071034e4af709d6783b7d105dc296a5cc84739b (diff)
perf, bpf: Add BPF support to all perf_event types
Allow BPF_PROG_TYPE_PERF_EVENT program types to attach to all perf_event types, including HW_CACHE, RAW, and dynamic pmu events. Only tracepoint/kprobe events are treated differently which require BPF_PROG_TYPE_TRACEPOINT/BPF_PROG_TYPE_KPROBE program types accordingly. Also add support for reading all event counters using bpf_perf_event_read() helper. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/perf_event.h7
-rw-r--r--kernel/bpf/arraymap.c28
-rw-r--r--kernel/events/core.c47
-rw-r--r--kernel/trace/bpf_trace.c22
4 files changed, 48 insertions, 56 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..8fc5f0fada5e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -896,7 +896,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
896 void *context); 896 void *context);
897extern void perf_pmu_migrate_context(struct pmu *pmu, 897extern void perf_pmu_migrate_context(struct pmu *pmu,
898 int src_cpu, int dst_cpu); 898 int src_cpu, int dst_cpu);
899extern u64 perf_event_read_local(struct perf_event *event); 899int perf_event_read_local(struct perf_event *event, u64 *value);
900extern u64 perf_event_read_value(struct perf_event *event, 900extern u64 perf_event_read_value(struct perf_event *event,
901 u64 *enabled, u64 *running); 901 u64 *enabled, u64 *running);
902 902
@@ -1301,7 +1301,10 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *
1301{ 1301{
1302 return ERR_PTR(-EINVAL); 1302 return ERR_PTR(-EINVAL);
1303} 1303}
1304static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; } 1304static inline int perf_event_read_local(struct perf_event *event, u64 *value)
1305{
1306 return -EINVAL;
1307}
1305static inline void perf_event_print_debug(void) { } 1308static inline void perf_event_print_debug(void) { }
1306static inline int perf_event_task_disable(void) { return -EINVAL; } 1309static inline int perf_event_task_disable(void) { return -EINVAL; }
1307static inline int perf_event_task_enable(void) { return -EINVAL; } 1310static inline int perf_event_task_enable(void) { return -EINVAL; }
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 172dc8ee0e3b..ecb43542246e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -452,38 +452,24 @@ static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
452static void *perf_event_fd_array_get_ptr(struct bpf_map *map, 452static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
453 struct file *map_file, int fd) 453 struct file *map_file, int fd)
454{ 454{
455 const struct perf_event_attr *attr;
456 struct bpf_event_entry *ee; 455 struct bpf_event_entry *ee;
457 struct perf_event *event; 456 struct perf_event *event;
458 struct file *perf_file; 457 struct file *perf_file;
458 u64 value;
459 459
460 perf_file = perf_event_get(fd); 460 perf_file = perf_event_get(fd);
461 if (IS_ERR(perf_file)) 461 if (IS_ERR(perf_file))
462 return perf_file; 462 return perf_file;
463 463
464 ee = ERR_PTR(-EOPNOTSUPP);
464 event = perf_file->private_data; 465 event = perf_file->private_data;
465 ee = ERR_PTR(-EINVAL); 466 if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
466
467 attr = perf_event_attrs(event);
468 if (IS_ERR(attr) || attr->inherit)
469 goto err_out; 467 goto err_out;
470 468
471 switch (attr->type) { 469 ee = bpf_event_entry_gen(perf_file, map_file);
472 case PERF_TYPE_SOFTWARE: 470 if (ee)
473 if (attr->config != PERF_COUNT_SW_BPF_OUTPUT) 471 return ee;
474 goto err_out; 472 ee = ERR_PTR(-ENOMEM);
475 /* fall-through */
476 case PERF_TYPE_RAW:
477 case PERF_TYPE_HARDWARE:
478 ee = bpf_event_entry_gen(perf_file, map_file);
479 if (ee)
480 return ee;
481 ee = ERR_PTR(-ENOMEM);
482 /* fall-through */
483 default:
484 break;
485 }
486
487err_out: 473err_out:
488 fput(perf_file); 474 fput(perf_file);
489 return ee; 475 return ee;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..51e40e4876c0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3636,10 +3636,10 @@ static inline u64 perf_event_count(struct perf_event *event)
3636 * will not be local and we cannot read them atomically 3636 * will not be local and we cannot read them atomically
3637 * - must not have a pmu::count method 3637 * - must not have a pmu::count method
3638 */ 3638 */
3639u64 perf_event_read_local(struct perf_event *event) 3639int perf_event_read_local(struct perf_event *event, u64 *value)
3640{ 3640{
3641 unsigned long flags; 3641 unsigned long flags;
3642 u64 val; 3642 int ret = 0;
3643 3643
3644 /* 3644 /*
3645 * Disabling interrupts avoids all counter scheduling (context 3645 * Disabling interrupts avoids all counter scheduling (context
@@ -3647,25 +3647,37 @@ u64 perf_event_read_local(struct perf_event *event)
3647 */ 3647 */
3648 local_irq_save(flags); 3648 local_irq_save(flags);
3649 3649
3650 /* If this is a per-task event, it must be for current */
3651 WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) &&
3652 event->hw.target != current);
3653
3654 /* If this is a per-CPU event, it must be for this CPU */
3655 WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) &&
3656 event->cpu != smp_processor_id());
3657
3658 /* 3650 /*
3659 * It must not be an event with inherit set, we cannot read 3651 * It must not be an event with inherit set, we cannot read
3660 * all child counters from atomic context. 3652 * all child counters from atomic context.
3661 */ 3653 */
3662 WARN_ON_ONCE(event->attr.inherit); 3654 if (event->attr.inherit) {
3655 ret = -EOPNOTSUPP;
3656 goto out;
3657 }
3663 3658
3664 /* 3659 /*
3665 * It must not have a pmu::count method, those are not 3660 * It must not have a pmu::count method, those are not
3666 * NMI safe. 3661 * NMI safe.
3667 */ 3662 */
3668 WARN_ON_ONCE(event->pmu->count); 3663 if (event->pmu->count) {
3664 ret = -EOPNOTSUPP;
3665 goto out;
3666 }
3667
3668 /* If this is a per-task event, it must be for current */
3669 if ((event->attach_state & PERF_ATTACH_TASK) &&
3670 event->hw.target != current) {
3671 ret = -EINVAL;
3672 goto out;
3673 }
3674
3675 /* If this is a per-CPU event, it must be for this CPU */
3676 if (!(event->attach_state & PERF_ATTACH_TASK) &&
3677 event->cpu != smp_processor_id()) {
3678 ret = -EINVAL;
3679 goto out;
3680 }
3669 3681
3670 /* 3682 /*
3671 * If the event is currently on this CPU, its either a per-task event, 3683 * If the event is currently on this CPU, its either a per-task event,
@@ -3675,10 +3687,11 @@ u64 perf_event_read_local(struct perf_event *event)
3675 if (event->oncpu == smp_processor_id()) 3687 if (event->oncpu == smp_processor_id())
3676 event->pmu->read(event); 3688 event->pmu->read(event);
3677 3689
3678 val = local64_read(&event->count); 3690 *value = local64_read(&event->count);
3691out:
3679 local_irq_restore(flags); 3692 local_irq_restore(flags);
3680 3693
3681 return val; 3694 return ret;
3682} 3695}
3683 3696
3684static int perf_event_read(struct perf_event *event, bool group) 3697static int perf_event_read(struct perf_event *event, bool group)
@@ -8037,12 +8050,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
8037 bool is_kprobe, is_tracepoint; 8050 bool is_kprobe, is_tracepoint;
8038 struct bpf_prog *prog; 8051 struct bpf_prog *prog;
8039 8052
8040 if (event->attr.type == PERF_TYPE_HARDWARE ||
8041 event->attr.type == PERF_TYPE_SOFTWARE)
8042 return perf_event_set_bpf_handler(event, prog_fd);
8043
8044 if (event->attr.type != PERF_TYPE_TRACEPOINT) 8053 if (event->attr.type != PERF_TYPE_TRACEPOINT)
8045 return -EINVAL; 8054 return perf_event_set_bpf_handler(event, prog_fd);
8046 8055
8047 if (event->tp_event->prog) 8056 if (event->tp_event->prog)
8048 return -EEXIST; 8057 return -EEXIST;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 460a031c77e5..08eb072430b9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -234,7 +234,8 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
234 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
235 u64 index = flags & BPF_F_INDEX_MASK; 235 u64 index = flags & BPF_F_INDEX_MASK;
236 struct bpf_event_entry *ee; 236 struct bpf_event_entry *ee;
237 struct perf_event *event; 237 u64 value = 0;
238 int err;
238 239
239 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 240 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
240 return -EINVAL; 241 return -EINVAL;
@@ -247,21 +248,14 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
247 if (!ee) 248 if (!ee)
248 return -ENOENT; 249 return -ENOENT;
249 250
250 event = ee->event; 251 err = perf_event_read_local(ee->event, &value);
251 if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
252 event->attr.type != PERF_TYPE_RAW))
253 return -EINVAL;
254
255 /* make sure event is local and doesn't have pmu::count */
256 if (unlikely(event->oncpu != cpu || event->pmu->count))
257 return -EINVAL;
258
259 /* 252 /*
260 * we don't know if the function is run successfully by the 253 * this api is ugly since we miss [-22..-2] range of valid
261 * return value. It can be judged in other places, such as 254 * counter values, but that's uapi
262 * eBPF programs.
263 */ 255 */
264 return perf_event_read_local(event); 256 if (err)
257 return err;
258 return value;
265} 259}
266 260
267static const struct bpf_func_proto bpf_perf_event_read_proto = { 261static const struct bpf_func_proto bpf_perf_event_read_proto = {