aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorYonghong Song <yhs@fb.com>2017-08-04 19:00:09 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-07 17:09:48 -0400
commitcf5f5cea270655dd49370760576c64b228583b79 (patch)
tree5758e9d56b94542d082d40ed66f8d8effa6287f9 /kernel/trace
parentd226a2b84d0528da7e35e7e19e052293889cdd21 (diff)
bpf: add support for sys_enter_* and sys_exit_* tracepoints
Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* style tracepoints. The iovisor/bcc issue #748 (https://github.com/iovisor/bcc/issues/748) documents this issue. For example, if you try to attach a bpf program to tracepoints syscalls/sys_enter_newfstat, you will get the following error: # ./tools/trace.py t:syscalls:sys_enter_newfstat Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument Failed to attach BPF to tracepoint The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* tracepoints are treated differently from other tracepoints and there is no bpf hook to it. This patch adds bpf support for these syscalls tracepoints by . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF . calling bpf programs in perf_syscall_enter and perf_syscall_exit The legality of bpf program ctx access is also checked. Function trace_event_get_offsets returns correct max offset for each specific syscall tracepoint, which is compared against the maximum offset access in bpf program. Signed-off-by: Yonghong Song <yhs@fb.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/trace_syscalls.c53
1 files changed, 51 insertions, 2 deletions
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e10395da88e..7a1a92036563 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
559static int sys_perf_refcount_enter; 559static int sys_perf_refcount_enter;
560static int sys_perf_refcount_exit; 560static int sys_perf_refcount_exit;
561 561
562static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
563 struct syscall_metadata *sys_data,
564 struct syscall_trace_enter *rec) {
565 struct syscall_tp_t {
566 unsigned long long regs;
567 unsigned long syscall_nr;
568 unsigned long args[sys_data->nb_args];
569 } param;
570 int i;
571
572 *(struct pt_regs **)&param = regs;
573 param.syscall_nr = rec->nr;
574 for (i = 0; i < sys_data->nb_args; i++)
575 param.args[i] = rec->args[i];
576 return trace_call_bpf(prog, &param);
577}
578
562static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) 579static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
563{ 580{
564 struct syscall_metadata *sys_data; 581 struct syscall_metadata *sys_data;
565 struct syscall_trace_enter *rec; 582 struct syscall_trace_enter *rec;
566 struct hlist_head *head; 583 struct hlist_head *head;
584 struct bpf_prog *prog;
567 int syscall_nr; 585 int syscall_nr;
568 int rctx; 586 int rctx;
569 int size; 587 int size;
@@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
578 if (!sys_data) 596 if (!sys_data)
579 return; 597 return;
580 598
599 prog = READ_ONCE(sys_data->enter_event->prog);
581 head = this_cpu_ptr(sys_data->enter_event->perf_events); 600 head = this_cpu_ptr(sys_data->enter_event->perf_events);
582 if (hlist_empty(head)) 601 if (!prog && hlist_empty(head))
583 return; 602 return;
584 603
585 /* get the size after alignment with the u32 buffer size field */ 604 /* get the size after alignment with the u32 buffer size field */
@@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
594 rec->nr = syscall_nr; 613 rec->nr = syscall_nr;
595 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 614 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
596 (unsigned long *)&rec->args); 615 (unsigned long *)&rec->args);
616
617 if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
618 hlist_empty(head)) {
619 perf_swevent_put_recursion_context(rctx);
620 return;
621 }
622
597 perf_trace_buf_submit(rec, size, rctx, 623 perf_trace_buf_submit(rec, size, rctx,
598 sys_data->enter_event->event.type, 1, regs, 624 sys_data->enter_event->event.type, 1, regs,
599 head, NULL); 625 head, NULL);
@@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call)
633 mutex_unlock(&syscall_trace_lock); 659 mutex_unlock(&syscall_trace_lock);
634} 660}
635 661
662static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
663 struct syscall_trace_exit *rec) {
664 struct syscall_tp_t {
665 unsigned long long regs;
666 unsigned long syscall_nr;
667 unsigned long ret;
668 } param;
669
670 *(struct pt_regs **)&param = regs;
671 param.syscall_nr = rec->nr;
672 param.ret = rec->ret;
673 return trace_call_bpf(prog, &param);
674}
675
636static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 676static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
637{ 677{
638 struct syscall_metadata *sys_data; 678 struct syscall_metadata *sys_data;
639 struct syscall_trace_exit *rec; 679 struct syscall_trace_exit *rec;
640 struct hlist_head *head; 680 struct hlist_head *head;
681 struct bpf_prog *prog;
641 int syscall_nr; 682 int syscall_nr;
642 int rctx; 683 int rctx;
643 int size; 684 int size;
@@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
652 if (!sys_data) 693 if (!sys_data)
653 return; 694 return;
654 695
696 prog = READ_ONCE(sys_data->exit_event->prog);
655 head = this_cpu_ptr(sys_data->exit_event->perf_events); 697 head = this_cpu_ptr(sys_data->exit_event->perf_events);
656 if (hlist_empty(head)) 698 if (!prog && hlist_empty(head))
657 return; 699 return;
658 700
659 /* We can probably do that at build time */ 701 /* We can probably do that at build time */
@@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
666 708
667 rec->nr = syscall_nr; 709 rec->nr = syscall_nr;
668 rec->ret = syscall_get_return_value(current, regs); 710 rec->ret = syscall_get_return_value(current, regs);
711
712 if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
713 hlist_empty(head)) {
714 perf_swevent_put_recursion_context(rctx);
715 return;
716 }
717
669 perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, 718 perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
670 1, regs, head, NULL); 719 1, regs, head, NULL);
671} 720}