diff options
author | Oleg Nesterov <oleg@redhat.com> | 2013-02-04 11:11:58 -0500 |
---|---|---|
committer | Oleg Nesterov <oleg@redhat.com> | 2013-02-08 12:28:07 -0500 |
commit | 31ba334836c0ac0039084859f14a5b96858493dc (patch) | |
tree | a6f8d72d58f165717481aae43fcabe25b326dce3 /kernel | |
parent | 736288ba5016e255869c26296014eeff649971c2 (diff) |
uprobes/perf: Teach trace_uprobe/perf code to pre-filter
Finally implement uprobe_perf_filter() which checks ->nr_systemwide or
->perf_events to figure out whether we need to insert the breakpoint.
uprobe_perf_open/close are changed to do uprobe_apply(true/false) when
the new perf event comes or goes away.
Note that currently this is very suboptimal:
- uprobe_register() called by TRACE_REG_PERF_REGISTER becomes a
heavy nop, consumer->filter() always returns F at this stage.
As it was already discussed we need uprobe_register_only() to
avoid the costly register_for_each_vma() when possible.
- uprobe_apply() is oftenly overkill. Unless "nr_systemwide != 0"
changes we need uprobe_apply_mm(), unapply_uprobe() is almost
what we need.
- uprobe_apply() can be simply avoided sometimes, see the next
changes.
Testing:
# perf probe -x /lib/libc.so.6 syscall
# perl -e 'syscall -1 while 1' &
[1] 530
# perf record -e probe_libc:syscall perl -e 'syscall -1 for 1..10; sleep 1'
# perf report --show-total-period
100.00% 10 perl libc-2.8.so [.] syscall
Before this patch:
# cat /sys/kernel/debug/tracing/uprobe_profile
/lib/libc.so.6 syscall 79291
A huge ->nrhit == 79291 reflects the fact that the background process
530 constantly hits this breakpoint too, even if doesn't contribute to
the output.
After the patch:
# cat /sys/kernel/debug/tracing/uprobe_profile
/lib/libc.so.6 syscall 10
This shows that only the target process was punished by int3.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/trace_uprobe.c | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2a74a93afdae..b7850f535acf 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -557,7 +557,12 @@ static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu) | |||
557 | return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE); | 557 | return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE); |
558 | } | 558 | } |
559 | 559 | ||
560 | static int probe_event_enable(struct trace_uprobe *tu, int flag) | 560 | typedef bool (*filter_func_t)(struct uprobe_consumer *self, |
561 | enum uprobe_filter_ctx ctx, | ||
562 | struct mm_struct *mm); | ||
563 | |||
564 | static int | ||
565 | probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) | ||
561 | { | 566 | { |
562 | int ret = 0; | 567 | int ret = 0; |
563 | 568 | ||
@@ -567,6 +572,7 @@ static int probe_event_enable(struct trace_uprobe *tu, int flag) | |||
567 | WARN_ON(!uprobe_filter_is_empty(&tu->filter)); | 572 | WARN_ON(!uprobe_filter_is_empty(&tu->filter)); |
568 | 573 | ||
569 | tu->flags |= flag; | 574 | tu->flags |= flag; |
575 | tu->consumer.filter = filter; | ||
570 | ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); | 576 | ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); |
571 | if (ret) | 577 | if (ret) |
572 | tu->flags &= ~flag; | 578 | tu->flags &= ~flag; |
@@ -656,6 +662,22 @@ static int set_print_fmt(struct trace_uprobe *tu) | |||
656 | } | 662 | } |
657 | 663 | ||
658 | #ifdef CONFIG_PERF_EVENTS | 664 | #ifdef CONFIG_PERF_EVENTS |
665 | static bool | ||
666 | __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) | ||
667 | { | ||
668 | struct perf_event *event; | ||
669 | |||
670 | if (filter->nr_systemwide) | ||
671 | return true; | ||
672 | |||
673 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { | ||
674 | if (event->hw.tp_target->mm == mm) | ||
675 | return true; | ||
676 | } | ||
677 | |||
678 | return false; | ||
679 | } | ||
680 | |||
659 | static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) | 681 | static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) |
660 | { | 682 | { |
661 | write_lock(&tu->filter.rwlock); | 683 | write_lock(&tu->filter.rwlock); |
@@ -665,6 +687,8 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) | |||
665 | tu->filter.nr_systemwide++; | 687 | tu->filter.nr_systemwide++; |
666 | write_unlock(&tu->filter.rwlock); | 688 | write_unlock(&tu->filter.rwlock); |
667 | 689 | ||
690 | uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); | ||
691 | |||
668 | return 0; | 692 | return 0; |
669 | } | 693 | } |
670 | 694 | ||
@@ -677,9 +701,25 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) | |||
677 | tu->filter.nr_systemwide--; | 701 | tu->filter.nr_systemwide--; |
678 | write_unlock(&tu->filter.rwlock); | 702 | write_unlock(&tu->filter.rwlock); |
679 | 703 | ||
704 | uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); | ||
705 | |||
680 | return 0; | 706 | return 0; |
681 | } | 707 | } |
682 | 708 | ||
709 | static bool uprobe_perf_filter(struct uprobe_consumer *uc, | ||
710 | enum uprobe_filter_ctx ctx, struct mm_struct *mm) | ||
711 | { | ||
712 | struct trace_uprobe *tu; | ||
713 | int ret; | ||
714 | |||
715 | tu = container_of(uc, struct trace_uprobe, consumer); | ||
716 | read_lock(&tu->filter.rwlock); | ||
717 | ret = __uprobe_perf_filter(&tu->filter, mm); | ||
718 | read_unlock(&tu->filter.rwlock); | ||
719 | |||
720 | return ret; | ||
721 | } | ||
722 | |||
683 | /* uprobe profile handler */ | 723 | /* uprobe profile handler */ |
684 | static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | 724 | static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) |
685 | { | 725 | { |
@@ -722,7 +762,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, | |||
722 | 762 | ||
723 | switch (type) { | 763 | switch (type) { |
724 | case TRACE_REG_REGISTER: | 764 | case TRACE_REG_REGISTER: |
725 | return probe_event_enable(tu, TP_FLAG_TRACE); | 765 | return probe_event_enable(tu, TP_FLAG_TRACE, NULL); |
726 | 766 | ||
727 | case TRACE_REG_UNREGISTER: | 767 | case TRACE_REG_UNREGISTER: |
728 | probe_event_disable(tu, TP_FLAG_TRACE); | 768 | probe_event_disable(tu, TP_FLAG_TRACE); |
@@ -730,7 +770,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, | |||
730 | 770 | ||
731 | #ifdef CONFIG_PERF_EVENTS | 771 | #ifdef CONFIG_PERF_EVENTS |
732 | case TRACE_REG_PERF_REGISTER: | 772 | case TRACE_REG_PERF_REGISTER: |
733 | return probe_event_enable(tu, TP_FLAG_PROFILE); | 773 | return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter); |
734 | 774 | ||
735 | case TRACE_REG_PERF_UNREGISTER: | 775 | case TRACE_REG_PERF_UNREGISTER: |
736 | probe_event_disable(tu, TP_FLAG_PROFILE); | 776 | probe_event_disable(tu, TP_FLAG_PROFILE); |