aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-03-18 20:23:53 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-04-04 09:23:07 -0400
commit6cc8a7c1d8560c042f486b23318a6291569ab96b (patch)
treebda763c18ab78e92bb36d7e250765a493cce4b19 /include
parent6f4dee06fbf0133917f3d76fa3fb50e18b10c1f5 (diff)
perf: Fetch hot regs from the template caller
Trace events can be defined from a template using DECLARE_EVENT_CLASS/DEFINE_EVENT or directly with TRACE_EVENT. In both cases we have a template tracepoint handler, used to record the trace, to which we pass our ftrace event instance. In the function level, if the class is named "foo" and the event is named "blah", we have the following chain of calls: perf_trace_blah() -> perf_trace_templ_foo() In the case we have several events sharing the class "blah", we'll have multiple users of perf_trace_templ_foo(), and it won't be inlined by the compiler. This is usually what happens with the DECLARE_EVENT_CLASS/DEFINE_EVENT based definition. But if perf_trace_blah() is the only caller of perf_trace_templ_foo() there are fair chances that it will be inlined. The problem is that we fetch the regs from perf_trace_templ_foo() after we rewinded the frame pointer to the second caller, we want to reach the caller of perf_trace_blah() to get the right source of the event. And we do this by always assuming that perf_trace_templ_foo() is not inlined. But as shown above this is not always true. And if it is inlined we miss the first caller, losing the most important level of precision. We get: 61.31% ls [kernel.kallsyms] [k] do_softirq | --- do_softirq irq_exit do_IRQ common_interrupt | |--25.00%-- tty_buffer_request_room Instead of: 61.31% ls [kernel.kallsyms] [k] __do_softirq | --- __do_softirq do_softirq irq_exit do_IRQ common_interrupt | |--25.00%-- tty_buffer_request_room To fix this, we fetch the regs from perf_trace_blah() rather than perf_trace_templ_foo() so that we don't have to deal with inlining surprises. That also bring us the advantage of having the true source of the event even if we don't have frame pointers. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r--include/trace/ftrace.h23
1 files changed, 12 insertions, 11 deletions
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index ea6f9d4a20e9..882c64832ffe 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -758,13 +758,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
758#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ 758#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
759static notrace void \ 759static notrace void \
760perf_trace_templ_##call(struct ftrace_event_call *event_call, \ 760perf_trace_templ_##call(struct ftrace_event_call *event_call, \
761 proto) \ 761 struct pt_regs *__regs, proto) \
762{ \ 762{ \
763 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 763 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
764 struct ftrace_raw_##call *entry; \ 764 struct ftrace_raw_##call *entry; \
765 u64 __addr = 0, __count = 1; \ 765 u64 __addr = 0, __count = 1; \
766 unsigned long irq_flags; \ 766 unsigned long irq_flags; \
767 struct pt_regs *__regs; \
768 int __entry_size; \ 767 int __entry_size; \
769 int __data_size; \ 768 int __data_size; \
770 int rctx; \ 769 int rctx; \
@@ -785,20 +784,22 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
785 \ 784 \
786 { assign; } \ 785 { assign; } \
787 \ 786 \
788 __regs = &__get_cpu_var(perf_trace_regs); \
789 perf_fetch_caller_regs(__regs, 2); \
790 \
791 perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ 787 perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
792 __count, irq_flags, __regs); \ 788 __count, irq_flags, __regs); \
793} 789}
794 790
795#undef DEFINE_EVENT 791#undef DEFINE_EVENT
796#define DEFINE_EVENT(template, call, proto, args) \ 792#define DEFINE_EVENT(template, call, proto, args) \
797static notrace void perf_trace_##call(proto) \ 793static notrace void perf_trace_##call(proto) \
798{ \ 794{ \
799 struct ftrace_event_call *event_call = &event_##call; \ 795 struct ftrace_event_call *event_call = &event_##call; \
800 \ 796 struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \
801 perf_trace_templ_##template(event_call, args); \ 797 \
798 perf_fetch_caller_regs(__regs, 1); \
799 \
800 perf_trace_templ_##template(event_call, __regs, args); \
801 \
802 put_cpu_var(perf_trace_regs); \
802} 803}
803 804
804#undef DEFINE_EVENT_PRINT 805#undef DEFINE_EVENT_PRINT