aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-03-03 01:16:16 -0500
committerFrederic Weisbecker <fweisbec@gmail.com>2010-03-10 08:40:38 -0500
commitc530665c31c0140b74ca7689e7f836177796e5bd (patch)
treeac9ebb98aca58fdf84cc18958a94393a9a7bbdf3
parent5331d7b84613b8325362dde53dc2bff2fb87d351 (diff)
perf: Take a hot regs snapshot for trace events
We are taking a wrong regs snapshot when a trace event triggers. Either we use get_irq_regs(), which gives us the interrupted registers if we are in an interrupt, or we use task_pt_regs() which gives us the state before we entered the kernel, assuming we are lucky enough to be no kernel thread, in which case task_pt_regs() returns the initial set of regs when the kernel thread was started. What we want is different. We need a hot snapshot of the regs, so that we can get the instruction pointer to record in the sample, the frame pointer for the callchain, and some other things. Let's use the new perf_fetch_caller_regs() for that. Comparison with perf record -e lock: -R -a -f -g Before: perf [kernel] [k] __do_softirq | --- __do_softirq | |--55.16%-- __open | --44.84%-- __write_nocancel After: perf [kernel] [k] perf_tp_event | --- perf_tp_event | |--41.07%-- lock_acquire | | | |--39.36%-- _raw_spin_lock | | | | | |--7.81%-- hrtimer_interrupt | | | smp_apic_timer_interrupt | | | apic_timer_interrupt The old case was producing unreliable callchains. Now having right frame and instruction pointers, we have the trace we want. Also syscalls and kprobe events already have the right regs, let's use them instead of wasting a retrieval. v2: Follow the rename perf_save_regs() -> perf_fetch_caller_regs() Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Archs <linux-arch@vger.kernel.org>
-rw-r--r--include/linux/ftrace_event.h7
-rw-r--r--include/trace/ftrace.h6
-rw-r--r--kernel/perf_event.c8
-rw-r--r--kernel/trace/trace_event_profile.c3
-rw-r--r--kernel/trace/trace_kprobe.c5
-rw-r--r--kernel/trace/trace_syscalls.c4
6 files changed, 19 insertions, 14 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 6b7c444ab8f6..ac424f18ce63 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -187,6 +187,9 @@ do { \
187 187
188#ifdef CONFIG_PERF_EVENTS 188#ifdef CONFIG_PERF_EVENTS
189struct perf_event; 189struct perf_event;
190
191DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
192
190extern int ftrace_profile_enable(int event_id); 193extern int ftrace_profile_enable(int event_id);
191extern void ftrace_profile_disable(int event_id); 194extern void ftrace_profile_disable(int event_id);
192extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 195extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -198,11 +201,11 @@ ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
198 201
199static inline void 202static inline void
200ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, 203ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
201 u64 count, unsigned long irq_flags) 204 u64 count, unsigned long irq_flags, struct pt_regs *regs)
202{ 205{
203 struct trace_entry *entry = raw_data; 206 struct trace_entry *entry = raw_data;
204 207
205 perf_tp_event(entry->type, addr, count, raw_data, size); 208 perf_tp_event(entry->type, addr, count, raw_data, size, regs);
206 perf_swevent_put_recursion_context(rctx); 209 perf_swevent_put_recursion_context(rctx);
207 local_irq_restore(irq_flags); 210 local_irq_restore(irq_flags);
208} 211}
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0804cd594803..f31bb8b9777c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -764,6 +764,7 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
764 struct ftrace_raw_##call *entry; \ 764 struct ftrace_raw_##call *entry; \
765 u64 __addr = 0, __count = 1; \ 765 u64 __addr = 0, __count = 1; \
766 unsigned long irq_flags; \ 766 unsigned long irq_flags; \
767 struct pt_regs *__regs; \
767 int __entry_size; \ 768 int __entry_size; \
768 int __data_size; \ 769 int __data_size; \
769 int rctx; \ 770 int rctx; \
@@ -784,8 +785,11 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
784 \ 785 \
785 { assign; } \ 786 { assign; } \
786 \ 787 \
788 __regs = &__get_cpu_var(perf_trace_regs); \
789 perf_fetch_caller_regs(__regs, 2); \
790 \
787 ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \ 791 ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \
788 __count, irq_flags); \ 792 __count, irq_flags, __regs); \
789} 793}
790 794
791#undef DEFINE_EVENT 795#undef DEFINE_EVENT
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 359d7f690c2b..45b4b6e55891 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4318,9 +4318,8 @@ static const struct pmu perf_ops_task_clock = {
4318#ifdef CONFIG_EVENT_TRACING 4318#ifdef CONFIG_EVENT_TRACING
4319 4319
4320void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4320void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4321 int entry_size) 4321 int entry_size, struct pt_regs *regs)
4322{ 4322{
4323 struct pt_regs *regs = get_irq_regs();
4324 struct perf_sample_data data; 4323 struct perf_sample_data data;
4325 struct perf_raw_record raw = { 4324 struct perf_raw_record raw = {
4326 .size = entry_size, 4325 .size = entry_size,
@@ -4330,12 +4329,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4330 perf_sample_data_init(&data, addr); 4329 perf_sample_data_init(&data, addr);
4331 data.raw = &raw; 4330 data.raw = &raw;
4332 4331
4333 if (!regs)
4334 regs = task_pt_regs(current);
4335
4336 /* Trace events already protected against recursion */ 4332 /* Trace events already protected against recursion */
4337 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4333 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4338 &data, regs); 4334 &data, regs);
4339} 4335}
4340EXPORT_SYMBOL_GPL(perf_tp_event); 4336EXPORT_SYMBOL_GPL(perf_tp_event);
4341 4337
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index f0d693005075..e66d21e15a0f 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -2,13 +2,14 @@
2 * trace event based perf counter profiling 2 * trace event based perf counter profiling
3 * 3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
12 13
13static char *perf_trace_buf; 14static char *perf_trace_buf;
14static char *perf_trace_buf_nmi; 15static char *perf_trace_buf_nmi;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 505c92273b1a..f7a20a8bfb31 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1240,7 +1240,7 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1240 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1242 1242
1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); 1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1244} 1244}
1245 1245
1246/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
@@ -1271,7 +1271,8 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1271 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1273 1273
1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); 1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs);
1275} 1276}
1276 1277
1277static int probe_profile_enable(struct ftrace_event_call *call) 1278static int probe_profile_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cba47d7935cc..7e6e84fb7b6c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -467,7 +467,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
467 rec->nr = syscall_nr; 467 rec->nr = syscall_nr;
468 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
469 (unsigned long *)&rec->args); 469 (unsigned long *)&rec->args);
470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
471} 471}
472 472
473int prof_sysenter_enable(struct ftrace_event_call *call) 473int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -542,7 +542,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
542 rec->nr = syscall_nr; 542 rec->nr = syscall_nr;
543 rec->ret = syscall_get_return_value(current, regs); 543 rec->ret = syscall_get_return_value(current, regs);
544 544
545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
546} 546}
547 547
548int prof_sysexit_enable(struct ftrace_event_call *call) 548int prof_sysexit_enable(struct ftrace_event_call *call)