aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-03-22 14:40:03 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-04-01 02:26:31 -0400
commite49a5bd38159dfb1928fd25b173bc9de4bbadb21 (patch)
tree85c7c5fcce6df6a6ab6c119c40f6652d7783ec0b
parenteb1e79611cc9bfe21978230e3521e77ea2d7874a (diff)
perf: Use hot regs with software sched switch/migrate events
Scheduler's task migration events don't work because they always pass NULL regs perf_sw_event(). The event hence gets filtered in perf_swevent_add(). Scheduler's context switches events use task_pt_regs() to get the context when the event occured which is a wrong thing to do as this won't give us the place in the kernel where we went to sleep but the place where we left userspace. The result is even more wrong if we switch from a kernel thread. Use the hot regs snapshot for both events as they belong to the non-interrupt/exception based events family. Unlike page faults or so that provide the regs matching the exact origin of the event, we need to save the current context. This makes the task migration event working and fix the context switch callchains and origin ip. Example: perf record -a -e cs Before: 10.91% ksoftirqd/0 0 [k] 0000000000000000 | --- (nil) perf_callchain perf_prepare_sample __perf_event_overflow perf_swevent_overflow perf_swevent_add perf_swevent_ctx_event do_perf_sw_event __perf_sw_event perf_event_task_sched_out schedule run_ksoftirqd kthread kernel_thread_helper After: 23.77% hald-addon-stor [kernel.kallsyms] [k] schedule | --- schedule | |--60.00%-- schedule_timeout | wait_for_common | wait_for_completion | blk_execute_rq | scsi_execute | scsi_execute_req | sr_test_unit_ready | | | |--66.67%-- sr_media_change | | media_changed | | cdrom_media_changed | | sr_block_media_changed | | check_disk_change | | cdrom_open v2: Always build perf_arch_fetch_caller_regs() now that software events need that too. They don't need it from modules, unlike trace events, so we keep the EXPORT_SYMBOL in trace_event_perf.c Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: David Miller <davem@davemloft.net>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c2
-rw-r--r--include/linux/perf_event.h21
-rw-r--r--kernel/perf_event.c4
3 files changed, 15 insertions, 12 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 60398a0d947c..5fb490c6ee5c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1702,7 +1702,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1702 return entry; 1702 return entry;
1703} 1703}
1704 1704
1705#ifdef CONFIG_EVENT_TRACING
1706void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) 1705void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1707{ 1706{
1708 regs->ip = ip; 1707 regs->ip = ip;
@@ -1714,4 +1713,3 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1714 regs->cs = __KERNEL_CS; 1713 regs->cs = __KERNEL_CS;
1715 local_save_flags(regs->flags); 1714 local_save_flags(regs->flags);
1716} 1715}
1717#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 95477038a72a..c8e375440403 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -842,13 +842,6 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
842 842
843extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); 843extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
844 844
845static inline void
846perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
847{
848 if (atomic_read(&perf_swevent_enabled[event_id]))
849 __perf_sw_event(event_id, nr, nmi, regs, addr);
850}
851
852extern void 845extern void
853perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); 846perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
854 847
@@ -887,6 +880,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
887 return perf_arch_fetch_caller_regs(regs, ip, skip); 880 return perf_arch_fetch_caller_regs(regs, ip, skip);
888} 881}
889 882
883static inline void
884perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
885{
886 if (atomic_read(&perf_swevent_enabled[event_id])) {
887 struct pt_regs hot_regs;
888
889 if (!regs) {
890 perf_fetch_caller_regs(&hot_regs, 1);
891 regs = &hot_regs;
892 }
893 __perf_sw_event(event_id, nr, nmi, regs, addr);
894 }
895}
896
890extern void __perf_event_mmap(struct vm_area_struct *vma); 897extern void __perf_event_mmap(struct vm_area_struct *vma);
891 898
892static inline void perf_event_mmap(struct vm_area_struct *vma) 899static inline void perf_event_mmap(struct vm_area_struct *vma)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 574ee58a3046..b0feb4795af3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1164,11 +1164,9 @@ void perf_event_task_sched_out(struct task_struct *task,
1164 struct perf_event_context *ctx = task->perf_event_ctxp; 1164 struct perf_event_context *ctx = task->perf_event_ctxp;
1165 struct perf_event_context *next_ctx; 1165 struct perf_event_context *next_ctx;
1166 struct perf_event_context *parent; 1166 struct perf_event_context *parent;
1167 struct pt_regs *regs;
1168 int do_switch = 1; 1167 int do_switch = 1;
1169 1168
1170 regs = task_pt_regs(task); 1169 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1171 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
1172 1170
1173 if (likely(!ctx || !cpuctx->task_ctx)) 1171 if (likely(!ctx || !cpuctx->task_ctx))
1174 return; 1172 return;