aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-03-04 15:15:56 -0500
committerFrederic Weisbecker <fweisbec@gmail.com>2010-03-10 08:39:35 -0500
commit5331d7b84613b8325362dde53dc2bff2fb87d351 (patch)
tree60f4bf4fdaf31b612eefc291bf6b558dc4c8d947
parent61e67fb9d3ed13e6a7f58652ae4979b9c872fa57 (diff)
perf: Introduce new perf_fetch_caller_regs() for hot regs snapshot
Events that trigger overflows by interrupting a context can use get_irq_regs() or task_pt_regs() to retrieve the state when the event triggered. But this is not the case for some other class of events like trace events as tracepoints are executed in the same context than the code that triggered the event. It means we need a different api to capture the regs there, namely we need a hot snapshot to get the most important informations for perf: the instruction pointer to get the event origin, the frame pointer for the callchain, the code segment for user_mode() tests (we always use __KERNEL_CS as trace events always occur from the kernel) and the eflags for further purposes. v2: rename perf_save_regs to perf_fetch_caller_regs as per Masami's suggestion. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Archs <linux-arch@vger.kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c12
-rw-r--r--arch/x86/kernel/dumpstack.h15
-rw-r--r--include/linux/perf_event.h42
-rw-r--r--kernel/perf_event.c5
4 files changed, 73 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1d665a0b202c..c6bde7d7afdc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1707,3 +1707,15 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1707 1707
1708 return entry; 1708 return entry;
1709} 1709}
1710
1711void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1712{
1713 regs->ip = ip;
1714 /*
1715 * perf_arch_fetch_caller_regs adds another call, we need to increment
1716 * the skip level
1717 */
1718 regs->bp = rewind_frame_pointer(skip + 1);
1719 regs->cs = __KERNEL_CS;
1720 local_save_flags(regs->flags);
1721}
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 4fd1420faffa..29e5f7c845b2 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -29,4 +29,19 @@ struct stack_frame {
29 struct stack_frame *next_frame; 29 struct stack_frame *next_frame;
30 unsigned long return_address; 30 unsigned long return_address;
31}; 31};
32
33static inline unsigned long rewind_frame_pointer(int n)
34{
35 struct stack_frame *frame;
36
37 get_bp(frame);
38
39#ifdef CONFIG_FRAME_POINTER
40 while (n--)
41 frame = frame->next_frame;
32#endif 42#endif
43
44 return (unsigned long)frame;
45}
46
47#endif /* DUMPSTACK_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 80acbf3d5de1..70cffd052c04 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -452,6 +452,7 @@ enum perf_callchain_context {
452#include <linux/fs.h> 452#include <linux/fs.h>
453#include <linux/pid_namespace.h> 453#include <linux/pid_namespace.h>
454#include <linux/workqueue.h> 454#include <linux/workqueue.h>
455#include <linux/ftrace.h>
455#include <asm/atomic.h> 456#include <asm/atomic.h>
456 457
457#define PERF_MAX_STACK_DEPTH 255 458#define PERF_MAX_STACK_DEPTH 255
@@ -847,6 +848,44 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
847 __perf_sw_event(event_id, nr, nmi, regs, addr); 848 __perf_sw_event(event_id, nr, nmi, regs, addr);
848} 849}
849 850
851extern void
852perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
853
854/*
855 * Take a snapshot of the regs. Skip ip and frame pointer to
856 * the nth caller. We only need a few of the regs:
857 * - ip for PERF_SAMPLE_IP
858 * - cs for user_mode() tests
859 * - bp for callchains
860 * - eflags, for future purposes, just in case
861 */
862static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
863{
864 unsigned long ip;
865
866 memset(regs, 0, sizeof(*regs));
867
868 switch (skip) {
869 case 1 :
870 ip = CALLER_ADDR0;
871 break;
872 case 2 :
873 ip = CALLER_ADDR1;
874 break;
875 case 3 :
876 ip = CALLER_ADDR2;
877 break;
878 case 4:
879 ip = CALLER_ADDR3;
880 break;
881 /* No need to support further for now */
882 default:
883 ip = 0;
884 }
885
886 return perf_arch_fetch_caller_regs(regs, ip, skip);
887}
888
850extern void __perf_event_mmap(struct vm_area_struct *vma); 889extern void __perf_event_mmap(struct vm_area_struct *vma);
851 890
852static inline void perf_event_mmap(struct vm_area_struct *vma) 891static inline void perf_event_mmap(struct vm_area_struct *vma)
@@ -880,7 +919,8 @@ static inline bool perf_paranoid_kernel(void)
880} 919}
881 920
882extern void perf_event_init(void); 921extern void perf_event_init(void);
883extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); 922extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
923 int entry_size, struct pt_regs *regs);
884extern void perf_bp_event(struct perf_event *event, void *data); 924extern void perf_bp_event(struct perf_event *event, void *data);
885 925
886#ifndef perf_misc_flags 926#ifndef perf_misc_flags
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 52c69a34d697..359d7f690c2b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2786,6 +2786,11 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2786 return NULL; 2786 return NULL;
2787} 2787}
2788 2788
2789__weak
2790void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2791{
2792}
2793
2789/* 2794/*
2790 * Output 2795 * Output
2791 */ 2796 */