diff options
author | Torok Edwin <edwintorok@gmail.com> | 2010-03-17 06:07:16 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-04-02 13:30:03 -0400 |
commit | 257ef9d21f1b008a6c7425544b36641c4325a922 (patch) | |
tree | 4ffda980fe6a93acd24efcc04cd10e130caae4c1 /arch | |
parent | b38b24ead33417146e051453d04bf60b8d2d7e25 (diff) |
perf, x86: Fix callgraphs of 32-bit processes on 64-bit kernels
When profiling a 32-bit process on a 64-bit kernel, callgraph tracing
stopped after the first function, because it has seen a garbage memory
address (tried to interpret the frame pointer, and return address as a
64-bit pointer).
Fix this by using a struct stack_frame with 32-bit pointers when the
TIF_IA32 flag is set.
Note that TIF_IA32 flag must be used, and not is_compat_task(), because
the latter is only set when the 32-bit process is executing a syscall,
which may not always be the case (when tracing page fault events for
example).
Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
LKML-Reference: <1268820436-13145-1-git-send-email-edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.h | 5 |
2 files changed, 44 insertions, 5 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bd28cf9d8a82..53ea4cf1a878 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 29 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | #include <asm/compat.h> | ||
31 | 32 | ||
32 | static u64 perf_event_mask __read_mostly; | 33 | static u64 perf_event_mask __read_mostly; |
33 | 34 | ||
@@ -1630,14 +1631,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
1630 | return len; | 1631 | return len; |
1631 | } | 1632 | } |
1632 | 1633 | ||
1633 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1634 | #ifdef CONFIG_COMPAT |
1635 | static inline int | ||
1636 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1634 | { | 1637 | { |
1635 | unsigned long bytes; | 1638 | /* 32-bit process in 64-bit kernel. */ |
1639 | struct stack_frame_ia32 frame; | ||
1640 | const void __user *fp; | ||
1641 | |||
1642 | if (!test_thread_flag(TIF_IA32)) | ||
1643 | return 0; | ||
1644 | |||
1645 | fp = compat_ptr(regs->bp); | ||
1646 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
1647 | unsigned long bytes; | ||
1648 | frame.next_frame = 0; | ||
1649 | frame.return_address = 0; | ||
1636 | 1650 | ||
1637 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | 1651 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1652 | if (bytes != sizeof(frame)) | ||
1653 | break; | ||
1654 | |||
1655 | if (fp < compat_ptr(regs->sp)) | ||
1656 | break; | ||
1638 | 1657 | ||
1639 | return bytes == sizeof(*frame); | 1658 | callchain_store(entry, frame.return_address); |
1659 | fp = compat_ptr(frame.next_frame); | ||
1660 | } | ||
1661 | return 1; | ||
1640 | } | 1662 | } |
1663 | #else | ||
1664 | static inline int | ||
1665 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1666 | { | ||
1667 | return 0; | ||
1668 | } | ||
1669 | #endif | ||
1641 | 1670 | ||
1642 | static void | 1671 | static void |
1643 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1672 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -1653,11 +1682,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1653 | callchain_store(entry, PERF_CONTEXT_USER); | 1682 | callchain_store(entry, PERF_CONTEXT_USER); |
1654 | callchain_store(entry, regs->ip); | 1683 | callchain_store(entry, regs->ip); |
1655 | 1684 | ||
1685 | if (perf_callchain_user32(regs, entry)) | ||
1686 | return; | ||
1687 | |||
1656 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1688 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1689 | unsigned long bytes; | ||
1657 | frame.next_frame = NULL; | 1690 | frame.next_frame = NULL; |
1658 | frame.return_address = 0; | 1691 | frame.return_address = 0; |
1659 | 1692 | ||
1660 | if (!copy_stack_frame(fp, &frame)) | 1693 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1694 | if (bytes != sizeof(frame)) | ||
1661 | break; | 1695 | break; |
1662 | 1696 | ||
1663 | if ((unsigned long)fp < regs->sp) | 1697 | if ((unsigned long)fp < regs->sp) |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 29e5f7c845b2..e39e77168a37 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -30,6 +30,11 @@ struct stack_frame { | |||
30 | unsigned long return_address; | 30 | unsigned long return_address; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | struct stack_frame_ia32 { | ||
34 | u32 next_frame; | ||
35 | u32 return_address; | ||
36 | }; | ||
37 | |||
33 | static inline unsigned long rewind_frame_pointer(int n) | 38 | static inline unsigned long rewind_frame_pointer(int n) |
34 | { | 39 | { |
35 | struct stack_frame *frame; | 40 | struct stack_frame *frame; |