diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2010-06-30 17:03:51 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2010-08-18 19:30:59 -0400 |
commit | 56962b4449af34070bb1994621ef4f0265eed4d8 (patch) | |
tree | b4c5dfee35d272c71cba80e75a51cb3e7070e430 /arch | |
parent | 70791ce9ba68a5921c9905ef05d23f62a90bc10c (diff) |
perf: Generalize some arch callchain code
- Most archs use one callchain buffer per cpu, except x86 that needs
to deal with NMIs. Provide a default perf_callchain_buffer()
implementation that x86 overrides.
- Centralize all the kernel/user regs handling and invoke new arch
handlers from there: perf_callchain_user() / perf_callchain_kernel()
That avoid all the user_mode(), current->mm checks and so...
- Invert some parameters in perf_callchain_*() helpers: entry to the
left, regs to the right, following the traditional (dst, src).
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kernel/perf_event.c | 43 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_callchain.c | 49 | ||||
-rw-r--r-- | arch/sh/kernel/perf_callchain.c | 37 | ||||
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 46 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 45 |
5 files changed, 43 insertions, 177 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a07c3b1955f0..0e3bbdb15927 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -3044,17 +3044,13 @@ user_backtrace(struct frame_tail *tail, | |||
3044 | return buftail.fp - 1; | 3044 | return buftail.fp - 1; |
3045 | } | 3045 | } |
3046 | 3046 | ||
3047 | static void | 3047 | void |
3048 | perf_callchain_user(struct pt_regs *regs, | 3048 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3049 | struct perf_callchain_entry *entry) | ||
3050 | { | 3049 | { |
3051 | struct frame_tail *tail; | 3050 | struct frame_tail *tail; |
3052 | 3051 | ||
3053 | perf_callchain_store(entry, PERF_CONTEXT_USER); | 3052 | perf_callchain_store(entry, PERF_CONTEXT_USER); |
3054 | 3053 | ||
3055 | if (!user_mode(regs)) | ||
3056 | regs = task_pt_regs(current); | ||
3057 | |||
3058 | tail = (struct frame_tail *)regs->ARM_fp - 1; | 3054 | tail = (struct frame_tail *)regs->ARM_fp - 1; |
3059 | 3055 | ||
3060 | while (tail && !((unsigned long)tail & 0x3)) | 3056 | while (tail && !((unsigned long)tail & 0x3)) |
@@ -3075,9 +3071,8 @@ callchain_trace(struct stackframe *fr, | |||
3075 | return 0; | 3071 | return 0; |
3076 | } | 3072 | } |
3077 | 3073 | ||
3078 | static void | 3074 | void |
3079 | perf_callchain_kernel(struct pt_regs *regs, | 3075 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3080 | struct perf_callchain_entry *entry) | ||
3081 | { | 3076 | { |
3082 | struct stackframe fr; | 3077 | struct stackframe fr; |
3083 | 3078 | ||
@@ -3088,33 +3083,3 @@ perf_callchain_kernel(struct pt_regs *regs, | |||
3088 | fr.pc = regs->ARM_pc; | 3083 | fr.pc = regs->ARM_pc; |
3089 | walk_stackframe(&fr, callchain_trace, entry); | 3084 | walk_stackframe(&fr, callchain_trace, entry); |
3090 | } | 3085 | } |
3091 | |||
3092 | static void | ||
3093 | perf_do_callchain(struct pt_regs *regs, | ||
3094 | struct perf_callchain_entry *entry) | ||
3095 | { | ||
3096 | int is_user; | ||
3097 | |||
3098 | if (!regs) | ||
3099 | return; | ||
3100 | |||
3101 | is_user = user_mode(regs); | ||
3102 | |||
3103 | if (!is_user) | ||
3104 | perf_callchain_kernel(regs, entry); | ||
3105 | |||
3106 | if (current->mm) | ||
3107 | perf_callchain_user(regs, entry); | ||
3108 | } | ||
3109 | |||
3110 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | ||
3111 | |||
3112 | struct perf_callchain_entry * | ||
3113 | perf_callchain(struct pt_regs *regs) | ||
3114 | { | ||
3115 | struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); | ||
3116 | |||
3117 | entry->nr = 0; | ||
3118 | perf_do_callchain(regs, entry); | ||
3119 | return entry; | ||
3120 | } | ||
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a286c2e5a3ea..f7a85ede8407 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c | |||
@@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) | |||
46 | return 0; | 46 | return 0; |
47 | } | 47 | } |
48 | 48 | ||
49 | static void perf_callchain_kernel(struct pt_regs *regs, | 49 | void |
50 | struct perf_callchain_entry *entry) | 50 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
51 | { | 51 | { |
52 | unsigned long sp, next_sp; | 52 | unsigned long sp, next_sp; |
53 | unsigned long next_ip; | 53 | unsigned long next_ip; |
@@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp) | |||
221 | puc == (unsigned long) &sf->uc; | 221 | puc == (unsigned long) &sf->uc; |
222 | } | 222 | } |
223 | 223 | ||
224 | static void perf_callchain_user_64(struct pt_regs *regs, | 224 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, |
225 | struct perf_callchain_entry *entry) | 225 | struct pt_regs *regs) |
226 | { | 226 | { |
227 | unsigned long sp, next_sp; | 227 | unsigned long sp, next_sp; |
228 | unsigned long next_ip; | 228 | unsigned long next_ip; |
@@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | |||
303 | return __get_user_inatomic(*ret, ptr); | 303 | return __get_user_inatomic(*ret, ptr); |
304 | } | 304 | } |
305 | 305 | ||
306 | static inline void perf_callchain_user_64(struct pt_regs *regs, | 306 | static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, |
307 | struct perf_callchain_entry *entry) | 307 | struct pt_regs *regs) |
308 | { | 308 | { |
309 | } | 309 | } |
310 | 310 | ||
@@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, | |||
423 | return mctx->mc_gregs; | 423 | return mctx->mc_gregs; |
424 | } | 424 | } |
425 | 425 | ||
426 | static void perf_callchain_user_32(struct pt_regs *regs, | 426 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, |
427 | struct perf_callchain_entry *entry) | 427 | struct pt_regs *regs) |
428 | { | 428 | { |
429 | unsigned int sp, next_sp; | 429 | unsigned int sp, next_sp; |
430 | unsigned int next_ip; | 430 | unsigned int next_ip; |
@@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs, | |||
471 | } | 471 | } |
472 | } | 472 | } |
473 | 473 | ||
474 | /* | 474 | void |
475 | * Since we can't get PMU interrupts inside a PMU interrupt handler, | 475 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
476 | * we don't need separate irq and nmi entries here. | ||
477 | */ | ||
478 | static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); | ||
479 | |||
480 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
481 | { | 476 | { |
482 | struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); | 477 | if (current_is_64bit()) |
483 | 478 | perf_callchain_user_64(entry, regs); | |
484 | entry->nr = 0; | 479 | else |
485 | 480 | perf_callchain_user_32(entry, regs); | |
486 | if (!user_mode(regs)) { | ||
487 | perf_callchain_kernel(regs, entry); | ||
488 | if (current->mm) | ||
489 | regs = task_pt_regs(current); | ||
490 | else | ||
491 | regs = NULL; | ||
492 | } | ||
493 | |||
494 | if (regs) { | ||
495 | if (current_is_64bit()) | ||
496 | perf_callchain_user_64(regs, entry); | ||
497 | else | ||
498 | perf_callchain_user_32(regs, entry); | ||
499 | } | ||
500 | |||
501 | return entry; | ||
502 | } | 481 | } |
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 00143f3dd196..ef076a91292a 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c | |||
@@ -44,44 +44,11 @@ static const struct stacktrace_ops callchain_ops = { | |||
44 | .address = callchain_address, | 44 | .address = callchain_address, |
45 | }; | 45 | }; |
46 | 46 | ||
47 | static void | 47 | void |
48 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 48 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
49 | { | 49 | { |
50 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | 50 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
51 | perf_callchain_store(entry, regs->pc); | 51 | perf_callchain_store(entry, regs->pc); |
52 | 52 | ||
53 | unwind_stack(NULL, regs, NULL, &callchain_ops, entry); | 53 | unwind_stack(NULL, regs, NULL, &callchain_ops, entry); |
54 | } | 54 | } |
55 | |||
56 | static void | ||
57 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
58 | { | ||
59 | int is_user; | ||
60 | |||
61 | if (!regs) | ||
62 | return; | ||
63 | |||
64 | is_user = user_mode(regs); | ||
65 | |||
66 | /* | ||
67 | * Only the kernel side is implemented for now. | ||
68 | */ | ||
69 | if (!is_user) | ||
70 | perf_callchain_kernel(regs, entry); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * No need for separate IRQ and NMI entries. | ||
75 | */ | ||
76 | static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); | ||
77 | |||
78 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
79 | { | ||
80 | struct perf_callchain_entry *entry = &__get_cpu_var(callchain); | ||
81 | |||
82 | entry->nr = 0; | ||
83 | |||
84 | perf_do_callchain(regs, entry); | ||
85 | |||
86 | return entry; | ||
87 | } | ||
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2a95a9079862..460162d74aba 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -1283,14 +1283,16 @@ void __init init_hw_perf_events(void) | |||
1283 | register_die_notifier(&perf_event_nmi_notifier); | 1283 | register_die_notifier(&perf_event_nmi_notifier); |
1284 | } | 1284 | } |
1285 | 1285 | ||
1286 | static void perf_callchain_kernel(struct pt_regs *regs, | 1286 | void perf_callchain_kernel(struct perf_callchain_entry *entry, |
1287 | struct perf_callchain_entry *entry) | 1287 | struct pt_regs *regs) |
1288 | { | 1288 | { |
1289 | unsigned long ksp, fp; | 1289 | unsigned long ksp, fp; |
1290 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1290 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1291 | int graph = 0; | 1291 | int graph = 0; |
1292 | #endif | 1292 | #endif |
1293 | 1293 | ||
1294 | stack_trace_flush(); | ||
1295 | |||
1294 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | 1296 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
1295 | perf_callchain_store(entry, regs->tpc); | 1297 | perf_callchain_store(entry, regs->tpc); |
1296 | 1298 | ||
@@ -1330,8 +1332,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, | |||
1330 | } while (entry->nr < PERF_MAX_STACK_DEPTH); | 1332 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1331 | } | 1333 | } |
1332 | 1334 | ||
1333 | static void perf_callchain_user_64(struct pt_regs *regs, | 1335 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, |
1334 | struct perf_callchain_entry *entry) | 1336 | struct pt_regs *regs) |
1335 | { | 1337 | { |
1336 | unsigned long ufp; | 1338 | unsigned long ufp; |
1337 | 1339 | ||
@@ -1353,8 +1355,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, | |||
1353 | } while (entry->nr < PERF_MAX_STACK_DEPTH); | 1355 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1354 | } | 1356 | } |
1355 | 1357 | ||
1356 | static void perf_callchain_user_32(struct pt_regs *regs, | 1358 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, |
1357 | struct perf_callchain_entry *entry) | 1359 | struct pt_regs *regs) |
1358 | { | 1360 | { |
1359 | unsigned long ufp; | 1361 | unsigned long ufp; |
1360 | 1362 | ||
@@ -1376,30 +1378,12 @@ static void perf_callchain_user_32(struct pt_regs *regs, | |||
1376 | } while (entry->nr < PERF_MAX_STACK_DEPTH); | 1378 | } while (entry->nr < PERF_MAX_STACK_DEPTH); |
1377 | } | 1379 | } |
1378 | 1380 | ||
1379 | /* Like powerpc we can't get PMU interrupts within the PMU handler, | 1381 | void |
1380 | * so no need for separate NMI and IRQ chains as on x86. | 1382 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
1381 | */ | ||
1382 | static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); | ||
1383 | |||
1384 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1385 | { | 1383 | { |
1386 | struct perf_callchain_entry *entry = &__get_cpu_var(callchain); | 1384 | flushw_user(); |
1387 | 1385 | if (test_thread_flag(TIF_32BIT)) | |
1388 | entry->nr = 0; | 1386 | perf_callchain_user_32(entry, regs); |
1389 | if (!user_mode(regs)) { | 1387 | else |
1390 | stack_trace_flush(); | 1388 | perf_callchain_user_64(entry, regs); |
1391 | perf_callchain_kernel(regs, entry); | ||
1392 | if (current->mm) | ||
1393 | regs = task_pt_regs(current); | ||
1394 | else | ||
1395 | regs = NULL; | ||
1396 | } | ||
1397 | if (regs) { | ||
1398 | flushw_user(); | ||
1399 | if (test_thread_flag(TIF_32BIT)) | ||
1400 | perf_callchain_user_32(regs, entry); | ||
1401 | else | ||
1402 | perf_callchain_user_64(regs, entry); | ||
1403 | } | ||
1404 | return entry; | ||
1405 | } | 1389 | } |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8af28caeafc1..39f8421b86e6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1571,9 +1571,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1571 | * callchain support | 1571 | * callchain support |
1572 | */ | 1572 | */ |
1573 | 1573 | ||
1574 | 1574 | static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi); | |
1575 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | ||
1576 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | ||
1577 | 1575 | ||
1578 | 1576 | ||
1579 | static void | 1577 | static void |
@@ -1607,8 +1605,8 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1607 | .walk_stack = print_context_stack_bp, | 1605 | .walk_stack = print_context_stack_bp, |
1608 | }; | 1606 | }; |
1609 | 1607 | ||
1610 | static void | 1608 | void |
1611 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1609 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
1612 | { | 1610 | { |
1613 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | 1611 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); |
1614 | perf_callchain_store(entry, regs->ip); | 1612 | perf_callchain_store(entry, regs->ip); |
@@ -1653,14 +1651,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1653 | } | 1651 | } |
1654 | #endif | 1652 | #endif |
1655 | 1653 | ||
1656 | static void | 1654 | void |
1657 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1655 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
1658 | { | 1656 | { |
1659 | struct stack_frame frame; | 1657 | struct stack_frame frame; |
1660 | const void __user *fp; | 1658 | const void __user *fp; |
1661 | 1659 | ||
1662 | if (!user_mode(regs)) | ||
1663 | regs = task_pt_regs(current); | ||
1664 | 1660 | ||
1665 | fp = (void __user *)regs->bp; | 1661 | fp = (void __user *)regs->bp; |
1666 | 1662 | ||
@@ -1687,42 +1683,17 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1687 | } | 1683 | } |
1688 | } | 1684 | } |
1689 | 1685 | ||
1690 | static void | 1686 | struct perf_callchain_entry *perf_callchain_buffer(void) |
1691 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1692 | { | ||
1693 | int is_user; | ||
1694 | |||
1695 | if (!regs) | ||
1696 | return; | ||
1697 | |||
1698 | is_user = user_mode(regs); | ||
1699 | |||
1700 | if (!is_user) | ||
1701 | perf_callchain_kernel(regs, entry); | ||
1702 | |||
1703 | if (current->mm) | ||
1704 | perf_callchain_user(regs, entry); | ||
1705 | } | ||
1706 | |||
1707 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1708 | { | 1687 | { |
1709 | struct perf_callchain_entry *entry; | ||
1710 | |||
1711 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | 1688 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
1712 | /* TODO: We don't support guest os callchain now */ | 1689 | /* TODO: We don't support guest os callchain now */ |
1713 | return NULL; | 1690 | return NULL; |
1714 | } | 1691 | } |
1715 | 1692 | ||
1716 | if (in_nmi()) | 1693 | if (in_nmi()) |
1717 | entry = &__get_cpu_var(pmc_nmi_entry); | 1694 | return &__get_cpu_var(perf_callchain_entry_nmi); |
1718 | else | ||
1719 | entry = &__get_cpu_var(pmc_irq_entry); | ||
1720 | |||
1721 | entry->nr = 0; | ||
1722 | |||
1723 | perf_do_callchain(regs, entry); | ||
1724 | 1695 | ||
1725 | return entry; | 1696 | return &__get_cpu_var(perf_callchain_entry); |
1726 | } | 1697 | } |
1727 | 1698 | ||
1728 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1699 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |