aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-06-30 17:03:51 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-08-18 19:30:59 -0400
commit56962b4449af34070bb1994621ef4f0265eed4d8 (patch)
treeb4c5dfee35d272c71cba80e75a51cb3e7070e430 /arch
parent70791ce9ba68a5921c9905ef05d23f62a90bc10c (diff)
perf: Generalize some arch callchain code
- Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Paul Mackerras <paulus@samba.org> Tested-by: Will Deacon <will.deacon@arm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Stephane Eranian <eranian@google.com> Cc: David Miller <davem@davemloft.net> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Borislav Petkov <bp@amd64.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kernel/perf_event.c43
-rw-r--r--arch/powerpc/kernel/perf_callchain.c49
-rw-r--r--arch/sh/kernel/perf_callchain.c37
-rw-r--r--arch/sparc/kernel/perf_event.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event.c45
5 files changed, 43 insertions, 177 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a07c3b1955f0..0e3bbdb15927 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3044,17 +3044,13 @@ user_backtrace(struct frame_tail *tail,
3044 return buftail.fp - 1; 3044 return buftail.fp - 1;
3045} 3045}
3046 3046
3047static void 3047void
3048perf_callchain_user(struct pt_regs *regs, 3048perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3049 struct perf_callchain_entry *entry)
3050{ 3049{
3051 struct frame_tail *tail; 3050 struct frame_tail *tail;
3052 3051
3053 perf_callchain_store(entry, PERF_CONTEXT_USER); 3052 perf_callchain_store(entry, PERF_CONTEXT_USER);
3054 3053
3055 if (!user_mode(regs))
3056 regs = task_pt_regs(current);
3057
3058 tail = (struct frame_tail *)regs->ARM_fp - 1; 3054 tail = (struct frame_tail *)regs->ARM_fp - 1;
3059 3055
3060 while (tail && !((unsigned long)tail & 0x3)) 3056 while (tail && !((unsigned long)tail & 0x3))
@@ -3075,9 +3071,8 @@ callchain_trace(struct stackframe *fr,
3075 return 0; 3071 return 0;
3076} 3072}
3077 3073
3078static void 3074void
3079perf_callchain_kernel(struct pt_regs *regs, 3075perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3080 struct perf_callchain_entry *entry)
3081{ 3076{
3082 struct stackframe fr; 3077 struct stackframe fr;
3083 3078
@@ -3088,33 +3083,3 @@ perf_callchain_kernel(struct pt_regs *regs,
3088 fr.pc = regs->ARM_pc; 3083 fr.pc = regs->ARM_pc;
3089 walk_stackframe(&fr, callchain_trace, entry); 3084 walk_stackframe(&fr, callchain_trace, entry);
3090} 3085}
3091
3092static void
3093perf_do_callchain(struct pt_regs *regs,
3094 struct perf_callchain_entry *entry)
3095{
3096 int is_user;
3097
3098 if (!regs)
3099 return;
3100
3101 is_user = user_mode(regs);
3102
3103 if (!is_user)
3104 perf_callchain_kernel(regs, entry);
3105
3106 if (current->mm)
3107 perf_callchain_user(regs, entry);
3108}
3109
3110static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3111
3112struct perf_callchain_entry *
3113perf_callchain(struct pt_regs *regs)
3114{
3115 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3116
3117 entry->nr = 0;
3118 perf_do_callchain(regs, entry);
3119 return entry;
3120}
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index a286c2e5a3ea..f7a85ede8407 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
46 return 0; 46 return 0;
47} 47}
48 48
49static void perf_callchain_kernel(struct pt_regs *regs, 49void
50 struct perf_callchain_entry *entry) 50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
51{ 51{
52 unsigned long sp, next_sp; 52 unsigned long sp, next_sp;
53 unsigned long next_ip; 53 unsigned long next_ip;
@@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp)
221 puc == (unsigned long) &sf->uc; 221 puc == (unsigned long) &sf->uc;
222} 222}
223 223
224static void perf_callchain_user_64(struct pt_regs *regs, 224static void perf_callchain_user_64(struct perf_callchain_entry *entry,
225 struct perf_callchain_entry *entry) 225 struct pt_regs *regs)
226{ 226{
227 unsigned long sp, next_sp; 227 unsigned long sp, next_sp;
228 unsigned long next_ip; 228 unsigned long next_ip;
@@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
303 return __get_user_inatomic(*ret, ptr); 303 return __get_user_inatomic(*ret, ptr);
304} 304}
305 305
306static inline void perf_callchain_user_64(struct pt_regs *regs, 306static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
307 struct perf_callchain_entry *entry) 307 struct pt_regs *regs)
308{ 308{
309} 309}
310 310
@@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
423 return mctx->mc_gregs; 423 return mctx->mc_gregs;
424} 424}
425 425
426static void perf_callchain_user_32(struct pt_regs *regs, 426static void perf_callchain_user_32(struct perf_callchain_entry *entry,
427 struct perf_callchain_entry *entry) 427 struct pt_regs *regs)
428{ 428{
429 unsigned int sp, next_sp; 429 unsigned int sp, next_sp;
430 unsigned int next_ip; 430 unsigned int next_ip;
@@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs,
471 } 471 }
472} 472}
473 473
474/* 474void
475 * Since we can't get PMU interrupts inside a PMU interrupt handler, 475perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
476 * we don't need separate irq and nmi entries here.
477 */
478static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
479
480struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
481{ 476{
482 struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); 477 if (current_is_64bit())
483 478 perf_callchain_user_64(entry, regs);
484 entry->nr = 0; 479 else
485 480 perf_callchain_user_32(entry, regs);
486 if (!user_mode(regs)) {
487 perf_callchain_kernel(regs, entry);
488 if (current->mm)
489 regs = task_pt_regs(current);
490 else
491 regs = NULL;
492 }
493
494 if (regs) {
495 if (current_is_64bit())
496 perf_callchain_user_64(regs, entry);
497 else
498 perf_callchain_user_32(regs, entry);
499 }
500
501 return entry;
502} 481}
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 00143f3dd196..ef076a91292a 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -44,44 +44,11 @@ static const struct stacktrace_ops callchain_ops = {
44 .address = callchain_address, 44 .address = callchain_address,
45}; 45};
46 46
47static void 47void
48perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 48perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
49{ 49{
50 perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 50 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
51 perf_callchain_store(entry, regs->pc); 51 perf_callchain_store(entry, regs->pc);
52 52
53 unwind_stack(NULL, regs, NULL, &callchain_ops, entry); 53 unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
54} 54}
55
56static void
57perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
58{
59 int is_user;
60
61 if (!regs)
62 return;
63
64 is_user = user_mode(regs);
65
66 /*
67 * Only the kernel side is implemented for now.
68 */
69 if (!is_user)
70 perf_callchain_kernel(regs, entry);
71}
72
73/*
74 * No need for separate IRQ and NMI entries.
75 */
76static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
77
78struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
79{
80 struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
81
82 entry->nr = 0;
83
84 perf_do_callchain(regs, entry);
85
86 return entry;
87}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 2a95a9079862..460162d74aba 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1283,14 +1283,16 @@ void __init init_hw_perf_events(void)
1283 register_die_notifier(&perf_event_nmi_notifier); 1283 register_die_notifier(&perf_event_nmi_notifier);
1284} 1284}
1285 1285
1286static void perf_callchain_kernel(struct pt_regs *regs, 1286void perf_callchain_kernel(struct perf_callchain_entry *entry,
1287 struct perf_callchain_entry *entry) 1287 struct pt_regs *regs)
1288{ 1288{
1289 unsigned long ksp, fp; 1289 unsigned long ksp, fp;
1290#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1290#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1291 int graph = 0; 1291 int graph = 0;
1292#endif 1292#endif
1293 1293
1294 stack_trace_flush();
1295
1294 perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 1296 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1295 perf_callchain_store(entry, regs->tpc); 1297 perf_callchain_store(entry, regs->tpc);
1296 1298
@@ -1330,8 +1332,8 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1330 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1332 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1331} 1333}
1332 1334
1333static void perf_callchain_user_64(struct pt_regs *regs, 1335static void perf_callchain_user_64(struct perf_callchain_entry *entry,
1334 struct perf_callchain_entry *entry) 1336 struct pt_regs *regs)
1335{ 1337{
1336 unsigned long ufp; 1338 unsigned long ufp;
1337 1339
@@ -1353,8 +1355,8 @@ static void perf_callchain_user_64(struct pt_regs *regs,
1353 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1355 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1354} 1356}
1355 1357
1356static void perf_callchain_user_32(struct pt_regs *regs, 1358static void perf_callchain_user_32(struct perf_callchain_entry *entry,
1357 struct perf_callchain_entry *entry) 1359 struct pt_regs *regs)
1358{ 1360{
1359 unsigned long ufp; 1361 unsigned long ufp;
1360 1362
@@ -1376,30 +1378,12 @@ static void perf_callchain_user_32(struct pt_regs *regs,
1376 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1378 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1377} 1379}
1378 1380
1379/* Like powerpc we can't get PMU interrupts within the PMU handler, 1381void
1380 * so no need for separate NMI and IRQ chains as on x86. 1382perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1381 */
1382static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
1383
1384struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1385{ 1383{
1386 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1384 flushw_user();
1387 1385 if (test_thread_flag(TIF_32BIT))
1388 entry->nr = 0; 1386 perf_callchain_user_32(entry, regs);
1389 if (!user_mode(regs)) { 1387 else
1390 stack_trace_flush(); 1388 perf_callchain_user_64(entry, regs);
1391 perf_callchain_kernel(regs, entry);
1392 if (current->mm)
1393 regs = task_pt_regs(current);
1394 else
1395 regs = NULL;
1396 }
1397 if (regs) {
1398 flushw_user();
1399 if (test_thread_flag(TIF_32BIT))
1400 perf_callchain_user_32(regs, entry);
1401 else
1402 perf_callchain_user_64(regs, entry);
1403 }
1404 return entry;
1405} 1389}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8af28caeafc1..39f8421b86e6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1571,9 +1571,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1571 * callchain support 1571 * callchain support
1572 */ 1572 */
1573 1573
1574 1574static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi);
1575static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
1576static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
1577 1575
1578 1576
1579static void 1577static void
@@ -1607,8 +1605,8 @@ static const struct stacktrace_ops backtrace_ops = {
1607 .walk_stack = print_context_stack_bp, 1605 .walk_stack = print_context_stack_bp,
1608}; 1606};
1609 1607
1610static void 1608void
1611perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1609perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1612{ 1610{
1613 perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 1611 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1614 perf_callchain_store(entry, regs->ip); 1612 perf_callchain_store(entry, regs->ip);
@@ -1653,14 +1651,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1653} 1651}
1654#endif 1652#endif
1655 1653
1656static void 1654void
1657perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1655perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1658{ 1656{
1659 struct stack_frame frame; 1657 struct stack_frame frame;
1660 const void __user *fp; 1658 const void __user *fp;
1661 1659
1662 if (!user_mode(regs))
1663 regs = task_pt_regs(current);
1664 1660
1665 fp = (void __user *)regs->bp; 1661 fp = (void __user *)regs->bp;
1666 1662
@@ -1687,42 +1683,17 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1687 } 1683 }
1688} 1684}
1689 1685
1690static void 1686struct perf_callchain_entry *perf_callchain_buffer(void)
1691perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1692{
1693 int is_user;
1694
1695 if (!regs)
1696 return;
1697
1698 is_user = user_mode(regs);
1699
1700 if (!is_user)
1701 perf_callchain_kernel(regs, entry);
1702
1703 if (current->mm)
1704 perf_callchain_user(regs, entry);
1705}
1706
1707struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1708{ 1687{
1709 struct perf_callchain_entry *entry;
1710
1711 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 1688 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 /* TODO: We don't support guest os callchain now */ 1689 /* TODO: We don't support guest os callchain now */
1713 return NULL; 1690 return NULL;
1714 } 1691 }
1715 1692
1716 if (in_nmi()) 1693 if (in_nmi())
1717 entry = &__get_cpu_var(pmc_nmi_entry); 1694 return &__get_cpu_var(perf_callchain_entry_nmi);
1718 else
1719 entry = &__get_cpu_var(pmc_irq_entry);
1720
1721 entry->nr = 0;
1722
1723 perf_do_callchain(regs, entry);
1724 1695
1725 return entry; 1696 return &__get_cpu_var(perf_callchain_entry);
1726} 1697}
1727 1698
1728unsigned long perf_instruction_pointer(struct pt_regs *regs) 1699unsigned long perf_instruction_pointer(struct pt_regs *regs)