aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2016-05-12 12:06:21 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-05-16 22:11:53 -0400
commitc85b03349640b34f3545503c8429fc43005e9a92 (patch)
tree4f226f1fa1c7b0399fba6986100418427e30870e
parent3e4de4ec4cfea40994b47a79767610153edbf45b (diff)
perf core: Separate accounting of contexts and real addresses in a stack trace
The perf_sample->ip_callchain->nr value includes all the entries in the ip_callchain->ip[] array, real addresses and PERF_CONTEXT_{KERNEL,USER,etc}, while what the user expects is that what is in the kernel.perf_event_max_stack sysctl or in the upcoming per event perf_event_attr.sample_max_stack knob be honoured in terms of IP addresses in the stack trace. So allocate a bunch of extra entries for contexts, and do the accounting via perf_callchain_entry_ctx struct members. A new sysctl, kernel.perf_event_max_contexts_per_stack is also introduced for investigating possible bugs in the callchain implementation by some arch. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: Wang Nan <wangnan0@huawei.com> Cc: Zefan Li <lizefan@huawei.com> Link: http://lkml.kernel.org/n/tip-3b4wnqk340c4sg4gwkfdi9yk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--Documentation/sysctl/kernel.txt14
-rw-r--r--include/linux/perf_event.h18
-rw-r--r--include/uapi/linux/perf_event.h1
-rw-r--r--kernel/events/callchain.c10
-rw-r--r--kernel/sysctl.c9
5 files changed, 49 insertions, 3 deletions
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index daabdd7ee543..a3683ce2a2f3 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -61,6 +61,7 @@ show up in /proc/sys/kernel:
61- perf_cpu_time_max_percent 61- perf_cpu_time_max_percent
62- perf_event_paranoid 62- perf_event_paranoid
63- perf_event_max_stack 63- perf_event_max_stack
64- perf_event_max_contexts_per_stack
64- pid_max 65- pid_max
65- powersave-nap [ PPC only ] 66- powersave-nap [ PPC only ]
66- printk 67- printk
@@ -668,6 +669,19 @@ The default value is 127.
668 669
669============================================================== 670==============================================================
670 671
672perf_event_max_contexts_per_stack:
673
674Controls maximum number of stack frame context entries for
675(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for
676instance, when using 'perf record -g' or 'perf trace --call-graph fp'.
677
678This can only be done when no events are in use that have callchains
679enabled, otherwise writing to this file will return -EBUSY.
680
681The default value is 8.
682
683==============================================================
684
671pid_max: 685pid_max:
672 686
673PID allocation wrap value. When the kernel's next PID value 687PID allocation wrap value. When the kernel's next PID value
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2024b14cc2b1..6b87be908790 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -65,6 +65,8 @@ struct perf_callchain_entry_ctx {
65 struct perf_callchain_entry *entry; 65 struct perf_callchain_entry *entry;
66 u32 max_stack; 66 u32 max_stack;
67 u32 nr; 67 u32 nr;
68 short contexts;
69 bool contexts_maxed;
68}; 70};
69 71
70struct perf_raw_record { 72struct perf_raw_record {
@@ -1078,12 +1080,24 @@ extern int get_callchain_buffers(void);
1078extern void put_callchain_buffers(void); 1080extern void put_callchain_buffers(void);
1079 1081
1080extern int sysctl_perf_event_max_stack; 1082extern int sysctl_perf_event_max_stack;
1083extern int sysctl_perf_event_max_contexts_per_stack;
1081 1084
1082#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context) 1085static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
1086{
1087 if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
1088 struct perf_callchain_entry *entry = ctx->entry;
1089 entry->ip[entry->nr++] = ip;
1090 ++ctx->contexts;
1091 return 0;
1092 } else {
1093 ctx->contexts_maxed = true;
1094 return -1; /* no more room, stop walking the stack */
1095 }
1096}
1083 1097
1084static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) 1098static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
1085{ 1099{
1086 if (ctx->nr < ctx->max_stack) { 1100 if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
1087 struct perf_callchain_entry *entry = ctx->entry; 1101 struct perf_callchain_entry *entry = ctx->entry;
1088 entry->ip[entry->nr++] = ip; 1102 entry->ip[entry->nr++] = ip;
1089 ++ctx->nr; 1103 ++ctx->nr;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 43fc8d213472..36ce552cf6a9 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -862,6 +862,7 @@ enum perf_event_type {
862}; 862};
863 863
864#define PERF_MAX_STACK_DEPTH 127 864#define PERF_MAX_STACK_DEPTH 127
865#define PERF_MAX_CONTEXTS_PER_STACK 8
865 866
866enum perf_callchain_context { 867enum perf_callchain_context {
867 PERF_CONTEXT_HV = (__u64)-32, 868 PERF_CONTEXT_HV = (__u64)-32,
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index ca645736a983..179ef4640964 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -19,11 +19,13 @@ struct callchain_cpus_entries {
19}; 19};
20 20
21int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; 21int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
22int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK;
22 23
23static inline size_t perf_callchain_entry__sizeof(void) 24static inline size_t perf_callchain_entry__sizeof(void)
24{ 25{
25 return (sizeof(struct perf_callchain_entry) + 26 return (sizeof(struct perf_callchain_entry) +
26 sizeof(__u64) * sysctl_perf_event_max_stack); 27 sizeof(__u64) * (sysctl_perf_event_max_stack +
28 sysctl_perf_event_max_contexts_per_stack));
27} 29}
28 30
29static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); 31static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
@@ -197,6 +199,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
197 ctx.entry = entry; 199 ctx.entry = entry;
198 ctx.max_stack = max_stack; 200 ctx.max_stack = max_stack;
199 ctx.nr = entry->nr = init_nr; 201 ctx.nr = entry->nr = init_nr;
202 ctx.contexts = 0;
203 ctx.contexts_maxed = false;
200 204
201 if (kernel && !user_mode(regs)) { 205 if (kernel && !user_mode(regs)) {
202 if (add_mark) 206 if (add_mark)
@@ -228,6 +232,10 @@ exit_put:
228 return entry; 232 return entry;
229} 233}
230 234
235/*
236 * Used for sysctl_perf_event_max_stack and
237 * sysctl_perf_event_max_contexts_per_stack.
238 */
231int perf_event_max_stack_handler(struct ctl_table *table, int write, 239int perf_event_max_stack_handler(struct ctl_table *table, int write,
232 void __user *buffer, size_t *lenp, loff_t *ppos) 240 void __user *buffer, size_t *lenp, loff_t *ppos)
233{ 241{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0ec6907a16b3..bec4c11c47d6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1156,6 +1156,15 @@ static struct ctl_table kern_table[] = {
1156 .extra1 = &zero, 1156 .extra1 = &zero,
1157 .extra2 = &six_hundred_forty_kb, 1157 .extra2 = &six_hundred_forty_kb,
1158 }, 1158 },
1159 {
1160 .procname = "perf_event_max_contexts_per_stack",
1161 .data = &sysctl_perf_event_max_contexts_per_stack,
1162 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
1163 .mode = 0644,
1164 .proc_handler = perf_event_max_stack_handler,
1165 .extra1 = &zero,
1166 .extra2 = &one_thousand,
1167 },
1159#endif 1168#endif
1160#ifdef CONFIG_KMEMCHECK 1169#ifdef CONFIG_KMEMCHECK
1161 { 1170 {