aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2016-06-23 09:40:58 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-06-23 16:04:26 -0400
commite216708d982a1c262f411fee2fcac2bd9ec93a32 (patch)
tree4941ea4945af89202d02c3f06e186dad4dd7bfac
parent50f736372d5ea0ce97ede698f957c9b141aa569e (diff)
perf script: Add callindent option
Based on patches from Andi Kleen. When printing PT instruction traces with perf script it is rather useful to see some indentation for the call tree. This patch adds a new callindent field to perf script that prints spaces for the function call stack depth. We already have code to track the function call stack for PT, that we can reuse with minor modifications. The resulting output is not quite as nice as ftrace yet, but a lot better than what was there before. Note there are some corner cases when the thread stack gets code confused and prints incorrect indentation. Even with that it is fairly useful. When displaying kernel code traces it is recommended to run as root, as otherwise perf doesn't understand the kernel addresses properly, and may not reset the call stack correctly on kernel boundaries. Example output: sudo perf-with-kcore record eg2 -a -e intel_pt// -- sleep 1 sudo perf-with-kcore script eg2 --ns -F callindent,time,comm,pid,sym,ip,addr,flags,cpu --itrace=cre | less ... swapper 0 [000] 5830.389116586: call irq_exit ffffffff8104d620 smp_call_function_single_interrupt+0x30 => ffffffff8107e720 irq_exit swapper 0 [000] 5830.389116586: call idle_cpu ffffffff8107e769 irq_exit+0x49 => ffffffff810a3970 idle_cpu swapper 0 [000] 5830.389116586: return idle_cpu ffffffff810a39b7 idle_cpu+0x47 => ffffffff8107e76e irq_exit swapper 0 [000] 5830.389116586: call tick_nohz_irq_exit ffffffff8107e7bd irq_exit+0x9d => ffffffff810f2fc0 tick_nohz_irq_exit swapper 0 [000] 5830.389116919: call __tick_nohz_idle_enter ffffffff810f2fe0 tick_nohz_irq_exit+0x20 => ffffffff810f28d0 __tick_nohz_idle_enter swapper 0 [000] 5830.389116919: call ktime_get ffffffff810f28f1 __tick_nohz_idle_enter+0x21 => ffffffff810e9ec0 ktime_get swapper 0 [000] 5830.389116919: call read_tsc ffffffff810e9ef6 ktime_get+0x36 => ffffffff81035070 read_tsc swapper 0 [000] 5830.389116919: return read_tsc ffffffff81035084 read_tsc+0x14 => ffffffff810e9efc ktime_get swapper 0 [000] 5830.389116919: return ktime_get ffffffff810e9f46 ktime_get+0x86 => ffffffff810f28f6 __tick_nohz_idle_enter swapper 0 [000] 5830.389116919: call sched_clock_idle_sleep_event ffffffff810f290b __tick_nohz_idle_enter+0x3b => ffffffff810a7380 sched_clock_idle_sleep_event swapper 0 [000] 5830.389116919: call sched_clock_cpu ffffffff810a738b sched_clock_idle_sleep_event+0xb => ffffffff810a72e0 sched_clock_cpu swapper 0 [000] 5830.389116919: call sched_clock ffffffff810a734d sched_clock_cpu+0x6d => ffffffff81035750 sched_clock swapper 0 [000] 5830.389116919: call native_sched_clock ffffffff81035754 sched_clock+0x4 => ffffffff81035640 native_sched_clock swapper 0 [000] 5830.389116919: return native_sched_clock ffffffff8103568c native_sched_clock+0x4c => ffffffff81035759 sched_clock swapper 0 [000] 5830.389116919: return sched_clock ffffffff8103575c sched_clock+0xc => ffffffff810a7352 sched_clock_cpu swapper 0 [000] 5830.389116919: return sched_clock_cpu ffffffff810a7356 sched_clock_cpu+0x76 => ffffffff810a7390 sched_clock_idle_sleep_event swapper 0 [000] 5830.389116919: return sched_clock_idle_sleep_event ffffffff810a7391 sched_clock_idle_sleep_event+0x11 => ffffffff810f2910 __tick_nohz_idle_enter ... Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Andi Kleen <ak@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1466689258-28493-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/Documentation/perf-script.txt4
-rw-r--r--tools/perf/builtin-script.c68
-rw-r--r--tools/perf/util/thread-stack.c7
-rw-r--r--tools/perf/util/thread-stack.h1
4 files changed, 79 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index a46030d8962d..1f6c70594f0f 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -177,6 +177,10 @@ OPTIONS
177 "tr end" for "bE". However the "x" flag will be display separately in those 177 "tr end" for "bE". However the "x" flag will be display separately in those
178 cases e.g. "jcc (x)" for a condition branch within a transaction. 178 cases e.g. "jcc (x)" for a condition branch within a transaction.
179 179
180 The callindent field is synthesized and may have a value when
181 Instruction Trace decoding. For calls and returns, it will display the
182 name of the symbol indented with spaces to reflect the stack depth.
183
180 Finally, a user may not set fields to none for all event types. 184 Finally, a user may not set fields to none for all event types.
181 i.e., -F "" is not allowed. 185 i.e., -F "" is not allowed.
182 186
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 56b2fb8135ce..971ff91b16cb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -21,6 +21,7 @@
21#include "util/cpumap.h" 21#include "util/cpumap.h"
22#include "util/thread_map.h" 22#include "util/thread_map.h"
23#include "util/stat.h" 23#include "util/stat.h"
24#include "util/thread-stack.h"
24#include <linux/bitmap.h> 25#include <linux/bitmap.h>
25#include <linux/stringify.h> 26#include <linux/stringify.h>
26#include "asm/bug.h" 27#include "asm/bug.h"
@@ -63,6 +64,7 @@ enum perf_output_field {
63 PERF_OUTPUT_DATA_SRC = 1U << 17, 64 PERF_OUTPUT_DATA_SRC = 1U << 17,
64 PERF_OUTPUT_WEIGHT = 1U << 18, 65 PERF_OUTPUT_WEIGHT = 1U << 18,
65 PERF_OUTPUT_BPF_OUTPUT = 1U << 19, 66 PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
67 PERF_OUTPUT_CALLINDENT = 1U << 20,
66}; 68};
67 69
68struct output_option { 70struct output_option {
@@ -89,6 +91,7 @@ struct output_option {
89 {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, 91 {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
90 {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, 92 {.str = "weight", .field = PERF_OUTPUT_WEIGHT},
91 {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, 93 {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT},
94 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
92}; 95};
93 96
94/* default set to maintain compatibility with current format */ 97/* default set to maintain compatibility with current format */
@@ -562,6 +565,62 @@ static void print_sample_addr(struct perf_sample *sample,
562 } 565 }
563} 566}
564 567
568static void print_sample_callindent(struct perf_sample *sample,
569 struct perf_evsel *evsel,
570 struct thread *thread,
571 struct addr_location *al)
572{
573 struct perf_event_attr *attr = &evsel->attr;
574 size_t depth = thread_stack__depth(thread);
575 struct addr_location addr_al;
576 const char *name = NULL;
577 static int spacing;
578 int len = 0;
579 u64 ip = 0;
580
581 /*
582 * The 'return' has already been popped off the stack so the depth has
583 * to be adjusted to match the 'call'.
584 */
585 if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
586 depth += 1;
587
588 if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
589 if (sample_addr_correlates_sym(attr)) {
590 thread__resolve(thread, &addr_al, sample);
591 if (addr_al.sym)
592 name = addr_al.sym->name;
593 else
594 ip = sample->addr;
595 } else {
596 ip = sample->addr;
597 }
598 } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
599 if (al->sym)
600 name = al->sym->name;
601 else
602 ip = sample->ip;
603 }
604
605 if (name)
606 len = printf("%*s%s", (int)depth * 4, "", name);
607 else if (ip)
608 len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip);
609
610 if (len < 0)
611 return;
612
613 /*
614 * Try to keep the output length from changing frequently so that the
615 * output lines up more nicely.
616 */
617 if (len > spacing || (len && len < spacing - 52))
618 spacing = round_up(len + 4, 32);
619
620 if (len < spacing)
621 printf("%*s", spacing - len, "");
622}
623
565static void print_sample_bts(struct perf_sample *sample, 624static void print_sample_bts(struct perf_sample *sample,
566 struct perf_evsel *evsel, 625 struct perf_evsel *evsel,
567 struct thread *thread, 626 struct thread *thread,
@@ -570,6 +629,9 @@ static void print_sample_bts(struct perf_sample *sample,
570 struct perf_event_attr *attr = &evsel->attr; 629 struct perf_event_attr *attr = &evsel->attr;
571 bool print_srcline_last = false; 630 bool print_srcline_last = false;
572 631
632 if (PRINT_FIELD(CALLINDENT))
633 print_sample_callindent(sample, evsel, thread, al);
634
573 /* print branch_from information */ 635 /* print branch_from information */
574 if (PRINT_FIELD(IP)) { 636 if (PRINT_FIELD(IP)) {
575 unsigned int print_opts = output[attr->type].print_ip_opts; 637 unsigned int print_opts = output[attr->type].print_ip_opts;
@@ -2053,7 +2115,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2053 "comma separated output fields prepend with 'type:'. " 2115 "comma separated output fields prepend with 'type:'. "
2054 "Valid types: hw,sw,trace,raw. " 2116 "Valid types: hw,sw,trace,raw. "
2055 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2117 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
2056 "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields), 2118 "addr,symoff,period,iregs,brstack,brstacksym,flags,"
2119 "callindent", parse_output_fields),
2057 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2120 OPT_BOOLEAN('a', "all-cpus", &system_wide,
2058 "system-wide collection from all CPUs"), 2121 "system-wide collection from all CPUs"),
2059 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 2122 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -2292,6 +2355,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2292 script.session = session; 2355 script.session = session;
2293 script__setup_sample_type(&script); 2356 script__setup_sample_type(&script);
2294 2357
2358 if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
2359 itrace_synth_opts.thread_stack = true;
2360
2295 session->itrace_synth_opts = &itrace_synth_opts; 2361 session->itrace_synth_opts = &itrace_synth_opts;
2296 2362
2297 if (cpu_list) { 2363 if (cpu_list) {
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 825086aa9a08..d3301529f6a7 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -616,3 +616,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
616 616
617 return err; 617 return err;
618} 618}
619
620size_t thread_stack__depth(struct thread *thread)
621{
622 if (!thread->ts)
623 return 0;
624 return thread->ts->cnt;
625}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index ad44c7944b8e..b7e41c4ebfdd 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -87,6 +87,7 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
87 size_t sz, u64 ip); 87 size_t sz, u64 ip);
88int thread_stack__flush(struct thread *thread); 88int thread_stack__flush(struct thread *thread);
89void thread_stack__free(struct thread *thread); 89void thread_stack__free(struct thread *thread);
90size_t thread_stack__depth(struct thread *thread);
90 91
91struct call_return_processor * 92struct call_return_processor *
92call_return_processor__new(int (*process)(struct call_return *cr, void *data), 93call_return_processor__new(int (*process)(struct call_return *cr, void *data),