aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-script.c
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2016-06-23 09:40:58 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-06-23 16:04:26 -0400
commite216708d982a1c262f411fee2fcac2bd9ec93a32 (patch)
tree4941ea4945af89202d02c3f06e186dad4dd7bfac /tools/perf/builtin-script.c
parent50f736372d5ea0ce97ede698f957c9b141aa569e (diff)
perf script: Add callindent option
Based on patches from Andi Kleen. When printing PT instruction traces with perf script it is rather useful to see some indentation for the call tree. This patch adds a new callindent field to perf script that prints spaces for the function call stack depth. We already have code to track the function call stack for PT, that we can reuse with minor modifications. The resulting output is not quite as nice as ftrace yet, but a lot better than what was there before. Note there are some corner cases when the thread stack gets code confused and prints incorrect indentation. Even with that it is fairly useful. When displaying kernel code traces it is recommended to run as root, as otherwise perf doesn't understand the kernel addresses properly, and may not reset the call stack correctly on kernel boundaries. Example output: sudo perf-with-kcore record eg2 -a -e intel_pt// -- sleep 1 sudo perf-with-kcore script eg2 --ns -F callindent,time,comm,pid,sym,ip,addr,flags,cpu --itrace=cre | less ... swapper 0 [000] 5830.389116586: call irq_exit ffffffff8104d620 smp_call_function_single_interrupt+0x30 => ffffffff8107e720 irq_exit swapper 0 [000] 5830.389116586: call idle_cpu ffffffff8107e769 irq_exit+0x49 => ffffffff810a3970 idle_cpu swapper 0 [000] 5830.389116586: return idle_cpu ffffffff810a39b7 idle_cpu+0x47 => ffffffff8107e76e irq_exit swapper 0 [000] 5830.389116586: call tick_nohz_irq_exit ffffffff8107e7bd irq_exit+0x9d => ffffffff810f2fc0 tick_nohz_irq_exit swapper 0 [000] 5830.389116919: call __tick_nohz_idle_enter ffffffff810f2fe0 tick_nohz_irq_exit+0x20 => ffffffff810f28d0 __tick_nohz_idle_enter swapper 0 [000] 5830.389116919: call ktime_get ffffffff810f28f1 __tick_nohz_idle_enter+0x21 => ffffffff810e9ec0 ktime_get swapper 0 [000] 5830.389116919: call read_tsc ffffffff810e9ef6 ktime_get+0x36 => ffffffff81035070 read_tsc swapper 0 [000] 5830.389116919: return read_tsc ffffffff81035084 read_tsc+0x14 => ffffffff810e9efc ktime_get swapper 0 [000] 5830.389116919: return ktime_get ffffffff810e9f46 ktime_get+0x86 => ffffffff810f28f6 __tick_nohz_idle_enter swapper 0 [000] 5830.389116919: call sched_clock_idle_sleep_event ffffffff810f290b __tick_nohz_idle_enter+0x3b => ffffffff810a7380 sched_clock_idle_sleep_event swapper 0 [000] 5830.389116919: call sched_clock_cpu ffffffff810a738b sched_clock_idle_sleep_event+0xb => ffffffff810a72e0 sched_clock_cpu swapper 0 [000] 5830.389116919: call sched_clock ffffffff810a734d sched_clock_cpu+0x6d => ffffffff81035750 sched_clock swapper 0 [000] 5830.389116919: call native_sched_clock ffffffff81035754 sched_clock+0x4 => ffffffff81035640 native_sched_clock swapper 0 [000] 5830.389116919: return native_sched_clock ffffffff8103568c native_sched_clock+0x4c => ffffffff81035759 sched_clock swapper 0 [000] 5830.389116919: return sched_clock ffffffff8103575c sched_clock+0xc => ffffffff810a7352 sched_clock_cpu swapper 0 [000] 5830.389116919: return sched_clock_cpu ffffffff810a7356 sched_clock_cpu+0x76 => ffffffff810a7390 sched_clock_idle_sleep_event swapper 0 [000] 5830.389116919: return sched_clock_idle_sleep_event ffffffff810a7391 sched_clock_idle_sleep_event+0x11 => ffffffff810f2910 __tick_nohz_idle_enter ... Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Andi Kleen <ak@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1466689258-28493-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-script.c')
-rw-r--r--tools/perf/builtin-script.c68
1 files changed, 67 insertions, 1 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 56b2fb8135ce..971ff91b16cb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -21,6 +21,7 @@
21#include "util/cpumap.h" 21#include "util/cpumap.h"
22#include "util/thread_map.h" 22#include "util/thread_map.h"
23#include "util/stat.h" 23#include "util/stat.h"
24#include "util/thread-stack.h"
24#include <linux/bitmap.h> 25#include <linux/bitmap.h>
25#include <linux/stringify.h> 26#include <linux/stringify.h>
26#include "asm/bug.h" 27#include "asm/bug.h"
@@ -63,6 +64,7 @@ enum perf_output_field {
63 PERF_OUTPUT_DATA_SRC = 1U << 17, 64 PERF_OUTPUT_DATA_SRC = 1U << 17,
64 PERF_OUTPUT_WEIGHT = 1U << 18, 65 PERF_OUTPUT_WEIGHT = 1U << 18,
65 PERF_OUTPUT_BPF_OUTPUT = 1U << 19, 66 PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
67 PERF_OUTPUT_CALLINDENT = 1U << 20,
66}; 68};
67 69
68struct output_option { 70struct output_option {
@@ -89,6 +91,7 @@ struct output_option {
89 {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, 91 {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
90 {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, 92 {.str = "weight", .field = PERF_OUTPUT_WEIGHT},
91 {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, 93 {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT},
94 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
92}; 95};
93 96
94/* default set to maintain compatibility with current format */ 97/* default set to maintain compatibility with current format */
@@ -562,6 +565,62 @@ static void print_sample_addr(struct perf_sample *sample,
562 } 565 }
563} 566}
564 567
568static void print_sample_callindent(struct perf_sample *sample,
569 struct perf_evsel *evsel,
570 struct thread *thread,
571 struct addr_location *al)
572{
573 struct perf_event_attr *attr = &evsel->attr;
574 size_t depth = thread_stack__depth(thread);
575 struct addr_location addr_al;
576 const char *name = NULL;
577 static int spacing;
578 int len = 0;
579 u64 ip = 0;
580
581 /*
582 * The 'return' has already been popped off the stack so the depth has
583 * to be adjusted to match the 'call'.
584 */
585 if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
586 depth += 1;
587
588 if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
589 if (sample_addr_correlates_sym(attr)) {
590 thread__resolve(thread, &addr_al, sample);
591 if (addr_al.sym)
592 name = addr_al.sym->name;
593 else
594 ip = sample->addr;
595 } else {
596 ip = sample->addr;
597 }
598 } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
599 if (al->sym)
600 name = al->sym->name;
601 else
602 ip = sample->ip;
603 }
604
605 if (name)
606 len = printf("%*s%s", (int)depth * 4, "", name);
607 else if (ip)
608 len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip);
609
610 if (len < 0)
611 return;
612
613 /*
614 * Try to keep the output length from changing frequently so that the
615 * output lines up more nicely.
616 */
617 if (len > spacing || (len && len < spacing - 52))
618 spacing = round_up(len + 4, 32);
619
620 if (len < spacing)
621 printf("%*s", spacing - len, "");
622}
623
565static void print_sample_bts(struct perf_sample *sample, 624static void print_sample_bts(struct perf_sample *sample,
566 struct perf_evsel *evsel, 625 struct perf_evsel *evsel,
567 struct thread *thread, 626 struct thread *thread,
@@ -570,6 +629,9 @@ static void print_sample_bts(struct perf_sample *sample,
570 struct perf_event_attr *attr = &evsel->attr; 629 struct perf_event_attr *attr = &evsel->attr;
571 bool print_srcline_last = false; 630 bool print_srcline_last = false;
572 631
632 if (PRINT_FIELD(CALLINDENT))
633 print_sample_callindent(sample, evsel, thread, al);
634
573 /* print branch_from information */ 635 /* print branch_from information */
574 if (PRINT_FIELD(IP)) { 636 if (PRINT_FIELD(IP)) {
575 unsigned int print_opts = output[attr->type].print_ip_opts; 637 unsigned int print_opts = output[attr->type].print_ip_opts;
@@ -2053,7 +2115,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2053 "comma separated output fields prepend with 'type:'. " 2115 "comma separated output fields prepend with 'type:'. "
2054 "Valid types: hw,sw,trace,raw. " 2116 "Valid types: hw,sw,trace,raw. "
2055 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2117 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
2056 "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields), 2118 "addr,symoff,period,iregs,brstack,brstacksym,flags,"
2119 "callindent", parse_output_fields),
2057 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2120 OPT_BOOLEAN('a', "all-cpus", &system_wide,
2058 "system-wide collection from all CPUs"), 2121 "system-wide collection from all CPUs"),
2059 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 2122 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -2292,6 +2355,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
2292 script.session = session; 2355 script.session = session;
2293 script__setup_sample_type(&script); 2356 script__setup_sample_type(&script);
2294 2357
2358 if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
2359 itrace_synth_opts.thread_stack = true;
2360
2295 session->itrace_synth_opts = &itrace_synth_opts; 2361 session->itrace_synth_opts = &itrace_synth_opts;
2296 2362
2297 if (cpu_list) { 2363 if (cpu_list) {