diff options
author | Kan Liang <kan.liang@intel.com> | 2015-01-05 13:23:04 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-18 11:16:17 -0500 |
commit | aad2b21c151273fa7abc419dac51a980eff1dd17 (patch) | |
tree | 6408263bd2daf71567178e4fe07d094de934414c /tools/perf | |
parent | 2c44b1936bb3b135a3fac8b3493394d42e51cf70 (diff) |
perf tools: Enable LBR call stack support
Currently, there are two call chain recording options, fp and dwarf.
Haswell has a new feature that utilizes the existing LBR facility to
record call chains. Kernel side LBR support code provides this as a
third option to record call chains. This patch enables the lbr call
stack support on the tooling side.
LBR call stack has some limitations:
- It reuses current LBR facility, so LBR call stack and branch record
can not be enabled at the same time.
- It is only available for user-space callchains.
However, it also offers some advantages:
- LBR call stack can work on user apps which don't have frame-pointers
or dwarf debug info compiled. It is a good alternative when nothing
else works.
Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Jacob Shin <jacob.w.shin@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rodrigo Campos <rodrigo@sdfg.com.ar>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1420482185-29830-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 8 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 6 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 2 | ||||
-rw-r--r-- | tools/perf/util/callchain.c | 8 | ||||
-rw-r--r-- | tools/perf/util/callchain.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 21 |
6 files changed, 40 insertions, 6 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 31e977459c51..1c7e50f62b1f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -115,13 +115,19 @@ OPTIONS | |||
115 | implies -g. | 115 | implies -g. |
116 | 116 | ||
117 | Allows specifying "fp" (frame pointer) or "dwarf" | 117 | Allows specifying "fp" (frame pointer) or "dwarf" |
118 | (DWARF's CFI - Call Frame Information) as the method to collect | 118 | (DWARF's CFI - Call Frame Information) or "lbr" |
119 | (Hardware Last Branch Record facility) as the method to collect | ||
119 | the information used to show the call graphs. | 120 | the information used to show the call graphs. |
120 | 121 | ||
121 | In some systems, where binaries are build with gcc | 122 | In some systems, where binaries are build with gcc |
122 | --fomit-frame-pointer, using the "fp" method will produce bogus | 123 | --fomit-frame-pointer, using the "fp" method will produce bogus |
123 | call graphs, using "dwarf", if available (perf tools linked to | 124 | call graphs, using "dwarf", if available (perf tools linked to |
124 | the libunwind library) should be used instead. | 125 | the libunwind library) should be used instead. |
126 | Using the "lbr" method doesn't require any compiler options. It | ||
127 | will produce call graphs from the hardware LBR registers. The | ||
128 | main limition is that it is only available on new Intel | ||
129 | platforms, such as Haswell. It can only get user call chain. It | ||
130 | doesn't work with branch stack sampling at the same time. | ||
125 | 131 | ||
126 | -q:: | 132 | -q:: |
127 | --quiet:: | 133 | --quiet:: |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 404ab3434052..d0d02a811ecd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -658,7 +658,7 @@ error: | |||
658 | 658 | ||
659 | static void callchain_debug(void) | 659 | static void callchain_debug(void) |
660 | { | 660 | { |
661 | static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; | 661 | static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; |
662 | 662 | ||
663 | pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); | 663 | pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); |
664 | 664 | ||
@@ -751,9 +751,9 @@ static struct record record = { | |||
751 | #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " | 751 | #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " |
752 | 752 | ||
753 | #ifdef HAVE_DWARF_UNWIND_SUPPORT | 753 | #ifdef HAVE_DWARF_UNWIND_SUPPORT |
754 | const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; | 754 | const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; |
755 | #else | 755 | #else |
756 | const char record_callchain_help[] = CALLCHAIN_HELP "fp"; | 756 | const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; |
757 | #endif | 757 | #endif |
758 | 758 | ||
759 | /* | 759 | /* |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2f91094e228b..0ba5f07906fb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep) | |||
249 | if ((sample_type & PERF_SAMPLE_REGS_USER) && | 249 | if ((sample_type & PERF_SAMPLE_REGS_USER) && |
250 | (sample_type & PERF_SAMPLE_STACK_USER)) | 250 | (sample_type & PERF_SAMPLE_STACK_USER)) |
251 | callchain_param.record_mode = CALLCHAIN_DWARF; | 251 | callchain_param.record_mode = CALLCHAIN_DWARF; |
252 | else if (sample_type & PERF_SAMPLE_BRANCH_STACK) | ||
253 | callchain_param.record_mode = CALLCHAIN_LBR; | ||
252 | else | 254 | else |
253 | callchain_param.record_mode = CALLCHAIN_FP; | 255 | callchain_param.record_mode = CALLCHAIN_FP; |
254 | } | 256 | } |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 14e7a123d43b..9f643ee77001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) | |||
97 | callchain_param.dump_size = size; | 97 | callchain_param.dump_size = size; |
98 | } | 98 | } |
99 | #endif /* HAVE_DWARF_UNWIND_SUPPORT */ | 99 | #endif /* HAVE_DWARF_UNWIND_SUPPORT */ |
100 | } else if (!strncmp(name, "lbr", sizeof("lbr"))) { | ||
101 | if (!strtok_r(NULL, ",", &saveptr)) { | ||
102 | callchain_param.record_mode = CALLCHAIN_LBR; | ||
103 | ret = 0; | ||
104 | } else | ||
105 | pr_err("callchain: No more arguments " | ||
106 | "needed for --call-graph lbr\n"); | ||
107 | break; | ||
100 | } else { | 108 | } else { |
101 | pr_err("callchain: Unknown --call-graph option " | 109 | pr_err("callchain: Unknown --call-graph option " |
102 | "value: %s\n", arg); | 110 | "value: %s\n", arg); |
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c0ec1acc38e4..6033a0a212ca 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -11,6 +11,7 @@ enum perf_call_graph_mode { | |||
11 | CALLCHAIN_NONE, | 11 | CALLCHAIN_NONE, |
12 | CALLCHAIN_FP, | 12 | CALLCHAIN_FP, |
13 | CALLCHAIN_DWARF, | 13 | CALLCHAIN_DWARF, |
14 | CALLCHAIN_LBR, | ||
14 | CALLCHAIN_MAX | 15 | CALLCHAIN_MAX |
15 | }; | 16 | }; |
16 | 17 | ||
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ea51a90e20a0..f93e5208c762 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) | |||
537 | } | 537 | } |
538 | 538 | ||
539 | static void | 539 | static void |
540 | perf_evsel__config_callgraph(struct perf_evsel *evsel) | 540 | perf_evsel__config_callgraph(struct perf_evsel *evsel, |
541 | struct record_opts *opts) | ||
541 | { | 542 | { |
542 | bool function = perf_evsel__is_function_event(evsel); | 543 | bool function = perf_evsel__is_function_event(evsel); |
543 | struct perf_event_attr *attr = &evsel->attr; | 544 | struct perf_event_attr *attr = &evsel->attr; |
544 | 545 | ||
545 | perf_evsel__set_sample_bit(evsel, CALLCHAIN); | 546 | perf_evsel__set_sample_bit(evsel, CALLCHAIN); |
546 | 547 | ||
548 | if (callchain_param.record_mode == CALLCHAIN_LBR) { | ||
549 | if (!opts->branch_stack) { | ||
550 | if (attr->exclude_user) { | ||
551 | pr_warning("LBR callstack option is only available " | ||
552 | "to get user callchain information. " | ||
553 | "Falling back to framepointers.\n"); | ||
554 | } else { | ||
555 | perf_evsel__set_sample_bit(evsel, BRANCH_STACK); | ||
556 | attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | | ||
557 | PERF_SAMPLE_BRANCH_CALL_STACK; | ||
558 | } | ||
559 | } else | ||
560 | pr_warning("Cannot use LBR callstack with branch stack. " | ||
561 | "Falling back to framepointers.\n"); | ||
562 | } | ||
563 | |||
547 | if (callchain_param.record_mode == CALLCHAIN_DWARF) { | 564 | if (callchain_param.record_mode == CALLCHAIN_DWARF) { |
548 | if (!function) { | 565 | if (!function) { |
549 | perf_evsel__set_sample_bit(evsel, REGS_USER); | 566 | perf_evsel__set_sample_bit(evsel, REGS_USER); |
@@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) | |||
667 | evsel->attr.exclude_callchain_user = 1; | 684 | evsel->attr.exclude_callchain_user = 1; |
668 | 685 | ||
669 | if (callchain_param.enabled && !evsel->no_aux_samples) | 686 | if (callchain_param.enabled && !evsel->no_aux_samples) |
670 | perf_evsel__config_callgraph(evsel); | 687 | perf_evsel__config_callgraph(evsel, opts); |
671 | 688 | ||
672 | if (opts->sample_intr_regs) { | 689 | if (opts->sample_intr_regs) { |
673 | attr->sample_regs_intr = PERF_REGS_MASK; | 690 | attr->sample_regs_intr = PERF_REGS_MASK; |