aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorWaiman Long <Waiman.Long@hp.com>2013-10-18 10:38:49 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-10-21 16:36:25 -0400
commit5dbb6e81d85e55ee2b4cf523c1738e16f63e5400 (patch)
tree334d7cd3884dde1e7f9d35e0be561f82edc92ccf /tools
parent91e95617429cb272fd908b1928a1915b37b9655f (diff)
perf top: Add --max-stack option to limit callchain stack scan
When the callgraph function is enabled (-G), it may take a long time to scan all the stack data and merge them accordingly. This patch adds a new --max-stack option to perf-top to limit the depth of callchain stack data to look at to reduce the time it takes for perf-top to finish its processing. It reduces the amount of information provided to the user in exchange for faster speed. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Acked-by: David Ahern <dsahern@gmail.com> Tested-by: Davidlohr Bueso <davidlohr@hp.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Scott J Norton <scott.norton@hp.com> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1382107129-2010-5-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/builtin-top.c8
-rw-r--r--tools/perf/util/top.h1
3 files changed, 15 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index f65777c1f723..c16a09e2f182 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -158,6 +158,14 @@ Default is to monitor all CPUS.
158 158
159 Default: fractal,0.5,callee. 159 Default: fractal,0.5,callee.
160 160
161--max-stack::
162 Set the stack depth limit when parsing the callchain, anything
163 beyond the specified depth will be ignored. This is a trade-off
164 between information loss and faster processing especially for
165 workloads that can have a very long callchain stack.
166
167 Default: 127
168
161--ignore-callees=<regex>:: 169--ignore-callees=<regex>::
162 Ignore callees of the function(s) matching the given regex. 170 Ignore callees of the function(s) matching the given regex.
163 This has the effect of collecting the callers of each such 171 This has the effect of collecting the callers of each such
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 112cb7d68e64..386d83324a8d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -771,7 +771,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
771 err = machine__resolve_callchain(machine, evsel, 771 err = machine__resolve_callchain(machine, evsel,
772 al.thread, sample, 772 al.thread, sample,
773 &parent, &al, 773 &parent, &al,
774 PERF_MAX_STACK_DEPTH); 774 top->max_stack);
775 if (err) 775 if (err)
776 return; 776 return;
777 } 777 }
@@ -1048,10 +1048,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1048 .user_freq = UINT_MAX, 1048 .user_freq = UINT_MAX,
1049 .user_interval = ULLONG_MAX, 1049 .user_interval = ULLONG_MAX,
1050 .freq = 4000, /* 4 KHz */ 1050 .freq = 4000, /* 4 KHz */
1051 .target = { 1051 .target = {
1052 .uses_mmap = true, 1052 .uses_mmap = true,
1053 }, 1053 },
1054 }, 1054 },
1055 .max_stack = PERF_MAX_STACK_DEPTH,
1055 .sym_pcnt_filter = 5, 1056 .sym_pcnt_filter = 5,
1056 }; 1057 };
1057 struct perf_record_opts *opts = &top.record_opts; 1058 struct perf_record_opts *opts = &top.record_opts;
@@ -1110,6 +1111,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1110 OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, 1111 OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
1111 "mode[,dump_size]", record_callchain_help, 1112 "mode[,dump_size]", record_callchain_help,
1112 &parse_callchain_opt, "fp"), 1113 &parse_callchain_opt, "fp"),
1114 OPT_INTEGER(0, "max-stack", &top.max_stack,
1115 "Set the maximum stack depth when parsing the callchain. "
1116 "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
1113 OPT_CALLBACK(0, "ignore-callees", NULL, "regex", 1117 OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
1114 "ignore callees of these functions in call graphs", 1118 "ignore callees of these functions in call graphs",
1115 report_parse_ignore_callees_opt), 1119 report_parse_ignore_callees_opt),
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index b554ffc462b6..88cfeaff600b 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -24,6 +24,7 @@ struct perf_top {
24 u64 exact_samples; 24 u64 exact_samples;
25 u64 guest_us_samples, guest_kernel_samples; 25 u64 guest_us_samples, guest_kernel_samples;
26 int print_entries, count_filter, delay_secs; 26 int print_entries, count_filter, delay_secs;
27 int max_stack;
27 bool hide_kernel_symbols, hide_user_symbols, zero; 28 bool hide_kernel_symbols, hide_user_symbols, zero;
28 bool use_tui, use_stdio; 29 bool use_tui, use_stdio;
29 bool kptr_restrict_warned; 30 bool kptr_restrict_warned;