diff options
author | Waiman Long <Waiman.Long@hp.com> | 2013-10-18 10:38:48 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-10-21 16:36:25 -0400 |
commit | 91e95617429cb272fd908b1928a1915b37b9655f (patch) | |
tree | 630cd19ff34cf210a03924e92c711d6c51076e2d /tools/perf/builtin-report.c | |
parent | cc9784bd9fa9d8e27fdea61142398cb85ce401a8 (diff) |
perf report: Add --max-stack option to limit callchain stack scan
When callgraph data was included in the perf data file, it may take a
long time to scan all those data and merge them together especially if
the stored callchains are long and the perf data file itself is large,
like a Gbyte or so.
The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127).
This is a large value. Usually the callgraph data that developers are
most interested in are the first few levels, the rests are usually not
looked at.
This patch adds a new --max-stack option to perf-report to limit the
depth of callchain stack data to look at to reduce the time it takes for
perf-report to finish its processing. It trades the presence of trailing
stack information with faster speed.
The following table shows the elapsed time of doing perf-report on a
perf.data file of size 985,531,828 bytes.
--max_stack Elapsed Time Output data size
----------- ------------ ----------------
not set 88.0s 124,422,651
64 87.5s 116,303,213
32 87.2s 112,023,804
16 86.6s 94,326,380
8 59.9s 33,697,248
4 40.7s 10,116,637
-g none 27.1s 2,555,810
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-report.c')
-rw-r--r-- | tools/perf/builtin-report.c | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa68a36bc461..81addcabb356 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -49,6 +49,7 @@ struct perf_report { | |||
49 | bool show_threads; | 49 | bool show_threads; |
50 | bool inverted_callchain; | 50 | bool inverted_callchain; |
51 | bool mem_mode; | 51 | bool mem_mode; |
52 | int max_stack; | ||
52 | struct perf_read_values show_threads_values; | 53 | struct perf_read_values show_threads_values; |
53 | const char *pretty_printing_style; | 54 | const char *pretty_printing_style; |
54 | const char *cpu_list; | 55 | const char *cpu_list; |
@@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, | |||
90 | if ((sort__has_parent || symbol_conf.use_callchain) && | 91 | if ((sort__has_parent || symbol_conf.use_callchain) && |
91 | sample->callchain) { | 92 | sample->callchain) { |
92 | err = machine__resolve_callchain(machine, evsel, al->thread, | 93 | err = machine__resolve_callchain(machine, evsel, al->thread, |
93 | sample, &parent, al); | 94 | sample, &parent, al, |
95 | rep->max_stack); | ||
94 | if (err) | 96 | if (err) |
95 | return err; | 97 | return err; |
96 | } | 98 | } |
@@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | |||
181 | if ((sort__has_parent || symbol_conf.use_callchain) | 183 | if ((sort__has_parent || symbol_conf.use_callchain) |
182 | && sample->callchain) { | 184 | && sample->callchain) { |
183 | err = machine__resolve_callchain(machine, evsel, al->thread, | 185 | err = machine__resolve_callchain(machine, evsel, al->thread, |
184 | sample, &parent, al); | 186 | sample, &parent, al, |
187 | rep->max_stack); | ||
185 | if (err) | 188 | if (err) |
186 | return err; | 189 | return err; |
187 | } | 190 | } |
@@ -244,18 +247,21 @@ out: | |||
244 | return err; | 247 | return err; |
245 | } | 248 | } |
246 | 249 | ||
247 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | 250 | static int perf_evsel__add_hist_entry(struct perf_tool *tool, |
251 | struct perf_evsel *evsel, | ||
248 | struct addr_location *al, | 252 | struct addr_location *al, |
249 | struct perf_sample *sample, | 253 | struct perf_sample *sample, |
250 | struct machine *machine) | 254 | struct machine *machine) |
251 | { | 255 | { |
256 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
252 | struct symbol *parent = NULL; | 257 | struct symbol *parent = NULL; |
253 | int err = 0; | 258 | int err = 0; |
254 | struct hist_entry *he; | 259 | struct hist_entry *he; |
255 | 260 | ||
256 | if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { | 261 | if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { |
257 | err = machine__resolve_callchain(machine, evsel, al->thread, | 262 | err = machine__resolve_callchain(machine, evsel, al->thread, |
258 | sample, &parent, al); | 263 | sample, &parent, al, |
264 | rep->max_stack); | ||
259 | if (err) | 265 | if (err) |
260 | return err; | 266 | return err; |
261 | } | 267 | } |
@@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool, | |||
332 | if (al.map != NULL) | 338 | if (al.map != NULL) |
333 | al.map->dso->hit = 1; | 339 | al.map->dso->hit = 1; |
334 | 340 | ||
335 | ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); | 341 | ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample, |
342 | machine); | ||
336 | if (ret < 0) | 343 | if (ret < 0) |
337 | pr_debug("problem incrementing symbol period, skipping event\n"); | 344 | pr_debug("problem incrementing symbol period, skipping event\n"); |
338 | } | 345 | } |
@@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
772 | .ordered_samples = true, | 779 | .ordered_samples = true, |
773 | .ordering_requires_timestamps = true, | 780 | .ordering_requires_timestamps = true, |
774 | }, | 781 | }, |
782 | .max_stack = PERF_MAX_STACK_DEPTH, | ||
775 | .pretty_printing_style = "normal", | 783 | .pretty_printing_style = "normal", |
776 | }; | 784 | }; |
777 | const struct option options[] = { | 785 | const struct option options[] = { |
@@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
812 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", | 820 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", |
813 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " | 821 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " |
814 | "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), | 822 | "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), |
823 | OPT_INTEGER(0, "max-stack", &report.max_stack, | ||
824 | "Set the maximum stack depth when parsing the callchain, " | ||
825 | "anything beyond the specified depth will be ignored. " | ||
826 | "Default: " __stringify(PERF_MAX_STACK_DEPTH)), | ||
815 | OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, | 827 | OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, |
816 | "alias for inverted call graph"), | 828 | "alias for inverted call graph"), |
817 | OPT_CALLBACK(0, "ignore-callees", NULL, "regex", | 829 | OPT_CALLBACK(0, "ignore-callees", NULL, "regex", |