diff options
author | Waiman Long <Waiman.Long@hp.com> | 2013-10-18 10:38:48 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-10-21 16:36:25 -0400 |
commit | 91e95617429cb272fd908b1928a1915b37b9655f (patch) | |
tree | 630cd19ff34cf210a03924e92c711d6c51076e2d /tools | |
parent | cc9784bd9fa9d8e27fdea61142398cb85ce401a8 (diff) |
perf report: Add --max-stack option to limit callchain stack scan
When callgraph data was included in the perf data file, it may take a
long time to scan all those data and merge them together especially if
the stored callchains are long and the perf data file itself is large,
like a Gbyte or so.
The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127).
This is a large value. Usually the callgraph data that developers are
most interested in are the first few levels, the rests are usually not
looked at.
This patch adds a new --max-stack option to perf-report to limit the
depth of callchain stack data to look at to reduce the time it takes for
perf-report to finish its processing. It trades the presence of trailing
stack information with faster speed.
The following table shows the elapsed time of doing perf-report on a
perf.data file of size 985,531,828 bytes.
--max_stack Elapsed Time Output data size
----------- ------------ ----------------
not set 88.0s 124,422,651
64 87.5s 116,303,213
32 87.2s 112,023,804
16 86.6s 94,326,380
8 59.9s 33,697,248
4 40.7s 10,116,637
-g none 27.1s 2,555,810
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-report.txt | 8 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 22 | ||||
-rw-r--r-- | tools/perf/builtin-top.c | 3 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 14 | ||||
-rw-r--r-- | tools/perf/util/machine.h | 3 | ||||
-rw-r--r-- | tools/perf/util/session.c | 3 |
6 files changed, 40 insertions, 13 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index be5ad87b6c3d..10a279871251 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -141,6 +141,14 @@ OPTIONS | |||
141 | 141 | ||
142 | Default: fractal,0.5,callee,function. | 142 | Default: fractal,0.5,callee,function. |
143 | 143 | ||
144 | --max-stack:: | ||
145 | Set the stack depth limit when parsing the callchain, anything | ||
146 | beyond the specified depth will be ignored. This is a trade-off | ||
147 | between information loss and faster processing especially for | ||
148 | workloads that can have a very long callchain stack. | ||
149 | |||
150 | Default: 127 | ||
151 | |||
144 | -G:: | 152 | -G:: |
145 | --inverted:: | 153 | --inverted:: |
146 | alias for inverted caller based call graph. | 154 | alias for inverted caller based call graph. |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa68a36bc461..81addcabb356 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -49,6 +49,7 @@ struct perf_report { | |||
49 | bool show_threads; | 49 | bool show_threads; |
50 | bool inverted_callchain; | 50 | bool inverted_callchain; |
51 | bool mem_mode; | 51 | bool mem_mode; |
52 | int max_stack; | ||
52 | struct perf_read_values show_threads_values; | 53 | struct perf_read_values show_threads_values; |
53 | const char *pretty_printing_style; | 54 | const char *pretty_printing_style; |
54 | const char *cpu_list; | 55 | const char *cpu_list; |
@@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, | |||
90 | if ((sort__has_parent || symbol_conf.use_callchain) && | 91 | if ((sort__has_parent || symbol_conf.use_callchain) && |
91 | sample->callchain) { | 92 | sample->callchain) { |
92 | err = machine__resolve_callchain(machine, evsel, al->thread, | 93 | err = machine__resolve_callchain(machine, evsel, al->thread, |
93 | sample, &parent, al); | 94 | sample, &parent, al, |
95 | rep->max_stack); | ||
94 | if (err) | 96 | if (err) |
95 | return err; | 97 | return err; |
96 | } | 98 | } |
@@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | |||
181 | if ((sort__has_parent || symbol_conf.use_callchain) | 183 | if ((sort__has_parent || symbol_conf.use_callchain) |
182 | && sample->callchain) { | 184 | && sample->callchain) { |
183 | err = machine__resolve_callchain(machine, evsel, al->thread, | 185 | err = machine__resolve_callchain(machine, evsel, al->thread, |
184 | sample, &parent, al); | 186 | sample, &parent, al, |
187 | rep->max_stack); | ||
185 | if (err) | 188 | if (err) |
186 | return err; | 189 | return err; |
187 | } | 190 | } |
@@ -244,18 +247,21 @@ out: | |||
244 | return err; | 247 | return err; |
245 | } | 248 | } |
246 | 249 | ||
247 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | 250 | static int perf_evsel__add_hist_entry(struct perf_tool *tool, |
251 | struct perf_evsel *evsel, | ||
248 | struct addr_location *al, | 252 | struct addr_location *al, |
249 | struct perf_sample *sample, | 253 | struct perf_sample *sample, |
250 | struct machine *machine) | 254 | struct machine *machine) |
251 | { | 255 | { |
256 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
252 | struct symbol *parent = NULL; | 257 | struct symbol *parent = NULL; |
253 | int err = 0; | 258 | int err = 0; |
254 | struct hist_entry *he; | 259 | struct hist_entry *he; |
255 | 260 | ||
256 | if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { | 261 | if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { |
257 | err = machine__resolve_callchain(machine, evsel, al->thread, | 262 | err = machine__resolve_callchain(machine, evsel, al->thread, |
258 | sample, &parent, al); | 263 | sample, &parent, al, |
264 | rep->max_stack); | ||
259 | if (err) | 265 | if (err) |
260 | return err; | 266 | return err; |
261 | } | 267 | } |
@@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool, | |||
332 | if (al.map != NULL) | 338 | if (al.map != NULL) |
333 | al.map->dso->hit = 1; | 339 | al.map->dso->hit = 1; |
334 | 340 | ||
335 | ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); | 341 | ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample, |
342 | machine); | ||
336 | if (ret < 0) | 343 | if (ret < 0) |
337 | pr_debug("problem incrementing symbol period, skipping event\n"); | 344 | pr_debug("problem incrementing symbol period, skipping event\n"); |
338 | } | 345 | } |
@@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
772 | .ordered_samples = true, | 779 | .ordered_samples = true, |
773 | .ordering_requires_timestamps = true, | 780 | .ordering_requires_timestamps = true, |
774 | }, | 781 | }, |
782 | .max_stack = PERF_MAX_STACK_DEPTH, | ||
775 | .pretty_printing_style = "normal", | 783 | .pretty_printing_style = "normal", |
776 | }; | 784 | }; |
777 | const struct option options[] = { | 785 | const struct option options[] = { |
@@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
812 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", | 820 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", |
813 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " | 821 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " |
814 | "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), | 822 | "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), |
823 | OPT_INTEGER(0, "max-stack", &report.max_stack, | ||
824 | "Set the maximum stack depth when parsing the callchain, " | ||
825 | "anything beyond the specified depth will be ignored. " | ||
826 | "Default: " __stringify(PERF_MAX_STACK_DEPTH)), | ||
815 | OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, | 827 | OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, |
816 | "alias for inverted call graph"), | 828 | "alias for inverted call graph"), |
817 | OPT_CALLBACK(0, "ignore-callees", NULL, "regex", | 829 | OPT_CALLBACK(0, "ignore-callees", NULL, "regex", |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d934f707ee74..112cb7d68e64 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool, | |||
770 | sample->callchain) { | 770 | sample->callchain) { |
771 | err = machine__resolve_callchain(machine, evsel, | 771 | err = machine__resolve_callchain(machine, evsel, |
772 | al.thread, sample, | 772 | al.thread, sample, |
773 | &parent, &al); | 773 | &parent, &al, |
774 | PERF_MAX_STACK_DEPTH); | ||
774 | if (err) | 775 | if (err) |
775 | return; | 776 | return; |
776 | } | 777 | } |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6b861aefd99a..ea93425cce95 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine, | |||
1253 | struct thread *thread, | 1253 | struct thread *thread, |
1254 | struct ip_callchain *chain, | 1254 | struct ip_callchain *chain, |
1255 | struct symbol **parent, | 1255 | struct symbol **parent, |
1256 | struct addr_location *root_al) | 1256 | struct addr_location *root_al, |
1257 | int max_stack) | ||
1257 | { | 1258 | { |
1258 | u8 cpumode = PERF_RECORD_MISC_USER; | 1259 | u8 cpumode = PERF_RECORD_MISC_USER; |
1259 | unsigned int i; | 1260 | int chain_nr = min(max_stack, (int)chain->nr); |
1261 | int i; | ||
1260 | int err; | 1262 | int err; |
1261 | 1263 | ||
1262 | callchain_cursor_reset(&callchain_cursor); | 1264 | callchain_cursor_reset(&callchain_cursor); |
@@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, | |||
1266 | return 0; | 1268 | return 0; |
1267 | } | 1269 | } |
1268 | 1270 | ||
1269 | for (i = 0; i < chain->nr; i++) { | 1271 | for (i = 0; i < chain_nr; i++) { |
1270 | u64 ip; | 1272 | u64 ip; |
1271 | struct addr_location al; | 1273 | struct addr_location al; |
1272 | 1274 | ||
@@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine, | |||
1338 | struct thread *thread, | 1340 | struct thread *thread, |
1339 | struct perf_sample *sample, | 1341 | struct perf_sample *sample, |
1340 | struct symbol **parent, | 1342 | struct symbol **parent, |
1341 | struct addr_location *root_al) | 1343 | struct addr_location *root_al, |
1344 | int max_stack) | ||
1342 | { | 1345 | { |
1343 | int ret; | 1346 | int ret; |
1344 | 1347 | ||
1345 | ret = machine__resolve_callchain_sample(machine, thread, | 1348 | ret = machine__resolve_callchain_sample(machine, thread, |
1346 | sample->callchain, parent, root_al); | 1349 | sample->callchain, parent, |
1350 | root_al, max_stack); | ||
1347 | if (ret) | 1351 | if (ret) |
1348 | return ret; | 1352 | return ret; |
1349 | 1353 | ||
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d44c09bdc45e..4c1f5d567f54 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h | |||
@@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine, | |||
92 | struct thread *thread, | 92 | struct thread *thread, |
93 | struct perf_sample *sample, | 93 | struct perf_sample *sample, |
94 | struct symbol **parent, | 94 | struct symbol **parent, |
95 | struct addr_location *root_al); | 95 | struct addr_location *root_al, |
96 | int max_stack); | ||
96 | 97 | ||
97 | /* | 98 | /* |
98 | * Default guest kernel is defined by parameter --guestkallsyms | 99 | * Default guest kernel is defined by parameter --guestkallsyms |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 19fc71678c8e..854c5aa4db0d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, | |||
1512 | if (symbol_conf.use_callchain && sample->callchain) { | 1512 | if (symbol_conf.use_callchain && sample->callchain) { |
1513 | 1513 | ||
1514 | if (machine__resolve_callchain(machine, evsel, al.thread, | 1514 | if (machine__resolve_callchain(machine, evsel, al.thread, |
1515 | sample, NULL, NULL) != 0) { | 1515 | sample, NULL, NULL, |
1516 | PERF_MAX_STACK_DEPTH) != 0) { | ||
1516 | if (verbose) | 1517 | if (verbose) |
1517 | error("Failed to resolve callchain. Skipping\n"); | 1518 | error("Failed to resolve callchain. Skipping\n"); |
1518 | return; | 1519 | return; |