diff options
author | Waiman Long <Waiman.Long@hp.com> | 2013-10-18 10:38:48 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-10-21 16:36:25 -0400 |
commit | 91e95617429cb272fd908b1928a1915b37b9655f (patch) | |
tree | 630cd19ff34cf210a03924e92c711d6c51076e2d /tools/perf/util/machine.c | |
parent | cc9784bd9fa9d8e27fdea61142398cb85ce401a8 (diff) |
perf report: Add --max-stack option to limit callchain stack scan
When callgraph data was included in the perf data file, it may take a
long time to scan all those data and merge them together especially if
the stored callchains are long and the perf data file itself is large,
like a Gbyte or so.
The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127).
This is a large value. Usually the callgraph data that developers are
most interested in are the first few levels, the rests are usually not
looked at.
This patch adds a new --max-stack option to perf-report to limit the
depth of callchain stack data to look at to reduce the time it takes for
perf-report to finish its processing. It trades the presence of trailing
stack information with faster speed.
The following table shows the elapsed time of doing perf-report on a
perf.data file of size 985,531,828 bytes.
--max_stack Elapsed Time Output data size
----------- ------------ ----------------
not set 88.0s 124,422,651
64 87.5s 116,303,213
32 87.2s 112,023,804
16 86.6s 94,326,380
8 59.9s 33,697,248
4 40.7s 10,116,637
-g none 27.1s 2,555,810
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r-- | tools/perf/util/machine.c | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6b861aefd99a..ea93425cce95 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine, | |||
1253 | struct thread *thread, | 1253 | struct thread *thread, |
1254 | struct ip_callchain *chain, | 1254 | struct ip_callchain *chain, |
1255 | struct symbol **parent, | 1255 | struct symbol **parent, |
1256 | struct addr_location *root_al) | 1256 | struct addr_location *root_al, |
1257 | int max_stack) | ||
1257 | { | 1258 | { |
1258 | u8 cpumode = PERF_RECORD_MISC_USER; | 1259 | u8 cpumode = PERF_RECORD_MISC_USER; |
1259 | unsigned int i; | 1260 | int chain_nr = min(max_stack, (int)chain->nr); |
1261 | int i; | ||
1260 | int err; | 1262 | int err; |
1261 | 1263 | ||
1262 | callchain_cursor_reset(&callchain_cursor); | 1264 | callchain_cursor_reset(&callchain_cursor); |
@@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, | |||
1266 | return 0; | 1268 | return 0; |
1267 | } | 1269 | } |
1268 | 1270 | ||
1269 | for (i = 0; i < chain->nr; i++) { | 1271 | for (i = 0; i < chain_nr; i++) { |
1270 | u64 ip; | 1272 | u64 ip; |
1271 | struct addr_location al; | 1273 | struct addr_location al; |
1272 | 1274 | ||
@@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine, | |||
1338 | struct thread *thread, | 1340 | struct thread *thread, |
1339 | struct perf_sample *sample, | 1341 | struct perf_sample *sample, |
1340 | struct symbol **parent, | 1342 | struct symbol **parent, |
1341 | struct addr_location *root_al) | 1343 | struct addr_location *root_al, |
1344 | int max_stack) | ||
1342 | { | 1345 | { |
1343 | int ret; | 1346 | int ret; |
1344 | 1347 | ||
1345 | ret = machine__resolve_callchain_sample(machine, thread, | 1348 | ret = machine__resolve_callchain_sample(machine, thread, |
1346 | sample->callchain, parent, root_al); | 1349 | sample->callchain, parent, |
1350 | root_al, max_stack); | ||
1347 | if (ret) | 1351 | if (ret) |
1348 | return ret; | 1352 | return ret; |
1349 | 1353 | ||