aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorWaiman Long <Waiman.Long@hp.com>2013-10-18 10:38:48 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-10-21 16:36:25 -0400
commit91e95617429cb272fd908b1928a1915b37b9655f (patch)
tree630cd19ff34cf210a03924e92c711d6c51076e2d /tools
parentcc9784bd9fa9d8e27fdea61142398cb85ce401a8 (diff)
perf report: Add --max-stack option to limit callchain stack scan
When callgraph data was included in the perf data file, it may take a long time to scan all those data and merge them together especially if the stored callchains are long and the perf data file itself is large, like a Gbyte or so. The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127). This is a large value. Usually the callgraph data that developers are most interested in are the first few levels, the rests are usually not looked at. This patch adds a new --max-stack option to perf-report to limit the depth of callchain stack data to look at to reduce the time it takes for perf-report to finish its processing. It trades the presence of trailing stack information with faster speed. The following table shows the elapsed time of doing perf-report on a perf.data file of size 985,531,828 bytes. --max_stack Elapsed Time Output data size ----------- ------------ ---------------- not set 88.0s 124,422,651 64 87.5s 116,303,213 32 87.2s 112,023,804 16 86.6s 94,326,380 8 59.9s 33,697,248 4 40.7s 10,116,637 -g none 27.1s 2,555,810 Signed-off-by: Waiman Long <Waiman.Long@hp.com> Acked-by: David Ahern <dsahern@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Aswin Chandramouleeswaran <aswin@hp.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Scott J Norton <scott.norton@hp.com> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-report.txt8
-rw-r--r--tools/perf/builtin-report.c22
-rw-r--r--tools/perf/builtin-top.c3
-rw-r--r--tools/perf/util/machine.c14
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/session.c3
6 files changed, 40 insertions, 13 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index be5ad87b6c3d..10a279871251 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -141,6 +141,14 @@ OPTIONS
141 141
142 Default: fractal,0.5,callee,function. 142 Default: fractal,0.5,callee,function.
143 143
144--max-stack::
145 Set the stack depth limit when parsing the callchain, anything
146 beyond the specified depth will be ignored. This is a trade-off
147 between information loss and faster processing especially for
148 workloads that can have a very long callchain stack.
149
150 Default: 127
151
144-G:: 152-G::
145--inverted:: 153--inverted::
146 alias for inverted caller based call graph. 154 alias for inverted caller based call graph.
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index fa68a36bc461..81addcabb356 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -49,6 +49,7 @@ struct perf_report {
49 bool show_threads; 49 bool show_threads;
50 bool inverted_callchain; 50 bool inverted_callchain;
51 bool mem_mode; 51 bool mem_mode;
52 int max_stack;
52 struct perf_read_values show_threads_values; 53 struct perf_read_values show_threads_values;
53 const char *pretty_printing_style; 54 const char *pretty_printing_style;
54 const char *cpu_list; 55 const char *cpu_list;
@@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
90 if ((sort__has_parent || symbol_conf.use_callchain) && 91 if ((sort__has_parent || symbol_conf.use_callchain) &&
91 sample->callchain) { 92 sample->callchain) {
92 err = machine__resolve_callchain(machine, evsel, al->thread, 93 err = machine__resolve_callchain(machine, evsel, al->thread,
93 sample, &parent, al); 94 sample, &parent, al,
95 rep->max_stack);
94 if (err) 96 if (err)
95 return err; 97 return err;
96 } 98 }
@@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
181 if ((sort__has_parent || symbol_conf.use_callchain) 183 if ((sort__has_parent || symbol_conf.use_callchain)
182 && sample->callchain) { 184 && sample->callchain) {
183 err = machine__resolve_callchain(machine, evsel, al->thread, 185 err = machine__resolve_callchain(machine, evsel, al->thread,
184 sample, &parent, al); 186 sample, &parent, al,
187 rep->max_stack);
185 if (err) 188 if (err)
186 return err; 189 return err;
187 } 190 }
@@ -244,18 +247,21 @@ out:
244 return err; 247 return err;
245} 248}
246 249
247static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, 250static int perf_evsel__add_hist_entry(struct perf_tool *tool,
251 struct perf_evsel *evsel,
248 struct addr_location *al, 252 struct addr_location *al,
249 struct perf_sample *sample, 253 struct perf_sample *sample,
250 struct machine *machine) 254 struct machine *machine)
251{ 255{
256 struct perf_report *rep = container_of(tool, struct perf_report, tool);
252 struct symbol *parent = NULL; 257 struct symbol *parent = NULL;
253 int err = 0; 258 int err = 0;
254 struct hist_entry *he; 259 struct hist_entry *he;
255 260
256 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { 261 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
257 err = machine__resolve_callchain(machine, evsel, al->thread, 262 err = machine__resolve_callchain(machine, evsel, al->thread,
258 sample, &parent, al); 263 sample, &parent, al,
264 rep->max_stack);
259 if (err) 265 if (err)
260 return err; 266 return err;
261 } 267 }
@@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool,
332 if (al.map != NULL) 338 if (al.map != NULL)
333 al.map->dso->hit = 1; 339 al.map->dso->hit = 1;
334 340
335 ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); 341 ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample,
342 machine);
336 if (ret < 0) 343 if (ret < 0)
337 pr_debug("problem incrementing symbol period, skipping event\n"); 344 pr_debug("problem incrementing symbol period, skipping event\n");
338 } 345 }
@@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
772 .ordered_samples = true, 779 .ordered_samples = true,
773 .ordering_requires_timestamps = true, 780 .ordering_requires_timestamps = true,
774 }, 781 },
782 .max_stack = PERF_MAX_STACK_DEPTH,
775 .pretty_printing_style = "normal", 783 .pretty_printing_style = "normal",
776 }; 784 };
777 const struct option options[] = { 785 const struct option options[] = {
@@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
812 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", 820 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
813 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " 821 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
814 "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), 822 "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
823 OPT_INTEGER(0, "max-stack", &report.max_stack,
824 "Set the maximum stack depth when parsing the callchain, "
825 "anything beyond the specified depth will be ignored. "
826 "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
815 OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, 827 OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
816 "alias for inverted call graph"), 828 "alias for inverted call graph"),
817 OPT_CALLBACK(0, "ignore-callees", NULL, "regex", 829 OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d934f707ee74..112cb7d68e64 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
770 sample->callchain) { 770 sample->callchain) {
771 err = machine__resolve_callchain(machine, evsel, 771 err = machine__resolve_callchain(machine, evsel,
772 al.thread, sample, 772 al.thread, sample,
773 &parent, &al); 773 &parent, &al,
774 PERF_MAX_STACK_DEPTH);
774 if (err) 775 if (err)
775 return; 776 return;
776 } 777 }
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6b861aefd99a..ea93425cce95 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine,
1253 struct thread *thread, 1253 struct thread *thread,
1254 struct ip_callchain *chain, 1254 struct ip_callchain *chain,
1255 struct symbol **parent, 1255 struct symbol **parent,
1256 struct addr_location *root_al) 1256 struct addr_location *root_al,
1257 int max_stack)
1257{ 1258{
1258 u8 cpumode = PERF_RECORD_MISC_USER; 1259 u8 cpumode = PERF_RECORD_MISC_USER;
1259 unsigned int i; 1260 int chain_nr = min(max_stack, (int)chain->nr);
1261 int i;
1260 int err; 1262 int err;
1261 1263
1262 callchain_cursor_reset(&callchain_cursor); 1264 callchain_cursor_reset(&callchain_cursor);
@@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine,
1266 return 0; 1268 return 0;
1267 } 1269 }
1268 1270
1269 for (i = 0; i < chain->nr; i++) { 1271 for (i = 0; i < chain_nr; i++) {
1270 u64 ip; 1272 u64 ip;
1271 struct addr_location al; 1273 struct addr_location al;
1272 1274
@@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine,
1338 struct thread *thread, 1340 struct thread *thread,
1339 struct perf_sample *sample, 1341 struct perf_sample *sample,
1340 struct symbol **parent, 1342 struct symbol **parent,
1341 struct addr_location *root_al) 1343 struct addr_location *root_al,
1344 int max_stack)
1342{ 1345{
1343 int ret; 1346 int ret;
1344 1347
1345 ret = machine__resolve_callchain_sample(machine, thread, 1348 ret = machine__resolve_callchain_sample(machine, thread,
1346 sample->callchain, parent, root_al); 1349 sample->callchain, parent,
1350 root_al, max_stack);
1347 if (ret) 1351 if (ret)
1348 return ret; 1352 return ret;
1349 1353
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index d44c09bdc45e..4c1f5d567f54 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine,
92 struct thread *thread, 92 struct thread *thread,
93 struct perf_sample *sample, 93 struct perf_sample *sample,
94 struct symbol **parent, 94 struct symbol **parent,
95 struct addr_location *root_al); 95 struct addr_location *root_al,
96 int max_stack);
96 97
97/* 98/*
98 * Default guest kernel is defined by parameter --guestkallsyms 99 * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 19fc71678c8e..854c5aa4db0d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
1512 if (symbol_conf.use_callchain && sample->callchain) { 1512 if (symbol_conf.use_callchain && sample->callchain) {
1513 1513
1514 if (machine__resolve_callchain(machine, evsel, al.thread, 1514 if (machine__resolve_callchain(machine, evsel, al.thread,
1515 sample, NULL, NULL) != 0) { 1515 sample, NULL, NULL,
1516 PERF_MAX_STACK_DEPTH) != 0) {
1516 if (verbose) 1517 if (verbose)
1517 error("Failed to resolve callchain. Skipping\n"); 1518 error("Failed to resolve callchain. Skipping\n");
1518 return; 1519 return;