aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorGreg Price <price@MIT.EDU>2012-12-07 00:48:05 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-07-12 12:53:55 -0400
commitb21484f1a1f300d422cfe5d4f8f50015e22cea24 (patch)
tree145db22403268858a54a07db111464ff24b6818c /tools/perf/util
parentdc098b35b56f83ae088e4291a4e389a6ff126965 (diff)
perf report/top: Add option to collapse undesired parts of call graph
For example, in an application with an expensive function implemented with deeply nested recursive calls, the default call-graph presentation is dominated by the different callchains within that function. By ignoring these callees, we can collect the callchains leading into the function and compactly identify what to blame for expensive calls. For example, in this report the callers of garbage_collect() are scattered across the tree: $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z] 22.03% ruby [.] gc_mark --- gc_mark |--59.40%-- mark_keyvalue | st_foreach | gc_mark_children | |--99.75%-- rb_gc_mark | | rb_vm_mark | | gc_mark_children | | gc_marks | | |--99.00%-- garbage_collect If we ignore the callees of garbage_collect(), its callers are coalesced: $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z] 72.92% ruby [.] garbage_collect --- garbage_collect vm_xmalloc |--47.08%-- ruby_xmalloc | st_insert2 | rb_hash_aset | |--98.45%-- features_index_add | | rb_provide_feature | | rb_require_safe | | vm_call_method Signed-off-by: Greg Price <price@mit.edu> Tested-by: Jiri Olsa <jolsa@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu Cc: Fengguang Wu <fengguang.wu@intel.com> [ remove spaces at beginning of line, reported by Fengguang Wu ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/machine.c24
-rw-r--r--tools/perf/util/machine.h4
-rw-r--r--tools/perf/util/session.c3
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/perf/util/sort.h4
5 files changed, 25 insertions, 12 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5dd5026a82ef..f9f9d6381b9a 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
1058 return ret; 1058 return ret;
1059} 1059}
1060 1060
1061static bool symbol__match_parent_regex(struct symbol *sym) 1061static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
1062{ 1062{
1063 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) 1063 if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
1064 return 1; 1064 return 1;
1065
1066 return 0; 1065 return 0;
1067} 1066}
1068 1067
@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
1159static int machine__resolve_callchain_sample(struct machine *machine, 1158static int machine__resolve_callchain_sample(struct machine *machine,
1160 struct thread *thread, 1159 struct thread *thread,
1161 struct ip_callchain *chain, 1160 struct ip_callchain *chain,
1162 struct symbol **parent) 1161 struct symbol **parent,
1163 1162 struct addr_location *root_al)
1164{ 1163{
1165 u8 cpumode = PERF_RECORD_MISC_USER; 1164 u8 cpumode = PERF_RECORD_MISC_USER;
1166 unsigned int i; 1165 unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
1211 MAP__FUNCTION, ip, &al, NULL); 1210 MAP__FUNCTION, ip, &al, NULL);
1212 if (al.sym != NULL) { 1211 if (al.sym != NULL) {
1213 if (sort__has_parent && !*parent && 1212 if (sort__has_parent && !*parent &&
1214 symbol__match_parent_regex(al.sym)) 1213 symbol__match_regex(al.sym, &parent_regex))
1215 *parent = al.sym; 1214 *parent = al.sym;
1215 else if (have_ignore_callees && root_al &&
1216 symbol__match_regex(al.sym, &ignore_callees_regex)) {
1217 /* Treat this symbol as the root,
1218 forgetting its callees. */
1219 *root_al = al;
1220 callchain_cursor_reset(&callchain_cursor);
1221 }
1216 if (!symbol_conf.use_callchain) 1222 if (!symbol_conf.use_callchain)
1217 break; 1223 break;
1218 } 1224 }
@@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine,
1237 struct perf_evsel *evsel, 1243 struct perf_evsel *evsel,
1238 struct thread *thread, 1244 struct thread *thread,
1239 struct perf_sample *sample, 1245 struct perf_sample *sample,
1240 struct symbol **parent) 1246 struct symbol **parent,
1241 1247 struct addr_location *root_al)
1242{ 1248{
1243 int ret; 1249 int ret;
1244 1250
1245 ret = machine__resolve_callchain_sample(machine, thread, 1251 ret = machine__resolve_callchain_sample(machine, thread,
1246 sample->callchain, parent); 1252 sample->callchain, parent, root_al);
1247 if (ret) 1253 if (ret)
1248 return ret; 1254 return ret;
1249 1255
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e49ba01b7937..5bb6244194d5 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -5,6 +5,7 @@
5#include <linux/rbtree.h> 5#include <linux/rbtree.h>
6#include "map.h" 6#include "map.h"
7 7
8struct addr_location;
8struct branch_stack; 9struct branch_stack;
9struct perf_evsel; 10struct perf_evsel;
10struct perf_sample; 11struct perf_sample;
@@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine,
83 struct perf_evsel *evsel, 84 struct perf_evsel *evsel,
84 struct thread *thread, 85 struct thread *thread,
85 struct perf_sample *sample, 86 struct perf_sample *sample,
86 struct symbol **parent); 87 struct symbol **parent,
88 struct addr_location *root_al);
87 89
88/* 90/*
89 * Default guest kernel is defined by parameter --guestkallsyms 91 * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 951a1cfb317c..1eb58eedcac1 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1406,9 +1406,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
1406 1406
1407 if (symbol_conf.use_callchain && sample->callchain) { 1407 if (symbol_conf.use_callchain && sample->callchain) {
1408 1408
1409
1410 if (machine__resolve_callchain(machine, evsel, al.thread, 1409 if (machine__resolve_callchain(machine, evsel, al.thread,
1411 sample, NULL) != 0) { 1410 sample, NULL, NULL) != 0) {
1412 if (verbose) 1411 if (verbose)
1413 error("Failed to resolve callchain. Skipping\n"); 1412 error("Failed to resolve callchain. Skipping\n");
1414 return; 1413 return;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8deee19d2e7f..cb2b108635ee 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -7,6 +7,8 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault";
7const char *parent_pattern = default_parent_pattern; 7const char *parent_pattern = default_parent_pattern;
8const char default_sort_order[] = "comm,dso,symbol"; 8const char default_sort_order[] = "comm,dso,symbol";
9const char *sort_order = default_sort_order; 9const char *sort_order = default_sort_order;
10regex_t ignore_callees_regex;
11int have_ignore_callees = 0;
10int sort__need_collapse = 0; 12int sort__need_collapse = 0;
11int sort__has_parent = 0; 13int sort__has_parent = 0;
12int sort__has_sym = 0; 14int sort__has_sym = 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 45ac84c1e037..a4a6d0b1ea0e 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -29,6 +29,8 @@ extern const char *sort_order;
29extern const char default_parent_pattern[]; 29extern const char default_parent_pattern[];
30extern const char *parent_pattern; 30extern const char *parent_pattern;
31extern const char default_sort_order[]; 31extern const char default_sort_order[];
32extern regex_t ignore_callees_regex;
33extern int have_ignore_callees;
32extern int sort__need_collapse; 34extern int sort__need_collapse;
33extern int sort__has_parent; 35extern int sort__has_parent;
34extern int sort__has_sym; 36extern int sort__has_sym;
@@ -183,4 +185,6 @@ int setup_sorting(void);
183extern int sort_dimension__add(const char *); 185extern int sort_dimension__add(const char *);
184void sort__setup_elide(FILE *fp); 186void sort__setup_elide(FILE *fp);
185 187
188int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
189
186#endif /* __PERF_SORT_H */ 190#endif /* __PERF_SORT_H */