diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-07-02 11:58:21 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-07-02 14:47:15 -0400 |
commit | 4eb3e4788b8a5e220a0aeb590f88c28850726ebe (patch) | |
tree | 537392177078684b87197d358cf2104e90ec6d66 /tools | |
parent | 5a4b181721375700124513cdd9f97056e1c66675 (diff) |
perf report: Add support for callchain graph output
Currently, the printing of callchains is done in a single
vertical level, this is the "flat" mode:
8.25% [k] copy_user_generic_string
4.19%
copy_user_generic_string
generic_file_aio_read
do_sync_read
vfs_read
sys_pread64
system_call_fastpath
pread64
This patch introduces a new "graph" mode which provides a
hierarchical output of factorized paths recursively sorted:
8.25% [k] copy_user_generic_string
|
|--4.31%-- generic_file_aio_read
| do_sync_read
| vfs_read
| |
| |--4.19%-- sys_pread64
| | system_call_fastpath
| | pread64
| |
| --0.12%-- sys_read
| system_call_fastpath
| __read
|
|--3.24%-- generic_file_buffered_write
| __generic_file_aio_write_nolock
| generic_file_aio_write
| do_sync_write
| reiserfs_file_write
| vfs_write
| |
| |--3.14%-- sys_pwrite64
| | system_call_fastpath
| | __pwrite64
| |
| --0.10%-- sys_write
[...]
The command line has then changed.
By providing the -c option, the callchain will output in the
flat mode by default.
But you can override it:
perf report -c graph
or
perf report -c flat
You can also pass the abreviated mode:
perf report -c g
or
perf report -c gra
will both make use of the graph mode.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-report.c | 141 | ||||
-rw-r--r-- | tools/perf/util/callchain.c | 51 | ||||
-rw-r--r-- | tools/perf/util/callchain.h | 11 |
3 files changed, 185 insertions, 18 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b44476ca239..0ca46386d93 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -59,6 +59,7 @@ static regex_t parent_regex; | |||
59 | 59 | ||
60 | static int exclude_other = 1; | 60 | static int exclude_other = 1; |
61 | static int callchain; | 61 | static int callchain; |
62 | static enum chain_mode callchain_mode; | ||
62 | 63 | ||
63 | static u64 sample_type; | 64 | static u64 sample_type; |
64 | 65 | ||
@@ -787,8 +788,103 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | |||
787 | return cmp; | 788 | return cmp; |
788 | } | 789 | } |
789 | 790 | ||
791 | static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) | ||
792 | { | ||
793 | int i; | ||
794 | size_t ret = 0; | ||
795 | |||
796 | ret += fprintf(fp, "%s", " "); | ||
797 | |||
798 | for (i = 0; i < depth; i++) | ||
799 | if (depth_mask & (1 << i)) | ||
800 | ret += fprintf(fp, "| "); | ||
801 | else | ||
802 | ret += fprintf(fp, " "); | ||
803 | |||
804 | ret += fprintf(fp, "\n"); | ||
805 | |||
806 | return ret; | ||
807 | } | ||
790 | static size_t | 808 | static size_t |
791 | callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) | 809 | ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, |
810 | int depth_mask, int count, u64 total_samples, | ||
811 | int hits) | ||
812 | { | ||
813 | int i; | ||
814 | size_t ret = 0; | ||
815 | |||
816 | ret += fprintf(fp, "%s", " "); | ||
817 | for (i = 0; i < depth; i++) { | ||
818 | if (depth_mask & (1 << i)) | ||
819 | ret += fprintf(fp, "|"); | ||
820 | else | ||
821 | ret += fprintf(fp, " "); | ||
822 | if (!count && i == depth - 1) { | ||
823 | double percent; | ||
824 | |||
825 | percent = hits * 100.0 / total_samples; | ||
826 | ret += fprintf(fp, "--%2.2f%%-- ", percent); | ||
827 | } else | ||
828 | ret += fprintf(fp, "%s", " "); | ||
829 | } | ||
830 | if (chain->sym) | ||
831 | ret += fprintf(fp, "%s\n", chain->sym->name); | ||
832 | else | ||
833 | ret += fprintf(fp, "%p\n", (void *)(long)chain->ip); | ||
834 | |||
835 | return ret; | ||
836 | } | ||
837 | |||
838 | static size_t | ||
839 | callchain__fprintf_graph(FILE *fp, struct callchain_node *self, | ||
840 | u64 total_samples, int depth, int depth_mask) | ||
841 | { | ||
842 | struct rb_node *node, *next; | ||
843 | struct callchain_node *child; | ||
844 | struct callchain_list *chain; | ||
845 | int new_depth_mask = depth_mask; | ||
846 | size_t ret = 0; | ||
847 | int i; | ||
848 | |||
849 | node = rb_first(&self->rb_root); | ||
850 | while (node) { | ||
851 | child = rb_entry(node, struct callchain_node, rb_node); | ||
852 | |||
853 | /* | ||
854 | * The depth mask manages the output of pipes that show | ||
855 | * the depth. We don't want to keep the pipes of the current | ||
856 | * level for the last child of this depth | ||
857 | */ | ||
858 | next = rb_next(node); | ||
859 | if (!next) | ||
860 | new_depth_mask &= ~(1 << (depth - 1)); | ||
861 | |||
862 | /* | ||
863 | * But we keep the older depth mask for the line seperator | ||
864 | * to keep the level link until we reach the last child | ||
865 | */ | ||
866 | ret += ipchain__fprintf_graph_line(fp, depth, depth_mask); | ||
867 | i = 0; | ||
868 | list_for_each_entry(chain, &child->val, list) { | ||
869 | if (chain->ip >= PERF_CONTEXT_MAX) | ||
870 | continue; | ||
871 | ret += ipchain__fprintf_graph(fp, chain, depth, | ||
872 | new_depth_mask, i++, | ||
873 | total_samples, | ||
874 | child->cumul_hit); | ||
875 | } | ||
876 | ret += callchain__fprintf_graph(fp, child, total_samples, | ||
877 | depth + 1, | ||
878 | new_depth_mask | (1 << depth)); | ||
879 | node = next; | ||
880 | } | ||
881 | |||
882 | return ret; | ||
883 | } | ||
884 | |||
885 | static size_t | ||
886 | callchain__fprintf_flat(FILE *fp, struct callchain_node *self, | ||
887 | u64 total_samples) | ||
792 | { | 888 | { |
793 | struct callchain_list *chain; | 889 | struct callchain_list *chain; |
794 | size_t ret = 0; | 890 | size_t ret = 0; |
@@ -796,7 +892,7 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) | |||
796 | if (!self) | 892 | if (!self) |
797 | return 0; | 893 | return 0; |
798 | 894 | ||
799 | ret += callchain__fprintf(fp, self->parent, total_samples); | 895 | ret += callchain__fprintf_flat(fp, self->parent, total_samples); |
800 | 896 | ||
801 | 897 | ||
802 | list_for_each_entry(chain, &self->val, list) { | 898 | list_for_each_entry(chain, &self->val, list) { |
@@ -826,8 +922,13 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, | |||
826 | 922 | ||
827 | chain = rb_entry(rb_node, struct callchain_node, rb_node); | 923 | chain = rb_entry(rb_node, struct callchain_node, rb_node); |
828 | percent = chain->hit * 100.0 / total_samples; | 924 | percent = chain->hit * 100.0 / total_samples; |
829 | ret += fprintf(fp, " %6.2f%%\n", percent); | 925 | if (callchain_mode == FLAT) { |
830 | ret += callchain__fprintf(fp, chain, total_samples); | 926 | ret += fprintf(fp, " %6.2f%%\n", percent); |
927 | ret += callchain__fprintf_flat(fp, chain, total_samples); | ||
928 | } else if (callchain_mode == GRAPH) { | ||
929 | ret += callchain__fprintf_graph(fp, chain, | ||
930 | total_samples, 1, 1); | ||
931 | } | ||
831 | ret += fprintf(fp, "\n"); | 932 | ret += fprintf(fp, "\n"); |
832 | rb_node = rb_next(rb_node); | 933 | rb_node = rb_next(rb_node); |
833 | } | 934 | } |
@@ -1129,8 +1230,12 @@ static void output__insert_entry(struct hist_entry *he) | |||
1129 | struct rb_node *parent = NULL; | 1230 | struct rb_node *parent = NULL; |
1130 | struct hist_entry *iter; | 1231 | struct hist_entry *iter; |
1131 | 1232 | ||
1132 | if (callchain) | 1233 | if (callchain) { |
1133 | sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); | 1234 | if (callchain_mode == FLAT) |
1235 | sort_chain_flat(&he->sorted_chain, &he->callchain); | ||
1236 | else if (callchain_mode == GRAPH) | ||
1237 | sort_chain_graph(&he->sorted_chain, &he->callchain); | ||
1238 | } | ||
1134 | 1239 | ||
1135 | while (*p != NULL) { | 1240 | while (*p != NULL) { |
1136 | parent = *p; | 1241 | parent = *p; |
@@ -1702,6 +1807,26 @@ done: | |||
1702 | return rc; | 1807 | return rc; |
1703 | } | 1808 | } |
1704 | 1809 | ||
1810 | static int | ||
1811 | parse_callchain_opt(const struct option *opt __used, const char *arg, | ||
1812 | int unset __used) | ||
1813 | { | ||
1814 | callchain = 1; | ||
1815 | |||
1816 | if (!arg) | ||
1817 | return 0; | ||
1818 | |||
1819 | if (!strncmp(arg, "graph", strlen(arg))) | ||
1820 | callchain_mode = GRAPH; | ||
1821 | |||
1822 | else if (!strncmp(arg, "flat", strlen(arg))) | ||
1823 | callchain_mode = FLAT; | ||
1824 | else | ||
1825 | return -1; | ||
1826 | |||
1827 | return 0; | ||
1828 | } | ||
1829 | |||
1705 | static const char * const report_usage[] = { | 1830 | static const char * const report_usage[] = { |
1706 | "perf report [<options>] <command>", | 1831 | "perf report [<options>] <command>", |
1707 | NULL | 1832 | NULL |
@@ -1725,7 +1850,9 @@ static const struct option options[] = { | |||
1725 | "regex filter to identify parent, see: '--sort parent'"), | 1850 | "regex filter to identify parent, see: '--sort parent'"), |
1726 | OPT_BOOLEAN('x', "exclude-other", &exclude_other, | 1851 | OPT_BOOLEAN('x', "exclude-other", &exclude_other, |
1727 | "Only display entries with parent-match"), | 1852 | "Only display entries with parent-match"), |
1728 | OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), | 1853 | OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type", |
1854 | "Display callchains with output_type: flat, graph. " | ||
1855 | "Default to flat", &parse_callchain_opt, "flat"), | ||
1729 | OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", | 1856 | OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", |
1730 | "only consider symbols in these dsos"), | 1857 | "only consider symbols in these dsos"), |
1731 | OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", | 1858 | OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3c4a91fea62..a9873aafcd9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -19,9 +19,9 @@ | |||
19 | #define chain_for_each_child(child, parent) \ | 19 | #define chain_for_each_child(child, parent) \ |
20 | list_for_each_entry(child, &parent->children, brothers) | 20 | list_for_each_entry(child, &parent->children, brothers) |
21 | 21 | ||
22 | |||
23 | static void | 22 | static void |
24 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) | 23 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, |
24 | enum chain_mode mode) | ||
25 | { | 25 | { |
26 | struct rb_node **p = &root->rb_node; | 26 | struct rb_node **p = &root->rb_node; |
27 | struct rb_node *parent = NULL; | 27 | struct rb_node *parent = NULL; |
@@ -31,10 +31,22 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) | |||
31 | parent = *p; | 31 | parent = *p; |
32 | rnode = rb_entry(parent, struct callchain_node, rb_node); | 32 | rnode = rb_entry(parent, struct callchain_node, rb_node); |
33 | 33 | ||
34 | if (rnode->hit < chain->hit) | 34 | switch (mode) { |
35 | p = &(*p)->rb_left; | 35 | case FLAT: |
36 | else | 36 | if (rnode->hit < chain->hit) |
37 | p = &(*p)->rb_right; | 37 | p = &(*p)->rb_left; |
38 | else | ||
39 | p = &(*p)->rb_right; | ||
40 | break; | ||
41 | case GRAPH: | ||
42 | if (rnode->cumul_hit < chain->cumul_hit) | ||
43 | p = &(*p)->rb_left; | ||
44 | else | ||
45 | p = &(*p)->rb_right; | ||
46 | break; | ||
47 | default: | ||
48 | break; | ||
49 | } | ||
38 | } | 50 | } |
39 | 51 | ||
40 | rb_link_node(&chain->rb_node, parent, p); | 52 | rb_link_node(&chain->rb_node, parent, p); |
@@ -45,15 +57,36 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) | |||
45 | * Once we get every callchains from the stream, we can now | 57 | * Once we get every callchains from the stream, we can now |
46 | * sort them by hit | 58 | * sort them by hit |
47 | */ | 59 | */ |
48 | void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) | 60 | void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node) |
49 | { | 61 | { |
50 | struct callchain_node *child; | 62 | struct callchain_node *child; |
51 | 63 | ||
52 | chain_for_each_child(child, node) | 64 | chain_for_each_child(child, node) |
53 | sort_chain_to_rbtree(rb_root, child); | 65 | sort_chain_flat(rb_root, child); |
54 | 66 | ||
55 | if (node->hit) | 67 | if (node->hit) |
56 | rb_insert_callchain(rb_root, node); | 68 | rb_insert_callchain(rb_root, node, FLAT); |
69 | } | ||
70 | |||
71 | static void __sort_chain_graph(struct callchain_node *node) | ||
72 | { | ||
73 | struct callchain_node *child; | ||
74 | |||
75 | node->rb_root = RB_ROOT; | ||
76 | node->cumul_hit = node->hit; | ||
77 | |||
78 | chain_for_each_child(child, node) { | ||
79 | __sort_chain_graph(child); | ||
80 | rb_insert_callchain(&node->rb_root, child, GRAPH); | ||
81 | node->cumul_hit += child->cumul_hit; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | void | ||
86 | sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root) | ||
87 | { | ||
88 | __sort_chain_graph(chain_root); | ||
89 | rb_root->rb_node = chain_root->rb_root.rb_node; | ||
57 | } | 90 | } |
58 | 91 | ||
59 | /* | 92 | /* |
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index e9bd5e882f3..dfa56008d9a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -6,15 +6,21 @@ | |||
6 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
7 | #include "symbol.h" | 7 | #include "symbol.h" |
8 | 8 | ||
9 | enum chain_mode { | ||
10 | FLAT, | ||
11 | GRAPH | ||
12 | }; | ||
9 | 13 | ||
10 | struct callchain_node { | 14 | struct callchain_node { |
11 | struct callchain_node *parent; | 15 | struct callchain_node *parent; |
12 | struct list_head brothers; | 16 | struct list_head brothers; |
13 | struct list_head children; | 17 | struct list_head children; |
14 | struct list_head val; | 18 | struct list_head val; |
15 | struct rb_node rb_node; | 19 | struct rb_node rb_node; /* to sort nodes in an rbtree */ |
20 | struct rb_root rb_root; /* sorted tree of children */ | ||
16 | unsigned int val_nr; | 21 | unsigned int val_nr; |
17 | u64 hit; | 22 | u64 hit; |
23 | u64 cumul_hit; /* hit + hits of children */ | ||
18 | }; | 24 | }; |
19 | 25 | ||
20 | struct callchain_list { | 26 | struct callchain_list { |
@@ -32,5 +38,6 @@ static inline void callchain_init(struct callchain_node *node) | |||
32 | 38 | ||
33 | void append_chain(struct callchain_node *root, struct ip_callchain *chain, | 39 | void append_chain(struct callchain_node *root, struct ip_callchain *chain, |
34 | struct symbol **syms); | 40 | struct symbol **syms); |
35 | void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); | 41 | void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node); |
42 | void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node); | ||
36 | #endif | 43 | #endif |