diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2009-12-14 17:09:31 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-15 02:50:29 -0500 |
commit | 86a9eee047ba09a714c3b8e27c9df2bbf715393a (patch) | |
tree | 09c096e92b583f42480ac1dbe7dfa45eae1a7f31 | |
parent | b38d34645cc52136b6c99623fef7ded26742e573 (diff) |
perf diff: Introduce tool to show performance difference
I guess it is enough to show some examples:
[root@doppio linux-2.6-tip]# rm -f perf.data*
[root@doppio linux-2.6-tip]# ls -la perf.data*
ls: cannot access perf.data*: No such file or directory
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data
[root@doppio linux-2.6-tip]# perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2692 samples) ]
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root 74280 2009-12-14 20:03 perf.data
-rw------- 1 root root 74440 2009-12-14 20:03 perf.data.old
[root@doppio linux-2.6-tip]# perf diff | head -5
1 -34994580 /lib64/libc-2.10.1.so _IO_vfprintf_internal
2 -15307806 [kernel.kallsyms] __kmalloc
3 +1 +3665941 /lib64/libc-2.10.1.so __GI_memmove
4 +4 +23508995 /lib64/libc-2.10.1.so _int_malloc
5 +7 +38538813 [kernel.kallsyms] __d_lookup
[root@doppio linux-2.6-tip]# perf diff -p | head -5
1 +1.00% /lib64/libc-2.10.1.so _IO_vfprintf_internal
2 [kernel.kallsyms] __kmalloc
3 +1 /lib64/libc-2.10.1.so __GI_memmove
4 +4 /lib64/libc-2.10.1.so _int_malloc
5 +7 -1.00% [kernel.kallsyms] __d_lookup
[root@doppio linux-2.6-tip]# perf diff -v | head -5
1 361449551 326454971 -34994580 /lib64/libc-2.10.1.so _IO_vfprintf_internal
2 151009241 135701435 -15307806 [kernel.kallsyms] __kmalloc
3 +1 101805328 105471269 +3665941 /lib64/libc-2.10.1.so __GI_memmove
4 +4 78041440 101550435 +23508995 /lib64/libc-2.10.1.so _int_malloc
5 +7 59536172 98074985 +38538813 [kernel.kallsyms] __d_lookup
[root@doppio linux-2.6-tip]# perf diff -vp | head -5
1 9.00% 8.00% +1.00% /lib64/libc-2.10.1.so _IO_vfprintf_internal
2 3.00% 3.00% [kernel.kallsyms] __kmalloc
3 +1 2.00% 2.00% /lib64/libc-2.10.1.so __GI_memmove
4 +4 2.00% 2.00% /lib64/libc-2.10.1.so _int_malloc
5 +7 1.00% 2.00% -1.00% [kernel.kallsyms] __d_lookup
[root@doppio linux-2.6-tip]#
This should be enough for diffs where the system is non
volatile, i.e. when one doesn't updates binaries.
For volatile environments, stay tuned for the next perf tool
feature: a buildid cache populated by 'perf record', managed by
'perf buildid-cache' a-la ccache, and used by all the report
tools.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
LKML-Reference: <1260828571-3613-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | tools/perf/Documentation/perf-diff.txt | 31 | ||||
-rw-r--r-- | tools/perf/Makefile | 1 | ||||
-rw-r--r-- | tools/perf/builtin-diff.c | 288 | ||||
-rw-r--r-- | tools/perf/builtin.h | 1 | ||||
-rw-r--r-- | tools/perf/command-list.txt | 1 | ||||
-rw-r--r-- | tools/perf/perf.c | 1 | ||||
-rw-r--r-- | tools/perf/util/sort.h | 8 |
7 files changed, 329 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt new file mode 100644 index 000000000000..bd1ee55cef6a --- /dev/null +++ b/tools/perf/Documentation/perf-diff.txt | |||
@@ -0,0 +1,31 @@ | |||
1 | perf-diff(1) | ||
2 | ============== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-diff - Read perf.data (created by perf record) and display the profile | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf diff' [oldfile] [newfile] | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command displays the performance difference among two perf.data files | ||
16 | captured via perf record. | ||
17 | |||
18 | If no parameters are passed it will assume perf.data.old and perf.data. | ||
19 | |||
20 | OPTIONS | ||
21 | ------- | ||
22 | -p:: | ||
23 | --percentage:: | ||
24 | Show percentages instead of raw counters | ||
25 | -v:: | ||
26 | --verbose:: | ||
27 | Be verbose, for instance, show the raw counters in addition to the | ||
28 | diff. | ||
29 | SEE ALSO | ||
30 | -------- | ||
31 | linkperf:perf-record[1] | ||
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a4cb79255383..87a424e4cdae 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -427,6 +427,7 @@ BUILTIN_OBJS += bench/sched-messaging.o | |||
427 | BUILTIN_OBJS += bench/sched-pipe.o | 427 | BUILTIN_OBJS += bench/sched-pipe.o |
428 | BUILTIN_OBJS += bench/mem-memcpy.o | 428 | BUILTIN_OBJS += bench/mem-memcpy.o |
429 | 429 | ||
430 | BUILTIN_OBJS += builtin-diff.o | ||
430 | BUILTIN_OBJS += builtin-help.o | 431 | BUILTIN_OBJS += builtin-help.o |
431 | BUILTIN_OBJS += builtin-sched.o | 432 | BUILTIN_OBJS += builtin-sched.o |
432 | BUILTIN_OBJS += builtin-buildid-list.o | 433 | BUILTIN_OBJS += builtin-buildid-list.o |
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c new file mode 100644 index 000000000000..0d528018ffb8 --- /dev/null +++ b/tools/perf/builtin-diff.c | |||
@@ -0,0 +1,288 @@ | |||
1 | /* | ||
2 | * builtin-diff.c | ||
3 | * | ||
4 | * Builtin diff command: Analyze two perf.data input files, look up and read | ||
5 | * DSOs and symbol information, sort them and produce a diff. | ||
6 | */ | ||
7 | #include "builtin.h" | ||
8 | |||
9 | #include "util/debug.h" | ||
10 | #include "util/event.h" | ||
11 | #include "util/hist.h" | ||
12 | #include "util/session.h" | ||
13 | #include "util/sort.h" | ||
14 | #include "util/symbol.h" | ||
15 | #include "util/util.h" | ||
16 | |||
17 | #include <stdlib.h> | ||
18 | |||
19 | static char const *input_old = "perf.data.old", | ||
20 | *input_new = "perf.data"; | ||
21 | static int force; | ||
22 | static bool show_percent; | ||
23 | |||
24 | struct symbol_conf symbol_conf; | ||
25 | |||
26 | static int perf_session__add_hist_entry(struct perf_session *self, | ||
27 | struct addr_location *al, u64 count) | ||
28 | { | ||
29 | bool hit; | ||
30 | struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL, | ||
31 | count, &hit); | ||
32 | if (he == NULL) | ||
33 | return -ENOMEM; | ||
34 | |||
35 | if (hit) | ||
36 | he->count += count; | ||
37 | |||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static int diff__process_sample_event(event_t *event, struct perf_session *session) | ||
42 | { | ||
43 | struct addr_location al; | ||
44 | struct sample_data data = { .period = 1, }; | ||
45 | |||
46 | dump_printf("(IP, %d): %d: %p\n", event->header.misc, | ||
47 | event->ip.pid, (void *)(long)event->ip.ip); | ||
48 | |||
49 | if (event__preprocess_sample(event, session, &al, NULL) < 0) { | ||
50 | pr_warning("problem processing %d event, skipping it.\n", | ||
51 | event->header.type); | ||
52 | return -1; | ||
53 | } | ||
54 | |||
55 | event__parse_sample(event, session->sample_type, &data); | ||
56 | |||
57 | if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) { | ||
58 | pr_warning("problem incrementing symbol count, skipping event\n"); | ||
59 | return -1; | ||
60 | } | ||
61 | |||
62 | session->events_stats.total += data.period; | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | static struct perf_event_ops event_ops = { | ||
67 | .process_sample_event = diff__process_sample_event, | ||
68 | .process_mmap_event = event__process_mmap, | ||
69 | .process_comm_event = event__process_comm, | ||
70 | .process_exit_event = event__process_task, | ||
71 | .process_fork_event = event__process_task, | ||
72 | .process_lost_event = event__process_lost, | ||
73 | }; | ||
74 | |||
75 | static void perf_session__insert_hist_entry_by_name(struct rb_root *root, | ||
76 | struct hist_entry *he) | ||
77 | { | ||
78 | struct rb_node **p = &root->rb_node; | ||
79 | struct rb_node *parent = NULL; | ||
80 | struct hist_entry *iter; | ||
81 | |||
82 | while (*p != NULL) { | ||
83 | int cmp; | ||
84 | parent = *p; | ||
85 | iter = rb_entry(parent, struct hist_entry, rb_node); | ||
86 | |||
87 | cmp = strcmp(he->map->dso->name, iter->map->dso->name); | ||
88 | if (cmp > 0) | ||
89 | p = &(*p)->rb_left; | ||
90 | else if (cmp < 0) | ||
91 | p = &(*p)->rb_right; | ||
92 | else { | ||
93 | cmp = strcmp(he->sym->name, iter->sym->name); | ||
94 | if (cmp > 0) | ||
95 | p = &(*p)->rb_left; | ||
96 | else | ||
97 | p = &(*p)->rb_right; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | rb_link_node(&he->rb_node, parent, p); | ||
102 | rb_insert_color(&he->rb_node, root); | ||
103 | } | ||
104 | |||
105 | static void perf_session__resort_by_name(struct perf_session *self) | ||
106 | { | ||
107 | unsigned long position = 1; | ||
108 | struct rb_root tmp = RB_ROOT; | ||
109 | struct rb_node *next = rb_first(&self->hists); | ||
110 | |||
111 | while (next != NULL) { | ||
112 | struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node); | ||
113 | |||
114 | next = rb_next(&n->rb_node); | ||
115 | rb_erase(&n->rb_node, &self->hists); | ||
116 | n->position = position++; | ||
117 | perf_session__insert_hist_entry_by_name(&tmp, n); | ||
118 | } | ||
119 | |||
120 | self->hists = tmp; | ||
121 | } | ||
122 | |||
123 | static struct hist_entry * | ||
124 | perf_session__find_hist_entry_by_name(struct perf_session *self, | ||
125 | struct hist_entry *he) | ||
126 | { | ||
127 | struct rb_node *n = self->hists.rb_node; | ||
128 | |||
129 | while (n) { | ||
130 | struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); | ||
131 | int cmp = strcmp(he->map->dso->name, iter->map->dso->name); | ||
132 | |||
133 | if (cmp > 0) | ||
134 | n = n->rb_left; | ||
135 | else if (cmp < 0) | ||
136 | n = n->rb_right; | ||
137 | else { | ||
138 | cmp = strcmp(he->sym->name, iter->sym->name); | ||
139 | if (cmp > 0) | ||
140 | n = n->rb_left; | ||
141 | else if (cmp < 0) | ||
142 | n = n->rb_right; | ||
143 | else | ||
144 | return iter; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | return NULL; | ||
149 | } | ||
150 | |||
151 | static void perf_session__match_hists(struct perf_session *old_session, | ||
152 | struct perf_session *new_session) | ||
153 | { | ||
154 | struct rb_node *nd; | ||
155 | |||
156 | perf_session__resort_by_name(old_session); | ||
157 | |||
158 | for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) { | ||
159 | struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); | ||
160 | pos->pair = perf_session__find_hist_entry_by_name(old_session, pos); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | static size_t hist_entry__fprintf_matched(struct hist_entry *self, | ||
165 | unsigned long pos, | ||
166 | struct perf_session *session, | ||
167 | struct perf_session *pair_session, | ||
168 | FILE *fp) | ||
169 | { | ||
170 | u64 old_count = 0; | ||
171 | char displacement[16]; | ||
172 | size_t printed; | ||
173 | |||
174 | if (self->pair != NULL) { | ||
175 | long pdiff = (long)self->pair->position - (long)pos; | ||
176 | old_count = self->pair->count; | ||
177 | if (pdiff == 0) | ||
178 | goto blank; | ||
179 | snprintf(displacement, sizeof(displacement), "%+4ld", pdiff); | ||
180 | } else { | ||
181 | blank: memset(displacement, ' ', sizeof(displacement)); | ||
182 | } | ||
183 | |||
184 | printed = fprintf(fp, "%4lu %5.5s ", pos, displacement); | ||
185 | |||
186 | if (show_percent) { | ||
187 | double old_percent = (old_count * 100) / pair_session->events_stats.total, | ||
188 | new_percent = (self->count * 100) / session->events_stats.total; | ||
189 | double diff = old_percent - new_percent; | ||
190 | |||
191 | if (verbose) | ||
192 | printed += fprintf(fp, " %3.2f%% %3.2f%%", old_percent, new_percent); | ||
193 | |||
194 | if ((u64)diff != 0) | ||
195 | printed += fprintf(fp, " %+4.2F%%", diff); | ||
196 | else | ||
197 | printed += fprintf(fp, " "); | ||
198 | } else { | ||
199 | if (verbose) | ||
200 | printed += fprintf(fp, " %9Lu %9Lu", old_count, self->count); | ||
201 | printed += fprintf(fp, " %+9Ld", (s64)self->count - (s64)old_count); | ||
202 | } | ||
203 | |||
204 | return printed + fprintf(fp, " %25.25s %s\n", | ||
205 | self->map->dso->name, self->sym->name); | ||
206 | } | ||
207 | |||
208 | static size_t perf_session__fprintf_matched_hists(struct perf_session *self, | ||
209 | struct perf_session *pair, | ||
210 | FILE *fp) | ||
211 | { | ||
212 | struct rb_node *nd; | ||
213 | size_t printed = 0; | ||
214 | unsigned long pos = 1; | ||
215 | |||
216 | for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) { | ||
217 | struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); | ||
218 | printed += hist_entry__fprintf_matched(he, pos++, self, pair, fp); | ||
219 | } | ||
220 | |||
221 | return printed; | ||
222 | } | ||
223 | |||
224 | static int __cmd_diff(void) | ||
225 | { | ||
226 | int ret, i; | ||
227 | struct perf_session *session[2]; | ||
228 | |||
229 | session[0] = perf_session__new(input_old, O_RDONLY, force, &symbol_conf); | ||
230 | session[1] = perf_session__new(input_new, O_RDONLY, force, &symbol_conf); | ||
231 | if (session[0] == NULL || session[1] == NULL) | ||
232 | return -ENOMEM; | ||
233 | |||
234 | for (i = 0; i < 2; ++i) { | ||
235 | ret = perf_session__process_events(session[i], &event_ops); | ||
236 | if (ret) | ||
237 | goto out_delete; | ||
238 | perf_session__output_resort(session[i], session[i]->events_stats.total); | ||
239 | } | ||
240 | |||
241 | perf_session__match_hists(session[0], session[1]); | ||
242 | perf_session__fprintf_matched_hists(session[1], session[0], stdout); | ||
243 | out_delete: | ||
244 | for (i = 0; i < 2; ++i) | ||
245 | perf_session__delete(session[i]); | ||
246 | return ret; | ||
247 | } | ||
248 | |||
249 | static const char *const diff_usage[] = { | ||
250 | "perf diff [<options>] [old_file] [new_file]", | ||
251 | }; | ||
252 | |||
253 | static const struct option options[] = { | ||
254 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
255 | "be more verbose (show symbol address, etc)"), | ||
256 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
257 | "dump raw trace in ASCII"), | ||
258 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), | ||
259 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, | ||
260 | "load module symbols - WARNING: use only with -k and LIVE kernel"), | ||
261 | OPT_BOOLEAN('p', "percentages", &show_percent, | ||
262 | "Don't shorten the pathnames taking into account the cwd"), | ||
263 | OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths, | ||
264 | "Don't shorten the pathnames taking into account the cwd"), | ||
265 | OPT_END() | ||
266 | }; | ||
267 | |||
268 | int cmd_diff(int argc, const char **argv, const char *prefix __used) | ||
269 | { | ||
270 | if (symbol__init(&symbol_conf) < 0) | ||
271 | return -1; | ||
272 | |||
273 | setup_sorting(diff_usage, options); | ||
274 | |||
275 | argc = parse_options(argc, argv, options, diff_usage, 0); | ||
276 | if (argc) { | ||
277 | if (argc > 2) | ||
278 | usage_with_options(diff_usage, options); | ||
279 | if (argc == 2) { | ||
280 | input_old = argv[0]; | ||
281 | input_new = argv[1]; | ||
282 | } else | ||
283 | input_new = argv[0]; | ||
284 | } | ||
285 | |||
286 | setup_pager(); | ||
287 | return __cmd_diff(); | ||
288 | } | ||
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index a3d8bf65f26c..18035b1f16c7 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h | |||
@@ -17,6 +17,7 @@ extern int check_pager_config(const char *cmd); | |||
17 | extern int cmd_annotate(int argc, const char **argv, const char *prefix); | 17 | extern int cmd_annotate(int argc, const char **argv, const char *prefix); |
18 | extern int cmd_bench(int argc, const char **argv, const char *prefix); | 18 | extern int cmd_bench(int argc, const char **argv, const char *prefix); |
19 | extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); | 19 | extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); |
20 | extern int cmd_diff(int argc, const char **argv, const char *prefix); | ||
20 | extern int cmd_help(int argc, const char **argv, const char *prefix); | 21 | extern int cmd_help(int argc, const char **argv, const char *prefix); |
21 | extern int cmd_sched(int argc, const char **argv, const char *prefix); | 22 | extern int cmd_sched(int argc, const char **argv, const char *prefix); |
22 | extern int cmd_list(int argc, const char **argv, const char *prefix); | 23 | extern int cmd_list(int argc, const char **argv, const char *prefix); |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 02b09ea17a3e..71dc7c3fe7b2 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
@@ -5,6 +5,7 @@ | |||
5 | perf-annotate mainporcelain common | 5 | perf-annotate mainporcelain common |
6 | perf-bench mainporcelain common | 6 | perf-bench mainporcelain common |
7 | perf-buildid-list mainporcelain common | 7 | perf-buildid-list mainporcelain common |
8 | perf-diff mainporcelain common | ||
8 | perf-list mainporcelain common | 9 | perf-list mainporcelain common |
9 | perf-sched mainporcelain common | 10 | perf-sched mainporcelain common |
10 | perf-record mainporcelain common | 11 | perf-record mainporcelain common |
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index cf64049bc9bd..873e55fab375 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -286,6 +286,7 @@ static void handle_internal_command(int argc, const char **argv) | |||
286 | const char *cmd = argv[0]; | 286 | const char *cmd = argv[0]; |
287 | static struct cmd_struct commands[] = { | 287 | static struct cmd_struct commands[] = { |
288 | { "buildid-list", cmd_buildid_list, 0 }, | 288 | { "buildid-list", cmd_buildid_list, 0 }, |
289 | { "diff", cmd_diff, 0 }, | ||
289 | { "help", cmd_help, 0 }, | 290 | { "help", cmd_help, 0 }, |
290 | { "list", cmd_list, 0 }, | 291 | { "list", cmd_list, 0 }, |
291 | { "record", cmd_record, 0 }, | 292 | { "record", cmd_record, 0 }, |
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index cb6151c026f2..925f083e1eee 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -49,9 +49,13 @@ struct hist_entry { | |||
49 | struct symbol *sym; | 49 | struct symbol *sym; |
50 | u64 ip; | 50 | u64 ip; |
51 | char level; | 51 | char level; |
52 | struct symbol *parent; | 52 | struct symbol *parent; |
53 | struct callchain_node callchain; | 53 | struct callchain_node callchain; |
54 | struct rb_root sorted_chain; | 54 | union { |
55 | unsigned long position; | ||
56 | struct hist_entry *pair; | ||
57 | struct rb_root sorted_chain; | ||
58 | }; | ||
55 | }; | 59 | }; |
56 | 60 | ||
57 | enum sort_type { | 61 | enum sort_type { |