diff options
author | Brice Goglin <Brice.Goglin@inria.fr> | 2009-08-07 07:55:24 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-09 07:04:20 -0400 |
commit | 8d51327090ac025d7f4ce6c059786b5e93513321 (patch) | |
tree | 32576a197af970ac698c7888c67235e8f723fa67 /tools | |
parent | 30dd568c912602b7dbd609a45d053e01b13422bb (diff) |
perf report: Fix and improve the displaying of per-thread event counters
Improve and fix the handling of per-thread counter stats
recorded via perf record -s. Previously we only displayed
it in debug printouts (-D) and even that output was hard
to disambiguate.
I moved everything to utils/values.[ch] so that we may reuse
it in perf stat.
We get something like this now:
# PID TID cache-misses cache-references
4658 4659 495581 3238779
4658 4662 498246 3236823
4658 4663 499531 3243162
Then it'll be easy to add --pretty=raw to display a single line per thread/event.
By the way, -S was also used for --symbol... So I used -T/--thread here.
perf report: Add -T/--threads to display per-thread counter values
We get something like this now:
# PID TID cache-misses cache-references
4658 4659 495581 3238779
4658 4662 498246 3236823
4658 4663 499531 3243162
Per-thread arrays of counter values are managed in utils/values.[ch]
Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus@samba.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-report.txt | 3 | ||||
-rw-r--r-- | tools/perf/Makefile | 2 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 25 | ||||
-rw-r--r-- | tools/perf/util/values.c | 171 | ||||
-rw-r--r-- | tools/perf/util/values.h | 26 |
5 files changed, 227 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e72e9311078..370344afb5b 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -27,6 +27,9 @@ OPTIONS | |||
27 | -n | 27 | -n |
28 | --show-nr-samples | 28 | --show-nr-samples |
29 | Show the number of samples for each symbol | 29 | Show the number of samples for each symbol |
30 | -T | ||
31 | --threads | ||
32 | Show per-thread event counters | ||
30 | -C:: | 33 | -C:: |
31 | --comms=:: | 34 | --comms=:: |
32 | Only consider symbols in these comms. CSV that understands | 35 | Only consider symbols in these comms. CSV that understands |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 60411e94113..de7beac1095 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -310,6 +310,7 @@ LIB_H += util/sigchain.h | |||
310 | LIB_H += util/symbol.h | 310 | LIB_H += util/symbol.h |
311 | LIB_H += util/module.h | 311 | LIB_H += util/module.h |
312 | LIB_H += util/color.h | 312 | LIB_H += util/color.h |
313 | LIB_H += util/values.h | ||
313 | 314 | ||
314 | LIB_OBJS += util/abspath.o | 315 | LIB_OBJS += util/abspath.o |
315 | LIB_OBJS += util/alias.o | 316 | LIB_OBJS += util/alias.o |
@@ -337,6 +338,7 @@ LIB_OBJS += util/color.o | |||
337 | LIB_OBJS += util/pager.o | 338 | LIB_OBJS += util/pager.o |
338 | LIB_OBJS += util/header.o | 339 | LIB_OBJS += util/header.o |
339 | LIB_OBJS += util/callchain.o | 340 | LIB_OBJS += util/callchain.o |
341 | LIB_OBJS += util/values.o | ||
340 | 342 | ||
341 | BUILTIN_OBJS += builtin-annotate.o | 343 | BUILTIN_OBJS += builtin-annotate.o |
342 | BUILTIN_OBJS += builtin-help.o | 344 | BUILTIN_OBJS += builtin-help.o |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 99274cec0ad..41639182fb3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include "util/string.h" | 17 | #include "util/string.h" |
18 | #include "util/callchain.h" | 18 | #include "util/callchain.h" |
19 | #include "util/strlist.h" | 19 | #include "util/strlist.h" |
20 | #include "util/values.h" | ||
20 | 21 | ||
21 | #include "perf.h" | 22 | #include "perf.h" |
22 | #include "util/header.h" | 23 | #include "util/header.h" |
@@ -53,6 +54,9 @@ static int modules; | |||
53 | static int full_paths; | 54 | static int full_paths; |
54 | static int show_nr_samples; | 55 | static int show_nr_samples; |
55 | 56 | ||
57 | static int show_threads; | ||
58 | static struct perf_read_values show_threads_values; | ||
59 | |||
56 | static unsigned long page_size; | 60 | static unsigned long page_size; |
57 | static unsigned long mmap_window = 32; | 61 | static unsigned long mmap_window = 32; |
58 | 62 | ||
@@ -1473,6 +1477,9 @@ print_entries: | |||
1473 | 1477 | ||
1474 | free(rem_sq_bracket); | 1478 | free(rem_sq_bracket); |
1475 | 1479 | ||
1480 | if (show_threads) | ||
1481 | perf_read_values_display(fp, &show_threads_values); | ||
1482 | |||
1476 | return ret; | 1483 | return ret; |
1477 | } | 1484 | } |
1478 | 1485 | ||
@@ -1758,6 +1765,16 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) | |||
1758 | { | 1765 | { |
1759 | struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); | 1766 | struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); |
1760 | 1767 | ||
1768 | if (show_threads) { | ||
1769 | char *name = attr ? __event_name(attr->type, attr->config) | ||
1770 | : "unknown"; | ||
1771 | perf_read_values_add_value(&show_threads_values, | ||
1772 | event->read.pid, event->read.tid, | ||
1773 | event->read.id, | ||
1774 | name, | ||
1775 | event->read.value); | ||
1776 | } | ||
1777 | |||
1761 | dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", | 1778 | dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", |
1762 | (void *)(offset + head), | 1779 | (void *)(offset + head), |
1763 | (void *)(long)(event->header.size), | 1780 | (void *)(long)(event->header.size), |
@@ -1839,6 +1856,9 @@ static int __cmd_report(void) | |||
1839 | 1856 | ||
1840 | register_idle_thread(); | 1857 | register_idle_thread(); |
1841 | 1858 | ||
1859 | if (show_threads) | ||
1860 | perf_read_values_init(&show_threads_values); | ||
1861 | |||
1842 | input = open(input_name, O_RDONLY); | 1862 | input = open(input_name, O_RDONLY); |
1843 | if (input < 0) { | 1863 | if (input < 0) { |
1844 | fprintf(stderr, " failed to open file: %s", input_name); | 1864 | fprintf(stderr, " failed to open file: %s", input_name); |
@@ -1993,6 +2013,9 @@ done: | |||
1993 | output__resort(total); | 2013 | output__resort(total); |
1994 | output__fprintf(stdout, total); | 2014 | output__fprintf(stdout, total); |
1995 | 2015 | ||
2016 | if (show_threads) | ||
2017 | perf_read_values_destroy(&show_threads_values); | ||
2018 | |||
1996 | return rc; | 2019 | return rc; |
1997 | } | 2020 | } |
1998 | 2021 | ||
@@ -2066,6 +2089,8 @@ static const struct option options[] = { | |||
2066 | "load module symbols - WARNING: use only with -k and LIVE kernel"), | 2089 | "load module symbols - WARNING: use only with -k and LIVE kernel"), |
2067 | OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, | 2090 | OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, |
2068 | "Show a column with the number of samples"), | 2091 | "Show a column with the number of samples"), |
2092 | OPT_BOOLEAN('T', "threads", &show_threads, | ||
2093 | "Show per-thread event counters"), | ||
2069 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 2094 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
2070 | "sort by key(s): pid, comm, dso, symbol, parent"), | 2095 | "sort by key(s): pid, comm, dso, symbol, parent"), |
2071 | OPT_BOOLEAN('P', "full-paths", &full_paths, | 2096 | OPT_BOOLEAN('P', "full-paths", &full_paths, |
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c new file mode 100644 index 00000000000..8551c0b8b23 --- /dev/null +++ b/tools/perf/util/values.c | |||
@@ -0,0 +1,171 @@ | |||
1 | #include <stdlib.h> | ||
2 | |||
3 | #include "util.h" | ||
4 | #include "values.h" | ||
5 | |||
6 | void perf_read_values_init(struct perf_read_values *values) | ||
7 | { | ||
8 | values->threads_max = 16; | ||
9 | values->pid = malloc(values->threads_max * sizeof(*values->pid)); | ||
10 | values->tid = malloc(values->threads_max * sizeof(*values->tid)); | ||
11 | values->value = malloc(values->threads_max * sizeof(*values->value)); | ||
12 | if (!values->pid || !values->tid || !values->value) | ||
13 | die("failed to allocate read_values threads arrays"); | ||
14 | values->threads = 0; | ||
15 | |||
16 | values->counters_max = 16; | ||
17 | values->counterrawid = malloc(values->counters_max | ||
18 | * sizeof(*values->counterrawid)); | ||
19 | values->countername = malloc(values->counters_max | ||
20 | * sizeof(*values->countername)); | ||
21 | if (!values->counterrawid || !values->countername) | ||
22 | die("failed to allocate read_values counters arrays"); | ||
23 | values->counters = 0; | ||
24 | } | ||
25 | |||
26 | void perf_read_values_destroy(struct perf_read_values *values) | ||
27 | { | ||
28 | int i; | ||
29 | |||
30 | if (!values->threads_max || !values->counters_max) | ||
31 | return; | ||
32 | |||
33 | for (i = 0; i < values->threads; i++) | ||
34 | free(values->value[i]); | ||
35 | free(values->pid); | ||
36 | free(values->tid); | ||
37 | free(values->counterrawid); | ||
38 | for (i = 0; i < values->counters; i++) | ||
39 | free(values->countername[i]); | ||
40 | free(values->countername); | ||
41 | } | ||
42 | |||
43 | static void perf_read_values__enlarge_threads(struct perf_read_values *values) | ||
44 | { | ||
45 | values->threads_max *= 2; | ||
46 | values->pid = realloc(values->pid, | ||
47 | values->threads_max * sizeof(*values->pid)); | ||
48 | values->tid = realloc(values->tid, | ||
49 | values->threads_max * sizeof(*values->tid)); | ||
50 | values->value = realloc(values->value, | ||
51 | values->threads_max * sizeof(*values->value)); | ||
52 | if (!values->pid || !values->tid || !values->value) | ||
53 | die("failed to enlarge read_values threads arrays"); | ||
54 | } | ||
55 | |||
56 | static int perf_read_values__findnew_thread(struct perf_read_values *values, | ||
57 | u32 pid, u32 tid) | ||
58 | { | ||
59 | int i; | ||
60 | |||
61 | for (i = 0; i < values->threads; i++) | ||
62 | if (values->pid[i] == pid && values->tid[i] == tid) | ||
63 | return i; | ||
64 | |||
65 | if (values->threads == values->threads_max) | ||
66 | perf_read_values__enlarge_threads(values); | ||
67 | |||
68 | i = values->threads++; | ||
69 | values->pid[i] = pid; | ||
70 | values->tid[i] = tid; | ||
71 | values->value[i] = malloc(values->counters_max * sizeof(**values->value)); | ||
72 | if (!values->value[i]) | ||
73 | die("failed to allocate read_values counters array"); | ||
74 | |||
75 | return i; | ||
76 | } | ||
77 | |||
78 | static void perf_read_values__enlarge_counters(struct perf_read_values *values) | ||
79 | { | ||
80 | int i; | ||
81 | |||
82 | values->counters_max *= 2; | ||
83 | values->counterrawid = realloc(values->counterrawid, | ||
84 | values->counters_max * sizeof(*values->counterrawid)); | ||
85 | values->countername = realloc(values->countername, | ||
86 | values->counters_max * sizeof(*values->countername)); | ||
87 | if (!values->counterrawid || !values->countername) | ||
88 | die("failed to enlarge read_values counters arrays"); | ||
89 | |||
90 | for (i = 0; i < values->threads; i++) { | ||
91 | values->value[i] = realloc(values->value[i], | ||
92 | values->counters_max * sizeof(**values->value)); | ||
93 | if (!values->value[i]) | ||
94 | die("failed to enlarge read_values counters arrays"); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | static int perf_read_values__findnew_counter(struct perf_read_values *values, | ||
99 | u64 rawid, char *name) | ||
100 | { | ||
101 | int i; | ||
102 | |||
103 | for (i = 0; i < values->counters; i++) | ||
104 | if (values->counterrawid[i] == rawid) | ||
105 | return i; | ||
106 | |||
107 | if (values->counters == values->counters_max) | ||
108 | perf_read_values__enlarge_counters(values); | ||
109 | |||
110 | i = values->counters++; | ||
111 | values->counterrawid[i] = rawid; | ||
112 | values->countername[i] = strdup(name); | ||
113 | |||
114 | return i; | ||
115 | } | ||
116 | |||
117 | void perf_read_values_add_value(struct perf_read_values *values, | ||
118 | u32 pid, u32 tid, | ||
119 | u64 rawid, char *name, u64 value) | ||
120 | { | ||
121 | int tindex, cindex; | ||
122 | |||
123 | tindex = perf_read_values__findnew_thread(values, pid, tid); | ||
124 | cindex = perf_read_values__findnew_counter(values, rawid, name); | ||
125 | |||
126 | values->value[tindex][cindex] = value; | ||
127 | } | ||
128 | |||
129 | void perf_read_values_display(FILE *fp, struct perf_read_values *values) | ||
130 | { | ||
131 | int i, j; | ||
132 | int pidwidth, tidwidth; | ||
133 | int *counterwidth; | ||
134 | |||
135 | counterwidth = malloc(values->counters * sizeof(*counterwidth)); | ||
136 | if (!counterwidth) | ||
137 | die("failed to allocate counterwidth array"); | ||
138 | tidwidth = 3; | ||
139 | pidwidth = 3; | ||
140 | for (j = 0; j < values->counters; j++) | ||
141 | counterwidth[j] = strlen(values->countername[j]); | ||
142 | for (i = 0; i < values->threads; i++) { | ||
143 | int width; | ||
144 | |||
145 | width = snprintf(NULL, 0, "%d", values->pid[i]); | ||
146 | if (width > pidwidth) | ||
147 | pidwidth = width; | ||
148 | width = snprintf(NULL, 0, "%d", values->tid[i]); | ||
149 | if (width > tidwidth) | ||
150 | tidwidth = width; | ||
151 | for (j = 0; j < values->counters; j++) { | ||
152 | width = snprintf(NULL, 0, "%Lu", values->value[i][j]); | ||
153 | if (width > counterwidth[j]) | ||
154 | counterwidth[j] = width; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID"); | ||
159 | for (j = 0; j < values->counters; j++) | ||
160 | fprintf(fp, " %*s", counterwidth[j], values->countername[j]); | ||
161 | fprintf(fp, "\n"); | ||
162 | |||
163 | for (i = 0; i < values->threads; i++) { | ||
164 | fprintf(fp, " %*d %*d", pidwidth, values->pid[i], | ||
165 | tidwidth, values->tid[i]); | ||
166 | for (j = 0; j < values->counters; j++) | ||
167 | fprintf(fp, " %*Lu", | ||
168 | counterwidth[j], values->value[i][j]); | ||
169 | fprintf(fp, "\n"); | ||
170 | } | ||
171 | } | ||
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h new file mode 100644 index 00000000000..e41be5e86e6 --- /dev/null +++ b/tools/perf/util/values.h | |||
@@ -0,0 +1,26 @@ | |||
1 | #ifndef _PERF_VALUES_H | ||
2 | #define _PERF_VALUES_H | ||
3 | |||
4 | #include "types.h" | ||
5 | |||
6 | struct perf_read_values { | ||
7 | int threads; | ||
8 | int threads_max; | ||
9 | u32 *pid, *tid; | ||
10 | int counters; | ||
11 | int counters_max; | ||
12 | u64 *counterrawid; | ||
13 | char **countername; | ||
14 | u64 **value; | ||
15 | }; | ||
16 | |||
17 | void perf_read_values_init(struct perf_read_values *values); | ||
18 | void perf_read_values_destroy(struct perf_read_values *values); | ||
19 | |||
20 | void perf_read_values_add_value(struct perf_read_values *values, | ||
21 | u32 pid, u32 tid, | ||
22 | u64 rawid, char *name, u64 value); | ||
23 | |||
24 | void perf_read_values_display(FILE *fp, struct perf_read_values *values); | ||
25 | |||
26 | #endif /* _PERF_VALUES_H */ | ||