diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 339 |
1 files changed, 339 insertions, 0 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c new file mode 100644 index 00000000000..2cbf5a18958 --- /dev/null +++ b/tools/perf/builtin-stat.c | |||
@@ -0,0 +1,339 @@ | |||
1 | /* | ||
2 | * builtin-stat.c | ||
3 | * | ||
4 | * Builtin stat command: Give a precise performance counters summary | ||
5 | * overview about any workload, CPU or specific PID. | ||
6 | * | ||
7 | * Sample output: | ||
8 | |||
9 | $ perf stat ~/hackbench 10 | ||
10 | Time: 0.104 | ||
11 | |||
12 | Performance counter stats for '/home/mingo/hackbench': | ||
13 | |||
14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor | ||
15 | 54011 context switches # 0.043 M/sec | ||
16 | 385 CPU migrations # 0.000 M/sec | ||
17 | 17755 pagefaults # 0.014 M/sec | ||
18 | 3808323185 CPU cycles # 3033.219 M/sec | ||
19 | 1575111190 instructions # 1254.530 M/sec | ||
20 | 17367895 cache references # 13.833 M/sec | ||
21 | 7674421 cache misses # 6.112 M/sec | ||
22 | |||
23 | Wall-clock time elapsed: 123.786620 msecs | ||
24 | |||
25 | * | ||
26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
27 | * | ||
28 | * Improvements and fixes by: | ||
29 | * | ||
30 | * Arjan van de Ven <arjan@linux.intel.com> | ||
31 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
32 | * Wu Fengguang <fengguang.wu@intel.com> | ||
33 | * Mike Galbraith <efault@gmx.de> | ||
34 | * Paul Mackerras <paulus@samba.org> | ||
35 | * | ||
36 | * Released under the GPL v2. (and only v2, not any later version) | ||
37 | */ | ||
38 | |||
39 | #include "perf.h" | ||
40 | #include "builtin.h" | ||
41 | #include "util/util.h" | ||
42 | #include "util/parse-options.h" | ||
43 | #include "util/parse-events.h" | ||
44 | |||
45 | #include <sys/prctl.h> | ||
46 | |||
47 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | ||
48 | |||
49 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK }, | ||
50 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES }, | ||
51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS }, | ||
52 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS }, | ||
53 | |||
54 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES }, | ||
55 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS }, | ||
56 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES }, | ||
57 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES }, | ||
58 | }; | ||
59 | |||
60 | static int system_wide = 0; | ||
61 | static int inherit = 1; | ||
62 | |||
63 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
64 | |||
65 | static int target_pid = -1; | ||
66 | static int nr_cpus = 0; | ||
67 | static unsigned int page_size; | ||
68 | |||
69 | static int scale = 1; | ||
70 | |||
71 | static const unsigned int default_count[] = { | ||
72 | 1000000, | ||
73 | 1000000, | ||
74 | 10000, | ||
75 | 10000, | ||
76 | 1000000, | ||
77 | 10000, | ||
78 | }; | ||
79 | |||
80 | static __u64 event_res[MAX_COUNTERS][3]; | ||
81 | static __u64 event_scaled[MAX_COUNTERS]; | ||
82 | |||
83 | static __u64 runtime_nsecs; | ||
84 | static __u64 walltime_nsecs; | ||
85 | |||
86 | static void create_perfstat_counter(int counter) | ||
87 | { | ||
88 | struct perf_counter_attr *attr = attrs + counter; | ||
89 | |||
90 | if (scale) | ||
91 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
92 | PERF_FORMAT_TOTAL_TIME_RUNNING; | ||
93 | |||
94 | if (system_wide) { | ||
95 | int cpu; | ||
96 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
97 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); | ||
98 | if (fd[cpu][counter] < 0) { | ||
99 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
100 | fd[cpu][counter], strerror(errno)); | ||
101 | exit(-1); | ||
102 | } | ||
103 | } | ||
104 | } else { | ||
105 | attr->inherit = inherit; | ||
106 | attr->disabled = 1; | ||
107 | |||
108 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | ||
109 | if (fd[0][counter] < 0) { | ||
110 | printf("perfstat error: syscall returned with %d (%s)\n", | ||
111 | fd[0][counter], strerror(errno)); | ||
112 | exit(-1); | ||
113 | } | ||
114 | } | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Does the counter have nsecs as a unit? | ||
119 | */ | ||
120 | static inline int nsec_counter(int counter) | ||
121 | { | ||
122 | if (attrs[counter].type != PERF_TYPE_SOFTWARE) | ||
123 | return 0; | ||
124 | |||
125 | if (attrs[counter].config == PERF_COUNT_CPU_CLOCK) | ||
126 | return 1; | ||
127 | |||
128 | if (attrs[counter].config == PERF_COUNT_TASK_CLOCK) | ||
129 | return 1; | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Read out the results of a single counter: | ||
136 | */ | ||
137 | static void read_counter(int counter) | ||
138 | { | ||
139 | __u64 *count, single_count[3]; | ||
140 | ssize_t res; | ||
141 | int cpu, nv; | ||
142 | int scaled; | ||
143 | |||
144 | count = event_res[counter]; | ||
145 | |||
146 | count[0] = count[1] = count[2] = 0; | ||
147 | |||
148 | nv = scale ? 3 : 1; | ||
149 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
150 | res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); | ||
151 | assert(res == nv * sizeof(__u64)); | ||
152 | |||
153 | count[0] += single_count[0]; | ||
154 | if (scale) { | ||
155 | count[1] += single_count[1]; | ||
156 | count[2] += single_count[2]; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | scaled = 0; | ||
161 | if (scale) { | ||
162 | if (count[2] == 0) { | ||
163 | event_scaled[counter] = -1; | ||
164 | count[0] = 0; | ||
165 | return; | ||
166 | } | ||
167 | |||
168 | if (count[2] < count[1]) { | ||
169 | event_scaled[counter] = 1; | ||
170 | count[0] = (unsigned long long) | ||
171 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
172 | } | ||
173 | } | ||
174 | /* | ||
175 | * Save the full runtime - to allow normalization during printout: | ||
176 | */ | ||
177 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
178 | attrs[counter].config == PERF_COUNT_TASK_CLOCK) | ||
179 | runtime_nsecs = count[0]; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Print out the results of a single counter: | ||
184 | */ | ||
185 | static void print_counter(int counter) | ||
186 | { | ||
187 | __u64 *count; | ||
188 | int scaled; | ||
189 | |||
190 | count = event_res[counter]; | ||
191 | scaled = event_scaled[counter]; | ||
192 | |||
193 | if (scaled == -1) { | ||
194 | fprintf(stderr, " %14s %-20s\n", | ||
195 | "<not counted>", event_name(counter)); | ||
196 | return; | ||
197 | } | ||
198 | |||
199 | if (nsec_counter(counter)) { | ||
200 | double msecs = (double)count[0] / 1000000; | ||
201 | |||
202 | fprintf(stderr, " %14.6f %-20s", | ||
203 | msecs, event_name(counter)); | ||
204 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
205 | attrs[counter].config == PERF_COUNT_TASK_CLOCK) { | ||
206 | |||
207 | fprintf(stderr, " # %11.3f CPU utilization factor", | ||
208 | (double)count[0] / (double)walltime_nsecs); | ||
209 | } | ||
210 | } else { | ||
211 | fprintf(stderr, " %14Ld %-20s", | ||
212 | count[0], event_name(counter)); | ||
213 | if (runtime_nsecs) | ||
214 | fprintf(stderr, " # %11.3f M/sec", | ||
215 | (double)count[0]/runtime_nsecs*1000.0); | ||
216 | } | ||
217 | if (scaled) | ||
218 | fprintf(stderr, " (scaled from %.2f%%)", | ||
219 | (double) count[2] / count[1] * 100); | ||
220 | fprintf(stderr, "\n"); | ||
221 | } | ||
222 | |||
223 | static int do_perfstat(int argc, const char **argv) | ||
224 | { | ||
225 | unsigned long long t0, t1; | ||
226 | int counter; | ||
227 | int status; | ||
228 | int pid; | ||
229 | int i; | ||
230 | |||
231 | if (!system_wide) | ||
232 | nr_cpus = 1; | ||
233 | |||
234 | for (counter = 0; counter < nr_counters; counter++) | ||
235 | create_perfstat_counter(counter); | ||
236 | |||
237 | /* | ||
238 | * Enable counters and exec the command: | ||
239 | */ | ||
240 | t0 = rdclock(); | ||
241 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
242 | |||
243 | if ((pid = fork()) < 0) | ||
244 | perror("failed to fork"); | ||
245 | |||
246 | if (!pid) { | ||
247 | if (execvp(argv[0], (char **)argv)) { | ||
248 | perror(argv[0]); | ||
249 | exit(-1); | ||
250 | } | ||
251 | } | ||
252 | |||
253 | while (wait(&status) >= 0) | ||
254 | ; | ||
255 | |||
256 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
257 | t1 = rdclock(); | ||
258 | |||
259 | walltime_nsecs = t1 - t0; | ||
260 | |||
261 | fflush(stdout); | ||
262 | |||
263 | fprintf(stderr, "\n"); | ||
264 | fprintf(stderr, " Performance counter stats for \'%s", argv[0]); | ||
265 | |||
266 | for (i = 1; i < argc; i++) | ||
267 | fprintf(stderr, " %s", argv[i]); | ||
268 | |||
269 | fprintf(stderr, "\':\n"); | ||
270 | fprintf(stderr, "\n"); | ||
271 | |||
272 | for (counter = 0; counter < nr_counters; counter++) | ||
273 | read_counter(counter); | ||
274 | |||
275 | for (counter = 0; counter < nr_counters; counter++) | ||
276 | print_counter(counter); | ||
277 | |||
278 | |||
279 | fprintf(stderr, "\n"); | ||
280 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
281 | (double)(t1-t0)/1e6); | ||
282 | fprintf(stderr, "\n"); | ||
283 | |||
284 | return 0; | ||
285 | } | ||
286 | |||
287 | static void skip_signal(int signo) | ||
288 | { | ||
289 | } | ||
290 | |||
291 | static const char * const stat_usage[] = { | ||
292 | "perf stat [<options>] <command>", | ||
293 | NULL | ||
294 | }; | ||
295 | |||
296 | static const struct option options[] = { | ||
297 | OPT_CALLBACK('e', "event", NULL, "event", | ||
298 | "event selector. use 'perf list' to list available events", | ||
299 | parse_events), | ||
300 | OPT_BOOLEAN('i', "inherit", &inherit, | ||
301 | "child tasks inherit counters"), | ||
302 | OPT_INTEGER('p', "pid", &target_pid, | ||
303 | "stat events on existing pid"), | ||
304 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | ||
305 | "system-wide collection from all CPUs"), | ||
306 | OPT_BOOLEAN('S', "scale", &scale, | ||
307 | "scale/normalize counters"), | ||
308 | OPT_END() | ||
309 | }; | ||
310 | |||
311 | int cmd_stat(int argc, const char **argv, const char *prefix) | ||
312 | { | ||
313 | page_size = sysconf(_SC_PAGE_SIZE); | ||
314 | |||
315 | memcpy(attrs, default_attrs, sizeof(attrs)); | ||
316 | |||
317 | argc = parse_options(argc, argv, options, stat_usage, 0); | ||
318 | if (!argc) | ||
319 | usage_with_options(stat_usage, options); | ||
320 | |||
321 | if (!nr_counters) | ||
322 | nr_counters = 8; | ||
323 | |||
324 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
325 | assert(nr_cpus <= MAX_NR_CPUS); | ||
326 | assert(nr_cpus >= 0); | ||
327 | |||
328 | /* | ||
329 | * We dont want to block the signals - that would cause | ||
330 | * child tasks to inherit that and Ctrl-C would not work. | ||
331 | * What we want is for Ctrl-C to work in the exec()-ed | ||
332 | * task, but being ignored by perf stat itself: | ||
333 | */ | ||
334 | signal(SIGINT, skip_signal); | ||
335 | signal(SIGALRM, skip_signal); | ||
336 | signal(SIGABRT, skip_signal); | ||
337 | |||
338 | return do_perfstat(argc, argv); | ||
339 | } | ||