diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c new file mode 100644 index 000000000000..c43e4a97dc42 --- /dev/null +++ b/tools/perf/builtin-stat.c | |||
@@ -0,0 +1,367 @@ | |||
1 | /* | ||
2 | * builtin-stat.c | ||
3 | * | ||
4 | * Builtin stat command: Give a precise performance counters summary | ||
5 | * overview about any workload, CPU or specific PID. | ||
6 | * | ||
7 | * Sample output: | ||
8 | |||
9 | $ perf stat ~/hackbench 10 | ||
10 | Time: 0.104 | ||
11 | |||
12 | Performance counter stats for '/home/mingo/hackbench': | ||
13 | |||
14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor | ||
15 | 54011 context switches # 0.043 M/sec | ||
16 | 385 CPU migrations # 0.000 M/sec | ||
17 | 17755 pagefaults # 0.014 M/sec | ||
18 | 3808323185 CPU cycles # 3033.219 M/sec | ||
19 | 1575111190 instructions # 1254.530 M/sec | ||
20 | 17367895 cache references # 13.833 M/sec | ||
21 | 7674421 cache misses # 6.112 M/sec | ||
22 | |||
23 | Wall-clock time elapsed: 123.786620 msecs | ||
24 | |||
25 | * | ||
26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
27 | * | ||
28 | * Improvements and fixes by: | ||
29 | * | ||
30 | * Arjan van de Ven <arjan@linux.intel.com> | ||
31 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
32 | * Wu Fengguang <fengguang.wu@intel.com> | ||
33 | * Mike Galbraith <efault@gmx.de> | ||
34 | * Paul Mackerras <paulus@samba.org> | ||
35 | * | ||
36 | * Released under the GPL v2. (and only v2, not any later version) | ||
37 | */ | ||
38 | |||
39 | #include "perf.h" | ||
40 | #include "builtin.h" | ||
41 | #include "util/util.h" | ||
42 | #include "util/parse-options.h" | ||
43 | #include "util/parse-events.h" | ||
44 | |||
45 | #include <sys/prctl.h> | ||
46 | |||
47 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | ||
48 | |||
49 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | ||
50 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, | ||
51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, | ||
52 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | ||
53 | |||
54 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | ||
55 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | ||
56 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, | ||
57 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
58 | |||
59 | }; | ||
60 | |||
61 | static int system_wide = 0; | ||
62 | static int inherit = 1; | ||
63 | static int verbose = 0; | ||
64 | |||
65 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
66 | |||
67 | static int target_pid = -1; | ||
68 | static int nr_cpus = 0; | ||
69 | static unsigned int page_size; | ||
70 | |||
71 | static int scale = 1; | ||
72 | |||
73 | static const unsigned int default_count[] = { | ||
74 | 1000000, | ||
75 | 1000000, | ||
76 | 10000, | ||
77 | 10000, | ||
78 | 1000000, | ||
79 | 10000, | ||
80 | }; | ||
81 | |||
82 | static __u64 event_res[MAX_COUNTERS][3]; | ||
83 | static __u64 event_scaled[MAX_COUNTERS]; | ||
84 | |||
85 | static __u64 runtime_nsecs; | ||
86 | static __u64 walltime_nsecs; | ||
87 | static __u64 runtime_cycles; | ||
88 | |||
89 | static void create_perf_stat_counter(int counter) | ||
90 | { | ||
91 | struct perf_counter_attr *attr = attrs + counter; | ||
92 | |||
93 | if (scale) | ||
94 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
95 | PERF_FORMAT_TOTAL_TIME_RUNNING; | ||
96 | |||
97 | if (system_wide) { | ||
98 | int cpu; | ||
99 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
100 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); | ||
101 | if (fd[cpu][counter] < 0 && verbose) { | ||
102 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); | ||
103 | } | ||
104 | } | ||
105 | } else { | ||
106 | attr->inherit = inherit; | ||
107 | attr->disabled = 1; | ||
108 | |||
109 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | ||
110 | if (fd[0][counter] < 0 && verbose) { | ||
111 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); | ||
112 | } | ||
113 | } | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Does the counter have nsecs as a unit? | ||
118 | */ | ||
119 | static inline int nsec_counter(int counter) | ||
120 | { | ||
121 | if (attrs[counter].type != PERF_TYPE_SOFTWARE) | ||
122 | return 0; | ||
123 | |||
124 | if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) | ||
125 | return 1; | ||
126 | |||
127 | if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | ||
128 | return 1; | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Read out the results of a single counter: | ||
135 | */ | ||
136 | static void read_counter(int counter) | ||
137 | { | ||
138 | __u64 *count, single_count[3]; | ||
139 | ssize_t res; | ||
140 | int cpu, nv; | ||
141 | int scaled; | ||
142 | |||
143 | count = event_res[counter]; | ||
144 | |||
145 | count[0] = count[1] = count[2] = 0; | ||
146 | |||
147 | nv = scale ? 3 : 1; | ||
148 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
149 | if (fd[cpu][counter] < 0) | ||
150 | continue; | ||
151 | |||
152 | res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); | ||
153 | assert(res == nv * sizeof(__u64)); | ||
154 | |||
155 | count[0] += single_count[0]; | ||
156 | if (scale) { | ||
157 | count[1] += single_count[1]; | ||
158 | count[2] += single_count[2]; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | scaled = 0; | ||
163 | if (scale) { | ||
164 | if (count[2] == 0) { | ||
165 | event_scaled[counter] = -1; | ||
166 | count[0] = 0; | ||
167 | return; | ||
168 | } | ||
169 | |||
170 | if (count[2] < count[1]) { | ||
171 | event_scaled[counter] = 1; | ||
172 | count[0] = (unsigned long long) | ||
173 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
174 | } | ||
175 | } | ||
176 | /* | ||
177 | * Save the full runtime - to allow normalization during printout: | ||
178 | */ | ||
179 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
180 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | ||
181 | runtime_nsecs = count[0]; | ||
182 | if (attrs[counter].type == PERF_TYPE_HARDWARE && | ||
183 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) | ||
184 | runtime_cycles = count[0]; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Print out the results of a single counter: | ||
189 | */ | ||
190 | static void print_counter(int counter) | ||
191 | { | ||
192 | __u64 *count; | ||
193 | int scaled; | ||
194 | |||
195 | count = event_res[counter]; | ||
196 | scaled = event_scaled[counter]; | ||
197 | |||
198 | if (scaled == -1) { | ||
199 | fprintf(stderr, " %14s %-20s\n", | ||
200 | "<not counted>", event_name(counter)); | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | if (nsec_counter(counter)) { | ||
205 | double msecs = (double)count[0] / 1000000; | ||
206 | |||
207 | fprintf(stderr, " %14.6f %-20s", | ||
208 | msecs, event_name(counter)); | ||
209 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
210 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
211 | |||
212 | if (walltime_nsecs) | ||
213 | fprintf(stderr, " # %11.3f CPU utilization factor", | ||
214 | (double)count[0] / (double)walltime_nsecs); | ||
215 | } | ||
216 | } else { | ||
217 | fprintf(stderr, " %14Ld %-20s", | ||
218 | count[0], event_name(counter)); | ||
219 | if (runtime_nsecs) | ||
220 | fprintf(stderr, " # %11.3f M/sec", | ||
221 | (double)count[0]/runtime_nsecs*1000.0); | ||
222 | if (runtime_cycles && | ||
223 | attrs[counter].type == PERF_TYPE_HARDWARE && | ||
224 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
225 | |||
226 | fprintf(stderr, " # %1.3f per cycle", | ||
227 | (double)count[0] / (double)runtime_cycles); | ||
228 | } | ||
229 | } | ||
230 | if (scaled) | ||
231 | fprintf(stderr, " (scaled from %.2f%%)", | ||
232 | (double) count[2] / count[1] * 100); | ||
233 | fprintf(stderr, "\n"); | ||
234 | } | ||
235 | |||
236 | static int do_perf_stat(int argc, const char **argv) | ||
237 | { | ||
238 | unsigned long long t0, t1; | ||
239 | int counter; | ||
240 | int status; | ||
241 | int pid; | ||
242 | int i; | ||
243 | |||
244 | if (!system_wide) | ||
245 | nr_cpus = 1; | ||
246 | |||
247 | for (counter = 0; counter < nr_counters; counter++) | ||
248 | create_perf_stat_counter(counter); | ||
249 | |||
250 | /* | ||
251 | * Enable counters and exec the command: | ||
252 | */ | ||
253 | t0 = rdclock(); | ||
254 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
255 | |||
256 | if ((pid = fork()) < 0) | ||
257 | perror("failed to fork"); | ||
258 | |||
259 | if (!pid) { | ||
260 | if (execvp(argv[0], (char **)argv)) { | ||
261 | perror(argv[0]); | ||
262 | exit(-1); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | while (wait(&status) >= 0) | ||
267 | ; | ||
268 | |||
269 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
270 | t1 = rdclock(); | ||
271 | |||
272 | walltime_nsecs = t1 - t0; | ||
273 | |||
274 | fflush(stdout); | ||
275 | |||
276 | fprintf(stderr, "\n"); | ||
277 | fprintf(stderr, " Performance counter stats for \'%s", argv[0]); | ||
278 | |||
279 | for (i = 1; i < argc; i++) | ||
280 | fprintf(stderr, " %s", argv[i]); | ||
281 | |||
282 | fprintf(stderr, "\':\n"); | ||
283 | fprintf(stderr, "\n"); | ||
284 | |||
285 | for (counter = 0; counter < nr_counters; counter++) | ||
286 | read_counter(counter); | ||
287 | |||
288 | for (counter = 0; counter < nr_counters; counter++) | ||
289 | print_counter(counter); | ||
290 | |||
291 | |||
292 | fprintf(stderr, "\n"); | ||
293 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
294 | (double)(t1-t0)/1e6); | ||
295 | fprintf(stderr, "\n"); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static volatile int signr = -1; | ||
301 | |||
302 | static void skip_signal(int signo) | ||
303 | { | ||
304 | signr = signo; | ||
305 | } | ||
306 | |||
307 | static void sig_atexit(void) | ||
308 | { | ||
309 | if (signr == -1) | ||
310 | return; | ||
311 | |||
312 | signal(signr, SIG_DFL); | ||
313 | kill(getpid(), signr); | ||
314 | } | ||
315 | |||
316 | static const char * const stat_usage[] = { | ||
317 | "perf stat [<options>] <command>", | ||
318 | NULL | ||
319 | }; | ||
320 | |||
321 | static const struct option options[] = { | ||
322 | OPT_CALLBACK('e', "event", NULL, "event", | ||
323 | "event selector. use 'perf list' to list available events", | ||
324 | parse_events), | ||
325 | OPT_BOOLEAN('i', "inherit", &inherit, | ||
326 | "child tasks inherit counters"), | ||
327 | OPT_INTEGER('p', "pid", &target_pid, | ||
328 | "stat events on existing pid"), | ||
329 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | ||
330 | "system-wide collection from all CPUs"), | ||
331 | OPT_BOOLEAN('S', "scale", &scale, | ||
332 | "scale/normalize counters"), | ||
333 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
334 | "be more verbose (show counter open errors, etc)"), | ||
335 | OPT_END() | ||
336 | }; | ||
337 | |||
338 | int cmd_stat(int argc, const char **argv, const char *prefix) | ||
339 | { | ||
340 | page_size = sysconf(_SC_PAGE_SIZE); | ||
341 | |||
342 | memcpy(attrs, default_attrs, sizeof(attrs)); | ||
343 | |||
344 | argc = parse_options(argc, argv, options, stat_usage, 0); | ||
345 | if (!argc) | ||
346 | usage_with_options(stat_usage, options); | ||
347 | |||
348 | if (!nr_counters) | ||
349 | nr_counters = 8; | ||
350 | |||
351 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
352 | assert(nr_cpus <= MAX_NR_CPUS); | ||
353 | assert(nr_cpus >= 0); | ||
354 | |||
355 | /* | ||
356 | * We dont want to block the signals - that would cause | ||
357 | * child tasks to inherit that and Ctrl-C would not work. | ||
358 | * What we want is for Ctrl-C to work in the exec()-ed | ||
359 | * task, but being ignored by perf stat itself: | ||
360 | */ | ||
361 | atexit(sig_atexit); | ||
362 | signal(SIGINT, skip_signal); | ||
363 | signal(SIGALRM, skip_signal); | ||
364 | signal(SIGABRT, skip_signal); | ||
365 | |||
366 | return do_perf_stat(argc, argv); | ||
367 | } | ||