diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 308 |
1 files changed, 231 insertions, 77 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c43e4a97dc42..6d3eeac1ea25 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "util/parse-events.h" | 43 | #include "util/parse-events.h" |
44 | 44 | ||
45 | #include <sys/prctl.h> | 45 | #include <sys/prctl.h> |
46 | #include <math.h> | ||
46 | 47 | ||
47 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | 48 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { |
48 | 49 | ||
@@ -79,12 +80,34 @@ static const unsigned int default_count[] = { | |||
79 | 10000, | 80 | 10000, |
80 | }; | 81 | }; |
81 | 82 | ||
82 | static __u64 event_res[MAX_COUNTERS][3]; | 83 | #define MAX_RUN 100 |
83 | static __u64 event_scaled[MAX_COUNTERS]; | ||
84 | 84 | ||
85 | static __u64 runtime_nsecs; | 85 | static int run_count = 1; |
86 | static __u64 walltime_nsecs; | 86 | static int run_idx = 0; |
87 | static __u64 runtime_cycles; | 87 | |
88 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
89 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
90 | |||
91 | //static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; | ||
92 | |||
93 | |||
94 | static u64 runtime_nsecs[MAX_RUN]; | ||
95 | static u64 walltime_nsecs[MAX_RUN]; | ||
96 | static u64 runtime_cycles[MAX_RUN]; | ||
97 | |||
98 | static u64 event_res_avg[MAX_COUNTERS][3]; | ||
99 | static u64 event_res_noise[MAX_COUNTERS][3]; | ||
100 | |||
101 | static u64 event_scaled_avg[MAX_COUNTERS]; | ||
102 | |||
103 | static u64 runtime_nsecs_avg; | ||
104 | static u64 runtime_nsecs_noise; | ||
105 | |||
106 | static u64 walltime_nsecs_avg; | ||
107 | static u64 walltime_nsecs_noise; | ||
108 | |||
109 | static u64 runtime_cycles_avg; | ||
110 | static u64 runtime_cycles_noise; | ||
88 | 111 | ||
89 | static void create_perf_stat_counter(int counter) | 112 | static void create_perf_stat_counter(int counter) |
90 | { | 113 | { |
@@ -135,12 +158,12 @@ static inline int nsec_counter(int counter) | |||
135 | */ | 158 | */ |
136 | static void read_counter(int counter) | 159 | static void read_counter(int counter) |
137 | { | 160 | { |
138 | __u64 *count, single_count[3]; | 161 | u64 *count, single_count[3]; |
139 | ssize_t res; | 162 | ssize_t res; |
140 | int cpu, nv; | 163 | int cpu, nv; |
141 | int scaled; | 164 | int scaled; |
142 | 165 | ||
143 | count = event_res[counter]; | 166 | count = event_res[run_idx][counter]; |
144 | 167 | ||
145 | count[0] = count[1] = count[2] = 0; | 168 | count[0] = count[1] = count[2] = 0; |
146 | 169 | ||
@@ -149,8 +172,10 @@ static void read_counter(int counter) | |||
149 | if (fd[cpu][counter] < 0) | 172 | if (fd[cpu][counter] < 0) |
150 | continue; | 173 | continue; |
151 | 174 | ||
152 | res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); | 175 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); |
153 | assert(res == nv * sizeof(__u64)); | 176 | assert(res == nv * sizeof(u64)); |
177 | close(fd[cpu][counter]); | ||
178 | fd[cpu][counter] = -1; | ||
154 | 179 | ||
155 | count[0] += single_count[0]; | 180 | count[0] += single_count[0]; |
156 | if (scale) { | 181 | if (scale) { |
@@ -162,13 +187,13 @@ static void read_counter(int counter) | |||
162 | scaled = 0; | 187 | scaled = 0; |
163 | if (scale) { | 188 | if (scale) { |
164 | if (count[2] == 0) { | 189 | if (count[2] == 0) { |
165 | event_scaled[counter] = -1; | 190 | event_scaled[run_idx][counter] = -1; |
166 | count[0] = 0; | 191 | count[0] = 0; |
167 | return; | 192 | return; |
168 | } | 193 | } |
169 | 194 | ||
170 | if (count[2] < count[1]) { | 195 | if (count[2] < count[1]) { |
171 | event_scaled[counter] = 1; | 196 | event_scaled[run_idx][counter] = 1; |
172 | count[0] = (unsigned long long) | 197 | count[0] = (unsigned long long) |
173 | ((double)count[0] * count[1] / count[2] + 0.5); | 198 | ((double)count[0] * count[1] / count[2] + 0.5); |
174 | } | 199 | } |
@@ -178,10 +203,94 @@ static void read_counter(int counter) | |||
178 | */ | 203 | */ |
179 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | 204 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && |
180 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | 205 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) |
181 | runtime_nsecs = count[0]; | 206 | runtime_nsecs[run_idx] = count[0]; |
182 | if (attrs[counter].type == PERF_TYPE_HARDWARE && | 207 | if (attrs[counter].type == PERF_TYPE_HARDWARE && |
183 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) | 208 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) |
184 | runtime_cycles = count[0]; | 209 | runtime_cycles[run_idx] = count[0]; |
210 | } | ||
211 | |||
212 | static int run_perf_stat(int argc, const char **argv) | ||
213 | { | ||
214 | unsigned long long t0, t1; | ||
215 | int status = 0; | ||
216 | int counter; | ||
217 | int pid; | ||
218 | |||
219 | if (!system_wide) | ||
220 | nr_cpus = 1; | ||
221 | |||
222 | for (counter = 0; counter < nr_counters; counter++) | ||
223 | create_perf_stat_counter(counter); | ||
224 | |||
225 | /* | ||
226 | * Enable counters and exec the command: | ||
227 | */ | ||
228 | t0 = rdclock(); | ||
229 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
230 | |||
231 | if ((pid = fork()) < 0) | ||
232 | perror("failed to fork"); | ||
233 | |||
234 | if (!pid) { | ||
235 | if (execvp(argv[0], (char **)argv)) { | ||
236 | perror(argv[0]); | ||
237 | exit(-1); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | wait(&status); | ||
242 | |||
243 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
244 | t1 = rdclock(); | ||
245 | |||
246 | walltime_nsecs[run_idx] = t1 - t0; | ||
247 | |||
248 | for (counter = 0; counter < nr_counters; counter++) | ||
249 | read_counter(counter); | ||
250 | |||
251 | return WEXITSTATUS(status); | ||
252 | } | ||
253 | |||
254 | static void print_noise(u64 *count, u64 *noise) | ||
255 | { | ||
256 | if (run_count > 1) | ||
257 | fprintf(stderr, " ( +- %7.3f%% )", | ||
258 | (double)noise[0]/(count[0]+1)*100.0); | ||
259 | } | ||
260 | |||
261 | static void nsec_printout(int counter, u64 *count, u64 *noise) | ||
262 | { | ||
263 | double msecs = (double)count[0] / 1000000; | ||
264 | |||
265 | fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); | ||
266 | |||
267 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
268 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
269 | |||
270 | if (walltime_nsecs_avg) | ||
271 | fprintf(stderr, " # %10.3f CPUs ", | ||
272 | (double)count[0] / (double)walltime_nsecs_avg); | ||
273 | } | ||
274 | print_noise(count, noise); | ||
275 | } | ||
276 | |||
277 | static void abs_printout(int counter, u64 *count, u64 *noise) | ||
278 | { | ||
279 | fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); | ||
280 | |||
281 | if (runtime_cycles_avg && | ||
282 | attrs[counter].type == PERF_TYPE_HARDWARE && | ||
283 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
284 | |||
285 | fprintf(stderr, " # %10.3f IPC ", | ||
286 | (double)count[0] / (double)runtime_cycles_avg); | ||
287 | } else { | ||
288 | if (runtime_nsecs_avg) { | ||
289 | fprintf(stderr, " # %10.3f M/sec", | ||
290 | (double)count[0]/runtime_nsecs_avg*1000.0); | ||
291 | } | ||
292 | } | ||
293 | print_noise(count, noise); | ||
185 | } | 294 | } |
186 | 295 | ||
187 | /* | 296 | /* |
@@ -189,11 +298,12 @@ static void read_counter(int counter) | |||
189 | */ | 298 | */ |
190 | static void print_counter(int counter) | 299 | static void print_counter(int counter) |
191 | { | 300 | { |
192 | __u64 *count; | 301 | u64 *count, *noise; |
193 | int scaled; | 302 | int scaled; |
194 | 303 | ||
195 | count = event_res[counter]; | 304 | count = event_res_avg[counter]; |
196 | scaled = event_scaled[counter]; | 305 | noise = event_res_noise[counter]; |
306 | scaled = event_scaled_avg[counter]; | ||
197 | 307 | ||
198 | if (scaled == -1) { | 308 | if (scaled == -1) { |
199 | fprintf(stderr, " %14s %-20s\n", | 309 | fprintf(stderr, " %14s %-20s\n", |
@@ -201,75 +311,107 @@ static void print_counter(int counter) | |||
201 | return; | 311 | return; |
202 | } | 312 | } |
203 | 313 | ||
204 | if (nsec_counter(counter)) { | 314 | if (nsec_counter(counter)) |
205 | double msecs = (double)count[0] / 1000000; | 315 | nsec_printout(counter, count, noise); |
206 | 316 | else | |
207 | fprintf(stderr, " %14.6f %-20s", | 317 | abs_printout(counter, count, noise); |
208 | msecs, event_name(counter)); | ||
209 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
210 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
211 | 318 | ||
212 | if (walltime_nsecs) | ||
213 | fprintf(stderr, " # %11.3f CPU utilization factor", | ||
214 | (double)count[0] / (double)walltime_nsecs); | ||
215 | } | ||
216 | } else { | ||
217 | fprintf(stderr, " %14Ld %-20s", | ||
218 | count[0], event_name(counter)); | ||
219 | if (runtime_nsecs) | ||
220 | fprintf(stderr, " # %11.3f M/sec", | ||
221 | (double)count[0]/runtime_nsecs*1000.0); | ||
222 | if (runtime_cycles && | ||
223 | attrs[counter].type == PERF_TYPE_HARDWARE && | ||
224 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
225 | |||
226 | fprintf(stderr, " # %1.3f per cycle", | ||
227 | (double)count[0] / (double)runtime_cycles); | ||
228 | } | ||
229 | } | ||
230 | if (scaled) | 319 | if (scaled) |
231 | fprintf(stderr, " (scaled from %.2f%%)", | 320 | fprintf(stderr, " (scaled from %.2f%%)", |
232 | (double) count[2] / count[1] * 100); | 321 | (double) count[2] / count[1] * 100); |
322 | |||
233 | fprintf(stderr, "\n"); | 323 | fprintf(stderr, "\n"); |
234 | } | 324 | } |
235 | 325 | ||
236 | static int do_perf_stat(int argc, const char **argv) | 326 | /* |
327 | * normalize_noise noise values down to stddev: | ||
328 | */ | ||
329 | static void normalize_noise(u64 *val) | ||
237 | { | 330 | { |
238 | unsigned long long t0, t1; | 331 | double res; |
239 | int counter; | ||
240 | int status; | ||
241 | int pid; | ||
242 | int i; | ||
243 | 332 | ||
244 | if (!system_wide) | 333 | res = (double)*val / (run_count * sqrt((double)run_count)); |
245 | nr_cpus = 1; | ||
246 | 334 | ||
247 | for (counter = 0; counter < nr_counters; counter++) | 335 | *val = (u64)res; |
248 | create_perf_stat_counter(counter); | 336 | } |
249 | 337 | ||
250 | /* | 338 | static void update_avg(const char *name, int idx, u64 *avg, u64 *val) |
251 | * Enable counters and exec the command: | 339 | { |
252 | */ | 340 | *avg += *val; |
253 | t0 = rdclock(); | ||
254 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
255 | 341 | ||
256 | if ((pid = fork()) < 0) | 342 | if (verbose > 1) |
257 | perror("failed to fork"); | 343 | fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); |
344 | } | ||
345 | /* | ||
346 | * Calculate the averages and noises: | ||
347 | */ | ||
348 | static void calc_avg(void) | ||
349 | { | ||
350 | int i, j; | ||
351 | |||
352 | if (verbose > 1) | ||
353 | fprintf(stderr, "\n"); | ||
354 | |||
355 | for (i = 0; i < run_count; i++) { | ||
356 | update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); | ||
357 | update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); | ||
358 | update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); | ||
359 | |||
360 | for (j = 0; j < nr_counters; j++) { | ||
361 | update_avg("counter/0", j, | ||
362 | event_res_avg[j]+0, event_res[i][j]+0); | ||
363 | update_avg("counter/1", j, | ||
364 | event_res_avg[j]+1, event_res[i][j]+1); | ||
365 | update_avg("counter/2", j, | ||
366 | event_res_avg[j]+2, event_res[i][j]+2); | ||
367 | update_avg("scaled", j, | ||
368 | event_scaled_avg + j, event_scaled[i]+j); | ||
369 | } | ||
370 | } | ||
371 | runtime_nsecs_avg /= run_count; | ||
372 | walltime_nsecs_avg /= run_count; | ||
373 | runtime_cycles_avg /= run_count; | ||
374 | |||
375 | for (j = 0; j < nr_counters; j++) { | ||
376 | event_res_avg[j][0] /= run_count; | ||
377 | event_res_avg[j][1] /= run_count; | ||
378 | event_res_avg[j][2] /= run_count; | ||
379 | } | ||
258 | 380 | ||
259 | if (!pid) { | 381 | for (i = 0; i < run_count; i++) { |
260 | if (execvp(argv[0], (char **)argv)) { | 382 | runtime_nsecs_noise += |
261 | perror(argv[0]); | 383 | abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); |
262 | exit(-1); | 384 | walltime_nsecs_noise += |
385 | abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); | ||
386 | runtime_cycles_noise += | ||
387 | abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); | ||
388 | |||
389 | for (j = 0; j < nr_counters; j++) { | ||
390 | event_res_noise[j][0] += | ||
391 | abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); | ||
392 | event_res_noise[j][1] += | ||
393 | abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); | ||
394 | event_res_noise[j][2] += | ||
395 | abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); | ||
263 | } | 396 | } |
264 | } | 397 | } |
265 | 398 | ||
266 | while (wait(&status) >= 0) | 399 | normalize_noise(&runtime_nsecs_noise); |
267 | ; | 400 | normalize_noise(&walltime_nsecs_noise); |
401 | normalize_noise(&runtime_cycles_noise); | ||
268 | 402 | ||
269 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | 403 | for (j = 0; j < nr_counters; j++) { |
270 | t1 = rdclock(); | 404 | normalize_noise(&event_res_noise[j][0]); |
405 | normalize_noise(&event_res_noise[j][1]); | ||
406 | normalize_noise(&event_res_noise[j][2]); | ||
407 | } | ||
408 | } | ||
271 | 409 | ||
272 | walltime_nsecs = t1 - t0; | 410 | static void print_stat(int argc, const char **argv) |
411 | { | ||
412 | int i, counter; | ||
413 | |||
414 | calc_avg(); | ||
273 | 415 | ||
274 | fflush(stdout); | 416 | fflush(stdout); |
275 | 417 | ||
@@ -279,22 +421,19 @@ static int do_perf_stat(int argc, const char **argv) | |||
279 | for (i = 1; i < argc; i++) | 421 | for (i = 1; i < argc; i++) |
280 | fprintf(stderr, " %s", argv[i]); | 422 | fprintf(stderr, " %s", argv[i]); |
281 | 423 | ||
282 | fprintf(stderr, "\':\n"); | 424 | fprintf(stderr, "\'"); |
283 | fprintf(stderr, "\n"); | 425 | if (run_count > 1) |
284 | 426 | fprintf(stderr, " (%d runs)", run_count); | |
285 | for (counter = 0; counter < nr_counters; counter++) | 427 | fprintf(stderr, ":\n\n"); |
286 | read_counter(counter); | ||
287 | 428 | ||
288 | for (counter = 0; counter < nr_counters; counter++) | 429 | for (counter = 0; counter < nr_counters; counter++) |
289 | print_counter(counter); | 430 | print_counter(counter); |
290 | 431 | ||
291 | 432 | ||
292 | fprintf(stderr, "\n"); | 433 | fprintf(stderr, "\n"); |
293 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | 434 | fprintf(stderr, " %14.9f seconds time elapsed.\n", |
294 | (double)(t1-t0)/1e6); | 435 | (double)walltime_nsecs_avg/1e9); |
295 | fprintf(stderr, "\n"); | 436 | fprintf(stderr, "\n"); |
296 | |||
297 | return 0; | ||
298 | } | 437 | } |
299 | 438 | ||
300 | static volatile int signr = -1; | 439 | static volatile int signr = -1; |
@@ -332,11 +471,15 @@ static const struct option options[] = { | |||
332 | "scale/normalize counters"), | 471 | "scale/normalize counters"), |
333 | OPT_BOOLEAN('v', "verbose", &verbose, | 472 | OPT_BOOLEAN('v', "verbose", &verbose, |
334 | "be more verbose (show counter open errors, etc)"), | 473 | "be more verbose (show counter open errors, etc)"), |
474 | OPT_INTEGER('r', "repeat", &run_count, | ||
475 | "repeat command and print average + stddev (max: 100)"), | ||
335 | OPT_END() | 476 | OPT_END() |
336 | }; | 477 | }; |
337 | 478 | ||
338 | int cmd_stat(int argc, const char **argv, const char *prefix) | 479 | int cmd_stat(int argc, const char **argv, const char *prefix) |
339 | { | 480 | { |
481 | int status; | ||
482 | |||
340 | page_size = sysconf(_SC_PAGE_SIZE); | 483 | page_size = sysconf(_SC_PAGE_SIZE); |
341 | 484 | ||
342 | memcpy(attrs, default_attrs, sizeof(attrs)); | 485 | memcpy(attrs, default_attrs, sizeof(attrs)); |
@@ -344,6 +487,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
344 | argc = parse_options(argc, argv, options, stat_usage, 0); | 487 | argc = parse_options(argc, argv, options, stat_usage, 0); |
345 | if (!argc) | 488 | if (!argc) |
346 | usage_with_options(stat_usage, options); | 489 | usage_with_options(stat_usage, options); |
490 | if (run_count <= 0 || run_count > MAX_RUN) | ||
491 | usage_with_options(stat_usage, options); | ||
347 | 492 | ||
348 | if (!nr_counters) | 493 | if (!nr_counters) |
349 | nr_counters = 8; | 494 | nr_counters = 8; |
@@ -363,5 +508,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
363 | signal(SIGALRM, skip_signal); | 508 | signal(SIGALRM, skip_signal); |
364 | signal(SIGABRT, skip_signal); | 509 | signal(SIGABRT, skip_signal); |
365 | 510 | ||
366 | return do_perf_stat(argc, argv); | 511 | status = 0; |
512 | for (run_idx = 0; run_idx < run_count; run_idx++) { | ||
513 | if (run_count != 1 && verbose) | ||
514 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); | ||
515 | status = run_perf_stat(argc, argv); | ||
516 | } | ||
517 | |||
518 | print_stat(argc, argv); | ||
519 | |||
520 | return status; | ||
367 | } | 521 | } |