diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 171 |
1 files changed, 102 insertions, 69 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea25..2e03524a1de0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -32,6 +32,7 @@ | |||
32 | * Wu Fengguang <fengguang.wu@intel.com> | 32 | * Wu Fengguang <fengguang.wu@intel.com> |
33 | * Mike Galbraith <efault@gmx.de> | 33 | * Mike Galbraith <efault@gmx.de> |
34 | * Paul Mackerras <paulus@samba.org> | 34 | * Paul Mackerras <paulus@samba.org> |
35 | * Jaswinder Singh Rajput <jaswinder@kernel.org> | ||
35 | * | 36 | * |
36 | * Released under the GPL v2. (and only v2, not any later version) | 37 | * Released under the GPL v2. (and only v2, not any later version) |
37 | */ | 38 | */ |
@@ -45,7 +46,7 @@ | |||
45 | #include <sys/prctl.h> | 46 | #include <sys/prctl.h> |
46 | #include <math.h> | 47 | #include <math.h> |
47 | 48 | ||
48 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | 49 | static struct perf_counter_attr default_attrs[] = { |
49 | 50 | ||
50 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, | 52 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, |
@@ -59,42 +60,28 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | |||
59 | 60 | ||
60 | }; | 61 | }; |
61 | 62 | ||
63 | #define MAX_RUN 100 | ||
64 | |||
62 | static int system_wide = 0; | 65 | static int system_wide = 0; |
63 | static int inherit = 1; | ||
64 | static int verbose = 0; | 66 | static int verbose = 0; |
65 | |||
66 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
67 | |||
68 | static int target_pid = -1; | ||
69 | static int nr_cpus = 0; | 67 | static int nr_cpus = 0; |
70 | static unsigned int page_size; | 68 | static int run_idx = 0; |
71 | 69 | ||
70 | static int run_count = 1; | ||
71 | static int inherit = 1; | ||
72 | static int scale = 1; | 72 | static int scale = 1; |
73 | static int target_pid = -1; | ||
74 | static int null_run = 0; | ||
73 | 75 | ||
74 | static const unsigned int default_count[] = { | 76 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; |
75 | 1000000, | ||
76 | 1000000, | ||
77 | 10000, | ||
78 | 10000, | ||
79 | 1000000, | ||
80 | 10000, | ||
81 | }; | ||
82 | |||
83 | #define MAX_RUN 100 | ||
84 | |||
85 | static int run_count = 1; | ||
86 | static int run_idx = 0; | ||
87 | |||
88 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
89 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
90 | |||
91 | //static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; | ||
92 | |||
93 | 77 | ||
94 | static u64 runtime_nsecs[MAX_RUN]; | 78 | static u64 runtime_nsecs[MAX_RUN]; |
95 | static u64 walltime_nsecs[MAX_RUN]; | 79 | static u64 walltime_nsecs[MAX_RUN]; |
96 | static u64 runtime_cycles[MAX_RUN]; | 80 | static u64 runtime_cycles[MAX_RUN]; |
97 | 81 | ||
82 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
83 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
84 | |||
98 | static u64 event_res_avg[MAX_COUNTERS][3]; | 85 | static u64 event_res_avg[MAX_COUNTERS][3]; |
99 | static u64 event_res_noise[MAX_COUNTERS][3]; | 86 | static u64 event_res_noise[MAX_COUNTERS][3]; |
100 | 87 | ||
@@ -109,7 +96,10 @@ static u64 walltime_nsecs_noise; | |||
109 | static u64 runtime_cycles_avg; | 96 | static u64 runtime_cycles_avg; |
110 | static u64 runtime_cycles_noise; | 97 | static u64 runtime_cycles_noise; |
111 | 98 | ||
112 | static void create_perf_stat_counter(int counter) | 99 | #define ERR_PERF_OPEN \ |
100 | "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" | ||
101 | |||
102 | static void create_perf_stat_counter(int counter, int pid) | ||
113 | { | 103 | { |
114 | struct perf_counter_attr *attr = attrs + counter; | 104 | struct perf_counter_attr *attr = attrs + counter; |
115 | 105 | ||
@@ -119,20 +109,21 @@ static void create_perf_stat_counter(int counter) | |||
119 | 109 | ||
120 | if (system_wide) { | 110 | if (system_wide) { |
121 | int cpu; | 111 | int cpu; |
122 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 112 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
123 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); | 113 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); |
124 | if (fd[cpu][counter] < 0 && verbose) { | 114 | if (fd[cpu][counter] < 0 && verbose) |
125 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); | 115 | fprintf(stderr, ERR_PERF_OPEN, counter, |
126 | } | 116 | fd[cpu][counter], strerror(errno)); |
127 | } | 117 | } |
128 | } else { | 118 | } else { |
129 | attr->inherit = inherit; | 119 | attr->inherit = inherit; |
130 | attr->disabled = 1; | 120 | attr->disabled = 1; |
131 | 121 | attr->enable_on_exec = 1; | |
132 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | 122 | |
133 | if (fd[0][counter] < 0 && verbose) { | 123 | fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); |
134 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); | 124 | if (fd[0][counter] < 0 && verbose) |
135 | } | 125 | fprintf(stderr, ERR_PERF_OPEN, counter, |
126 | fd[0][counter], strerror(errno)); | ||
136 | } | 127 | } |
137 | } | 128 | } |
138 | 129 | ||
@@ -168,7 +159,7 @@ static void read_counter(int counter) | |||
168 | count[0] = count[1] = count[2] = 0; | 159 | count[0] = count[1] = count[2] = 0; |
169 | 160 | ||
170 | nv = scale ? 3 : 1; | 161 | nv = scale ? 3 : 1; |
171 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 162 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
172 | if (fd[cpu][counter] < 0) | 163 | if (fd[cpu][counter] < 0) |
173 | continue; | 164 | continue; |
174 | 165 | ||
@@ -215,32 +206,67 @@ static int run_perf_stat(int argc, const char **argv) | |||
215 | int status = 0; | 206 | int status = 0; |
216 | int counter; | 207 | int counter; |
217 | int pid; | 208 | int pid; |
209 | int child_ready_pipe[2], go_pipe[2]; | ||
210 | char buf; | ||
218 | 211 | ||
219 | if (!system_wide) | 212 | if (!system_wide) |
220 | nr_cpus = 1; | 213 | nr_cpus = 1; |
221 | 214 | ||
222 | for (counter = 0; counter < nr_counters; counter++) | 215 | if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { |
223 | create_perf_stat_counter(counter); | 216 | perror("failed to create pipes"); |
224 | 217 | exit(1); | |
225 | /* | 218 | } |
226 | * Enable counters and exec the command: | ||
227 | */ | ||
228 | t0 = rdclock(); | ||
229 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
230 | 219 | ||
231 | if ((pid = fork()) < 0) | 220 | if ((pid = fork()) < 0) |
232 | perror("failed to fork"); | 221 | perror("failed to fork"); |
233 | 222 | ||
234 | if (!pid) { | 223 | if (!pid) { |
235 | if (execvp(argv[0], (char **)argv)) { | 224 | close(child_ready_pipe[0]); |
236 | perror(argv[0]); | 225 | close(go_pipe[1]); |
237 | exit(-1); | 226 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); |
238 | } | 227 | |
228 | /* | ||
229 | * Do a dummy execvp to get the PLT entry resolved, | ||
230 | * so we avoid the resolver overhead on the real | ||
231 | * execvp call. | ||
232 | */ | ||
233 | execvp("", (char **)argv); | ||
234 | |||
235 | /* | ||
236 | * Tell the parent we're ready to go | ||
237 | */ | ||
238 | close(child_ready_pipe[1]); | ||
239 | |||
240 | /* | ||
241 | * Wait until the parent tells us to go. | ||
242 | */ | ||
243 | read(go_pipe[0], &buf, 1); | ||
244 | |||
245 | execvp(argv[0], (char **)argv); | ||
246 | |||
247 | perror(argv[0]); | ||
248 | exit(-1); | ||
239 | } | 249 | } |
240 | 250 | ||
251 | /* | ||
252 | * Wait for the child to be ready to exec. | ||
253 | */ | ||
254 | close(child_ready_pipe[1]); | ||
255 | close(go_pipe[0]); | ||
256 | read(child_ready_pipe[0], &buf, 1); | ||
257 | close(child_ready_pipe[0]); | ||
258 | |||
259 | for (counter = 0; counter < nr_counters; counter++) | ||
260 | create_perf_stat_counter(counter, pid); | ||
261 | |||
262 | /* | ||
263 | * Enable counters and exec the command: | ||
264 | */ | ||
265 | t0 = rdclock(); | ||
266 | |||
267 | close(go_pipe[1]); | ||
241 | wait(&status); | 268 | wait(&status); |
242 | 269 | ||
243 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
244 | t1 = rdclock(); | 270 | t1 = rdclock(); |
245 | 271 | ||
246 | walltime_nsecs[run_idx] = t1 - t0; | 272 | walltime_nsecs[run_idx] = t1 - t0; |
@@ -262,7 +288,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) | |||
262 | { | 288 | { |
263 | double msecs = (double)count[0] / 1000000; | 289 | double msecs = (double)count[0] / 1000000; |
264 | 290 | ||
265 | fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); | 291 | fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); |
266 | 292 | ||
267 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | 293 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && |
268 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | 294 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { |
@@ -276,7 +302,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) | |||
276 | 302 | ||
277 | static void abs_printout(int counter, u64 *count, u64 *noise) | 303 | static void abs_printout(int counter, u64 *count, u64 *noise) |
278 | { | 304 | { |
279 | fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); | 305 | fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); |
280 | 306 | ||
281 | if (runtime_cycles_avg && | 307 | if (runtime_cycles_avg && |
282 | attrs[counter].type == PERF_TYPE_HARDWARE && | 308 | attrs[counter].type == PERF_TYPE_HARDWARE && |
@@ -306,7 +332,7 @@ static void print_counter(int counter) | |||
306 | scaled = event_scaled_avg[counter]; | 332 | scaled = event_scaled_avg[counter]; |
307 | 333 | ||
308 | if (scaled == -1) { | 334 | if (scaled == -1) { |
309 | fprintf(stderr, " %14s %-20s\n", | 335 | fprintf(stderr, " %14s %-24s\n", |
310 | "<not counted>", event_name(counter)); | 336 | "<not counted>", event_name(counter)); |
311 | return; | 337 | return; |
312 | } | 338 | } |
@@ -364,8 +390,11 @@ static void calc_avg(void) | |||
364 | event_res_avg[j]+1, event_res[i][j]+1); | 390 | event_res_avg[j]+1, event_res[i][j]+1); |
365 | update_avg("counter/2", j, | 391 | update_avg("counter/2", j, |
366 | event_res_avg[j]+2, event_res[i][j]+2); | 392 | event_res_avg[j]+2, event_res[i][j]+2); |
367 | update_avg("scaled", j, | 393 | if (event_scaled[i][j] != -1) |
368 | event_scaled_avg + j, event_scaled[i]+j); | 394 | update_avg("scaled", j, |
395 | event_scaled_avg + j, event_scaled[i]+j); | ||
396 | else | ||
397 | event_scaled_avg[j] = -1; | ||
369 | } | 398 | } |
370 | } | 399 | } |
371 | runtime_nsecs_avg /= run_count; | 400 | runtime_nsecs_avg /= run_count; |
@@ -429,11 +458,14 @@ static void print_stat(int argc, const char **argv) | |||
429 | for (counter = 0; counter < nr_counters; counter++) | 458 | for (counter = 0; counter < nr_counters; counter++) |
430 | print_counter(counter); | 459 | print_counter(counter); |
431 | 460 | ||
432 | |||
433 | fprintf(stderr, "\n"); | 461 | fprintf(stderr, "\n"); |
434 | fprintf(stderr, " %14.9f seconds time elapsed.\n", | 462 | fprintf(stderr, " %14.9f seconds time elapsed", |
435 | (double)walltime_nsecs_avg/1e9); | 463 | (double)walltime_nsecs_avg/1e9); |
436 | fprintf(stderr, "\n"); | 464 | if (run_count > 1) { |
465 | fprintf(stderr, " ( +- %7.3f%% )", | ||
466 | 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); | ||
467 | } | ||
468 | fprintf(stderr, "\n\n"); | ||
437 | } | 469 | } |
438 | 470 | ||
439 | static volatile int signr = -1; | 471 | static volatile int signr = -1; |
@@ -466,13 +498,15 @@ static const struct option options[] = { | |||
466 | OPT_INTEGER('p', "pid", &target_pid, | 498 | OPT_INTEGER('p', "pid", &target_pid, |
467 | "stat events on existing pid"), | 499 | "stat events on existing pid"), |
468 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 500 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
469 | "system-wide collection from all CPUs"), | 501 | "system-wide collection from all CPUs"), |
470 | OPT_BOOLEAN('S', "scale", &scale, | 502 | OPT_BOOLEAN('S', "scale", &scale, |
471 | "scale/normalize counters"), | 503 | "scale/normalize counters"), |
472 | OPT_BOOLEAN('v', "verbose", &verbose, | 504 | OPT_BOOLEAN('v', "verbose", &verbose, |
473 | "be more verbose (show counter open errors, etc)"), | 505 | "be more verbose (show counter open errors, etc)"), |
474 | OPT_INTEGER('r', "repeat", &run_count, | 506 | OPT_INTEGER('r', "repeat", &run_count, |
475 | "repeat command and print average + stddev (max: 100)"), | 507 | "repeat command and print average + stddev (max: 100)"), |
508 | OPT_BOOLEAN('n', "null", &null_run, | ||
509 | "null run - dont start any counters"), | ||
476 | OPT_END() | 510 | OPT_END() |
477 | }; | 511 | }; |
478 | 512 | ||
@@ -480,18 +514,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
480 | { | 514 | { |
481 | int status; | 515 | int status; |
482 | 516 | ||
483 | page_size = sysconf(_SC_PAGE_SIZE); | ||
484 | |||
485 | memcpy(attrs, default_attrs, sizeof(attrs)); | ||
486 | |||
487 | argc = parse_options(argc, argv, options, stat_usage, 0); | 517 | argc = parse_options(argc, argv, options, stat_usage, 0); |
488 | if (!argc) | 518 | if (!argc) |
489 | usage_with_options(stat_usage, options); | 519 | usage_with_options(stat_usage, options); |
490 | if (run_count <= 0 || run_count > MAX_RUN) | 520 | if (run_count <= 0 || run_count > MAX_RUN) |
491 | usage_with_options(stat_usage, options); | 521 | usage_with_options(stat_usage, options); |
492 | 522 | ||
493 | if (!nr_counters) | 523 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
494 | nr_counters = 8; | 524 | if (!null_run && !nr_counters) { |
525 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | ||
526 | nr_counters = ARRAY_SIZE(default_attrs); | ||
527 | } | ||
495 | 528 | ||
496 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | 529 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
497 | assert(nr_cpus <= MAX_NR_CPUS); | 530 | assert(nr_cpus <= MAX_NR_CPUS); |
@@ -511,7 +544,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
511 | status = 0; | 544 | status = 0; |
512 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 545 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
513 | if (run_count != 1 && verbose) | 546 | if (run_count != 1 && verbose) |
514 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); | 547 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
515 | status = run_perf_stat(argc, argv); | 548 | status = run_perf_stat(argc, argv); |
516 | } | 549 | } |
517 | 550 | ||