diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 216 |
1 files changed, 123 insertions, 93 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea25..27921a8ce1a9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -32,6 +32,7 @@ | |||
32 | * Wu Fengguang <fengguang.wu@intel.com> | 32 | * Wu Fengguang <fengguang.wu@intel.com> |
33 | * Mike Galbraith <efault@gmx.de> | 33 | * Mike Galbraith <efault@gmx.de> |
34 | * Paul Mackerras <paulus@samba.org> | 34 | * Paul Mackerras <paulus@samba.org> |
35 | * Jaswinder Singh Rajput <jaswinder@kernel.org> | ||
35 | * | 36 | * |
36 | * Released under the GPL v2. (and only v2, not any later version) | 37 | * Released under the GPL v2. (and only v2, not any later version) |
37 | */ | 38 | */ |
@@ -45,7 +46,7 @@ | |||
45 | #include <sys/prctl.h> | 46 | #include <sys/prctl.h> |
46 | #include <math.h> | 47 | #include <math.h> |
47 | 48 | ||
48 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | 49 | static struct perf_counter_attr default_attrs[] = { |
49 | 50 | ||
50 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, | 52 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, |
@@ -59,42 +60,28 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | |||
59 | 60 | ||
60 | }; | 61 | }; |
61 | 62 | ||
63 | #define MAX_RUN 100 | ||
64 | |||
62 | static int system_wide = 0; | 65 | static int system_wide = 0; |
63 | static int inherit = 1; | ||
64 | static int verbose = 0; | 66 | static int verbose = 0; |
67 | static unsigned int nr_cpus = 0; | ||
68 | static int run_idx = 0; | ||
65 | 69 | ||
66 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 70 | static int run_count = 1; |
67 | 71 | static int inherit = 1; | |
68 | static int target_pid = -1; | ||
69 | static int nr_cpus = 0; | ||
70 | static unsigned int page_size; | ||
71 | |||
72 | static int scale = 1; | 72 | static int scale = 1; |
73 | static int target_pid = -1; | ||
74 | static int null_run = 0; | ||
73 | 75 | ||
74 | static const unsigned int default_count[] = { | 76 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; |
75 | 1000000, | ||
76 | 1000000, | ||
77 | 10000, | ||
78 | 10000, | ||
79 | 1000000, | ||
80 | 10000, | ||
81 | }; | ||
82 | |||
83 | #define MAX_RUN 100 | ||
84 | |||
85 | static int run_count = 1; | ||
86 | static int run_idx = 0; | ||
87 | |||
88 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
89 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
90 | |||
91 | //static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; | ||
92 | |||
93 | 77 | ||
94 | static u64 runtime_nsecs[MAX_RUN]; | 78 | static u64 runtime_nsecs[MAX_RUN]; |
95 | static u64 walltime_nsecs[MAX_RUN]; | 79 | static u64 walltime_nsecs[MAX_RUN]; |
96 | static u64 runtime_cycles[MAX_RUN]; | 80 | static u64 runtime_cycles[MAX_RUN]; |
97 | 81 | ||
82 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
83 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
84 | |||
98 | static u64 event_res_avg[MAX_COUNTERS][3]; | 85 | static u64 event_res_avg[MAX_COUNTERS][3]; |
99 | static u64 event_res_noise[MAX_COUNTERS][3]; | 86 | static u64 event_res_noise[MAX_COUNTERS][3]; |
100 | 87 | ||
@@ -109,7 +96,14 @@ static u64 walltime_nsecs_noise; | |||
109 | static u64 runtime_cycles_avg; | 96 | static u64 runtime_cycles_avg; |
110 | static u64 runtime_cycles_noise; | 97 | static u64 runtime_cycles_noise; |
111 | 98 | ||
112 | static void create_perf_stat_counter(int counter) | 99 | #define MATCH_EVENT(t, c, counter) \ |
100 | (attrs[counter].type == PERF_TYPE_##t && \ | ||
101 | attrs[counter].config == PERF_COUNT_##c) | ||
102 | |||
103 | #define ERR_PERF_OPEN \ | ||
104 | "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" | ||
105 | |||
106 | static void create_perf_stat_counter(int counter, int pid) | ||
113 | { | 107 | { |
114 | struct perf_counter_attr *attr = attrs + counter; | 108 | struct perf_counter_attr *attr = attrs + counter; |
115 | 109 | ||
@@ -118,21 +112,23 @@ static void create_perf_stat_counter(int counter) | |||
118 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 112 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
119 | 113 | ||
120 | if (system_wide) { | 114 | if (system_wide) { |
121 | int cpu; | 115 | unsigned int cpu; |
122 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 116 | |
117 | for (cpu = 0; cpu < nr_cpus; cpu++) { | ||
123 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); | 118 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); |
124 | if (fd[cpu][counter] < 0 && verbose) { | 119 | if (fd[cpu][counter] < 0 && verbose) |
125 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); | 120 | fprintf(stderr, ERR_PERF_OPEN, counter, |
126 | } | 121 | fd[cpu][counter], strerror(errno)); |
127 | } | 122 | } |
128 | } else { | 123 | } else { |
129 | attr->inherit = inherit; | 124 | attr->inherit = inherit; |
130 | attr->disabled = 1; | 125 | attr->disabled = 1; |
131 | 126 | attr->enable_on_exec = 1; | |
132 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | 127 | |
133 | if (fd[0][counter] < 0 && verbose) { | 128 | fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); |
134 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); | 129 | if (fd[0][counter] < 0 && verbose) |
135 | } | 130 | fprintf(stderr, ERR_PERF_OPEN, counter, |
131 | fd[0][counter], strerror(errno)); | ||
136 | } | 132 | } |
137 | } | 133 | } |
138 | 134 | ||
@@ -141,13 +137,8 @@ static void create_perf_stat_counter(int counter) | |||
141 | */ | 137 | */ |
142 | static inline int nsec_counter(int counter) | 138 | static inline int nsec_counter(int counter) |
143 | { | 139 | { |
144 | if (attrs[counter].type != PERF_TYPE_SOFTWARE) | 140 | if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || |
145 | return 0; | 141 | MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
146 | |||
147 | if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) | ||
148 | return 1; | ||
149 | |||
150 | if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | ||
151 | return 1; | 142 | return 1; |
152 | 143 | ||
153 | return 0; | 144 | return 0; |
@@ -159,8 +150,8 @@ static inline int nsec_counter(int counter) | |||
159 | static void read_counter(int counter) | 150 | static void read_counter(int counter) |
160 | { | 151 | { |
161 | u64 *count, single_count[3]; | 152 | u64 *count, single_count[3]; |
162 | ssize_t res; | 153 | unsigned int cpu; |
163 | int cpu, nv; | 154 | size_t res, nv; |
164 | int scaled; | 155 | int scaled; |
165 | 156 | ||
166 | count = event_res[run_idx][counter]; | 157 | count = event_res[run_idx][counter]; |
@@ -168,12 +159,13 @@ static void read_counter(int counter) | |||
168 | count[0] = count[1] = count[2] = 0; | 159 | count[0] = count[1] = count[2] = 0; |
169 | 160 | ||
170 | nv = scale ? 3 : 1; | 161 | nv = scale ? 3 : 1; |
171 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | 162 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
172 | if (fd[cpu][counter] < 0) | 163 | if (fd[cpu][counter] < 0) |
173 | continue; | 164 | continue; |
174 | 165 | ||
175 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | 166 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); |
176 | assert(res == nv * sizeof(u64)); | 167 | assert(res == nv * sizeof(u64)); |
168 | |||
177 | close(fd[cpu][counter]); | 169 | close(fd[cpu][counter]); |
178 | fd[cpu][counter] = -1; | 170 | fd[cpu][counter] = -1; |
179 | 171 | ||
@@ -201,46 +193,81 @@ static void read_counter(int counter) | |||
201 | /* | 193 | /* |
202 | * Save the full runtime - to allow normalization during printout: | 194 | * Save the full runtime - to allow normalization during printout: |
203 | */ | 195 | */ |
204 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | 196 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
205 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | ||
206 | runtime_nsecs[run_idx] = count[0]; | 197 | runtime_nsecs[run_idx] = count[0]; |
207 | if (attrs[counter].type == PERF_TYPE_HARDWARE && | 198 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) |
208 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) | ||
209 | runtime_cycles[run_idx] = count[0]; | 199 | runtime_cycles[run_idx] = count[0]; |
210 | } | 200 | } |
211 | 201 | ||
212 | static int run_perf_stat(int argc, const char **argv) | 202 | static int run_perf_stat(int argc __used, const char **argv) |
213 | { | 203 | { |
214 | unsigned long long t0, t1; | 204 | unsigned long long t0, t1; |
215 | int status = 0; | 205 | int status = 0; |
216 | int counter; | 206 | int counter; |
217 | int pid; | 207 | int pid; |
208 | int child_ready_pipe[2], go_pipe[2]; | ||
209 | char buf; | ||
218 | 210 | ||
219 | if (!system_wide) | 211 | if (!system_wide) |
220 | nr_cpus = 1; | 212 | nr_cpus = 1; |
221 | 213 | ||
222 | for (counter = 0; counter < nr_counters; counter++) | 214 | if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { |
223 | create_perf_stat_counter(counter); | 215 | perror("failed to create pipes"); |
224 | 216 | exit(1); | |
225 | /* | 217 | } |
226 | * Enable counters and exec the command: | ||
227 | */ | ||
228 | t0 = rdclock(); | ||
229 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
230 | 218 | ||
231 | if ((pid = fork()) < 0) | 219 | if ((pid = fork()) < 0) |
232 | perror("failed to fork"); | 220 | perror("failed to fork"); |
233 | 221 | ||
234 | if (!pid) { | 222 | if (!pid) { |
235 | if (execvp(argv[0], (char **)argv)) { | 223 | close(child_ready_pipe[0]); |
236 | perror(argv[0]); | 224 | close(go_pipe[1]); |
237 | exit(-1); | 225 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); |
238 | } | 226 | |
227 | /* | ||
228 | * Do a dummy execvp to get the PLT entry resolved, | ||
229 | * so we avoid the resolver overhead on the real | ||
230 | * execvp call. | ||
231 | */ | ||
232 | execvp("", (char **)argv); | ||
233 | |||
234 | /* | ||
235 | * Tell the parent we're ready to go | ||
236 | */ | ||
237 | close(child_ready_pipe[1]); | ||
238 | |||
239 | /* | ||
240 | * Wait until the parent tells us to go. | ||
241 | */ | ||
242 | if (read(go_pipe[0], &buf, 1) == -1) | ||
243 | perror("unable to read pipe"); | ||
244 | |||
245 | execvp(argv[0], (char **)argv); | ||
246 | |||
247 | perror(argv[0]); | ||
248 | exit(-1); | ||
239 | } | 249 | } |
240 | 250 | ||
251 | /* | ||
252 | * Wait for the child to be ready to exec. | ||
253 | */ | ||
254 | close(child_ready_pipe[1]); | ||
255 | close(go_pipe[0]); | ||
256 | if (read(child_ready_pipe[0], &buf, 1) == -1) | ||
257 | perror("unable to read pipe"); | ||
258 | close(child_ready_pipe[0]); | ||
259 | |||
260 | for (counter = 0; counter < nr_counters; counter++) | ||
261 | create_perf_stat_counter(counter, pid); | ||
262 | |||
263 | /* | ||
264 | * Enable counters and exec the command: | ||
265 | */ | ||
266 | t0 = rdclock(); | ||
267 | |||
268 | close(go_pipe[1]); | ||
241 | wait(&status); | 269 | wait(&status); |
242 | 270 | ||
243 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
244 | t1 = rdclock(); | 271 | t1 = rdclock(); |
245 | 272 | ||
246 | walltime_nsecs[run_idx] = t1 - t0; | 273 | walltime_nsecs[run_idx] = t1 - t0; |
@@ -262,11 +289,9 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) | |||
262 | { | 289 | { |
263 | double msecs = (double)count[0] / 1000000; | 290 | double msecs = (double)count[0] / 1000000; |
264 | 291 | ||
265 | fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); | 292 | fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); |
266 | |||
267 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
268 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
269 | 293 | ||
294 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | ||
270 | if (walltime_nsecs_avg) | 295 | if (walltime_nsecs_avg) |
271 | fprintf(stderr, " # %10.3f CPUs ", | 296 | fprintf(stderr, " # %10.3f CPUs ", |
272 | (double)count[0] / (double)walltime_nsecs_avg); | 297 | (double)count[0] / (double)walltime_nsecs_avg); |
@@ -276,12 +301,10 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) | |||
276 | 301 | ||
277 | static void abs_printout(int counter, u64 *count, u64 *noise) | 302 | static void abs_printout(int counter, u64 *count, u64 *noise) |
278 | { | 303 | { |
279 | fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); | 304 | fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); |
280 | 305 | ||
281 | if (runtime_cycles_avg && | 306 | if (runtime_cycles_avg && |
282 | attrs[counter].type == PERF_TYPE_HARDWARE && | 307 | MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { |
283 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
284 | |||
285 | fprintf(stderr, " # %10.3f IPC ", | 308 | fprintf(stderr, " # %10.3f IPC ", |
286 | (double)count[0] / (double)runtime_cycles_avg); | 309 | (double)count[0] / (double)runtime_cycles_avg); |
287 | } else { | 310 | } else { |
@@ -306,7 +329,7 @@ static void print_counter(int counter) | |||
306 | scaled = event_scaled_avg[counter]; | 329 | scaled = event_scaled_avg[counter]; |
307 | 330 | ||
308 | if (scaled == -1) { | 331 | if (scaled == -1) { |
309 | fprintf(stderr, " %14s %-20s\n", | 332 | fprintf(stderr, " %14s %-24s\n", |
310 | "<not counted>", event_name(counter)); | 333 | "<not counted>", event_name(counter)); |
311 | return; | 334 | return; |
312 | } | 335 | } |
@@ -364,8 +387,11 @@ static void calc_avg(void) | |||
364 | event_res_avg[j]+1, event_res[i][j]+1); | 387 | event_res_avg[j]+1, event_res[i][j]+1); |
365 | update_avg("counter/2", j, | 388 | update_avg("counter/2", j, |
366 | event_res_avg[j]+2, event_res[i][j]+2); | 389 | event_res_avg[j]+2, event_res[i][j]+2); |
367 | update_avg("scaled", j, | 390 | if (event_scaled[i][j] != (u64)-1) |
368 | event_scaled_avg + j, event_scaled[i]+j); | 391 | update_avg("scaled", j, |
392 | event_scaled_avg + j, event_scaled[i]+j); | ||
393 | else | ||
394 | event_scaled_avg[j] = -1; | ||
369 | } | 395 | } |
370 | } | 396 | } |
371 | runtime_nsecs_avg /= run_count; | 397 | runtime_nsecs_avg /= run_count; |
@@ -429,11 +455,14 @@ static void print_stat(int argc, const char **argv) | |||
429 | for (counter = 0; counter < nr_counters; counter++) | 455 | for (counter = 0; counter < nr_counters; counter++) |
430 | print_counter(counter); | 456 | print_counter(counter); |
431 | 457 | ||
432 | |||
433 | fprintf(stderr, "\n"); | 458 | fprintf(stderr, "\n"); |
434 | fprintf(stderr, " %14.9f seconds time elapsed.\n", | 459 | fprintf(stderr, " %14.9f seconds time elapsed", |
435 | (double)walltime_nsecs_avg/1e9); | 460 | (double)walltime_nsecs_avg/1e9); |
436 | fprintf(stderr, "\n"); | 461 | if (run_count > 1) { |
462 | fprintf(stderr, " ( +- %7.3f%% )", | ||
463 | 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); | ||
464 | } | ||
465 | fprintf(stderr, "\n\n"); | ||
437 | } | 466 | } |
438 | 467 | ||
439 | static volatile int signr = -1; | 468 | static volatile int signr = -1; |
@@ -466,36 +495,37 @@ static const struct option options[] = { | |||
466 | OPT_INTEGER('p', "pid", &target_pid, | 495 | OPT_INTEGER('p', "pid", &target_pid, |
467 | "stat events on existing pid"), | 496 | "stat events on existing pid"), |
468 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 497 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
469 | "system-wide collection from all CPUs"), | 498 | "system-wide collection from all CPUs"), |
470 | OPT_BOOLEAN('S', "scale", &scale, | 499 | OPT_BOOLEAN('S', "scale", &scale, |
471 | "scale/normalize counters"), | 500 | "scale/normalize counters"), |
472 | OPT_BOOLEAN('v', "verbose", &verbose, | 501 | OPT_BOOLEAN('v', "verbose", &verbose, |
473 | "be more verbose (show counter open errors, etc)"), | 502 | "be more verbose (show counter open errors, etc)"), |
474 | OPT_INTEGER('r', "repeat", &run_count, | 503 | OPT_INTEGER('r', "repeat", &run_count, |
475 | "repeat command and print average + stddev (max: 100)"), | 504 | "repeat command and print average + stddev (max: 100)"), |
505 | OPT_BOOLEAN('n', "null", &null_run, | ||
506 | "null run - dont start any counters"), | ||
476 | OPT_END() | 507 | OPT_END() |
477 | }; | 508 | }; |
478 | 509 | ||
479 | int cmd_stat(int argc, const char **argv, const char *prefix) | 510 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
480 | { | 511 | { |
481 | int status; | 512 | int status; |
482 | 513 | ||
483 | page_size = sysconf(_SC_PAGE_SIZE); | ||
484 | |||
485 | memcpy(attrs, default_attrs, sizeof(attrs)); | ||
486 | |||
487 | argc = parse_options(argc, argv, options, stat_usage, 0); | 514 | argc = parse_options(argc, argv, options, stat_usage, 0); |
488 | if (!argc) | 515 | if (!argc) |
489 | usage_with_options(stat_usage, options); | 516 | usage_with_options(stat_usage, options); |
490 | if (run_count <= 0 || run_count > MAX_RUN) | 517 | if (run_count <= 0 || run_count > MAX_RUN) |
491 | usage_with_options(stat_usage, options); | 518 | usage_with_options(stat_usage, options); |
492 | 519 | ||
493 | if (!nr_counters) | 520 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
494 | nr_counters = 8; | 521 | if (!null_run && !nr_counters) { |
522 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | ||
523 | nr_counters = ARRAY_SIZE(default_attrs); | ||
524 | } | ||
495 | 525 | ||
496 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | 526 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
497 | assert(nr_cpus <= MAX_NR_CPUS); | 527 | assert(nr_cpus <= MAX_NR_CPUS); |
498 | assert(nr_cpus >= 0); | 528 | assert((int)nr_cpus >= 0); |
499 | 529 | ||
500 | /* | 530 | /* |
501 | * We dont want to block the signals - that would cause | 531 | * We dont want to block the signals - that would cause |
@@ -511,7 +541,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
511 | status = 0; | 541 | status = 0; |
512 | for (run_idx = 0; run_idx < run_count; run_idx++) { | 542 | for (run_idx = 0; run_idx < run_count; run_idx++) { |
513 | if (run_count != 1 && verbose) | 543 | if (run_count != 1 && verbose) |
514 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); | 544 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); |
515 | status = run_perf_stat(argc, argv); | 545 | status = run_perf_stat(argc, argv); |
516 | } | 546 | } |
517 | 547 | ||