aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c216
1 files changed, 123 insertions, 93 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6d3eeac1ea25..27921a8ce1a9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
32 * Wu Fengguang <fengguang.wu@intel.com> 32 * Wu Fengguang <fengguang.wu@intel.com>
33 * Mike Galbraith <efault@gmx.de> 33 * Mike Galbraith <efault@gmx.de>
34 * Paul Mackerras <paulus@samba.org> 34 * Paul Mackerras <paulus@samba.org>
35 * Jaswinder Singh Rajput <jaswinder@kernel.org>
35 * 36 *
36 * Released under the GPL v2. (and only v2, not any later version) 37 * Released under the GPL v2. (and only v2, not any later version)
37 */ 38 */
@@ -45,7 +46,7 @@
45#include <sys/prctl.h> 46#include <sys/prctl.h>
46#include <math.h> 47#include <math.h>
47 48
48static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { 49static struct perf_counter_attr default_attrs[] = {
49 50
50 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 51 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
51 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, 52 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -59,42 +60,28 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
59 60
60}; 61};
61 62
63#define MAX_RUN 100
64
62static int system_wide = 0; 65static int system_wide = 0;
63static int inherit = 1;
64static int verbose = 0; 66static int verbose = 0;
67static unsigned int nr_cpus = 0;
68static int run_idx = 0;
65 69
66static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 70static int run_count = 1;
67 71static int inherit = 1;
68static int target_pid = -1;
69static int nr_cpus = 0;
70static unsigned int page_size;
71
72static int scale = 1; 72static int scale = 1;
73static int target_pid = -1;
74static int null_run = 0;
73 75
74static const unsigned int default_count[] = { 76static int fd[MAX_NR_CPUS][MAX_COUNTERS];
75 1000000,
76 1000000,
77 10000,
78 10000,
79 1000000,
80 10000,
81};
82
83#define MAX_RUN 100
84
85static int run_count = 1;
86static int run_idx = 0;
87
88static u64 event_res[MAX_RUN][MAX_COUNTERS][3];
89static u64 event_scaled[MAX_RUN][MAX_COUNTERS];
90
91//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3];
92
93 77
94static u64 runtime_nsecs[MAX_RUN]; 78static u64 runtime_nsecs[MAX_RUN];
95static u64 walltime_nsecs[MAX_RUN]; 79static u64 walltime_nsecs[MAX_RUN];
96static u64 runtime_cycles[MAX_RUN]; 80static u64 runtime_cycles[MAX_RUN];
97 81
82static u64 event_res[MAX_RUN][MAX_COUNTERS][3];
83static u64 event_scaled[MAX_RUN][MAX_COUNTERS];
84
98static u64 event_res_avg[MAX_COUNTERS][3]; 85static u64 event_res_avg[MAX_COUNTERS][3];
99static u64 event_res_noise[MAX_COUNTERS][3]; 86static u64 event_res_noise[MAX_COUNTERS][3];
100 87
@@ -109,7 +96,14 @@ static u64 walltime_nsecs_noise;
109static u64 runtime_cycles_avg; 96static u64 runtime_cycles_avg;
110static u64 runtime_cycles_noise; 97static u64 runtime_cycles_noise;
111 98
112static void create_perf_stat_counter(int counter) 99#define MATCH_EVENT(t, c, counter) \
100 (attrs[counter].type == PERF_TYPE_##t && \
101 attrs[counter].config == PERF_COUNT_##c)
102
103#define ERR_PERF_OPEN \
104"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
105
106static void create_perf_stat_counter(int counter, int pid)
113{ 107{
114 struct perf_counter_attr *attr = attrs + counter; 108 struct perf_counter_attr *attr = attrs + counter;
115 109
@@ -118,21 +112,23 @@ static void create_perf_stat_counter(int counter)
118 PERF_FORMAT_TOTAL_TIME_RUNNING; 112 PERF_FORMAT_TOTAL_TIME_RUNNING;
119 113
120 if (system_wide) { 114 if (system_wide) {
121 int cpu; 115 unsigned int cpu;
122 for (cpu = 0; cpu < nr_cpus; cpu ++) { 116
117 for (cpu = 0; cpu < nr_cpus; cpu++) {
123 fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); 118 fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
124 if (fd[cpu][counter] < 0 && verbose) { 119 if (fd[cpu][counter] < 0 && verbose)
125 printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); 120 fprintf(stderr, ERR_PERF_OPEN, counter,
126 } 121 fd[cpu][counter], strerror(errno));
127 } 122 }
128 } else { 123 } else {
129 attr->inherit = inherit; 124 attr->inherit = inherit;
130 attr->disabled = 1; 125 attr->disabled = 1;
131 126 attr->enable_on_exec = 1;
132 fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); 127
133 if (fd[0][counter] < 0 && verbose) { 128 fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
134 printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); 129 if (fd[0][counter] < 0 && verbose)
135 } 130 fprintf(stderr, ERR_PERF_OPEN, counter,
131 fd[0][counter], strerror(errno));
136 } 132 }
137} 133}
138 134
@@ -141,13 +137,8 @@ static void create_perf_stat_counter(int counter)
141 */ 137 */
142static inline int nsec_counter(int counter) 138static inline int nsec_counter(int counter)
143{ 139{
144 if (attrs[counter].type != PERF_TYPE_SOFTWARE) 140 if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
145 return 0; 141 MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
146
147 if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
148 return 1;
149
150 if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
151 return 1; 142 return 1;
152 143
153 return 0; 144 return 0;
@@ -159,8 +150,8 @@ static inline int nsec_counter(int counter)
159static void read_counter(int counter) 150static void read_counter(int counter)
160{ 151{
161 u64 *count, single_count[3]; 152 u64 *count, single_count[3];
162 ssize_t res; 153 unsigned int cpu;
163 int cpu, nv; 154 size_t res, nv;
164 int scaled; 155 int scaled;
165 156
166 count = event_res[run_idx][counter]; 157 count = event_res[run_idx][counter];
@@ -168,12 +159,13 @@ static void read_counter(int counter)
168 count[0] = count[1] = count[2] = 0; 159 count[0] = count[1] = count[2] = 0;
169 160
170 nv = scale ? 3 : 1; 161 nv = scale ? 3 : 1;
171 for (cpu = 0; cpu < nr_cpus; cpu ++) { 162 for (cpu = 0; cpu < nr_cpus; cpu++) {
172 if (fd[cpu][counter] < 0) 163 if (fd[cpu][counter] < 0)
173 continue; 164 continue;
174 165
175 res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); 166 res = read(fd[cpu][counter], single_count, nv * sizeof(u64));
176 assert(res == nv * sizeof(u64)); 167 assert(res == nv * sizeof(u64));
168
177 close(fd[cpu][counter]); 169 close(fd[cpu][counter]);
178 fd[cpu][counter] = -1; 170 fd[cpu][counter] = -1;
179 171
@@ -201,46 +193,81 @@ static void read_counter(int counter)
201 /* 193 /*
202 * Save the full runtime - to allow normalization during printout: 194 * Save the full runtime - to allow normalization during printout:
203 */ 195 */
204 if (attrs[counter].type == PERF_TYPE_SOFTWARE && 196 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
205 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
206 runtime_nsecs[run_idx] = count[0]; 197 runtime_nsecs[run_idx] = count[0];
207 if (attrs[counter].type == PERF_TYPE_HARDWARE && 198 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
208 attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
209 runtime_cycles[run_idx] = count[0]; 199 runtime_cycles[run_idx] = count[0];
210} 200}
211 201
212static int run_perf_stat(int argc, const char **argv) 202static int run_perf_stat(int argc __used, const char **argv)
213{ 203{
214 unsigned long long t0, t1; 204 unsigned long long t0, t1;
215 int status = 0; 205 int status = 0;
216 int counter; 206 int counter;
217 int pid; 207 int pid;
208 int child_ready_pipe[2], go_pipe[2];
209 char buf;
218 210
219 if (!system_wide) 211 if (!system_wide)
220 nr_cpus = 1; 212 nr_cpus = 1;
221 213
222 for (counter = 0; counter < nr_counters; counter++) 214 if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
223 create_perf_stat_counter(counter); 215 perror("failed to create pipes");
224 216 exit(1);
225 /* 217 }
226 * Enable counters and exec the command:
227 */
228 t0 = rdclock();
229 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
230 218
231 if ((pid = fork()) < 0) 219 if ((pid = fork()) < 0)
232 perror("failed to fork"); 220 perror("failed to fork");
233 221
234 if (!pid) { 222 if (!pid) {
235 if (execvp(argv[0], (char **)argv)) { 223 close(child_ready_pipe[0]);
236 perror(argv[0]); 224 close(go_pipe[1]);
237 exit(-1); 225 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
238 } 226
227 /*
228 * Do a dummy execvp to get the PLT entry resolved,
229 * so we avoid the resolver overhead on the real
230 * execvp call.
231 */
232 execvp("", (char **)argv);
233
234 /*
235 * Tell the parent we're ready to go
236 */
237 close(child_ready_pipe[1]);
238
239 /*
240 * Wait until the parent tells us to go.
241 */
242 if (read(go_pipe[0], &buf, 1) == -1)
243 perror("unable to read pipe");
244
245 execvp(argv[0], (char **)argv);
246
247 perror(argv[0]);
248 exit(-1);
239 } 249 }
240 250
251 /*
252 * Wait for the child to be ready to exec.
253 */
254 close(child_ready_pipe[1]);
255 close(go_pipe[0]);
256 if (read(child_ready_pipe[0], &buf, 1) == -1)
257 perror("unable to read pipe");
258 close(child_ready_pipe[0]);
259
260 for (counter = 0; counter < nr_counters; counter++)
261 create_perf_stat_counter(counter, pid);
262
263 /*
264 * Enable counters and exec the command:
265 */
266 t0 = rdclock();
267
268 close(go_pipe[1]);
241 wait(&status); 269 wait(&status);
242 270
243 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
244 t1 = rdclock(); 271 t1 = rdclock();
245 272
246 walltime_nsecs[run_idx] = t1 - t0; 273 walltime_nsecs[run_idx] = t1 - t0;
@@ -262,11 +289,9 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
262{ 289{
263 double msecs = (double)count[0] / 1000000; 290 double msecs = (double)count[0] / 1000000;
264 291
265 fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); 292 fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));
266
267 if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
268 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
269 293
294 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
270 if (walltime_nsecs_avg) 295 if (walltime_nsecs_avg)
271 fprintf(stderr, " # %10.3f CPUs ", 296 fprintf(stderr, " # %10.3f CPUs ",
272 (double)count[0] / (double)walltime_nsecs_avg); 297 (double)count[0] / (double)walltime_nsecs_avg);
@@ -276,12 +301,10 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
276 301
277static void abs_printout(int counter, u64 *count, u64 *noise) 302static void abs_printout(int counter, u64 *count, u64 *noise)
278{ 303{
279 fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); 304 fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter));
280 305
281 if (runtime_cycles_avg && 306 if (runtime_cycles_avg &&
282 attrs[counter].type == PERF_TYPE_HARDWARE && 307 MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
283 attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
284
285 fprintf(stderr, " # %10.3f IPC ", 308 fprintf(stderr, " # %10.3f IPC ",
286 (double)count[0] / (double)runtime_cycles_avg); 309 (double)count[0] / (double)runtime_cycles_avg);
287 } else { 310 } else {
@@ -306,7 +329,7 @@ static void print_counter(int counter)
306 scaled = event_scaled_avg[counter]; 329 scaled = event_scaled_avg[counter];
307 330
308 if (scaled == -1) { 331 if (scaled == -1) {
309 fprintf(stderr, " %14s %-20s\n", 332 fprintf(stderr, " %14s %-24s\n",
310 "<not counted>", event_name(counter)); 333 "<not counted>", event_name(counter));
311 return; 334 return;
312 } 335 }
@@ -364,8 +387,11 @@ static void calc_avg(void)
364 event_res_avg[j]+1, event_res[i][j]+1); 387 event_res_avg[j]+1, event_res[i][j]+1);
365 update_avg("counter/2", j, 388 update_avg("counter/2", j,
366 event_res_avg[j]+2, event_res[i][j]+2); 389 event_res_avg[j]+2, event_res[i][j]+2);
367 update_avg("scaled", j, 390 if (event_scaled[i][j] != (u64)-1)
368 event_scaled_avg + j, event_scaled[i]+j); 391 update_avg("scaled", j,
392 event_scaled_avg + j, event_scaled[i]+j);
393 else
394 event_scaled_avg[j] = -1;
369 } 395 }
370 } 396 }
371 runtime_nsecs_avg /= run_count; 397 runtime_nsecs_avg /= run_count;
@@ -429,11 +455,14 @@ static void print_stat(int argc, const char **argv)
429 for (counter = 0; counter < nr_counters; counter++) 455 for (counter = 0; counter < nr_counters; counter++)
430 print_counter(counter); 456 print_counter(counter);
431 457
432
433 fprintf(stderr, "\n"); 458 fprintf(stderr, "\n");
434 fprintf(stderr, " %14.9f seconds time elapsed.\n", 459 fprintf(stderr, " %14.9f seconds time elapsed",
435 (double)walltime_nsecs_avg/1e9); 460 (double)walltime_nsecs_avg/1e9);
436 fprintf(stderr, "\n"); 461 if (run_count > 1) {
462 fprintf(stderr, " ( +- %7.3f%% )",
463 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg);
464 }
465 fprintf(stderr, "\n\n");
437} 466}
438 467
439static volatile int signr = -1; 468static volatile int signr = -1;
@@ -466,36 +495,37 @@ static const struct option options[] = {
466 OPT_INTEGER('p', "pid", &target_pid, 495 OPT_INTEGER('p', "pid", &target_pid,
467 "stat events on existing pid"), 496 "stat events on existing pid"),
468 OPT_BOOLEAN('a', "all-cpus", &system_wide, 497 OPT_BOOLEAN('a', "all-cpus", &system_wide,
469 "system-wide collection from all CPUs"), 498 "system-wide collection from all CPUs"),
470 OPT_BOOLEAN('S', "scale", &scale, 499 OPT_BOOLEAN('S', "scale", &scale,
471 "scale/normalize counters"), 500 "scale/normalize counters"),
472 OPT_BOOLEAN('v', "verbose", &verbose, 501 OPT_BOOLEAN('v', "verbose", &verbose,
473 "be more verbose (show counter open errors, etc)"), 502 "be more verbose (show counter open errors, etc)"),
474 OPT_INTEGER('r', "repeat", &run_count, 503 OPT_INTEGER('r', "repeat", &run_count,
475 "repeat command and print average + stddev (max: 100)"), 504 "repeat command and print average + stddev (max: 100)"),
505 OPT_BOOLEAN('n', "null", &null_run,
506 "null run - dont start any counters"),
476 OPT_END() 507 OPT_END()
477}; 508};
478 509
479int cmd_stat(int argc, const char **argv, const char *prefix) 510int cmd_stat(int argc, const char **argv, const char *prefix __used)
480{ 511{
481 int status; 512 int status;
482 513
483 page_size = sysconf(_SC_PAGE_SIZE);
484
485 memcpy(attrs, default_attrs, sizeof(attrs));
486
487 argc = parse_options(argc, argv, options, stat_usage, 0); 514 argc = parse_options(argc, argv, options, stat_usage, 0);
488 if (!argc) 515 if (!argc)
489 usage_with_options(stat_usage, options); 516 usage_with_options(stat_usage, options);
490 if (run_count <= 0 || run_count > MAX_RUN) 517 if (run_count <= 0 || run_count > MAX_RUN)
491 usage_with_options(stat_usage, options); 518 usage_with_options(stat_usage, options);
492 519
493 if (!nr_counters) 520 /* Set attrs and nr_counters if no event is selected and !null_run */
494 nr_counters = 8; 521 if (!null_run && !nr_counters) {
522 memcpy(attrs, default_attrs, sizeof(default_attrs));
523 nr_counters = ARRAY_SIZE(default_attrs);
524 }
495 525
496 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 526 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
497 assert(nr_cpus <= MAX_NR_CPUS); 527 assert(nr_cpus <= MAX_NR_CPUS);
498 assert(nr_cpus >= 0); 528 assert((int)nr_cpus >= 0);
499 529
500 /* 530 /*
501 * We dont want to block the signals - that would cause 531 * We dont want to block the signals - that would cause
@@ -511,7 +541,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
511 status = 0; 541 status = 0;
512 for (run_idx = 0; run_idx < run_count; run_idx++) { 542 for (run_idx = 0; run_idx < run_count; run_idx++) {
513 if (run_count != 1 && verbose) 543 if (run_count != 1 && verbose)
514 fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); 544 fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
515 status = run_perf_stat(argc, argv); 545 status = run_perf_stat(argc, argv);
516 } 546 }
517 547