diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 148 |
1 files changed, 90 insertions, 58 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3db31e7bf173..95db31cff6fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -44,21 +44,25 @@ | |||
44 | #include "util/parse-events.h" | 44 | #include "util/parse-events.h" |
45 | #include "util/event.h" | 45 | #include "util/event.h" |
46 | #include "util/debug.h" | 46 | #include "util/debug.h" |
47 | #include "util/header.h" | ||
48 | #include "util/cpumap.h" | ||
47 | 49 | ||
48 | #include <sys/prctl.h> | 50 | #include <sys/prctl.h> |
49 | #include <math.h> | 51 | #include <math.h> |
50 | 52 | ||
51 | static struct perf_event_attr default_attrs[] = { | 53 | static struct perf_event_attr default_attrs[] = { |
52 | 54 | ||
53 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 55 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
54 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, | 56 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, |
55 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, | 57 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, |
56 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | 58 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, |
57 | 59 | ||
58 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | 60 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
59 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | 61 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
60 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, | 62 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
61 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | 63 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, |
64 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, | ||
65 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
62 | 66 | ||
63 | }; | 67 | }; |
64 | 68 | ||
@@ -77,6 +81,8 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | |||
77 | 81 | ||
78 | static int event_scaled[MAX_COUNTERS]; | 82 | static int event_scaled[MAX_COUNTERS]; |
79 | 83 | ||
84 | static volatile int done = 0; | ||
85 | |||
80 | struct stats | 86 | struct stats |
81 | { | 87 | { |
82 | double n, mean, M2; | 88 | double n, mean, M2; |
@@ -125,6 +131,7 @@ struct stats event_res_stats[MAX_COUNTERS][3]; | |||
125 | struct stats runtime_nsecs_stats; | 131 | struct stats runtime_nsecs_stats; |
126 | struct stats walltime_nsecs_stats; | 132 | struct stats walltime_nsecs_stats; |
127 | struct stats runtime_cycles_stats; | 133 | struct stats runtime_cycles_stats; |
134 | struct stats runtime_branches_stats; | ||
128 | 135 | ||
129 | #define MATCH_EVENT(t, c, counter) \ | 136 | #define MATCH_EVENT(t, c, counter) \ |
130 | (attrs[counter].type == PERF_TYPE_##t && \ | 137 | (attrs[counter].type == PERF_TYPE_##t && \ |
@@ -145,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid) | |||
145 | unsigned int cpu; | 152 | unsigned int cpu; |
146 | 153 | ||
147 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 154 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
148 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); | 155 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); |
149 | if (fd[cpu][counter] < 0 && verbose) | 156 | if (fd[cpu][counter] < 0 && verbose) |
150 | fprintf(stderr, ERR_PERF_OPEN, counter, | 157 | fprintf(stderr, ERR_PERF_OPEN, counter, |
151 | fd[cpu][counter], strerror(errno)); | 158 | fd[cpu][counter], strerror(errno)); |
@@ -235,6 +242,8 @@ static void read_counter(int counter) | |||
235 | update_stats(&runtime_nsecs_stats, count[0]); | 242 | update_stats(&runtime_nsecs_stats, count[0]); |
236 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 243 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) |
237 | update_stats(&runtime_cycles_stats, count[0]); | 244 | update_stats(&runtime_cycles_stats, count[0]); |
245 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | ||
246 | update_stats(&runtime_branches_stats, count[0]); | ||
238 | } | 247 | } |
239 | 248 | ||
240 | static int run_perf_stat(int argc __used, const char **argv) | 249 | static int run_perf_stat(int argc __used, const char **argv) |
@@ -242,61 +251,64 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
242 | unsigned long long t0, t1; | 251 | unsigned long long t0, t1; |
243 | int status = 0; | 252 | int status = 0; |
244 | int counter; | 253 | int counter; |
245 | int pid; | 254 | int pid = target_pid; |
246 | int child_ready_pipe[2], go_pipe[2]; | 255 | int child_ready_pipe[2], go_pipe[2]; |
256 | const bool forks = (target_pid == -1 && argc > 0); | ||
247 | char buf; | 257 | char buf; |
248 | 258 | ||
249 | if (!system_wide) | 259 | if (!system_wide) |
250 | nr_cpus = 1; | 260 | nr_cpus = 1; |
251 | 261 | ||
252 | if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { | 262 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
253 | perror("failed to create pipes"); | 263 | perror("failed to create pipes"); |
254 | exit(1); | 264 | exit(1); |
255 | } | 265 | } |
256 | 266 | ||
257 | if ((pid = fork()) < 0) | 267 | if (forks) { |
258 | perror("failed to fork"); | 268 | if ((pid = fork()) < 0) |
259 | 269 | perror("failed to fork"); | |
260 | if (!pid) { | 270 | |
261 | close(child_ready_pipe[0]); | 271 | if (!pid) { |
262 | close(go_pipe[1]); | 272 | close(child_ready_pipe[0]); |
263 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | 273 | close(go_pipe[1]); |
274 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | ||
275 | |||
276 | /* | ||
277 | * Do a dummy execvp to get the PLT entry resolved, | ||
278 | * so we avoid the resolver overhead on the real | ||
279 | * execvp call. | ||
280 | */ | ||
281 | execvp("", (char **)argv); | ||
282 | |||
283 | /* | ||
284 | * Tell the parent we're ready to go | ||
285 | */ | ||
286 | close(child_ready_pipe[1]); | ||
287 | |||
288 | /* | ||
289 | * Wait until the parent tells us to go. | ||
290 | */ | ||
291 | if (read(go_pipe[0], &buf, 1) == -1) | ||
292 | perror("unable to read pipe"); | ||
293 | |||
294 | execvp(argv[0], (char **)argv); | ||
295 | |||
296 | perror(argv[0]); | ||
297 | exit(-1); | ||
298 | } | ||
264 | 299 | ||
265 | /* | 300 | child_pid = pid; |
266 | * Do a dummy execvp to get the PLT entry resolved, | ||
267 | * so we avoid the resolver overhead on the real | ||
268 | * execvp call. | ||
269 | */ | ||
270 | execvp("", (char **)argv); | ||
271 | 301 | ||
272 | /* | 302 | /* |
273 | * Tell the parent we're ready to go | 303 | * Wait for the child to be ready to exec. |
274 | */ | 304 | */ |
275 | close(child_ready_pipe[1]); | 305 | close(child_ready_pipe[1]); |
276 | 306 | close(go_pipe[0]); | |
277 | /* | 307 | if (read(child_ready_pipe[0], &buf, 1) == -1) |
278 | * Wait until the parent tells us to go. | ||
279 | */ | ||
280 | if (read(go_pipe[0], &buf, 1) == -1) | ||
281 | perror("unable to read pipe"); | 308 | perror("unable to read pipe"); |
282 | 309 | close(child_ready_pipe[0]); | |
283 | execvp(argv[0], (char **)argv); | ||
284 | |||
285 | perror(argv[0]); | ||
286 | exit(-1); | ||
287 | } | 310 | } |
288 | 311 | ||
289 | child_pid = pid; | ||
290 | |||
291 | /* | ||
292 | * Wait for the child to be ready to exec. | ||
293 | */ | ||
294 | close(child_ready_pipe[1]); | ||
295 | close(go_pipe[0]); | ||
296 | if (read(child_ready_pipe[0], &buf, 1) == -1) | ||
297 | perror("unable to read pipe"); | ||
298 | close(child_ready_pipe[0]); | ||
299 | |||
300 | for (counter = 0; counter < nr_counters; counter++) | 312 | for (counter = 0; counter < nr_counters; counter++) |
301 | create_perf_stat_counter(counter, pid); | 313 | create_perf_stat_counter(counter, pid); |
302 | 314 | ||
@@ -305,8 +317,12 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
305 | */ | 317 | */ |
306 | t0 = rdclock(); | 318 | t0 = rdclock(); |
307 | 319 | ||
308 | close(go_pipe[1]); | 320 | if (forks) { |
309 | wait(&status); | 321 | close(go_pipe[1]); |
322 | wait(&status); | ||
323 | } else { | ||
324 | while(!done); | ||
325 | } | ||
310 | 326 | ||
311 | t1 = rdclock(); | 327 | t1 = rdclock(); |
312 | 328 | ||
@@ -352,7 +368,16 @@ static void abs_printout(int counter, double avg) | |||
352 | ratio = avg / total; | 368 | ratio = avg / total; |
353 | 369 | ||
354 | fprintf(stderr, " # %10.3f IPC ", ratio); | 370 | fprintf(stderr, " # %10.3f IPC ", ratio); |
355 | } else { | 371 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && |
372 | runtime_branches_stats.n != 0) { | ||
373 | total = avg_stats(&runtime_branches_stats); | ||
374 | |||
375 | if (total) | ||
376 | ratio = avg * 100 / total; | ||
377 | |||
378 | fprintf(stderr, " # %10.3f %% ", ratio); | ||
379 | |||
380 | } else if (runtime_nsecs_stats.n != 0) { | ||
356 | total = avg_stats(&runtime_nsecs_stats); | 381 | total = avg_stats(&runtime_nsecs_stats); |
357 | 382 | ||
358 | if (total) | 383 | if (total) |
@@ -403,10 +428,13 @@ static void print_stat(int argc, const char **argv) | |||
403 | fflush(stdout); | 428 | fflush(stdout); |
404 | 429 | ||
405 | fprintf(stderr, "\n"); | 430 | fprintf(stderr, "\n"); |
406 | fprintf(stderr, " Performance counter stats for \'%s", argv[0]); | 431 | fprintf(stderr, " Performance counter stats for "); |
407 | 432 | if(target_pid == -1) { | |
408 | for (i = 1; i < argc; i++) | 433 | fprintf(stderr, "\'%s", argv[0]); |
409 | fprintf(stderr, " %s", argv[i]); | 434 | for (i = 1; i < argc; i++) |
435 | fprintf(stderr, " %s", argv[i]); | ||
436 | }else | ||
437 | fprintf(stderr, "task pid \'%d", target_pid); | ||
410 | 438 | ||
411 | fprintf(stderr, "\'"); | 439 | fprintf(stderr, "\'"); |
412 | if (run_count > 1) | 440 | if (run_count > 1) |
@@ -431,6 +459,9 @@ static volatile int signr = -1; | |||
431 | 459 | ||
432 | static void skip_signal(int signo) | 460 | static void skip_signal(int signo) |
433 | { | 461 | { |
462 | if(target_pid != -1) | ||
463 | done = 1; | ||
464 | |||
434 | signr = signo; | 465 | signr = signo; |
435 | } | 466 | } |
436 | 467 | ||
@@ -447,7 +478,7 @@ static void sig_atexit(void) | |||
447 | } | 478 | } |
448 | 479 | ||
449 | static const char * const stat_usage[] = { | 480 | static const char * const stat_usage[] = { |
450 | "perf stat [<options>] <command>", | 481 | "perf stat [<options>] [<command>]", |
451 | NULL | 482 | NULL |
452 | }; | 483 | }; |
453 | 484 | ||
@@ -478,7 +509,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
478 | 509 | ||
479 | argc = parse_options(argc, argv, options, stat_usage, | 510 | argc = parse_options(argc, argv, options, stat_usage, |
480 | PARSE_OPT_STOP_AT_NON_OPTION); | 511 | PARSE_OPT_STOP_AT_NON_OPTION); |
481 | if (!argc) | 512 | if (!argc && target_pid == -1) |
482 | usage_with_options(stat_usage, options); | 513 | usage_with_options(stat_usage, options); |
483 | if (run_count <= 0) | 514 | if (run_count <= 0) |
484 | usage_with_options(stat_usage, options); | 515 | usage_with_options(stat_usage, options); |
@@ -489,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
489 | nr_counters = ARRAY_SIZE(default_attrs); | 520 | nr_counters = ARRAY_SIZE(default_attrs); |
490 | } | 521 | } |
491 | 522 | ||
492 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | 523 | if (system_wide) |
493 | assert(nr_cpus <= MAX_NR_CPUS); | 524 | nr_cpus = read_cpu_map(); |
494 | assert((int)nr_cpus >= 0); | 525 | else |
526 | nr_cpus = 1; | ||
495 | 527 | ||
496 | /* | 528 | /* |
497 | * We dont want to block the signals - that would cause | 529 | * We dont want to block the signals - that would cause |