aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c148
1 files changed, 90 insertions, 58 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3db31e7bf173..95db31cff6fd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,21 +44,25 @@
44#include "util/parse-events.h" 44#include "util/parse-events.h"
45#include "util/event.h" 45#include "util/event.h"
46#include "util/debug.h" 46#include "util/debug.h"
47#include "util/header.h"
48#include "util/cpumap.h"
47 49
48#include <sys/prctl.h> 50#include <sys/prctl.h>
49#include <math.h> 51#include <math.h>
50 52
51static struct perf_event_attr default_attrs[] = { 53static struct perf_event_attr default_attrs[] = {
52 54
53 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 55 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
54 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, 56 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
55 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 57 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
56 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 58 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
57 59
58 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 60 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
59 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 61 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
60 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, 62 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
61 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, 63 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
64 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
65 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
62 66
63}; 67};
64 68
@@ -77,6 +81,8 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS];
77 81
78static int event_scaled[MAX_COUNTERS]; 82static int event_scaled[MAX_COUNTERS];
79 83
84static volatile int done = 0;
85
80struct stats 86struct stats
81{ 87{
82 double n, mean, M2; 88 double n, mean, M2;
@@ -125,6 +131,7 @@ struct stats event_res_stats[MAX_COUNTERS][3];
125struct stats runtime_nsecs_stats; 131struct stats runtime_nsecs_stats;
126struct stats walltime_nsecs_stats; 132struct stats walltime_nsecs_stats;
127struct stats runtime_cycles_stats; 133struct stats runtime_cycles_stats;
134struct stats runtime_branches_stats;
128 135
129#define MATCH_EVENT(t, c, counter) \ 136#define MATCH_EVENT(t, c, counter) \
130 (attrs[counter].type == PERF_TYPE_##t && \ 137 (attrs[counter].type == PERF_TYPE_##t && \
@@ -145,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid)
145 unsigned int cpu; 152 unsigned int cpu;
146 153
147 for (cpu = 0; cpu < nr_cpus; cpu++) { 154 for (cpu = 0; cpu < nr_cpus; cpu++) {
148 fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); 155 fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0);
149 if (fd[cpu][counter] < 0 && verbose) 156 if (fd[cpu][counter] < 0 && verbose)
150 fprintf(stderr, ERR_PERF_OPEN, counter, 157 fprintf(stderr, ERR_PERF_OPEN, counter,
151 fd[cpu][counter], strerror(errno)); 158 fd[cpu][counter], strerror(errno));
@@ -235,6 +242,8 @@ static void read_counter(int counter)
235 update_stats(&runtime_nsecs_stats, count[0]); 242 update_stats(&runtime_nsecs_stats, count[0]);
236 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 243 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
237 update_stats(&runtime_cycles_stats, count[0]); 244 update_stats(&runtime_cycles_stats, count[0]);
245 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
246 update_stats(&runtime_branches_stats, count[0]);
238} 247}
239 248
240static int run_perf_stat(int argc __used, const char **argv) 249static int run_perf_stat(int argc __used, const char **argv)
@@ -242,61 +251,64 @@ static int run_perf_stat(int argc __used, const char **argv)
242 unsigned long long t0, t1; 251 unsigned long long t0, t1;
243 int status = 0; 252 int status = 0;
244 int counter; 253 int counter;
245 int pid; 254 int pid = target_pid;
246 int child_ready_pipe[2], go_pipe[2]; 255 int child_ready_pipe[2], go_pipe[2];
256 const bool forks = (target_pid == -1 && argc > 0);
247 char buf; 257 char buf;
248 258
249 if (!system_wide) 259 if (!system_wide)
250 nr_cpus = 1; 260 nr_cpus = 1;
251 261
252 if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { 262 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
253 perror("failed to create pipes"); 263 perror("failed to create pipes");
254 exit(1); 264 exit(1);
255 } 265 }
256 266
257 if ((pid = fork()) < 0) 267 if (forks) {
258 perror("failed to fork"); 268 if ((pid = fork()) < 0)
259 269 perror("failed to fork");
260 if (!pid) { 270
261 close(child_ready_pipe[0]); 271 if (!pid) {
262 close(go_pipe[1]); 272 close(child_ready_pipe[0]);
263 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 273 close(go_pipe[1]);
274 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
275
276 /*
277 * Do a dummy execvp to get the PLT entry resolved,
278 * so we avoid the resolver overhead on the real
279 * execvp call.
280 */
281 execvp("", (char **)argv);
282
283 /*
284 * Tell the parent we're ready to go
285 */
286 close(child_ready_pipe[1]);
287
288 /*
289 * Wait until the parent tells us to go.
290 */
291 if (read(go_pipe[0], &buf, 1) == -1)
292 perror("unable to read pipe");
293
294 execvp(argv[0], (char **)argv);
295
296 perror(argv[0]);
297 exit(-1);
298 }
264 299
265 /* 300 child_pid = pid;
266 * Do a dummy execvp to get the PLT entry resolved,
267 * so we avoid the resolver overhead on the real
268 * execvp call.
269 */
270 execvp("", (char **)argv);
271 301
272 /* 302 /*
273 * Tell the parent we're ready to go 303 * Wait for the child to be ready to exec.
274 */ 304 */
275 close(child_ready_pipe[1]); 305 close(child_ready_pipe[1]);
276 306 close(go_pipe[0]);
277 /* 307 if (read(child_ready_pipe[0], &buf, 1) == -1)
278 * Wait until the parent tells us to go.
279 */
280 if (read(go_pipe[0], &buf, 1) == -1)
281 perror("unable to read pipe"); 308 perror("unable to read pipe");
282 309 close(child_ready_pipe[0]);
283 execvp(argv[0], (char **)argv);
284
285 perror(argv[0]);
286 exit(-1);
287 } 310 }
288 311
289 child_pid = pid;
290
291 /*
292 * Wait for the child to be ready to exec.
293 */
294 close(child_ready_pipe[1]);
295 close(go_pipe[0]);
296 if (read(child_ready_pipe[0], &buf, 1) == -1)
297 perror("unable to read pipe");
298 close(child_ready_pipe[0]);
299
300 for (counter = 0; counter < nr_counters; counter++) 312 for (counter = 0; counter < nr_counters; counter++)
301 create_perf_stat_counter(counter, pid); 313 create_perf_stat_counter(counter, pid);
302 314
@@ -305,8 +317,12 @@ static int run_perf_stat(int argc __used, const char **argv)
305 */ 317 */
306 t0 = rdclock(); 318 t0 = rdclock();
307 319
308 close(go_pipe[1]); 320 if (forks) {
309 wait(&status); 321 close(go_pipe[1]);
322 wait(&status);
323 } else {
324 while(!done);
325 }
310 326
311 t1 = rdclock(); 327 t1 = rdclock();
312 328
@@ -352,7 +368,16 @@ static void abs_printout(int counter, double avg)
352 ratio = avg / total; 368 ratio = avg / total;
353 369
354 fprintf(stderr, " # %10.3f IPC ", ratio); 370 fprintf(stderr, " # %10.3f IPC ", ratio);
355 } else { 371 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
372 runtime_branches_stats.n != 0) {
373 total = avg_stats(&runtime_branches_stats);
374
375 if (total)
376 ratio = avg * 100 / total;
377
378 fprintf(stderr, " # %10.3f %% ", ratio);
379
380 } else if (runtime_nsecs_stats.n != 0) {
356 total = avg_stats(&runtime_nsecs_stats); 381 total = avg_stats(&runtime_nsecs_stats);
357 382
358 if (total) 383 if (total)
@@ -403,10 +428,13 @@ static void print_stat(int argc, const char **argv)
403 fflush(stdout); 428 fflush(stdout);
404 429
405 fprintf(stderr, "\n"); 430 fprintf(stderr, "\n");
406 fprintf(stderr, " Performance counter stats for \'%s", argv[0]); 431 fprintf(stderr, " Performance counter stats for ");
407 432 if(target_pid == -1) {
408 for (i = 1; i < argc; i++) 433 fprintf(stderr, "\'%s", argv[0]);
409 fprintf(stderr, " %s", argv[i]); 434 for (i = 1; i < argc; i++)
435 fprintf(stderr, " %s", argv[i]);
436 }else
437 fprintf(stderr, "task pid \'%d", target_pid);
410 438
411 fprintf(stderr, "\'"); 439 fprintf(stderr, "\'");
412 if (run_count > 1) 440 if (run_count > 1)
@@ -431,6 +459,9 @@ static volatile int signr = -1;
431 459
432static void skip_signal(int signo) 460static void skip_signal(int signo)
433{ 461{
462 if(target_pid != -1)
463 done = 1;
464
434 signr = signo; 465 signr = signo;
435} 466}
436 467
@@ -447,7 +478,7 @@ static void sig_atexit(void)
447} 478}
448 479
449static const char * const stat_usage[] = { 480static const char * const stat_usage[] = {
450 "perf stat [<options>] <command>", 481 "perf stat [<options>] [<command>]",
451 NULL 482 NULL
452}; 483};
453 484
@@ -478,7 +509,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
478 509
479 argc = parse_options(argc, argv, options, stat_usage, 510 argc = parse_options(argc, argv, options, stat_usage,
480 PARSE_OPT_STOP_AT_NON_OPTION); 511 PARSE_OPT_STOP_AT_NON_OPTION);
481 if (!argc) 512 if (!argc && target_pid == -1)
482 usage_with_options(stat_usage, options); 513 usage_with_options(stat_usage, options);
483 if (run_count <= 0) 514 if (run_count <= 0)
484 usage_with_options(stat_usage, options); 515 usage_with_options(stat_usage, options);
@@ -489,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
489 nr_counters = ARRAY_SIZE(default_attrs); 520 nr_counters = ARRAY_SIZE(default_attrs);
490 } 521 }
491 522
492 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 523 if (system_wide)
493 assert(nr_cpus <= MAX_NR_CPUS); 524 nr_cpus = read_cpu_map();
494 assert((int)nr_cpus >= 0); 525 else
526 nr_cpus = 1;
495 527
496 /* 528 /*
497 * We dont want to block the signals - that would cause 529 * We dont want to block the signals - that would cause