diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
| -rw-r--r-- | tools/perf/builtin-stat.c | 161 |
1 files changed, 111 insertions, 50 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95db31cff6fd..ff8c413b7e73 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include "util/debug.h" | 46 | #include "util/debug.h" |
| 47 | #include "util/header.h" | 47 | #include "util/header.h" |
| 48 | #include "util/cpumap.h" | 48 | #include "util/cpumap.h" |
| 49 | #include "util/thread.h" | ||
| 49 | 50 | ||
| 50 | #include <sys/prctl.h> | 51 | #include <sys/prctl.h> |
| 51 | #include <math.h> | 52 | #include <math.h> |
| @@ -66,18 +67,21 @@ static struct perf_event_attr default_attrs[] = { | |||
| 66 | 67 | ||
| 67 | }; | 68 | }; |
| 68 | 69 | ||
| 69 | static int system_wide = 0; | 70 | static bool system_wide = false; |
| 70 | static unsigned int nr_cpus = 0; | 71 | static unsigned int nr_cpus = 0; |
| 71 | static int run_idx = 0; | 72 | static int run_idx = 0; |
| 72 | 73 | ||
| 73 | static int run_count = 1; | 74 | static int run_count = 1; |
| 74 | static int inherit = 1; | 75 | static bool no_inherit = false; |
| 75 | static int scale = 1; | 76 | static bool scale = true; |
| 76 | static pid_t target_pid = -1; | 77 | static pid_t target_pid = -1; |
| 78 | static pid_t target_tid = -1; | ||
| 79 | static pid_t *all_tids = NULL; | ||
| 80 | static int thread_num = 0; | ||
| 77 | static pid_t child_pid = -1; | 81 | static pid_t child_pid = -1; |
| 78 | static int null_run = 0; | 82 | static bool null_run = false; |
| 79 | 83 | ||
| 80 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 84 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
| 81 | 85 | ||
| 82 | static int event_scaled[MAX_COUNTERS]; | 86 | static int event_scaled[MAX_COUNTERS]; |
| 83 | 87 | ||
| @@ -140,9 +144,11 @@ struct stats runtime_branches_stats; | |||
| 140 | #define ERR_PERF_OPEN \ | 144 | #define ERR_PERF_OPEN \ |
| 141 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 145 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" |
| 142 | 146 | ||
| 143 | static void create_perf_stat_counter(int counter, int pid) | 147 | static int create_perf_stat_counter(int counter) |
| 144 | { | 148 | { |
| 145 | struct perf_event_attr *attr = attrs + counter; | 149 | struct perf_event_attr *attr = attrs + counter; |
| 150 | int thread; | ||
| 151 | int ncreated = 0; | ||
| 146 | 152 | ||
| 147 | if (scale) | 153 | if (scale) |
| 148 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 154 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
| @@ -152,21 +158,33 @@ static void create_perf_stat_counter(int counter, int pid) | |||
| 152 | unsigned int cpu; | 158 | unsigned int cpu; |
| 153 | 159 | ||
| 154 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 160 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
| 155 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); | 161 | fd[cpu][counter][0] = sys_perf_event_open(attr, |
| 156 | if (fd[cpu][counter] < 0 && verbose) | 162 | -1, cpumap[cpu], -1, 0); |
| 157 | fprintf(stderr, ERR_PERF_OPEN, counter, | 163 | if (fd[cpu][counter][0] < 0) |
| 158 | fd[cpu][counter], strerror(errno)); | 164 | pr_debug(ERR_PERF_OPEN, counter, |
| 165 | fd[cpu][counter][0], strerror(errno)); | ||
| 166 | else | ||
| 167 | ++ncreated; | ||
| 159 | } | 168 | } |
| 160 | } else { | 169 | } else { |
| 161 | attr->inherit = inherit; | 170 | attr->inherit = !no_inherit; |
| 162 | attr->disabled = 1; | 171 | if (target_pid == -1 && target_tid == -1) { |
| 163 | attr->enable_on_exec = 1; | 172 | attr->disabled = 1; |
| 164 | 173 | attr->enable_on_exec = 1; | |
| 165 | fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); | 174 | } |
| 166 | if (fd[0][counter] < 0 && verbose) | 175 | for (thread = 0; thread < thread_num; thread++) { |
| 167 | fprintf(stderr, ERR_PERF_OPEN, counter, | 176 | fd[0][counter][thread] = sys_perf_event_open(attr, |
| 168 | fd[0][counter], strerror(errno)); | 177 | all_tids[thread], -1, -1, 0); |
| 178 | if (fd[0][counter][thread] < 0) | ||
| 179 | pr_debug(ERR_PERF_OPEN, counter, | ||
| 180 | fd[0][counter][thread], | ||
| 181 | strerror(errno)); | ||
| 182 | else | ||
| 183 | ++ncreated; | ||
| 184 | } | ||
| 169 | } | 185 | } |
| 186 | |||
| 187 | return ncreated; | ||
| 170 | } | 188 | } |
| 171 | 189 | ||
| 172 | /* | 190 | /* |
| @@ -190,25 +208,28 @@ static void read_counter(int counter) | |||
| 190 | unsigned int cpu; | 208 | unsigned int cpu; |
| 191 | size_t res, nv; | 209 | size_t res, nv; |
| 192 | int scaled; | 210 | int scaled; |
| 193 | int i; | 211 | int i, thread; |
| 194 | 212 | ||
| 195 | count[0] = count[1] = count[2] = 0; | 213 | count[0] = count[1] = count[2] = 0; |
| 196 | 214 | ||
| 197 | nv = scale ? 3 : 1; | 215 | nv = scale ? 3 : 1; |
| 198 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 216 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
| 199 | if (fd[cpu][counter] < 0) | 217 | for (thread = 0; thread < thread_num; thread++) { |
| 200 | continue; | 218 | if (fd[cpu][counter][thread] < 0) |
| 201 | 219 | continue; | |
| 202 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | 220 | |
| 203 | assert(res == nv * sizeof(u64)); | 221 | res = read(fd[cpu][counter][thread], |
| 204 | 222 | single_count, nv * sizeof(u64)); | |
| 205 | close(fd[cpu][counter]); | 223 | assert(res == nv * sizeof(u64)); |
| 206 | fd[cpu][counter] = -1; | 224 | |
| 207 | 225 | close(fd[cpu][counter][thread]); | |
| 208 | count[0] += single_count[0]; | 226 | fd[cpu][counter][thread] = -1; |
| 209 | if (scale) { | 227 | |
| 210 | count[1] += single_count[1]; | 228 | count[0] += single_count[0]; |
| 211 | count[2] += single_count[2]; | 229 | if (scale) { |
| 230 | count[1] += single_count[1]; | ||
| 231 | count[2] += single_count[2]; | ||
| 232 | } | ||
| 212 | } | 233 | } |
| 213 | } | 234 | } |
| 214 | 235 | ||
| @@ -250,10 +271,9 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 250 | { | 271 | { |
| 251 | unsigned long long t0, t1; | 272 | unsigned long long t0, t1; |
| 252 | int status = 0; | 273 | int status = 0; |
| 253 | int counter; | 274 | int counter, ncreated = 0; |
| 254 | int pid = target_pid; | ||
| 255 | int child_ready_pipe[2], go_pipe[2]; | 275 | int child_ready_pipe[2], go_pipe[2]; |
| 256 | const bool forks = (target_pid == -1 && argc > 0); | 276 | const bool forks = (argc > 0); |
| 257 | char buf; | 277 | char buf; |
| 258 | 278 | ||
| 259 | if (!system_wide) | 279 | if (!system_wide) |
| @@ -265,10 +285,10 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 265 | } | 285 | } |
| 266 | 286 | ||
| 267 | if (forks) { | 287 | if (forks) { |
| 268 | if ((pid = fork()) < 0) | 288 | if ((child_pid = fork()) < 0) |
| 269 | perror("failed to fork"); | 289 | perror("failed to fork"); |
| 270 | 290 | ||
| 271 | if (!pid) { | 291 | if (!child_pid) { |
| 272 | close(child_ready_pipe[0]); | 292 | close(child_ready_pipe[0]); |
| 273 | close(go_pipe[1]); | 293 | close(go_pipe[1]); |
| 274 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | 294 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); |
| @@ -297,7 +317,8 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 297 | exit(-1); | 317 | exit(-1); |
| 298 | } | 318 | } |
| 299 | 319 | ||
| 300 | child_pid = pid; | 320 | if (target_tid == -1 && target_pid == -1 && !system_wide) |
| 321 | all_tids[0] = child_pid; | ||
| 301 | 322 | ||
| 302 | /* | 323 | /* |
| 303 | * Wait for the child to be ready to exec. | 324 | * Wait for the child to be ready to exec. |
| @@ -310,7 +331,16 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 310 | } | 331 | } |
| 311 | 332 | ||
| 312 | for (counter = 0; counter < nr_counters; counter++) | 333 | for (counter = 0; counter < nr_counters; counter++) |
| 313 | create_perf_stat_counter(counter, pid); | 334 | ncreated += create_perf_stat_counter(counter); |
| 335 | |||
| 336 | if (ncreated == 0) { | ||
| 337 | pr_err("No permission to collect %sstats.\n" | ||
| 338 | "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", | ||
| 339 | system_wide ? "system-wide " : ""); | ||
| 340 | if (child_pid != -1) | ||
| 341 | kill(child_pid, SIGTERM); | ||
| 342 | return -1; | ||
| 343 | } | ||
| 314 | 344 | ||
| 315 | /* | 345 | /* |
| 316 | * Enable counters and exec the command: | 346 | * Enable counters and exec the command: |
| @@ -321,7 +351,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 321 | close(go_pipe[1]); | 351 | close(go_pipe[1]); |
| 322 | wait(&status); | 352 | wait(&status); |
| 323 | } else { | 353 | } else { |
| 324 | while(!done); | 354 | while(!done) sleep(1); |
| 325 | } | 355 | } |
| 326 | 356 | ||
| 327 | t1 = rdclock(); | 357 | t1 = rdclock(); |
| @@ -429,12 +459,14 @@ static void print_stat(int argc, const char **argv) | |||
| 429 | 459 | ||
| 430 | fprintf(stderr, "\n"); | 460 | fprintf(stderr, "\n"); |
| 431 | fprintf(stderr, " Performance counter stats for "); | 461 | fprintf(stderr, " Performance counter stats for "); |
| 432 | if(target_pid == -1) { | 462 | if(target_pid == -1 && target_tid == -1) { |
| 433 | fprintf(stderr, "\'%s", argv[0]); | 463 | fprintf(stderr, "\'%s", argv[0]); |
| 434 | for (i = 1; i < argc; i++) | 464 | for (i = 1; i < argc; i++) |
| 435 | fprintf(stderr, " %s", argv[i]); | 465 | fprintf(stderr, " %s", argv[i]); |
| 436 | }else | 466 | } else if (target_pid != -1) |
| 437 | fprintf(stderr, "task pid \'%d", target_pid); | 467 | fprintf(stderr, "process id \'%d", target_pid); |
| 468 | else | ||
| 469 | fprintf(stderr, "thread id \'%d", target_tid); | ||
| 438 | 470 | ||
| 439 | fprintf(stderr, "\'"); | 471 | fprintf(stderr, "\'"); |
| 440 | if (run_count > 1) | 472 | if (run_count > 1) |
| @@ -459,7 +491,7 @@ static volatile int signr = -1; | |||
| 459 | 491 | ||
| 460 | static void skip_signal(int signo) | 492 | static void skip_signal(int signo) |
| 461 | { | 493 | { |
| 462 | if(target_pid != -1) | 494 | if(child_pid == -1) |
| 463 | done = 1; | 495 | done = 1; |
| 464 | 496 | ||
| 465 | signr = signo; | 497 | signr = signo; |
| @@ -486,15 +518,17 @@ static const struct option options[] = { | |||
| 486 | OPT_CALLBACK('e', "event", NULL, "event", | 518 | OPT_CALLBACK('e', "event", NULL, "event", |
| 487 | "event selector. use 'perf list' to list available events", | 519 | "event selector. use 'perf list' to list available events", |
| 488 | parse_events), | 520 | parse_events), |
| 489 | OPT_BOOLEAN('i', "inherit", &inherit, | 521 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
| 490 | "child tasks inherit counters"), | 522 | "child tasks do not inherit counters"), |
| 491 | OPT_INTEGER('p', "pid", &target_pid, | 523 | OPT_INTEGER('p', "pid", &target_pid, |
| 492 | "stat events on existing pid"), | 524 | "stat events on existing process id"), |
| 525 | OPT_INTEGER('t', "tid", &target_tid, | ||
| 526 | "stat events on existing thread id"), | ||
| 493 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 527 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
| 494 | "system-wide collection from all CPUs"), | 528 | "system-wide collection from all CPUs"), |
| 495 | OPT_BOOLEAN('c', "scale", &scale, | 529 | OPT_BOOLEAN('c', "scale", &scale, |
| 496 | "scale/normalize counters"), | 530 | "scale/normalize counters"), |
| 497 | OPT_BOOLEAN('v', "verbose", &verbose, | 531 | OPT_INCR('v', "verbose", &verbose, |
| 498 | "be more verbose (show counter open errors, etc)"), | 532 | "be more verbose (show counter open errors, etc)"), |
| 499 | OPT_INTEGER('r', "repeat", &run_count, | 533 | OPT_INTEGER('r', "repeat", &run_count, |
| 500 | "repeat command and print average + stddev (max: 100)"), | 534 | "repeat command and print average + stddev (max: 100)"), |
| @@ -506,10 +540,11 @@ static const struct option options[] = { | |||
| 506 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 540 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
| 507 | { | 541 | { |
| 508 | int status; | 542 | int status; |
| 543 | int i,j; | ||
| 509 | 544 | ||
| 510 | argc = parse_options(argc, argv, options, stat_usage, | 545 | argc = parse_options(argc, argv, options, stat_usage, |
| 511 | PARSE_OPT_STOP_AT_NON_OPTION); | 546 | PARSE_OPT_STOP_AT_NON_OPTION); |
| 512 | if (!argc && target_pid == -1) | 547 | if (!argc && target_pid == -1 && target_tid == -1) |
| 513 | usage_with_options(stat_usage, options); | 548 | usage_with_options(stat_usage, options); |
| 514 | if (run_count <= 0) | 549 | if (run_count <= 0) |
| 515 | usage_with_options(stat_usage, options); | 550 | usage_with_options(stat_usage, options); |
| @@ -525,6 +560,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 525 | else | 560 | else |
| 526 | nr_cpus = 1; | 561 | nr_cpus = 1; |
| 527 | 562 | ||
| 563 | if (target_pid != -1) { | ||
| 564 | target_tid = target_pid; | ||
| 565 | thread_num = find_all_tid(target_pid, &all_tids); | ||
| 566 | if (thread_num <= 0) { | ||
| 567 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
| 568 | target_pid); | ||
| 569 | usage_with_options(stat_usage, options); | ||
| 570 | } | ||
| 571 | } else { | ||
| 572 | all_tids=malloc(sizeof(pid_t)); | ||
| 573 | if (!all_tids) | ||
| 574 | return -ENOMEM; | ||
| 575 | |||
| 576 | all_tids[0] = target_tid; | ||
| 577 | thread_num = 1; | ||
| 578 | } | ||
| 579 | |||
| 580 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
| 581 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
| 582 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
| 583 | if (!fd[i][j]) | ||
| 584 | return -ENOMEM; | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 528 | /* | 588 | /* |
| 529 | * We dont want to block the signals - that would cause | 589 | * We dont want to block the signals - that would cause |
| 530 | * child tasks to inherit that and Ctrl-C would not work. | 590 | * child tasks to inherit that and Ctrl-C would not work. |
| @@ -543,7 +603,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 543 | status = run_perf_stat(argc, argv); | 603 | status = run_perf_stat(argc, argv); |
| 544 | } | 604 | } |
| 545 | 605 | ||
| 546 | print_stat(argc, argv); | 606 | if (status != -1) |
| 607 | print_stat(argc, argv); | ||
| 547 | 608 | ||
| 548 | return status; | 609 | return status; |
| 549 | } | 610 | } |
