diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-18 11:19:03 -0400 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-18 11:19:03 -0400 | 
| commit | 4d7b4ac22fbec1a03206c6cde353f2fd6942f828 (patch) | |
| tree | 2d96a9e9c28cf6fa628a278decc00ad55a8b043b /tools/perf/builtin-stat.c | |
| parent | 3aaf51ace5975050ab43c7d4d7e439e0ae7d13d7 (diff) | |
| parent | 94f3ca95787ada3d64339a4ecb2754236ab563f6 (diff) | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (311 commits)
  perf tools: Add mode to build without newt support
  perf symbols: symbol inconsistency message should be done only at verbose=1
  perf tui: Add explicit -lslang option
  perf options: Type check all the remaining OPT_ variants
  perf options: Type check OPT_BOOLEAN and fix the offenders
  perf options: Check v type in OPT_U?INTEGER
  perf options: Introduce OPT_UINTEGER
  perf tui: Add workaround for slang < 2.1.4
  perf record: Fix bug mismatch with -c option definition
  perf options: Introduce OPT_U64
  perf tui: Add help window to show key associations
  perf tui: Make <- exit menus too
  perf newt: Add single key shortcuts for zoom into DSO and threads
  perf newt: Exit browser unconditionally when CTRL+C, q or Q is pressed
  perf newt: Fix the 'A'/'a' shortcut for annotate
  perf newt: Make <- exit the ui_browser
  x86, perf: P4 PMU - fix counters management logic
  perf newt: Make <- zoom out filters
  perf report: Report number of events, not samples
  perf hist: Clarify events_stats fields usage
  ...
Fix up trivial conflicts in kernel/fork.c and tools/perf/builtin-record.c
Diffstat (limited to 'tools/perf/builtin-stat.c')
| -rw-r--r-- | tools/perf/builtin-stat.c | 161 | 
1 files changed, 111 insertions, 50 deletions
| diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95db31cff6fd..ff8c413b7e73 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include "util/debug.h" | 46 | #include "util/debug.h" | 
| 47 | #include "util/header.h" | 47 | #include "util/header.h" | 
| 48 | #include "util/cpumap.h" | 48 | #include "util/cpumap.h" | 
| 49 | #include "util/thread.h" | ||
| 49 | 50 | ||
| 50 | #include <sys/prctl.h> | 51 | #include <sys/prctl.h> | 
| 51 | #include <math.h> | 52 | #include <math.h> | 
| @@ -66,18 +67,21 @@ static struct perf_event_attr default_attrs[] = { | |||
| 66 | 67 | ||
| 67 | }; | 68 | }; | 
| 68 | 69 | ||
| 69 | static int system_wide = 0; | 70 | static bool system_wide = false; | 
| 70 | static unsigned int nr_cpus = 0; | 71 | static unsigned int nr_cpus = 0; | 
| 71 | static int run_idx = 0; | 72 | static int run_idx = 0; | 
| 72 | 73 | ||
| 73 | static int run_count = 1; | 74 | static int run_count = 1; | 
| 74 | static int inherit = 1; | 75 | static bool no_inherit = false; | 
| 75 | static int scale = 1; | 76 | static bool scale = true; | 
| 76 | static pid_t target_pid = -1; | 77 | static pid_t target_pid = -1; | 
| 78 | static pid_t target_tid = -1; | ||
| 79 | static pid_t *all_tids = NULL; | ||
| 80 | static int thread_num = 0; | ||
| 77 | static pid_t child_pid = -1; | 81 | static pid_t child_pid = -1; | 
| 78 | static int null_run = 0; | 82 | static bool null_run = false; | 
| 79 | 83 | ||
| 80 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 84 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | 
| 81 | 85 | ||
| 82 | static int event_scaled[MAX_COUNTERS]; | 86 | static int event_scaled[MAX_COUNTERS]; | 
| 83 | 87 | ||
| @@ -140,9 +144,11 @@ struct stats runtime_branches_stats; | |||
| 140 | #define ERR_PERF_OPEN \ | 144 | #define ERR_PERF_OPEN \ | 
| 141 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 145 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 
| 142 | 146 | ||
| 143 | static void create_perf_stat_counter(int counter, int pid) | 147 | static int create_perf_stat_counter(int counter) | 
| 144 | { | 148 | { | 
| 145 | struct perf_event_attr *attr = attrs + counter; | 149 | struct perf_event_attr *attr = attrs + counter; | 
| 150 | int thread; | ||
| 151 | int ncreated = 0; | ||
| 146 | 152 | ||
| 147 | if (scale) | 153 | if (scale) | 
| 148 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 154 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 
| @@ -152,21 +158,33 @@ static void create_perf_stat_counter(int counter, int pid) | |||
| 152 | unsigned int cpu; | 158 | unsigned int cpu; | 
| 153 | 159 | ||
| 154 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 160 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 
| 155 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); | 161 | fd[cpu][counter][0] = sys_perf_event_open(attr, | 
| 156 | if (fd[cpu][counter] < 0 && verbose) | 162 | -1, cpumap[cpu], -1, 0); | 
| 157 | fprintf(stderr, ERR_PERF_OPEN, counter, | 163 | if (fd[cpu][counter][0] < 0) | 
| 158 | fd[cpu][counter], strerror(errno)); | 164 | pr_debug(ERR_PERF_OPEN, counter, | 
| 165 | fd[cpu][counter][0], strerror(errno)); | ||
| 166 | else | ||
| 167 | ++ncreated; | ||
| 159 | } | 168 | } | 
| 160 | } else { | 169 | } else { | 
| 161 | attr->inherit = inherit; | 170 | attr->inherit = !no_inherit; | 
| 162 | attr->disabled = 1; | 171 | if (target_pid == -1 && target_tid == -1) { | 
| 163 | attr->enable_on_exec = 1; | 172 | attr->disabled = 1; | 
| 164 | 173 | attr->enable_on_exec = 1; | |
| 165 | fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); | 174 | } | 
| 166 | if (fd[0][counter] < 0 && verbose) | 175 | for (thread = 0; thread < thread_num; thread++) { | 
| 167 | fprintf(stderr, ERR_PERF_OPEN, counter, | 176 | fd[0][counter][thread] = sys_perf_event_open(attr, | 
| 168 | fd[0][counter], strerror(errno)); | 177 | all_tids[thread], -1, -1, 0); | 
| 178 | if (fd[0][counter][thread] < 0) | ||
| 179 | pr_debug(ERR_PERF_OPEN, counter, | ||
| 180 | fd[0][counter][thread], | ||
| 181 | strerror(errno)); | ||
| 182 | else | ||
| 183 | ++ncreated; | ||
| 184 | } | ||
| 169 | } | 185 | } | 
| 186 | |||
| 187 | return ncreated; | ||
| 170 | } | 188 | } | 
| 171 | 189 | ||
| 172 | /* | 190 | /* | 
| @@ -190,25 +208,28 @@ static void read_counter(int counter) | |||
| 190 | unsigned int cpu; | 208 | unsigned int cpu; | 
| 191 | size_t res, nv; | 209 | size_t res, nv; | 
| 192 | int scaled; | 210 | int scaled; | 
| 193 | int i; | 211 | int i, thread; | 
| 194 | 212 | ||
| 195 | count[0] = count[1] = count[2] = 0; | 213 | count[0] = count[1] = count[2] = 0; | 
| 196 | 214 | ||
| 197 | nv = scale ? 3 : 1; | 215 | nv = scale ? 3 : 1; | 
| 198 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 216 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 
| 199 | if (fd[cpu][counter] < 0) | 217 | for (thread = 0; thread < thread_num; thread++) { | 
| 200 | continue; | 218 | if (fd[cpu][counter][thread] < 0) | 
| 201 | 219 | continue; | |
| 202 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | 220 | |
| 203 | assert(res == nv * sizeof(u64)); | 221 | res = read(fd[cpu][counter][thread], | 
| 204 | 222 | single_count, nv * sizeof(u64)); | |
| 205 | close(fd[cpu][counter]); | 223 | assert(res == nv * sizeof(u64)); | 
| 206 | fd[cpu][counter] = -1; | 224 | |
| 207 | 225 | close(fd[cpu][counter][thread]); | |
| 208 | count[0] += single_count[0]; | 226 | fd[cpu][counter][thread] = -1; | 
| 209 | if (scale) { | 227 | |
| 210 | count[1] += single_count[1]; | 228 | count[0] += single_count[0]; | 
| 211 | count[2] += single_count[2]; | 229 | if (scale) { | 
| 230 | count[1] += single_count[1]; | ||
| 231 | count[2] += single_count[2]; | ||
| 232 | } | ||
| 212 | } | 233 | } | 
| 213 | } | 234 | } | 
| 214 | 235 | ||
| @@ -250,10 +271,9 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 250 | { | 271 | { | 
| 251 | unsigned long long t0, t1; | 272 | unsigned long long t0, t1; | 
| 252 | int status = 0; | 273 | int status = 0; | 
| 253 | int counter; | 274 | int counter, ncreated = 0; | 
| 254 | int pid = target_pid; | ||
| 255 | int child_ready_pipe[2], go_pipe[2]; | 275 | int child_ready_pipe[2], go_pipe[2]; | 
| 256 | const bool forks = (target_pid == -1 && argc > 0); | 276 | const bool forks = (argc > 0); | 
| 257 | char buf; | 277 | char buf; | 
| 258 | 278 | ||
| 259 | if (!system_wide) | 279 | if (!system_wide) | 
| @@ -265,10 +285,10 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 265 | } | 285 | } | 
| 266 | 286 | ||
| 267 | if (forks) { | 287 | if (forks) { | 
| 268 | if ((pid = fork()) < 0) | 288 | if ((child_pid = fork()) < 0) | 
| 269 | perror("failed to fork"); | 289 | perror("failed to fork"); | 
| 270 | 290 | ||
| 271 | if (!pid) { | 291 | if (!child_pid) { | 
| 272 | close(child_ready_pipe[0]); | 292 | close(child_ready_pipe[0]); | 
| 273 | close(go_pipe[1]); | 293 | close(go_pipe[1]); | 
| 274 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | 294 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | 
| @@ -297,7 +317,8 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 297 | exit(-1); | 317 | exit(-1); | 
| 298 | } | 318 | } | 
| 299 | 319 | ||
| 300 | child_pid = pid; | 320 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 
| 321 | all_tids[0] = child_pid; | ||
| 301 | 322 | ||
| 302 | /* | 323 | /* | 
| 303 | * Wait for the child to be ready to exec. | 324 | * Wait for the child to be ready to exec. | 
| @@ -310,7 +331,16 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 310 | } | 331 | } | 
| 311 | 332 | ||
| 312 | for (counter = 0; counter < nr_counters; counter++) | 333 | for (counter = 0; counter < nr_counters; counter++) | 
| 313 | create_perf_stat_counter(counter, pid); | 334 | ncreated += create_perf_stat_counter(counter); | 
| 335 | |||
| 336 | if (ncreated == 0) { | ||
| 337 | pr_err("No permission to collect %sstats.\n" | ||
| 338 | "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", | ||
| 339 | system_wide ? "system-wide " : ""); | ||
| 340 | if (child_pid != -1) | ||
| 341 | kill(child_pid, SIGTERM); | ||
| 342 | return -1; | ||
| 343 | } | ||
| 314 | 344 | ||
| 315 | /* | 345 | /* | 
| 316 | * Enable counters and exec the command: | 346 | * Enable counters and exec the command: | 
| @@ -321,7 +351,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 321 | close(go_pipe[1]); | 351 | close(go_pipe[1]); | 
| 322 | wait(&status); | 352 | wait(&status); | 
| 323 | } else { | 353 | } else { | 
| 324 | while(!done); | 354 | while(!done) sleep(1); | 
| 325 | } | 355 | } | 
| 326 | 356 | ||
| 327 | t1 = rdclock(); | 357 | t1 = rdclock(); | 
| @@ -429,12 +459,14 @@ static void print_stat(int argc, const char **argv) | |||
| 429 | 459 | ||
| 430 | fprintf(stderr, "\n"); | 460 | fprintf(stderr, "\n"); | 
| 431 | fprintf(stderr, " Performance counter stats for "); | 461 | fprintf(stderr, " Performance counter stats for "); | 
| 432 | if(target_pid == -1) { | 462 | if(target_pid == -1 && target_tid == -1) { | 
| 433 | fprintf(stderr, "\'%s", argv[0]); | 463 | fprintf(stderr, "\'%s", argv[0]); | 
| 434 | for (i = 1; i < argc; i++) | 464 | for (i = 1; i < argc; i++) | 
| 435 | fprintf(stderr, " %s", argv[i]); | 465 | fprintf(stderr, " %s", argv[i]); | 
| 436 | }else | 466 | } else if (target_pid != -1) | 
| 437 | fprintf(stderr, "task pid \'%d", target_pid); | 467 | fprintf(stderr, "process id \'%d", target_pid); | 
| 468 | else | ||
| 469 | fprintf(stderr, "thread id \'%d", target_tid); | ||
| 438 | 470 | ||
| 439 | fprintf(stderr, "\'"); | 471 | fprintf(stderr, "\'"); | 
| 440 | if (run_count > 1) | 472 | if (run_count > 1) | 
| @@ -459,7 +491,7 @@ static volatile int signr = -1; | |||
| 459 | 491 | ||
| 460 | static void skip_signal(int signo) | 492 | static void skip_signal(int signo) | 
| 461 | { | 493 | { | 
| 462 | if(target_pid != -1) | 494 | if(child_pid == -1) | 
| 463 | done = 1; | 495 | done = 1; | 
| 464 | 496 | ||
| 465 | signr = signo; | 497 | signr = signo; | 
| @@ -486,15 +518,17 @@ static const struct option options[] = { | |||
| 486 | OPT_CALLBACK('e', "event", NULL, "event", | 518 | OPT_CALLBACK('e', "event", NULL, "event", | 
| 487 | "event selector. use 'perf list' to list available events", | 519 | "event selector. use 'perf list' to list available events", | 
| 488 | parse_events), | 520 | parse_events), | 
| 489 | OPT_BOOLEAN('i', "inherit", &inherit, | 521 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, | 
| 490 | "child tasks inherit counters"), | 522 | "child tasks do not inherit counters"), | 
| 491 | OPT_INTEGER('p', "pid", &target_pid, | 523 | OPT_INTEGER('p', "pid", &target_pid, | 
| 492 | "stat events on existing pid"), | 524 | "stat events on existing process id"), | 
| 525 | OPT_INTEGER('t', "tid", &target_tid, | ||
| 526 | "stat events on existing thread id"), | ||
| 493 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 527 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 
| 494 | "system-wide collection from all CPUs"), | 528 | "system-wide collection from all CPUs"), | 
| 495 | OPT_BOOLEAN('c', "scale", &scale, | 529 | OPT_BOOLEAN('c', "scale", &scale, | 
| 496 | "scale/normalize counters"), | 530 | "scale/normalize counters"), | 
| 497 | OPT_BOOLEAN('v', "verbose", &verbose, | 531 | OPT_INCR('v', "verbose", &verbose, | 
| 498 | "be more verbose (show counter open errors, etc)"), | 532 | "be more verbose (show counter open errors, etc)"), | 
| 499 | OPT_INTEGER('r', "repeat", &run_count, | 533 | OPT_INTEGER('r', "repeat", &run_count, | 
| 500 | "repeat command and print average + stddev (max: 100)"), | 534 | "repeat command and print average + stddev (max: 100)"), | 
| @@ -506,10 +540,11 @@ static const struct option options[] = { | |||
| 506 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 540 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 
| 507 | { | 541 | { | 
| 508 | int status; | 542 | int status; | 
| 543 | int i,j; | ||
| 509 | 544 | ||
| 510 | argc = parse_options(argc, argv, options, stat_usage, | 545 | argc = parse_options(argc, argv, options, stat_usage, | 
| 511 | PARSE_OPT_STOP_AT_NON_OPTION); | 546 | PARSE_OPT_STOP_AT_NON_OPTION); | 
| 512 | if (!argc && target_pid == -1) | 547 | if (!argc && target_pid == -1 && target_tid == -1) | 
| 513 | usage_with_options(stat_usage, options); | 548 | usage_with_options(stat_usage, options); | 
| 514 | if (run_count <= 0) | 549 | if (run_count <= 0) | 
| 515 | usage_with_options(stat_usage, options); | 550 | usage_with_options(stat_usage, options); | 
| @@ -525,6 +560,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 525 | else | 560 | else | 
| 526 | nr_cpus = 1; | 561 | nr_cpus = 1; | 
| 527 | 562 | ||
| 563 | if (target_pid != -1) { | ||
| 564 | target_tid = target_pid; | ||
| 565 | thread_num = find_all_tid(target_pid, &all_tids); | ||
| 566 | if (thread_num <= 0) { | ||
| 567 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
| 568 | target_pid); | ||
| 569 | usage_with_options(stat_usage, options); | ||
| 570 | } | ||
| 571 | } else { | ||
| 572 | all_tids=malloc(sizeof(pid_t)); | ||
| 573 | if (!all_tids) | ||
| 574 | return -ENOMEM; | ||
| 575 | |||
| 576 | all_tids[0] = target_tid; | ||
| 577 | thread_num = 1; | ||
| 578 | } | ||
| 579 | |||
| 580 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
| 581 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
| 582 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
| 583 | if (!fd[i][j]) | ||
| 584 | return -ENOMEM; | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 528 | /* | 588 | /* | 
| 529 | * We dont want to block the signals - that would cause | 589 | * We dont want to block the signals - that would cause | 
| 530 | * child tasks to inherit that and Ctrl-C would not work. | 590 | * child tasks to inherit that and Ctrl-C would not work. | 
| @@ -543,7 +603,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 543 | status = run_perf_stat(argc, argv); | 603 | status = run_perf_stat(argc, argv); | 
| 544 | } | 604 | } | 
| 545 | 605 | ||
| 546 | print_stat(argc, argv); | 606 | if (status != -1) | 
| 607 | print_stat(argc, argv); | ||
| 547 | 608 | ||
| 548 | return status; | 609 | return status; | 
| 549 | } | 610 | } | 
