diff options
author | David S. Miller <davem@davemloft.net> | 2010-05-19 02:01:55 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-19 02:01:55 -0400 |
commit | 2ec8c6bb5d8f3a62a79f463525054bae1e3d4487 (patch) | |
tree | fa7f8400ac685fb52e96f64997c7c682fc2aa021 /tools/perf/builtin-stat.c | |
parent | 7b39f90fabcf9e2af0cd79d0a60440d821e22b56 (diff) | |
parent | 537b60d17894b7c19a6060feae40299d7109d6e7 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts:
include/linux/mod_devicetable.h
scripts/mod/file2alias.c
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 161 |
1 files changed, 111 insertions, 50 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95db31cff6fd..ff8c413b7e73 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "util/debug.h" | 46 | #include "util/debug.h" |
47 | #include "util/header.h" | 47 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 48 | #include "util/cpumap.h" |
49 | #include "util/thread.h" | ||
49 | 50 | ||
50 | #include <sys/prctl.h> | 51 | #include <sys/prctl.h> |
51 | #include <math.h> | 52 | #include <math.h> |
@@ -66,18 +67,21 @@ static struct perf_event_attr default_attrs[] = { | |||
66 | 67 | ||
67 | }; | 68 | }; |
68 | 69 | ||
69 | static int system_wide = 0; | 70 | static bool system_wide = false; |
70 | static unsigned int nr_cpus = 0; | 71 | static unsigned int nr_cpus = 0; |
71 | static int run_idx = 0; | 72 | static int run_idx = 0; |
72 | 73 | ||
73 | static int run_count = 1; | 74 | static int run_count = 1; |
74 | static int inherit = 1; | 75 | static bool no_inherit = false; |
75 | static int scale = 1; | 76 | static bool scale = true; |
76 | static pid_t target_pid = -1; | 77 | static pid_t target_pid = -1; |
78 | static pid_t target_tid = -1; | ||
79 | static pid_t *all_tids = NULL; | ||
80 | static int thread_num = 0; | ||
77 | static pid_t child_pid = -1; | 81 | static pid_t child_pid = -1; |
78 | static int null_run = 0; | 82 | static bool null_run = false; |
79 | 83 | ||
80 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 84 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
81 | 85 | ||
82 | static int event_scaled[MAX_COUNTERS]; | 86 | static int event_scaled[MAX_COUNTERS]; |
83 | 87 | ||
@@ -140,9 +144,11 @@ struct stats runtime_branches_stats; | |||
140 | #define ERR_PERF_OPEN \ | 144 | #define ERR_PERF_OPEN \ |
141 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 145 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" |
142 | 146 | ||
143 | static void create_perf_stat_counter(int counter, int pid) | 147 | static int create_perf_stat_counter(int counter) |
144 | { | 148 | { |
145 | struct perf_event_attr *attr = attrs + counter; | 149 | struct perf_event_attr *attr = attrs + counter; |
150 | int thread; | ||
151 | int ncreated = 0; | ||
146 | 152 | ||
147 | if (scale) | 153 | if (scale) |
148 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 154 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
@@ -152,21 +158,33 @@ static void create_perf_stat_counter(int counter, int pid) | |||
152 | unsigned int cpu; | 158 | unsigned int cpu; |
153 | 159 | ||
154 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 160 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
155 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); | 161 | fd[cpu][counter][0] = sys_perf_event_open(attr, |
156 | if (fd[cpu][counter] < 0 && verbose) | 162 | -1, cpumap[cpu], -1, 0); |
157 | fprintf(stderr, ERR_PERF_OPEN, counter, | 163 | if (fd[cpu][counter][0] < 0) |
158 | fd[cpu][counter], strerror(errno)); | 164 | pr_debug(ERR_PERF_OPEN, counter, |
165 | fd[cpu][counter][0], strerror(errno)); | ||
166 | else | ||
167 | ++ncreated; | ||
159 | } | 168 | } |
160 | } else { | 169 | } else { |
161 | attr->inherit = inherit; | 170 | attr->inherit = !no_inherit; |
162 | attr->disabled = 1; | 171 | if (target_pid == -1 && target_tid == -1) { |
163 | attr->enable_on_exec = 1; | 172 | attr->disabled = 1; |
164 | 173 | attr->enable_on_exec = 1; | |
165 | fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); | 174 | } |
166 | if (fd[0][counter] < 0 && verbose) | 175 | for (thread = 0; thread < thread_num; thread++) { |
167 | fprintf(stderr, ERR_PERF_OPEN, counter, | 176 | fd[0][counter][thread] = sys_perf_event_open(attr, |
168 | fd[0][counter], strerror(errno)); | 177 | all_tids[thread], -1, -1, 0); |
178 | if (fd[0][counter][thread] < 0) | ||
179 | pr_debug(ERR_PERF_OPEN, counter, | ||
180 | fd[0][counter][thread], | ||
181 | strerror(errno)); | ||
182 | else | ||
183 | ++ncreated; | ||
184 | } | ||
169 | } | 185 | } |
186 | |||
187 | return ncreated; | ||
170 | } | 188 | } |
171 | 189 | ||
172 | /* | 190 | /* |
@@ -190,25 +208,28 @@ static void read_counter(int counter) | |||
190 | unsigned int cpu; | 208 | unsigned int cpu; |
191 | size_t res, nv; | 209 | size_t res, nv; |
192 | int scaled; | 210 | int scaled; |
193 | int i; | 211 | int i, thread; |
194 | 212 | ||
195 | count[0] = count[1] = count[2] = 0; | 213 | count[0] = count[1] = count[2] = 0; |
196 | 214 | ||
197 | nv = scale ? 3 : 1; | 215 | nv = scale ? 3 : 1; |
198 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 216 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
199 | if (fd[cpu][counter] < 0) | 217 | for (thread = 0; thread < thread_num; thread++) { |
200 | continue; | 218 | if (fd[cpu][counter][thread] < 0) |
201 | 219 | continue; | |
202 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | 220 | |
203 | assert(res == nv * sizeof(u64)); | 221 | res = read(fd[cpu][counter][thread], |
204 | 222 | single_count, nv * sizeof(u64)); | |
205 | close(fd[cpu][counter]); | 223 | assert(res == nv * sizeof(u64)); |
206 | fd[cpu][counter] = -1; | 224 | |
207 | 225 | close(fd[cpu][counter][thread]); | |
208 | count[0] += single_count[0]; | 226 | fd[cpu][counter][thread] = -1; |
209 | if (scale) { | 227 | |
210 | count[1] += single_count[1]; | 228 | count[0] += single_count[0]; |
211 | count[2] += single_count[2]; | 229 | if (scale) { |
230 | count[1] += single_count[1]; | ||
231 | count[2] += single_count[2]; | ||
232 | } | ||
212 | } | 233 | } |
213 | } | 234 | } |
214 | 235 | ||
@@ -250,10 +271,9 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
250 | { | 271 | { |
251 | unsigned long long t0, t1; | 272 | unsigned long long t0, t1; |
252 | int status = 0; | 273 | int status = 0; |
253 | int counter; | 274 | int counter, ncreated = 0; |
254 | int pid = target_pid; | ||
255 | int child_ready_pipe[2], go_pipe[2]; | 275 | int child_ready_pipe[2], go_pipe[2]; |
256 | const bool forks = (target_pid == -1 && argc > 0); | 276 | const bool forks = (argc > 0); |
257 | char buf; | 277 | char buf; |
258 | 278 | ||
259 | if (!system_wide) | 279 | if (!system_wide) |
@@ -265,10 +285,10 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
265 | } | 285 | } |
266 | 286 | ||
267 | if (forks) { | 287 | if (forks) { |
268 | if ((pid = fork()) < 0) | 288 | if ((child_pid = fork()) < 0) |
269 | perror("failed to fork"); | 289 | perror("failed to fork"); |
270 | 290 | ||
271 | if (!pid) { | 291 | if (!child_pid) { |
272 | close(child_ready_pipe[0]); | 292 | close(child_ready_pipe[0]); |
273 | close(go_pipe[1]); | 293 | close(go_pipe[1]); |
274 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | 294 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); |
@@ -297,7 +317,8 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
297 | exit(-1); | 317 | exit(-1); |
298 | } | 318 | } |
299 | 319 | ||
300 | child_pid = pid; | 320 | if (target_tid == -1 && target_pid == -1 && !system_wide) |
321 | all_tids[0] = child_pid; | ||
301 | 322 | ||
302 | /* | 323 | /* |
303 | * Wait for the child to be ready to exec. | 324 | * Wait for the child to be ready to exec. |
@@ -310,7 +331,16 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
310 | } | 331 | } |
311 | 332 | ||
312 | for (counter = 0; counter < nr_counters; counter++) | 333 | for (counter = 0; counter < nr_counters; counter++) |
313 | create_perf_stat_counter(counter, pid); | 334 | ncreated += create_perf_stat_counter(counter); |
335 | |||
336 | if (ncreated == 0) { | ||
337 | pr_err("No permission to collect %sstats.\n" | ||
338 | "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", | ||
339 | system_wide ? "system-wide " : ""); | ||
340 | if (child_pid != -1) | ||
341 | kill(child_pid, SIGTERM); | ||
342 | return -1; | ||
343 | } | ||
314 | 344 | ||
315 | /* | 345 | /* |
316 | * Enable counters and exec the command: | 346 | * Enable counters and exec the command: |
@@ -321,7 +351,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
321 | close(go_pipe[1]); | 351 | close(go_pipe[1]); |
322 | wait(&status); | 352 | wait(&status); |
323 | } else { | 353 | } else { |
324 | while(!done); | 354 | while(!done) sleep(1); |
325 | } | 355 | } |
326 | 356 | ||
327 | t1 = rdclock(); | 357 | t1 = rdclock(); |
@@ -429,12 +459,14 @@ static void print_stat(int argc, const char **argv) | |||
429 | 459 | ||
430 | fprintf(stderr, "\n"); | 460 | fprintf(stderr, "\n"); |
431 | fprintf(stderr, " Performance counter stats for "); | 461 | fprintf(stderr, " Performance counter stats for "); |
432 | if(target_pid == -1) { | 462 | if(target_pid == -1 && target_tid == -1) { |
433 | fprintf(stderr, "\'%s", argv[0]); | 463 | fprintf(stderr, "\'%s", argv[0]); |
434 | for (i = 1; i < argc; i++) | 464 | for (i = 1; i < argc; i++) |
435 | fprintf(stderr, " %s", argv[i]); | 465 | fprintf(stderr, " %s", argv[i]); |
436 | }else | 466 | } else if (target_pid != -1) |
437 | fprintf(stderr, "task pid \'%d", target_pid); | 467 | fprintf(stderr, "process id \'%d", target_pid); |
468 | else | ||
469 | fprintf(stderr, "thread id \'%d", target_tid); | ||
438 | 470 | ||
439 | fprintf(stderr, "\'"); | 471 | fprintf(stderr, "\'"); |
440 | if (run_count > 1) | 472 | if (run_count > 1) |
@@ -459,7 +491,7 @@ static volatile int signr = -1; | |||
459 | 491 | ||
460 | static void skip_signal(int signo) | 492 | static void skip_signal(int signo) |
461 | { | 493 | { |
462 | if(target_pid != -1) | 494 | if(child_pid == -1) |
463 | done = 1; | 495 | done = 1; |
464 | 496 | ||
465 | signr = signo; | 497 | signr = signo; |
@@ -486,15 +518,17 @@ static const struct option options[] = { | |||
486 | OPT_CALLBACK('e', "event", NULL, "event", | 518 | OPT_CALLBACK('e', "event", NULL, "event", |
487 | "event selector. use 'perf list' to list available events", | 519 | "event selector. use 'perf list' to list available events", |
488 | parse_events), | 520 | parse_events), |
489 | OPT_BOOLEAN('i', "inherit", &inherit, | 521 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
490 | "child tasks inherit counters"), | 522 | "child tasks do not inherit counters"), |
491 | OPT_INTEGER('p', "pid", &target_pid, | 523 | OPT_INTEGER('p', "pid", &target_pid, |
492 | "stat events on existing pid"), | 524 | "stat events on existing process id"), |
525 | OPT_INTEGER('t', "tid", &target_tid, | ||
526 | "stat events on existing thread id"), | ||
493 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 527 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
494 | "system-wide collection from all CPUs"), | 528 | "system-wide collection from all CPUs"), |
495 | OPT_BOOLEAN('c', "scale", &scale, | 529 | OPT_BOOLEAN('c', "scale", &scale, |
496 | "scale/normalize counters"), | 530 | "scale/normalize counters"), |
497 | OPT_BOOLEAN('v', "verbose", &verbose, | 531 | OPT_INCR('v', "verbose", &verbose, |
498 | "be more verbose (show counter open errors, etc)"), | 532 | "be more verbose (show counter open errors, etc)"), |
499 | OPT_INTEGER('r', "repeat", &run_count, | 533 | OPT_INTEGER('r', "repeat", &run_count, |
500 | "repeat command and print average + stddev (max: 100)"), | 534 | "repeat command and print average + stddev (max: 100)"), |
@@ -506,10 +540,11 @@ static const struct option options[] = { | |||
506 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 540 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
507 | { | 541 | { |
508 | int status; | 542 | int status; |
543 | int i,j; | ||
509 | 544 | ||
510 | argc = parse_options(argc, argv, options, stat_usage, | 545 | argc = parse_options(argc, argv, options, stat_usage, |
511 | PARSE_OPT_STOP_AT_NON_OPTION); | 546 | PARSE_OPT_STOP_AT_NON_OPTION); |
512 | if (!argc && target_pid == -1) | 547 | if (!argc && target_pid == -1 && target_tid == -1) |
513 | usage_with_options(stat_usage, options); | 548 | usage_with_options(stat_usage, options); |
514 | if (run_count <= 0) | 549 | if (run_count <= 0) |
515 | usage_with_options(stat_usage, options); | 550 | usage_with_options(stat_usage, options); |
@@ -525,6 +560,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
525 | else | 560 | else |
526 | nr_cpus = 1; | 561 | nr_cpus = 1; |
527 | 562 | ||
563 | if (target_pid != -1) { | ||
564 | target_tid = target_pid; | ||
565 | thread_num = find_all_tid(target_pid, &all_tids); | ||
566 | if (thread_num <= 0) { | ||
567 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
568 | target_pid); | ||
569 | usage_with_options(stat_usage, options); | ||
570 | } | ||
571 | } else { | ||
572 | all_tids=malloc(sizeof(pid_t)); | ||
573 | if (!all_tids) | ||
574 | return -ENOMEM; | ||
575 | |||
576 | all_tids[0] = target_tid; | ||
577 | thread_num = 1; | ||
578 | } | ||
579 | |||
580 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
581 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
582 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
583 | if (!fd[i][j]) | ||
584 | return -ENOMEM; | ||
585 | } | ||
586 | } | ||
587 | |||
528 | /* | 588 | /* |
529 | * We dont want to block the signals - that would cause | 589 | * We dont want to block the signals - that would cause |
530 | * child tasks to inherit that and Ctrl-C would not work. | 590 | * child tasks to inherit that and Ctrl-C would not work. |
@@ -543,7 +603,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
543 | status = run_perf_stat(argc, argv); | 603 | status = run_perf_stat(argc, argv); |
544 | } | 604 | } |
545 | 605 | ||
546 | print_stat(argc, argv); | 606 | if (status != -1) |
607 | print_stat(argc, argv); | ||
547 | 608 | ||
548 | return status; | 609 | return status; |
549 | } | 610 | } |