diff options
author | Zhang, Yanmin <yanmin_zhang@linux.intel.com> | 2010-03-18 10:36:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-18 11:21:12 -0400 |
commit | d6d901c23a9c4c7361aa901b5b2dda69703dd5e0 (patch) | |
tree | 601fc2cafac552c80b8456c8dd4b9964171552db /tools/perf/builtin-stat.c | |
parent | 46be604b5ba738d53e5f5314813a4e7092864baf (diff) |
perf events: Change perf parameter --pid to process-wide collection instead of thread-wide
Parameter --pid (or -p) of perf currently means a thread-wide
collection. For exmaple, if a process whose id is 8888 has 10
threads, 'perf top -p 8888' just collects the main thread
statistics. That's misleading. Users are used to attach a whole
process when debugging a process by gdb. To follow normal usage
style, the patch change --pid to process-wide collection and add
--tid (-t) to mean a thread-wide collection.
Usage example is:
# perf top -p 8888
# perf record -p 8888 -f sleep 10
# perf stat -p 8888 -f sleep 10
Above commands collect the statistics of all threads of process
8888.
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jes Sorensen <Jes.Sorensen@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: zhiteng.huang@intel.com
Cc: Zachary Amsden <zamsden@redhat.com>
LKML-Reference: <1268922965-14774-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 110 |
1 files changed, 75 insertions, 35 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5f41244cbbf2..c92f90ff5a9f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "util/debug.h" | 46 | #include "util/debug.h" |
47 | #include "util/header.h" | 47 | #include "util/header.h" |
48 | #include "util/cpumap.h" | 48 | #include "util/cpumap.h" |
49 | #include "util/thread.h" | ||
49 | 50 | ||
50 | #include <sys/prctl.h> | 51 | #include <sys/prctl.h> |
51 | #include <math.h> | 52 | #include <math.h> |
@@ -74,10 +75,13 @@ static int run_count = 1; | |||
74 | static int inherit = 1; | 75 | static int inherit = 1; |
75 | static int scale = 1; | 76 | static int scale = 1; |
76 | static pid_t target_pid = -1; | 77 | static pid_t target_pid = -1; |
78 | static pid_t target_tid = -1; | ||
79 | static pid_t *all_tids = NULL; | ||
80 | static int thread_num = 0; | ||
77 | static pid_t child_pid = -1; | 81 | static pid_t child_pid = -1; |
78 | static int null_run = 0; | 82 | static int null_run = 0; |
79 | 83 | ||
80 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | 84 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
81 | 85 | ||
82 | static int event_scaled[MAX_COUNTERS]; | 86 | static int event_scaled[MAX_COUNTERS]; |
83 | 87 | ||
@@ -140,9 +144,10 @@ struct stats runtime_branches_stats; | |||
140 | #define ERR_PERF_OPEN \ | 144 | #define ERR_PERF_OPEN \ |
141 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 145 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" |
142 | 146 | ||
143 | static void create_perf_stat_counter(int counter, int pid) | 147 | static void create_perf_stat_counter(int counter) |
144 | { | 148 | { |
145 | struct perf_event_attr *attr = attrs + counter; | 149 | struct perf_event_attr *attr = attrs + counter; |
150 | int thread; | ||
146 | 151 | ||
147 | if (scale) | 152 | if (scale) |
148 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 153 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
@@ -152,10 +157,11 @@ static void create_perf_stat_counter(int counter, int pid) | |||
152 | unsigned int cpu; | 157 | unsigned int cpu; |
153 | 158 | ||
154 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 159 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
155 | fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); | 160 | fd[cpu][counter][0] = sys_perf_event_open(attr, |
156 | if (fd[cpu][counter] < 0 && verbose) | 161 | -1, cpumap[cpu], -1, 0); |
162 | if (fd[cpu][counter][0] < 0 && verbose) | ||
157 | fprintf(stderr, ERR_PERF_OPEN, counter, | 163 | fprintf(stderr, ERR_PERF_OPEN, counter, |
158 | fd[cpu][counter], strerror(errno)); | 164 | fd[cpu][counter][0], strerror(errno)); |
159 | } | 165 | } |
160 | } else { | 166 | } else { |
161 | attr->inherit = inherit; | 167 | attr->inherit = inherit; |
@@ -163,11 +169,14 @@ static void create_perf_stat_counter(int counter, int pid) | |||
163 | attr->disabled = 1; | 169 | attr->disabled = 1; |
164 | attr->enable_on_exec = 1; | 170 | attr->enable_on_exec = 1; |
165 | } | 171 | } |
166 | 172 | for (thread = 0; thread < thread_num; thread++) { | |
167 | fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); | 173 | fd[0][counter][thread] = sys_perf_event_open(attr, |
168 | if (fd[0][counter] < 0 && verbose) | 174 | all_tids[thread], -1, -1, 0); |
169 | fprintf(stderr, ERR_PERF_OPEN, counter, | 175 | if (fd[0][counter][thread] < 0 && verbose) |
170 | fd[0][counter], strerror(errno)); | 176 | fprintf(stderr, ERR_PERF_OPEN, counter, |
177 | fd[0][counter][thread], | ||
178 | strerror(errno)); | ||
179 | } | ||
171 | } | 180 | } |
172 | } | 181 | } |
173 | 182 | ||
@@ -192,25 +201,28 @@ static void read_counter(int counter) | |||
192 | unsigned int cpu; | 201 | unsigned int cpu; |
193 | size_t res, nv; | 202 | size_t res, nv; |
194 | int scaled; | 203 | int scaled; |
195 | int i; | 204 | int i, thread; |
196 | 205 | ||
197 | count[0] = count[1] = count[2] = 0; | 206 | count[0] = count[1] = count[2] = 0; |
198 | 207 | ||
199 | nv = scale ? 3 : 1; | 208 | nv = scale ? 3 : 1; |
200 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 209 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
201 | if (fd[cpu][counter] < 0) | 210 | for (thread = 0; thread < thread_num; thread++) { |
202 | continue; | 211 | if (fd[cpu][counter][thread] < 0) |
203 | 212 | continue; | |
204 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); | 213 | |
205 | assert(res == nv * sizeof(u64)); | 214 | res = read(fd[cpu][counter][thread], |
206 | 215 | single_count, nv * sizeof(u64)); | |
207 | close(fd[cpu][counter]); | 216 | assert(res == nv * sizeof(u64)); |
208 | fd[cpu][counter] = -1; | 217 | |
209 | 218 | close(fd[cpu][counter][thread]); | |
210 | count[0] += single_count[0]; | 219 | fd[cpu][counter][thread] = -1; |
211 | if (scale) { | 220 | |
212 | count[1] += single_count[1]; | 221 | count[0] += single_count[0]; |
213 | count[2] += single_count[2]; | 222 | if (scale) { |
223 | count[1] += single_count[1]; | ||
224 | count[2] += single_count[2]; | ||
225 | } | ||
214 | } | 226 | } |
215 | } | 227 | } |
216 | 228 | ||
@@ -253,7 +265,6 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
253 | unsigned long long t0, t1; | 265 | unsigned long long t0, t1; |
254 | int status = 0; | 266 | int status = 0; |
255 | int counter; | 267 | int counter; |
256 | int pid; | ||
257 | int child_ready_pipe[2], go_pipe[2]; | 268 | int child_ready_pipe[2], go_pipe[2]; |
258 | const bool forks = (argc > 0); | 269 | const bool forks = (argc > 0); |
259 | char buf; | 270 | char buf; |
@@ -299,6 +310,9 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
299 | exit(-1); | 310 | exit(-1); |
300 | } | 311 | } |
301 | 312 | ||
313 | if (target_tid == -1 && target_pid == -1 && !system_wide) | ||
314 | all_tids[0] = child_pid; | ||
315 | |||
302 | /* | 316 | /* |
303 | * Wait for the child to be ready to exec. | 317 | * Wait for the child to be ready to exec. |
304 | */ | 318 | */ |
@@ -309,12 +323,8 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
309 | close(child_ready_pipe[0]); | 323 | close(child_ready_pipe[0]); |
310 | } | 324 | } |
311 | 325 | ||
312 | if (target_pid == -1) | ||
313 | pid = child_pid; | ||
314 | else | ||
315 | pid = target_pid; | ||
316 | for (counter = 0; counter < nr_counters; counter++) | 326 | for (counter = 0; counter < nr_counters; counter++) |
317 | create_perf_stat_counter(counter, pid); | 327 | create_perf_stat_counter(counter); |
318 | 328 | ||
319 | /* | 329 | /* |
320 | * Enable counters and exec the command: | 330 | * Enable counters and exec the command: |
@@ -433,12 +443,14 @@ static void print_stat(int argc, const char **argv) | |||
433 | 443 | ||
434 | fprintf(stderr, "\n"); | 444 | fprintf(stderr, "\n"); |
435 | fprintf(stderr, " Performance counter stats for "); | 445 | fprintf(stderr, " Performance counter stats for "); |
436 | if(target_pid == -1) { | 446 | if(target_pid == -1 && target_tid == -1) { |
437 | fprintf(stderr, "\'%s", argv[0]); | 447 | fprintf(stderr, "\'%s", argv[0]); |
438 | for (i = 1; i < argc; i++) | 448 | for (i = 1; i < argc; i++) |
439 | fprintf(stderr, " %s", argv[i]); | 449 | fprintf(stderr, " %s", argv[i]); |
440 | }else | 450 | } else if (target_pid != -1) |
441 | fprintf(stderr, "task pid \'%d", target_pid); | 451 | fprintf(stderr, "process id \'%d", target_pid); |
452 | else | ||
453 | fprintf(stderr, "thread id \'%d", target_tid); | ||
442 | 454 | ||
443 | fprintf(stderr, "\'"); | 455 | fprintf(stderr, "\'"); |
444 | if (run_count > 1) | 456 | if (run_count > 1) |
@@ -493,7 +505,9 @@ static const struct option options[] = { | |||
493 | OPT_BOOLEAN('i', "inherit", &inherit, | 505 | OPT_BOOLEAN('i', "inherit", &inherit, |
494 | "child tasks inherit counters"), | 506 | "child tasks inherit counters"), |
495 | OPT_INTEGER('p', "pid", &target_pid, | 507 | OPT_INTEGER('p', "pid", &target_pid, |
496 | "stat events on existing pid"), | 508 | "stat events on existing process id"), |
509 | OPT_INTEGER('t', "tid", &target_tid, | ||
510 | "stat events on existing thread id"), | ||
497 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 511 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
498 | "system-wide collection from all CPUs"), | 512 | "system-wide collection from all CPUs"), |
499 | OPT_BOOLEAN('c', "scale", &scale, | 513 | OPT_BOOLEAN('c', "scale", &scale, |
@@ -510,10 +524,11 @@ static const struct option options[] = { | |||
510 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 524 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
511 | { | 525 | { |
512 | int status; | 526 | int status; |
527 | int i,j; | ||
513 | 528 | ||
514 | argc = parse_options(argc, argv, options, stat_usage, | 529 | argc = parse_options(argc, argv, options, stat_usage, |
515 | PARSE_OPT_STOP_AT_NON_OPTION); | 530 | PARSE_OPT_STOP_AT_NON_OPTION); |
516 | if (!argc && target_pid == -1) | 531 | if (!argc && target_pid == -1 && target_tid == -1) |
517 | usage_with_options(stat_usage, options); | 532 | usage_with_options(stat_usage, options); |
518 | if (run_count <= 0) | 533 | if (run_count <= 0) |
519 | usage_with_options(stat_usage, options); | 534 | usage_with_options(stat_usage, options); |
@@ -529,6 +544,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
529 | else | 544 | else |
530 | nr_cpus = 1; | 545 | nr_cpus = 1; |
531 | 546 | ||
547 | if (target_pid != -1) { | ||
548 | target_tid = target_pid; | ||
549 | thread_num = find_all_tid(target_pid, &all_tids); | ||
550 | if (thread_num <= 0) { | ||
551 | fprintf(stderr, "Can't find all threads of pid %d\n", | ||
552 | target_pid); | ||
553 | usage_with_options(stat_usage, options); | ||
554 | } | ||
555 | } else { | ||
556 | all_tids=malloc(sizeof(pid_t)); | ||
557 | if (!all_tids) | ||
558 | return -ENOMEM; | ||
559 | |||
560 | all_tids[0] = target_tid; | ||
561 | thread_num = 1; | ||
562 | } | ||
563 | |||
564 | for (i = 0; i < MAX_NR_CPUS; i++) { | ||
565 | for (j = 0; j < MAX_COUNTERS; j++) { | ||
566 | fd[i][j] = malloc(sizeof(int)*thread_num); | ||
567 | if (!fd[i][j]) | ||
568 | return -ENOMEM; | ||
569 | } | ||
570 | } | ||
571 | |||
532 | /* | 572 | /* |
533 | * We dont want to block the signals - that would cause | 573 | * We dont want to block the signals - that would cause |
534 | * child tasks to inherit that and Ctrl-C would not work. | 574 | * child tasks to inherit that and Ctrl-C would not work. |