aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorZhang, Yanmin <yanmin_zhang@linux.intel.com>2010-03-18 10:36:05 -0400
committerIngo Molnar <mingo@elte.hu>2010-03-18 11:21:12 -0400
commitd6d901c23a9c4c7361aa901b5b2dda69703dd5e0 (patch)
tree601fc2cafac552c80b8456c8dd4b9964171552db /tools/perf/builtin-stat.c
parent46be604b5ba738d53e5f5314813a4e7092864baf (diff)
perf events: Change perf parameter --pid to process-wide collection instead of thread-wide
Parameter --pid (or -p) of perf currently means a thread-wide collection. For exmaple, if a process whose id is 8888 has 10 threads, 'perf top -p 8888' just collects the main thread statistics. That's misleading. Users are used to attach a whole process when debugging a process by gdb. To follow normal usage style, the patch change --pid to process-wide collection and add --tid (-t) to mean a thread-wide collection. Usage example is: # perf top -p 8888 # perf record -p 8888 -f sleep 10 # perf stat -p 8888 -f sleep 10 Above commands collect the statistics of all threads of process 8888. Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Avi Kivity <avi@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Sheng Yang <sheng@linux.intel.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jes Sorensen <Jes.Sorensen@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Cc: zhiteng.huang@intel.com Cc: Zachary Amsden <zamsden@redhat.com> LKML-Reference: <1268922965-14774-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c110
1 files changed, 75 insertions, 35 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5f41244cbbf2..c92f90ff5a9f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
46#include "util/debug.h" 46#include "util/debug.h"
47#include "util/header.h" 47#include "util/header.h"
48#include "util/cpumap.h" 48#include "util/cpumap.h"
49#include "util/thread.h"
49 50
50#include <sys/prctl.h> 51#include <sys/prctl.h>
51#include <math.h> 52#include <math.h>
@@ -74,10 +75,13 @@ static int run_count = 1;
74static int inherit = 1; 75static int inherit = 1;
75static int scale = 1; 76static int scale = 1;
76static pid_t target_pid = -1; 77static pid_t target_pid = -1;
78static pid_t target_tid = -1;
79static pid_t *all_tids = NULL;
80static int thread_num = 0;
77static pid_t child_pid = -1; 81static pid_t child_pid = -1;
78static int null_run = 0; 82static int null_run = 0;
79 83
80static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 84static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
81 85
82static int event_scaled[MAX_COUNTERS]; 86static int event_scaled[MAX_COUNTERS];
83 87
@@ -140,9 +144,10 @@ struct stats runtime_branches_stats;
140#define ERR_PERF_OPEN \ 144#define ERR_PERF_OPEN \
141"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" 145"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
142 146
143static void create_perf_stat_counter(int counter, int pid) 147static void create_perf_stat_counter(int counter)
144{ 148{
145 struct perf_event_attr *attr = attrs + counter; 149 struct perf_event_attr *attr = attrs + counter;
150 int thread;
146 151
147 if (scale) 152 if (scale)
148 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 153 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -152,10 +157,11 @@ static void create_perf_stat_counter(int counter, int pid)
152 unsigned int cpu; 157 unsigned int cpu;
153 158
154 for (cpu = 0; cpu < nr_cpus; cpu++) { 159 for (cpu = 0; cpu < nr_cpus; cpu++) {
155 fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); 160 fd[cpu][counter][0] = sys_perf_event_open(attr,
156 if (fd[cpu][counter] < 0 && verbose) 161 -1, cpumap[cpu], -1, 0);
162 if (fd[cpu][counter][0] < 0 && verbose)
157 fprintf(stderr, ERR_PERF_OPEN, counter, 163 fprintf(stderr, ERR_PERF_OPEN, counter,
158 fd[cpu][counter], strerror(errno)); 164 fd[cpu][counter][0], strerror(errno));
159 } 165 }
160 } else { 166 } else {
161 attr->inherit = inherit; 167 attr->inherit = inherit;
@@ -163,11 +169,14 @@ static void create_perf_stat_counter(int counter, int pid)
163 attr->disabled = 1; 169 attr->disabled = 1;
164 attr->enable_on_exec = 1; 170 attr->enable_on_exec = 1;
165 } 171 }
166 172 for (thread = 0; thread < thread_num; thread++) {
167 fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); 173 fd[0][counter][thread] = sys_perf_event_open(attr,
168 if (fd[0][counter] < 0 && verbose) 174 all_tids[thread], -1, -1, 0);
169 fprintf(stderr, ERR_PERF_OPEN, counter, 175 if (fd[0][counter][thread] < 0 && verbose)
170 fd[0][counter], strerror(errno)); 176 fprintf(stderr, ERR_PERF_OPEN, counter,
177 fd[0][counter][thread],
178 strerror(errno));
179 }
171 } 180 }
172} 181}
173 182
@@ -192,25 +201,28 @@ static void read_counter(int counter)
192 unsigned int cpu; 201 unsigned int cpu;
193 size_t res, nv; 202 size_t res, nv;
194 int scaled; 203 int scaled;
195 int i; 204 int i, thread;
196 205
197 count[0] = count[1] = count[2] = 0; 206 count[0] = count[1] = count[2] = 0;
198 207
199 nv = scale ? 3 : 1; 208 nv = scale ? 3 : 1;
200 for (cpu = 0; cpu < nr_cpus; cpu++) { 209 for (cpu = 0; cpu < nr_cpus; cpu++) {
201 if (fd[cpu][counter] < 0) 210 for (thread = 0; thread < thread_num; thread++) {
202 continue; 211 if (fd[cpu][counter][thread] < 0)
203 212 continue;
204 res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); 213
205 assert(res == nv * sizeof(u64)); 214 res = read(fd[cpu][counter][thread],
206 215 single_count, nv * sizeof(u64));
207 close(fd[cpu][counter]); 216 assert(res == nv * sizeof(u64));
208 fd[cpu][counter] = -1; 217
209 218 close(fd[cpu][counter][thread]);
210 count[0] += single_count[0]; 219 fd[cpu][counter][thread] = -1;
211 if (scale) { 220
212 count[1] += single_count[1]; 221 count[0] += single_count[0];
213 count[2] += single_count[2]; 222 if (scale) {
223 count[1] += single_count[1];
224 count[2] += single_count[2];
225 }
214 } 226 }
215 } 227 }
216 228
@@ -253,7 +265,6 @@ static int run_perf_stat(int argc __used, const char **argv)
253 unsigned long long t0, t1; 265 unsigned long long t0, t1;
254 int status = 0; 266 int status = 0;
255 int counter; 267 int counter;
256 int pid;
257 int child_ready_pipe[2], go_pipe[2]; 268 int child_ready_pipe[2], go_pipe[2];
258 const bool forks = (argc > 0); 269 const bool forks = (argc > 0);
259 char buf; 270 char buf;
@@ -299,6 +310,9 @@ static int run_perf_stat(int argc __used, const char **argv)
299 exit(-1); 310 exit(-1);
300 } 311 }
301 312
313 if (target_tid == -1 && target_pid == -1 && !system_wide)
314 all_tids[0] = child_pid;
315
302 /* 316 /*
303 * Wait for the child to be ready to exec. 317 * Wait for the child to be ready to exec.
304 */ 318 */
@@ -309,12 +323,8 @@ static int run_perf_stat(int argc __used, const char **argv)
309 close(child_ready_pipe[0]); 323 close(child_ready_pipe[0]);
310 } 324 }
311 325
312 if (target_pid == -1)
313 pid = child_pid;
314 else
315 pid = target_pid;
316 for (counter = 0; counter < nr_counters; counter++) 326 for (counter = 0; counter < nr_counters; counter++)
317 create_perf_stat_counter(counter, pid); 327 create_perf_stat_counter(counter);
318 328
319 /* 329 /*
320 * Enable counters and exec the command: 330 * Enable counters and exec the command:
@@ -433,12 +443,14 @@ static void print_stat(int argc, const char **argv)
433 443
434 fprintf(stderr, "\n"); 444 fprintf(stderr, "\n");
435 fprintf(stderr, " Performance counter stats for "); 445 fprintf(stderr, " Performance counter stats for ");
436 if(target_pid == -1) { 446 if(target_pid == -1 && target_tid == -1) {
437 fprintf(stderr, "\'%s", argv[0]); 447 fprintf(stderr, "\'%s", argv[0]);
438 for (i = 1; i < argc; i++) 448 for (i = 1; i < argc; i++)
439 fprintf(stderr, " %s", argv[i]); 449 fprintf(stderr, " %s", argv[i]);
440 }else 450 } else if (target_pid != -1)
441 fprintf(stderr, "task pid \'%d", target_pid); 451 fprintf(stderr, "process id \'%d", target_pid);
452 else
453 fprintf(stderr, "thread id \'%d", target_tid);
442 454
443 fprintf(stderr, "\'"); 455 fprintf(stderr, "\'");
444 if (run_count > 1) 456 if (run_count > 1)
@@ -493,7 +505,9 @@ static const struct option options[] = {
493 OPT_BOOLEAN('i', "inherit", &inherit, 505 OPT_BOOLEAN('i', "inherit", &inherit,
494 "child tasks inherit counters"), 506 "child tasks inherit counters"),
495 OPT_INTEGER('p', "pid", &target_pid, 507 OPT_INTEGER('p', "pid", &target_pid,
496 "stat events on existing pid"), 508 "stat events on existing process id"),
509 OPT_INTEGER('t', "tid", &target_tid,
510 "stat events on existing thread id"),
497 OPT_BOOLEAN('a', "all-cpus", &system_wide, 511 OPT_BOOLEAN('a', "all-cpus", &system_wide,
498 "system-wide collection from all CPUs"), 512 "system-wide collection from all CPUs"),
499 OPT_BOOLEAN('c', "scale", &scale, 513 OPT_BOOLEAN('c', "scale", &scale,
@@ -510,10 +524,11 @@ static const struct option options[] = {
510int cmd_stat(int argc, const char **argv, const char *prefix __used) 524int cmd_stat(int argc, const char **argv, const char *prefix __used)
511{ 525{
512 int status; 526 int status;
527 int i,j;
513 528
514 argc = parse_options(argc, argv, options, stat_usage, 529 argc = parse_options(argc, argv, options, stat_usage,
515 PARSE_OPT_STOP_AT_NON_OPTION); 530 PARSE_OPT_STOP_AT_NON_OPTION);
516 if (!argc && target_pid == -1) 531 if (!argc && target_pid == -1 && target_tid == -1)
517 usage_with_options(stat_usage, options); 532 usage_with_options(stat_usage, options);
518 if (run_count <= 0) 533 if (run_count <= 0)
519 usage_with_options(stat_usage, options); 534 usage_with_options(stat_usage, options);
@@ -529,6 +544,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
529 else 544 else
530 nr_cpus = 1; 545 nr_cpus = 1;
531 546
547 if (target_pid != -1) {
548 target_tid = target_pid;
549 thread_num = find_all_tid(target_pid, &all_tids);
550 if (thread_num <= 0) {
551 fprintf(stderr, "Can't find all threads of pid %d\n",
552 target_pid);
553 usage_with_options(stat_usage, options);
554 }
555 } else {
556 all_tids=malloc(sizeof(pid_t));
557 if (!all_tids)
558 return -ENOMEM;
559
560 all_tids[0] = target_tid;
561 thread_num = 1;
562 }
563
564 for (i = 0; i < MAX_NR_CPUS; i++) {
565 for (j = 0; j < MAX_COUNTERS; j++) {
566 fd[i][j] = malloc(sizeof(int)*thread_num);
567 if (!fd[i][j])
568 return -ENOMEM;
569 }
570 }
571
532 /* 572 /*
533 * We dont want to block the signals - that would cause 573 * We dont want to block the signals - that would cause
534 * child tasks to inherit that and Ctrl-C would not work. 574 * child tasks to inherit that and Ctrl-C would not work.