aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c161
1 files changed, 111 insertions, 50 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 95db31cff6fd..ff8c413b7e73 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
46#include "util/debug.h" 46#include "util/debug.h"
47#include "util/header.h" 47#include "util/header.h"
48#include "util/cpumap.h" 48#include "util/cpumap.h"
49#include "util/thread.h"
49 50
50#include <sys/prctl.h> 51#include <sys/prctl.h>
51#include <math.h> 52#include <math.h>
@@ -66,18 +67,21 @@ static struct perf_event_attr default_attrs[] = {
66 67
67}; 68};
68 69
69static int system_wide = 0; 70static bool system_wide = false;
70static unsigned int nr_cpus = 0; 71static unsigned int nr_cpus = 0;
71static int run_idx = 0; 72static int run_idx = 0;
72 73
73static int run_count = 1; 74static int run_count = 1;
74static int inherit = 1; 75static bool no_inherit = false;
75static int scale = 1; 76static bool scale = true;
76static pid_t target_pid = -1; 77static pid_t target_pid = -1;
78static pid_t target_tid = -1;
79static pid_t *all_tids = NULL;
80static int thread_num = 0;
77static pid_t child_pid = -1; 81static pid_t child_pid = -1;
78static int null_run = 0; 82static bool null_run = false;
79 83
80static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 84static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
81 85
82static int event_scaled[MAX_COUNTERS]; 86static int event_scaled[MAX_COUNTERS];
83 87
@@ -140,9 +144,11 @@ struct stats runtime_branches_stats;
140#define ERR_PERF_OPEN \ 144#define ERR_PERF_OPEN \
141"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" 145"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
142 146
143static void create_perf_stat_counter(int counter, int pid) 147static int create_perf_stat_counter(int counter)
144{ 148{
145 struct perf_event_attr *attr = attrs + counter; 149 struct perf_event_attr *attr = attrs + counter;
150 int thread;
151 int ncreated = 0;
146 152
147 if (scale) 153 if (scale)
148 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 154 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -152,21 +158,33 @@ static void create_perf_stat_counter(int counter, int pid)
152 unsigned int cpu; 158 unsigned int cpu;
153 159
154 for (cpu = 0; cpu < nr_cpus; cpu++) { 160 for (cpu = 0; cpu < nr_cpus; cpu++) {
155 fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); 161 fd[cpu][counter][0] = sys_perf_event_open(attr,
156 if (fd[cpu][counter] < 0 && verbose) 162 -1, cpumap[cpu], -1, 0);
157 fprintf(stderr, ERR_PERF_OPEN, counter, 163 if (fd[cpu][counter][0] < 0)
158 fd[cpu][counter], strerror(errno)); 164 pr_debug(ERR_PERF_OPEN, counter,
165 fd[cpu][counter][0], strerror(errno));
166 else
167 ++ncreated;
159 } 168 }
160 } else { 169 } else {
161 attr->inherit = inherit; 170 attr->inherit = !no_inherit;
162 attr->disabled = 1; 171 if (target_pid == -1 && target_tid == -1) {
163 attr->enable_on_exec = 1; 172 attr->disabled = 1;
164 173 attr->enable_on_exec = 1;
165 fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); 174 }
166 if (fd[0][counter] < 0 && verbose) 175 for (thread = 0; thread < thread_num; thread++) {
167 fprintf(stderr, ERR_PERF_OPEN, counter, 176 fd[0][counter][thread] = sys_perf_event_open(attr,
168 fd[0][counter], strerror(errno)); 177 all_tids[thread], -1, -1, 0);
178 if (fd[0][counter][thread] < 0)
179 pr_debug(ERR_PERF_OPEN, counter,
180 fd[0][counter][thread],
181 strerror(errno));
182 else
183 ++ncreated;
184 }
169 } 185 }
186
187 return ncreated;
170} 188}
171 189
172/* 190/*
@@ -190,25 +208,28 @@ static void read_counter(int counter)
190 unsigned int cpu; 208 unsigned int cpu;
191 size_t res, nv; 209 size_t res, nv;
192 int scaled; 210 int scaled;
193 int i; 211 int i, thread;
194 212
195 count[0] = count[1] = count[2] = 0; 213 count[0] = count[1] = count[2] = 0;
196 214
197 nv = scale ? 3 : 1; 215 nv = scale ? 3 : 1;
198 for (cpu = 0; cpu < nr_cpus; cpu++) { 216 for (cpu = 0; cpu < nr_cpus; cpu++) {
199 if (fd[cpu][counter] < 0) 217 for (thread = 0; thread < thread_num; thread++) {
200 continue; 218 if (fd[cpu][counter][thread] < 0)
201 219 continue;
202 res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); 220
203 assert(res == nv * sizeof(u64)); 221 res = read(fd[cpu][counter][thread],
204 222 single_count, nv * sizeof(u64));
205 close(fd[cpu][counter]); 223 assert(res == nv * sizeof(u64));
206 fd[cpu][counter] = -1; 224
207 225 close(fd[cpu][counter][thread]);
208 count[0] += single_count[0]; 226 fd[cpu][counter][thread] = -1;
209 if (scale) { 227
210 count[1] += single_count[1]; 228 count[0] += single_count[0];
211 count[2] += single_count[2]; 229 if (scale) {
230 count[1] += single_count[1];
231 count[2] += single_count[2];
232 }
212 } 233 }
213 } 234 }
214 235
@@ -250,10 +271,9 @@ static int run_perf_stat(int argc __used, const char **argv)
250{ 271{
251 unsigned long long t0, t1; 272 unsigned long long t0, t1;
252 int status = 0; 273 int status = 0;
253 int counter; 274 int counter, ncreated = 0;
254 int pid = target_pid;
255 int child_ready_pipe[2], go_pipe[2]; 275 int child_ready_pipe[2], go_pipe[2];
256 const bool forks = (target_pid == -1 && argc > 0); 276 const bool forks = (argc > 0);
257 char buf; 277 char buf;
258 278
259 if (!system_wide) 279 if (!system_wide)
@@ -265,10 +285,10 @@ static int run_perf_stat(int argc __used, const char **argv)
265 } 285 }
266 286
267 if (forks) { 287 if (forks) {
268 if ((pid = fork()) < 0) 288 if ((child_pid = fork()) < 0)
269 perror("failed to fork"); 289 perror("failed to fork");
270 290
271 if (!pid) { 291 if (!child_pid) {
272 close(child_ready_pipe[0]); 292 close(child_ready_pipe[0]);
273 close(go_pipe[1]); 293 close(go_pipe[1]);
274 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 294 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
@@ -297,7 +317,8 @@ static int run_perf_stat(int argc __used, const char **argv)
297 exit(-1); 317 exit(-1);
298 } 318 }
299 319
300 child_pid = pid; 320 if (target_tid == -1 && target_pid == -1 && !system_wide)
321 all_tids[0] = child_pid;
301 322
302 /* 323 /*
303 * Wait for the child to be ready to exec. 324 * Wait for the child to be ready to exec.
@@ -310,7 +331,16 @@ static int run_perf_stat(int argc __used, const char **argv)
310 } 331 }
311 332
312 for (counter = 0; counter < nr_counters; counter++) 333 for (counter = 0; counter < nr_counters; counter++)
313 create_perf_stat_counter(counter, pid); 334 ncreated += create_perf_stat_counter(counter);
335
336 if (ncreated == 0) {
337 pr_err("No permission to collect %sstats.\n"
338 "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
339 system_wide ? "system-wide " : "");
340 if (child_pid != -1)
341 kill(child_pid, SIGTERM);
342 return -1;
343 }
314 344
315 /* 345 /*
316 * Enable counters and exec the command: 346 * Enable counters and exec the command:
@@ -321,7 +351,7 @@ static int run_perf_stat(int argc __used, const char **argv)
321 close(go_pipe[1]); 351 close(go_pipe[1]);
322 wait(&status); 352 wait(&status);
323 } else { 353 } else {
324 while(!done); 354 while(!done) sleep(1);
325 } 355 }
326 356
327 t1 = rdclock(); 357 t1 = rdclock();
@@ -429,12 +459,14 @@ static void print_stat(int argc, const char **argv)
429 459
430 fprintf(stderr, "\n"); 460 fprintf(stderr, "\n");
431 fprintf(stderr, " Performance counter stats for "); 461 fprintf(stderr, " Performance counter stats for ");
432 if(target_pid == -1) { 462 if(target_pid == -1 && target_tid == -1) {
433 fprintf(stderr, "\'%s", argv[0]); 463 fprintf(stderr, "\'%s", argv[0]);
434 for (i = 1; i < argc; i++) 464 for (i = 1; i < argc; i++)
435 fprintf(stderr, " %s", argv[i]); 465 fprintf(stderr, " %s", argv[i]);
436 }else 466 } else if (target_pid != -1)
437 fprintf(stderr, "task pid \'%d", target_pid); 467 fprintf(stderr, "process id \'%d", target_pid);
468 else
469 fprintf(stderr, "thread id \'%d", target_tid);
438 470
439 fprintf(stderr, "\'"); 471 fprintf(stderr, "\'");
440 if (run_count > 1) 472 if (run_count > 1)
@@ -459,7 +491,7 @@ static volatile int signr = -1;
459 491
460static void skip_signal(int signo) 492static void skip_signal(int signo)
461{ 493{
462 if(target_pid != -1) 494 if(child_pid == -1)
463 done = 1; 495 done = 1;
464 496
465 signr = signo; 497 signr = signo;
@@ -486,15 +518,17 @@ static const struct option options[] = {
486 OPT_CALLBACK('e', "event", NULL, "event", 518 OPT_CALLBACK('e', "event", NULL, "event",
487 "event selector. use 'perf list' to list available events", 519 "event selector. use 'perf list' to list available events",
488 parse_events), 520 parse_events),
489 OPT_BOOLEAN('i', "inherit", &inherit, 521 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
490 "child tasks inherit counters"), 522 "child tasks do not inherit counters"),
491 OPT_INTEGER('p', "pid", &target_pid, 523 OPT_INTEGER('p', "pid", &target_pid,
492 "stat events on existing pid"), 524 "stat events on existing process id"),
525 OPT_INTEGER('t', "tid", &target_tid,
526 "stat events on existing thread id"),
493 OPT_BOOLEAN('a', "all-cpus", &system_wide, 527 OPT_BOOLEAN('a', "all-cpus", &system_wide,
494 "system-wide collection from all CPUs"), 528 "system-wide collection from all CPUs"),
495 OPT_BOOLEAN('c', "scale", &scale, 529 OPT_BOOLEAN('c', "scale", &scale,
496 "scale/normalize counters"), 530 "scale/normalize counters"),
497 OPT_BOOLEAN('v', "verbose", &verbose, 531 OPT_INCR('v', "verbose", &verbose,
498 "be more verbose (show counter open errors, etc)"), 532 "be more verbose (show counter open errors, etc)"),
499 OPT_INTEGER('r', "repeat", &run_count, 533 OPT_INTEGER('r', "repeat", &run_count,
500 "repeat command and print average + stddev (max: 100)"), 534 "repeat command and print average + stddev (max: 100)"),
@@ -506,10 +540,11 @@ static const struct option options[] = {
506int cmd_stat(int argc, const char **argv, const char *prefix __used) 540int cmd_stat(int argc, const char **argv, const char *prefix __used)
507{ 541{
508 int status; 542 int status;
543 int i,j;
509 544
510 argc = parse_options(argc, argv, options, stat_usage, 545 argc = parse_options(argc, argv, options, stat_usage,
511 PARSE_OPT_STOP_AT_NON_OPTION); 546 PARSE_OPT_STOP_AT_NON_OPTION);
512 if (!argc && target_pid == -1) 547 if (!argc && target_pid == -1 && target_tid == -1)
513 usage_with_options(stat_usage, options); 548 usage_with_options(stat_usage, options);
514 if (run_count <= 0) 549 if (run_count <= 0)
515 usage_with_options(stat_usage, options); 550 usage_with_options(stat_usage, options);
@@ -525,6 +560,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
525 else 560 else
526 nr_cpus = 1; 561 nr_cpus = 1;
527 562
563 if (target_pid != -1) {
564 target_tid = target_pid;
565 thread_num = find_all_tid(target_pid, &all_tids);
566 if (thread_num <= 0) {
567 fprintf(stderr, "Can't find all threads of pid %d\n",
568 target_pid);
569 usage_with_options(stat_usage, options);
570 }
571 } else {
572 all_tids=malloc(sizeof(pid_t));
573 if (!all_tids)
574 return -ENOMEM;
575
576 all_tids[0] = target_tid;
577 thread_num = 1;
578 }
579
580 for (i = 0; i < MAX_NR_CPUS; i++) {
581 for (j = 0; j < MAX_COUNTERS; j++) {
582 fd[i][j] = malloc(sizeof(int)*thread_num);
583 if (!fd[i][j])
584 return -ENOMEM;
585 }
586 }
587
528 /* 588 /*
529 * We dont want to block the signals - that would cause 589 * We dont want to block the signals - that would cause
530 * child tasks to inherit that and Ctrl-C would not work. 590 * child tasks to inherit that and Ctrl-C would not work.
@@ -543,7 +603,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
543 status = run_perf_stat(argc, argv); 603 status = run_perf_stat(argc, argv);
544 } 604 }
545 605
546 print_stat(argc, argv); 606 if (status != -1)
607 print_stat(argc, argv);
547 608
548 return status; 609 return status;
549} 610}