aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c532
1 files changed, 328 insertions, 204 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44f9502..c385a63ebfd1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,6 +43,7 @@
43#include "util/parse-options.h" 43#include "util/parse-options.h"
44#include "util/parse-events.h" 44#include "util/parse-events.h"
45#include "util/event.h" 45#include "util/event.h"
46#include "util/evsel.h"
46#include "util/debug.h" 47#include "util/debug.h"
47#include "util/header.h" 48#include "util/header.h"
48#include "util/cpumap.h" 49#include "util/cpumap.h"
@@ -52,6 +53,8 @@
52#include <math.h> 53#include <math.h>
53#include <locale.h> 54#include <locale.h>
54 55
56#define DEFAULT_SEPARATOR " "
57
55static struct perf_event_attr default_attrs[] = { 58static struct perf_event_attr default_attrs[] = {
56 59
57 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 60 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -69,25 +72,23 @@ static struct perf_event_attr default_attrs[] = {
69}; 72};
70 73
71static bool system_wide = false; 74static bool system_wide = false;
72static int nr_cpus = 0; 75static struct cpu_map *cpus;
73static int run_idx = 0; 76static int run_idx = 0;
74 77
75static int run_count = 1; 78static int run_count = 1;
76static bool no_inherit = false; 79static bool no_inherit = false;
77static bool scale = true; 80static bool scale = true;
81static bool no_aggr = false;
78static pid_t target_pid = -1; 82static pid_t target_pid = -1;
79static pid_t target_tid = -1; 83static pid_t target_tid = -1;
80static pid_t *all_tids = NULL; 84static struct thread_map *threads;
81static int thread_num = 0;
82static pid_t child_pid = -1; 85static pid_t child_pid = -1;
83static bool null_run = false; 86static bool null_run = false;
84static bool big_num = false; 87static bool big_num = true;
88static int big_num_opt = -1;
85static const char *cpu_list; 89static const char *cpu_list;
86 90static const char *csv_sep = NULL;
87 91static bool csv_output = false;
88static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
89
90static int event_scaled[MAX_COUNTERS];
91 92
92static volatile int done = 0; 93static volatile int done = 0;
93 94
@@ -96,6 +97,22 @@ struct stats
96 double n, mean, M2; 97 double n, mean, M2;
97}; 98};
98 99
100struct perf_stat {
101 struct stats res_stats[3];
102};
103
104static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
105{
106 evsel->priv = zalloc(sizeof(struct perf_stat));
107 return evsel->priv == NULL ? -ENOMEM : 0;
108}
109
110static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
111{
112 free(evsel->priv);
113 evsel->priv = NULL;
114}
115
99static void update_stats(struct stats *stats, u64 val) 116static void update_stats(struct stats *stats, u64 val)
100{ 117{
101 double delta; 118 double delta;
@@ -135,69 +152,38 @@ static double stddev_stats(struct stats *stats)
135 return sqrt(variance_mean); 152 return sqrt(variance_mean);
136} 153}
137 154
138struct stats event_res_stats[MAX_COUNTERS][3]; 155struct stats runtime_nsecs_stats[MAX_NR_CPUS];
139struct stats runtime_nsecs_stats; 156struct stats runtime_cycles_stats[MAX_NR_CPUS];
157struct stats runtime_branches_stats[MAX_NR_CPUS];
140struct stats walltime_nsecs_stats; 158struct stats walltime_nsecs_stats;
141struct stats runtime_cycles_stats;
142struct stats runtime_branches_stats;
143 159
144#define MATCH_EVENT(t, c, counter) \ 160static int create_perf_stat_counter(struct perf_evsel *evsel)
145 (attrs[counter].type == PERF_TYPE_##t && \
146 attrs[counter].config == PERF_COUNT_##c)
147
148#define ERR_PERF_OPEN \
149"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
150
151static int create_perf_stat_counter(int counter)
152{ 161{
153 struct perf_event_attr *attr = attrs + counter; 162 struct perf_event_attr *attr = &evsel->attr;
154 int thread;
155 int ncreated = 0;
156 163
157 if (scale) 164 if (scale)
158 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 165 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
159 PERF_FORMAT_TOTAL_TIME_RUNNING; 166 PERF_FORMAT_TOTAL_TIME_RUNNING;
160 167
161 if (system_wide) { 168 if (system_wide)
162 int cpu; 169 return perf_evsel__open_per_cpu(evsel, cpus);
163 170
164 for (cpu = 0; cpu < nr_cpus; cpu++) { 171 attr->inherit = !no_inherit;
165 fd[cpu][counter][0] = sys_perf_event_open(attr, 172 if (target_pid == -1 && target_tid == -1) {
166 -1, cpumap[cpu], -1, 0); 173 attr->disabled = 1;
167 if (fd[cpu][counter][0] < 0) 174 attr->enable_on_exec = 1;
168 pr_debug(ERR_PERF_OPEN, counter,
169 fd[cpu][counter][0], strerror(errno));
170 else
171 ++ncreated;
172 }
173 } else {
174 attr->inherit = !no_inherit;
175 if (target_pid == -1 && target_tid == -1) {
176 attr->disabled = 1;
177 attr->enable_on_exec = 1;
178 }
179 for (thread = 0; thread < thread_num; thread++) {
180 fd[0][counter][thread] = sys_perf_event_open(attr,
181 all_tids[thread], -1, -1, 0);
182 if (fd[0][counter][thread] < 0)
183 pr_debug(ERR_PERF_OPEN, counter,
184 fd[0][counter][thread],
185 strerror(errno));
186 else
187 ++ncreated;
188 }
189 } 175 }
190 176
191 return ncreated; 177 return perf_evsel__open_per_thread(evsel, threads);
192} 178}
193 179
194/* 180/*
195 * Does the counter have nsecs as a unit? 181 * Does the counter have nsecs as a unit?
196 */ 182 */
197static inline int nsec_counter(int counter) 183static inline int nsec_counter(struct perf_evsel *evsel)
198{ 184{
199 if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || 185 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
200 MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 186 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
201 return 1; 187 return 1;
202 188
203 return 0; 189 return 0;
@@ -205,55 +191,19 @@ static inline int nsec_counter(int counter)
205 191
206/* 192/*
207 * Read out the results of a single counter: 193 * Read out the results of a single counter:
194 * aggregate counts across CPUs in system-wide mode
208 */ 195 */
209static void read_counter(int counter) 196static int read_counter_aggr(struct perf_evsel *counter)
210{ 197{
211 u64 count[3], single_count[3]; 198 struct perf_stat *ps = counter->priv;
212 int cpu; 199 u64 *count = counter->counts->aggr.values;
213 size_t res, nv; 200 int i;
214 int scaled;
215 int i, thread;
216
217 count[0] = count[1] = count[2] = 0;
218
219 nv = scale ? 3 : 1;
220 for (cpu = 0; cpu < nr_cpus; cpu++) {
221 for (thread = 0; thread < thread_num; thread++) {
222 if (fd[cpu][counter][thread] < 0)
223 continue;
224
225 res = read(fd[cpu][counter][thread],
226 single_count, nv * sizeof(u64));
227 assert(res == nv * sizeof(u64));
228
229 close(fd[cpu][counter][thread]);
230 fd[cpu][counter][thread] = -1;
231
232 count[0] += single_count[0];
233 if (scale) {
234 count[1] += single_count[1];
235 count[2] += single_count[2];
236 }
237 }
238 }
239
240 scaled = 0;
241 if (scale) {
242 if (count[2] == 0) {
243 event_scaled[counter] = -1;
244 count[0] = 0;
245 return;
246 }
247 201
248 if (count[2] < count[1]) { 202 if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
249 event_scaled[counter] = 1; 203 return -1;
250 count[0] = (unsigned long long)
251 ((double)count[0] * count[1] / count[2] + 0.5);
252 }
253 }
254 204
255 for (i = 0; i < 3; i++) 205 for (i = 0; i < 3; i++)
256 update_stats(&event_res_stats[counter][i], count[i]); 206 update_stats(&ps->res_stats[i], count[i]);
257 207
258 if (verbose) { 208 if (verbose) {
259 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), 209 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
@@ -263,26 +213,51 @@ static void read_counter(int counter)
263 /* 213 /*
264 * Save the full runtime - to allow normalization during printout: 214 * Save the full runtime - to allow normalization during printout:
265 */ 215 */
266 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 216 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
267 update_stats(&runtime_nsecs_stats, count[0]); 217 update_stats(&runtime_nsecs_stats[0], count[0]);
268 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 218 if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
269 update_stats(&runtime_cycles_stats, count[0]); 219 update_stats(&runtime_cycles_stats[0], count[0]);
270 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) 220 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
271 update_stats(&runtime_branches_stats, count[0]); 221 update_stats(&runtime_branches_stats[0], count[0]);
222
223 return 0;
224}
225
226/*
227 * Read out the results of a single counter:
228 * do not aggregate counts across CPUs in system-wide mode
229 */
230static int read_counter(struct perf_evsel *counter)
231{
232 u64 *count;
233 int cpu;
234
235 for (cpu = 0; cpu < cpus->nr; cpu++) {
236 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
237 return -1;
238
239 count = counter->counts->cpu[cpu].values;
240
241 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
242 update_stats(&runtime_nsecs_stats[cpu], count[0]);
243 if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
244 update_stats(&runtime_cycles_stats[cpu], count[0]);
245 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
246 update_stats(&runtime_branches_stats[cpu], count[0]);
247 }
248
249 return 0;
272} 250}
273 251
274static int run_perf_stat(int argc __used, const char **argv) 252static int run_perf_stat(int argc __used, const char **argv)
275{ 253{
276 unsigned long long t0, t1; 254 unsigned long long t0, t1;
255 struct perf_evsel *counter;
277 int status = 0; 256 int status = 0;
278 int counter, ncreated = 0;
279 int child_ready_pipe[2], go_pipe[2]; 257 int child_ready_pipe[2], go_pipe[2];
280 const bool forks = (argc > 0); 258 const bool forks = (argc > 0);
281 char buf; 259 char buf;
282 260
283 if (!system_wide)
284 nr_cpus = 1;
285
286 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 261 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
287 perror("failed to create pipes"); 262 perror("failed to create pipes");
288 exit(1); 263 exit(1);
@@ -322,7 +297,7 @@ static int run_perf_stat(int argc __used, const char **argv)
322 } 297 }
323 298
324 if (target_tid == -1 && target_pid == -1 && !system_wide) 299 if (target_tid == -1 && target_pid == -1 && !system_wide)
325 all_tids[0] = child_pid; 300 threads->map[0] = child_pid;
326 301
327 /* 302 /*
328 * Wait for the child to be ready to exec. 303 * Wait for the child to be ready to exec.
@@ -334,16 +309,25 @@ static int run_perf_stat(int argc __used, const char **argv)
334 close(child_ready_pipe[0]); 309 close(child_ready_pipe[0]);
335 } 310 }
336 311
337 for (counter = 0; counter < nr_counters; counter++) 312 list_for_each_entry(counter, &evsel_list, node) {
338 ncreated += create_perf_stat_counter(counter); 313 if (create_perf_stat_counter(counter) < 0) {
339 314 if (errno == -EPERM || errno == -EACCES) {
340 if (ncreated == 0) { 315 error("You may not have permission to collect %sstats.\n"
341 pr_err("No permission to collect %sstats.\n" 316 "\t Consider tweaking"
342 "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", 317 " /proc/sys/kernel/perf_event_paranoid or running as root.",
343 system_wide ? "system-wide " : ""); 318 system_wide ? "system-wide " : "");
344 if (child_pid != -1) 319 } else if (errno == ENOENT) {
345 kill(child_pid, SIGTERM); 320 error("%s event is not supported. ", event_name(counter));
346 return -1; 321 } else {
322 error("open_counter returned with %d (%s). "
323 "/bin/dmesg may provide additional information.\n",
324 errno, strerror(errno));
325 }
326 if (child_pid != -1)
327 kill(child_pid, SIGTERM);
328 die("Not all events could be opened.\n");
329 return -1;
330 }
347 } 331 }
348 332
349 /* 333 /*
@@ -362,60 +346,97 @@ static int run_perf_stat(int argc __used, const char **argv)
362 346
363 update_stats(&walltime_nsecs_stats, t1 - t0); 347 update_stats(&walltime_nsecs_stats, t1 - t0);
364 348
365 for (counter = 0; counter < nr_counters; counter++) 349 if (no_aggr) {
366 read_counter(counter); 350 list_for_each_entry(counter, &evsel_list, node) {
351 read_counter(counter);
352 perf_evsel__close_fd(counter, cpus->nr, 1);
353 }
354 } else {
355 list_for_each_entry(counter, &evsel_list, node) {
356 read_counter_aggr(counter);
357 perf_evsel__close_fd(counter, cpus->nr, threads->nr);
358 }
359 }
367 360
368 return WEXITSTATUS(status); 361 return WEXITSTATUS(status);
369} 362}
370 363
371static void print_noise(int counter, double avg) 364static void print_noise(struct perf_evsel *evsel, double avg)
372{ 365{
366 struct perf_stat *ps;
367
373 if (run_count == 1) 368 if (run_count == 1)
374 return; 369 return;
375 370
371 ps = evsel->priv;
376 fprintf(stderr, " ( +- %7.3f%% )", 372 fprintf(stderr, " ( +- %7.3f%% )",
377 100 * stddev_stats(&event_res_stats[counter][0]) / avg); 373 100 * stddev_stats(&ps->res_stats[0]) / avg);
378} 374}
379 375
380static void nsec_printout(int counter, double avg) 376static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
381{ 377{
382 double msecs = avg / 1e6; 378 double msecs = avg / 1e6;
379 char cpustr[16] = { '\0', };
380 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
383 381
384 fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); 382 if (no_aggr)
383 sprintf(cpustr, "CPU%*d%s",
384 csv_output ? 0 : -4,
385 cpus->map[cpu], csv_sep);
386
387 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
388
389 if (csv_output)
390 return;
385 391
386 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { 392 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
387 fprintf(stderr, " # %10.3f CPUs ", 393 fprintf(stderr, " # %10.3f CPUs ",
388 avg / avg_stats(&walltime_nsecs_stats)); 394 avg / avg_stats(&walltime_nsecs_stats));
389 }
390} 395}
391 396
392static void abs_printout(int counter, double avg) 397static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
393{ 398{
394 double total, ratio = 0.0; 399 double total, ratio = 0.0;
400 char cpustr[16] = { '\0', };
401 const char *fmt;
402
403 if (csv_output)
404 fmt = "%s%.0f%s%s";
405 else if (big_num)
406 fmt = "%s%'18.0f%s%-24s";
407 else
408 fmt = "%s%18.0f%s%-24s";
395 409
396 if (big_num) 410 if (no_aggr)
397 fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); 411 sprintf(cpustr, "CPU%*d%s",
412 csv_output ? 0 : -4,
413 cpus->map[cpu], csv_sep);
398 else 414 else
399 fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); 415 cpu = 0;
416
417 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
400 418
401 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { 419 if (csv_output)
402 total = avg_stats(&runtime_cycles_stats); 420 return;
421
422 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
423 total = avg_stats(&runtime_cycles_stats[cpu]);
403 424
404 if (total) 425 if (total)
405 ratio = avg / total; 426 ratio = avg / total;
406 427
407 fprintf(stderr, " # %10.3f IPC ", ratio); 428 fprintf(stderr, " # %10.3f IPC ", ratio);
408 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && 429 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
409 runtime_branches_stats.n != 0) { 430 runtime_branches_stats[cpu].n != 0) {
410 total = avg_stats(&runtime_branches_stats); 431 total = avg_stats(&runtime_branches_stats[cpu]);
411 432
412 if (total) 433 if (total)
413 ratio = avg * 100 / total; 434 ratio = avg * 100 / total;
414 435
415 fprintf(stderr, " # %10.3f %% ", ratio); 436 fprintf(stderr, " # %10.3f %% ", ratio);
416 437
417 } else if (runtime_nsecs_stats.n != 0) { 438 } else if (runtime_nsecs_stats[cpu].n != 0) {
418 total = avg_stats(&runtime_nsecs_stats); 439 total = avg_stats(&runtime_nsecs_stats[cpu]);
419 440
420 if (total) 441 if (total)
421 ratio = 1000.0 * avg / total; 442 ratio = 1000.0 * avg / total;
@@ -426,30 +447,38 @@ static void abs_printout(int counter, double avg)
426 447
427/* 448/*
428 * Print out the results of a single counter: 449 * Print out the results of a single counter:
450 * aggregated counts in system-wide mode
429 */ 451 */
430static void print_counter(int counter) 452static void print_counter_aggr(struct perf_evsel *counter)
431{ 453{
432 double avg = avg_stats(&event_res_stats[counter][0]); 454 struct perf_stat *ps = counter->priv;
433 int scaled = event_scaled[counter]; 455 double avg = avg_stats(&ps->res_stats[0]);
456 int scaled = counter->counts->scaled;
434 457
435 if (scaled == -1) { 458 if (scaled == -1) {
436 fprintf(stderr, " %18s %-24s\n", 459 fprintf(stderr, "%*s%s%-24s\n",
437 "<not counted>", event_name(counter)); 460 csv_output ? 0 : 18,
461 "<not counted>", csv_sep, event_name(counter));
438 return; 462 return;
439 } 463 }
440 464
441 if (nsec_counter(counter)) 465 if (nsec_counter(counter))
442 nsec_printout(counter, avg); 466 nsec_printout(-1, counter, avg);
443 else 467 else
444 abs_printout(counter, avg); 468 abs_printout(-1, counter, avg);
469
470 if (csv_output) {
471 fputc('\n', stderr);
472 return;
473 }
445 474
446 print_noise(counter, avg); 475 print_noise(counter, avg);
447 476
448 if (scaled) { 477 if (scaled) {
449 double avg_enabled, avg_running; 478 double avg_enabled, avg_running;
450 479
451 avg_enabled = avg_stats(&event_res_stats[counter][1]); 480 avg_enabled = avg_stats(&ps->res_stats[1]);
452 avg_running = avg_stats(&event_res_stats[counter][2]); 481 avg_running = avg_stats(&ps->res_stats[2]);
453 482
454 fprintf(stderr, " (scaled from %.2f%%)", 483 fprintf(stderr, " (scaled from %.2f%%)",
455 100 * avg_running / avg_enabled); 484 100 * avg_running / avg_enabled);
@@ -458,40 +487,92 @@ static void print_counter(int counter)
458 fprintf(stderr, "\n"); 487 fprintf(stderr, "\n");
459} 488}
460 489
490/*
491 * Print out the results of a single counter:
492 * does not use aggregated count in system-wide
493 */
494static void print_counter(struct perf_evsel *counter)
495{
496 u64 ena, run, val;
497 int cpu;
498
499 for (cpu = 0; cpu < cpus->nr; cpu++) {
500 val = counter->counts->cpu[cpu].val;
501 ena = counter->counts->cpu[cpu].ena;
502 run = counter->counts->cpu[cpu].run;
503 if (run == 0 || ena == 0) {
504 fprintf(stderr, "CPU%*d%s%*s%s%-24s",
505 csv_output ? 0 : -4,
506 cpus->map[cpu], csv_sep,
507 csv_output ? 0 : 18,
508 "<not counted>", csv_sep,
509 event_name(counter));
510
511 fprintf(stderr, "\n");
512 continue;
513 }
514
515 if (nsec_counter(counter))
516 nsec_printout(cpu, counter, val);
517 else
518 abs_printout(cpu, counter, val);
519
520 if (!csv_output) {
521 print_noise(counter, 1.0);
522
523 if (run != ena) {
524 fprintf(stderr, " (scaled from %.2f%%)",
525 100.0 * run / ena);
526 }
527 }
528 fprintf(stderr, "\n");
529 }
530}
531
461static void print_stat(int argc, const char **argv) 532static void print_stat(int argc, const char **argv)
462{ 533{
463 int i, counter; 534 struct perf_evsel *counter;
535 int i;
464 536
465 fflush(stdout); 537 fflush(stdout);
466 538
467 fprintf(stderr, "\n"); 539 if (!csv_output) {
468 fprintf(stderr, " Performance counter stats for "); 540 fprintf(stderr, "\n");
469 if(target_pid == -1 && target_tid == -1) { 541 fprintf(stderr, " Performance counter stats for ");
470 fprintf(stderr, "\'%s", argv[0]); 542 if(target_pid == -1 && target_tid == -1) {
471 for (i = 1; i < argc; i++) 543 fprintf(stderr, "\'%s", argv[0]);
472 fprintf(stderr, " %s", argv[i]); 544 for (i = 1; i < argc; i++)
473 } else if (target_pid != -1) 545 fprintf(stderr, " %s", argv[i]);
474 fprintf(stderr, "process id \'%d", target_pid); 546 } else if (target_pid != -1)
475 else 547 fprintf(stderr, "process id \'%d", target_pid);
476 fprintf(stderr, "thread id \'%d", target_tid); 548 else
477 549 fprintf(stderr, "thread id \'%d", target_tid);
478 fprintf(stderr, "\'"); 550
479 if (run_count > 1) 551 fprintf(stderr, "\'");
480 fprintf(stderr, " (%d runs)", run_count); 552 if (run_count > 1)
481 fprintf(stderr, ":\n\n"); 553 fprintf(stderr, " (%d runs)", run_count);
554 fprintf(stderr, ":\n\n");
555 }
482 556
483 for (counter = 0; counter < nr_counters; counter++) 557 if (no_aggr) {
484 print_counter(counter); 558 list_for_each_entry(counter, &evsel_list, node)
559 print_counter(counter);
560 } else {
561 list_for_each_entry(counter, &evsel_list, node)
562 print_counter_aggr(counter);
563 }
485 564
486 fprintf(stderr, "\n"); 565 if (!csv_output) {
487 fprintf(stderr, " %18.9f seconds time elapsed", 566 fprintf(stderr, "\n");
488 avg_stats(&walltime_nsecs_stats)/1e9); 567 fprintf(stderr, " %18.9f seconds time elapsed",
489 if (run_count > 1) { 568 avg_stats(&walltime_nsecs_stats)/1e9);
490 fprintf(stderr, " ( +- %7.3f%% )", 569 if (run_count > 1) {
570 fprintf(stderr, " ( +- %7.3f%% )",
491 100*stddev_stats(&walltime_nsecs_stats) / 571 100*stddev_stats(&walltime_nsecs_stats) /
492 avg_stats(&walltime_nsecs_stats)); 572 avg_stats(&walltime_nsecs_stats));
573 }
574 fprintf(stderr, "\n\n");
493 } 575 }
494 fprintf(stderr, "\n\n");
495} 576}
496 577
497static volatile int signr = -1; 578static volatile int signr = -1;
@@ -521,6 +602,13 @@ static const char * const stat_usage[] = {
521 NULL 602 NULL
522}; 603};
523 604
605static int stat__set_big_num(const struct option *opt __used,
606 const char *s __used, int unset)
607{
608 big_num_opt = unset ? 0 : 1;
609 return 0;
610}
611
524static const struct option options[] = { 612static const struct option options[] = {
525 OPT_CALLBACK('e', "event", NULL, "event", 613 OPT_CALLBACK('e', "event", NULL, "event",
526 "event selector. use 'perf list' to list available events", 614 "event selector. use 'perf list' to list available events",
@@ -541,64 +629,95 @@ static const struct option options[] = {
541 "repeat command and print average + stddev (max: 100)"), 629 "repeat command and print average + stddev (max: 100)"),
542 OPT_BOOLEAN('n', "null", &null_run, 630 OPT_BOOLEAN('n', "null", &null_run,
543 "null run - dont start any counters"), 631 "null run - dont start any counters"),
544 OPT_BOOLEAN('B', "big-num", &big_num, 632 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
545 "print large numbers with thousands\' separators"), 633 "print large numbers with thousands\' separators",
634 stat__set_big_num),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu", 635 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"), 636 "list of cpus to monitor in system-wide"),
637 OPT_BOOLEAN('A', "no-aggr", &no_aggr,
638 "disable CPU count aggregation"),
639 OPT_STRING('x', "field-separator", &csv_sep, "separator",
640 "print counts with custom separator"),
548 OPT_END() 641 OPT_END()
549}; 642};
550 643
551int cmd_stat(int argc, const char **argv, const char *prefix __used) 644int cmd_stat(int argc, const char **argv, const char *prefix __used)
552{ 645{
553 int status; 646 struct perf_evsel *pos;
554 int i,j; 647 int status = -ENOMEM;
555 648
556 setlocale(LC_ALL, ""); 649 setlocale(LC_ALL, "");
557 650
558 argc = parse_options(argc, argv, options, stat_usage, 651 argc = parse_options(argc, argv, options, stat_usage,
559 PARSE_OPT_STOP_AT_NON_OPTION); 652 PARSE_OPT_STOP_AT_NON_OPTION);
653
654 if (csv_sep)
655 csv_output = true;
656 else
657 csv_sep = DEFAULT_SEPARATOR;
658
659 /*
660 * let the spreadsheet do the pretty-printing
661 */
662 if (csv_output) {
663 /* User explicitely passed -B? */
664 if (big_num_opt == 1) {
665 fprintf(stderr, "-B option not supported with -x\n");
666 usage_with_options(stat_usage, options);
667 } else /* Nope, so disable big number formatting */
668 big_num = false;
669 } else if (big_num_opt == 0) /* User passed --no-big-num */
670 big_num = false;
671
560 if (!argc && target_pid == -1 && target_tid == -1) 672 if (!argc && target_pid == -1 && target_tid == -1)
561 usage_with_options(stat_usage, options); 673 usage_with_options(stat_usage, options);
562 if (run_count <= 0) 674 if (run_count <= 0)
563 usage_with_options(stat_usage, options); 675 usage_with_options(stat_usage, options);
564 676
677 /* no_aggr is for system-wide only */
678 if (no_aggr && !system_wide)
679 usage_with_options(stat_usage, options);
680
565 /* Set attrs and nr_counters if no event is selected and !null_run */ 681 /* Set attrs and nr_counters if no event is selected and !null_run */
566 if (!null_run && !nr_counters) { 682 if (!null_run && !nr_counters) {
567 memcpy(attrs, default_attrs, sizeof(default_attrs)); 683 size_t c;
684
568 nr_counters = ARRAY_SIZE(default_attrs); 685 nr_counters = ARRAY_SIZE(default_attrs);
686
687 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
688 pos = perf_evsel__new(&default_attrs[c],
689 nr_counters);
690 if (pos == NULL)
691 goto out;
692 list_add(&pos->node, &evsel_list);
693 }
569 } 694 }
570 695
571 if (system_wide) 696 if (target_pid != -1)
572 nr_cpus = read_cpu_map(cpu_list); 697 target_tid = target_pid;
573 else
574 nr_cpus = 1;
575 698
576 if (nr_cpus < 1) 699 threads = thread_map__new(target_pid, target_tid);
700 if (threads == NULL) {
701 pr_err("Problems finding threads of monitor\n");
577 usage_with_options(stat_usage, options); 702 usage_with_options(stat_usage, options);
703 }
578 704
579 if (target_pid != -1) { 705 if (system_wide)
580 target_tid = target_pid; 706 cpus = cpu_map__new(cpu_list);
581 thread_num = find_all_tid(target_pid, &all_tids); 707 else
582 if (thread_num <= 0) { 708 cpus = cpu_map__dummy_new();
583 fprintf(stderr, "Can't find all threads of pid %d\n",
584 target_pid);
585 usage_with_options(stat_usage, options);
586 }
587 } else {
588 all_tids=malloc(sizeof(pid_t));
589 if (!all_tids)
590 return -ENOMEM;
591 709
592 all_tids[0] = target_tid; 710 if (cpus == NULL) {
593 thread_num = 1; 711 perror("failed to parse CPUs map");
712 usage_with_options(stat_usage, options);
713 return -1;
594 } 714 }
595 715
596 for (i = 0; i < MAX_NR_CPUS; i++) { 716 list_for_each_entry(pos, &evsel_list, node) {
597 for (j = 0; j < MAX_COUNTERS; j++) { 717 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
598 fd[i][j] = malloc(sizeof(int)*thread_num); 718 perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
599 if (!fd[i][j]) 719 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
600 return -ENOMEM; 720 goto out_free_fd;
601 }
602 } 721 }
603 722
604 /* 723 /*
@@ -621,6 +740,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
621 740
622 if (status != -1) 741 if (status != -1)
623 print_stat(argc, argv); 742 print_stat(argc, argv);
624 743out_free_fd:
744 list_for_each_entry(pos, &evsel_list, node)
745 perf_evsel__free_stat_priv(pos);
746out:
747 thread_map__delete(threads);
748 threads = NULL;
625 return status; 749 return status;
626} 750}