aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 13:28:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 13:28:30 -0500
commit9326657abe1a83ed4b4f396b923ca1217fd50cba (patch)
tree9fd5035a6f68af7306d58938e309bd36ed81646c /tools/perf/builtin-stat.c
parent2cc3f16cad1561c6fc551aefff559e53726efc8b (diff)
parent45e6af06367e7b2eb8dc49671092462d8f8a5f47 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Add Intel RAPL energy counter support (Stephane Eranian) - Clean up uprobes (Oleg Nesterov) - Optimize ring-buffer writes (Peter Zijlstra) Tooling side changes, user visible: - 'perf diff': - Add column colouring improvements (Ramkumar Ramachandra) - 'perf kvm': - Add guest related improvements, including allowing to specify a directory with guest specific /proc information (Dongsheng Yang) - Add shell completion support (Ramkumar Ramachandra) - Add '-v' option (Dongsheng Yang) - Support --guestmount (Dongsheng Yang) - 'perf probe': - Support showing source code, asking for variables to be collected at probe time and other 'perf probe' operations that use DWARF information. This supports only binaries with debugging information at this time, detached debuginfo (aka debuginfo packages) support should come in later patches (Masami Hiramatsu) - 'perf record': - Rename --no-delay option to --no-buffering, better reflecting its purpose and freeing up '--delay' to take the place of '--initial-delay', so that 'record' and 'stat' are consistent (Arnaldo Carvalho de Melo) - Default the -t/--thread option to no inheritance (Adrian Hunter) - Make per-cpu mmaps the default (Adrian Hunter) - 'perf report': - Improve callchain processing performance (Frederic Weisbecker) - Retain bfd reference to lookup source line numbers, greatly optimizing, among other use cases, 'perf report -s srcline' (Adrian Hunter) - Improve callchain processing performance even more (Namhyung Kim) - Add a perf.data file header window in the 'perf report' TUI, associated with the 'i' hotkey, providing a counterpart to the --header option in the stdio UI (Namhyung Kim) - 'perf script': - Add an option in 'perf script' to print the source line number (Adrian Hunter) - Add --header/--header-only options to 'script' and 'report', the default is not tho show the header info, but as this has been the default for some time, leave a single line explaining how to obtain that information (Jiri Olsa) - Add options to show comm, fork, exit and mmap PERF_RECORD_ events (Namhyung Kim) - Print callchains and symbols if they exist (David Ahern) - 'perf timechart' - Add backtrace support to CPU info - Print pid along the name - Add support for CPU topology - Add new option --highlight'ing threads, be it by name or, if a numeric value is provided, that run more than given duration (Stanislav Fomichev) - 'perf top': - Make 'perf top -g' refer to callchains, for consistency with other tools (David Ahern) - 'perf trace': - Handle old kernels where the "raw_syscalls" tracepoints were called plain "syscalls" (David Ahern) - Remove thread summary coloring, by Pekka Enberg. - Honour -m option in 'trace', the tool was offering the option to set the mmap size, but wasn't using it when doing the actual mmap on the events file descriptors (Jiri Olsa) - generic: - Backport libtraceevent plugin support (trace-cmd repository, with plugins for jbd2, hrtimer, kmem, kvm, mac80211, sched_switch, function, xen, scsi, cfg80211 (Jiri Olsa) - Print session information only if --stdio is given (Namhyung Kim) Tooling side changes, developer visible (plumbing): - Improve 'perf probe' exit path, release resources (Masami Hiramatsu) - Improve libtraceevent plugins exit path, allowing the registering of an unregister handler to be called at exit time (Namhyung Kim) - Add an alias to the build test makefile (make -C tools/perf build-test) (Namhyung Kim) - Get rid of die() and friends (good riddance!) in libtraceevent (Namhyung Kim) - Fix cross build problems related to pkgconfig and CROSS_COMPILE not being propagated to the feature tests, leading to features being tested in the host and then being enabled on the target (Mark Rutland) - Improve forked workload error reporting by sending the errno in the signal data queueing integer field, using sigqueue and by doing the signal setup in the evlist methods, removing open coded equivalents in various tools (Arnaldo Carvalho de Melo) - Do more auto exit cleanup chores in the 'evlist' destructor, so that the tools don't have to all do that sequence (Arnaldo Carvalho de Melo) - Pack 'struct perf_session_env' and 'struct trace' (Arnaldo Carvalho de Melo) - Add test for building detached source tarballs (Arnaldo Carvalho de Melo) - Move some header files (tools/perf/ to tools/include/ to make them available to other tools/ dwelling codebases (Namhyung Kim) - Move logic to warn about kptr_restrict'ed kernels to separate function in 'report' (Arnaldo Carvalho de Melo) - Move hist browser selection code to separate function (Arnaldo Carvalho de Melo) - Move histogram entries collapsing to separate function (Arnaldo Carvalho de Melo) - Introduce evlist__for_each() & friends (Arnaldo Carvalho de Melo) - Automate setup of FEATURE_CHECK_(C|LD)FLAGS-all variables (Jiri Olsa) - Move arch setup into seprate Makefile (Jiri Olsa) - Make libtraceevent install target quieter (Jiri Olsa) - Make tests/make output more compact (Jiri Olsa) - Ignore generated files in feature-checks (Chunwei Chen) - Introduce pevent_filter_strerror() in libtraceevent, similar in purpose to libc's strerror() function (Namhyung Kim) - Use perf_data_file methods to write output file in 'record' and 'inject' (Jiri Olsa) - Use pr_*() functions where applicable in 'report' (Namhyumg Kim) - Add 'machine' 'addr_location' struct to have full picture (machine, thread, map, symbol, addr) for a (partially) resolved address, reducing function signatures (Arnaldo Carvalho de Melo) - Reduce code duplication in the histogram entry creation/insertion (Arnaldo Carvalho de Melo) - Auto allocate annotation histogram data structures (Arnaldo Carvalho de Melo) - No need to test against NULL before calling free, also set freed memory in struct pointers to NULL, to help fixing use after free bugs (Arnaldo Carvalho de Melo) - Rename some struct DSO binary_type related members and methods, to clarify its purpose and need for differentiation (symtab_type, ie one is about the files .text, CFI, etc, i.e. its binary contents, and the other is about where the symbol table came from (Arnaldo Carvalho de Melo) - Convert to new topic libraries, starting with an API one (sysfs, debugfs, etc), renaming liblk in the process (Borislav Petkov) - Get rid of some more panic() like error handling in libtraceevent. (Namhyung Kim) - Get rid of panic() like calls in libtraceevent (Namyung Kim) - Start carving out symbol parsing routines (perf, just moving routines to topic files in tools/lib/symbol/, tools that want to use it need to integrate it directly, ie no tools/lib/symbol/Makefile is provided (Arnaldo Carvalho de Melo) - Assorted refactoring patches, moving code around and adding utility evlist methods that will be used in the IPT patchset (Adrian Hunter) - Assorted mmap_pages handling fixes (Adrian Hunter) - Several man pages typo fixes (Dongsheng Yang) - Get rid of several die() calls in libtraceevent (Namhyung Kim) - Use basename() in a more robust way, to avoid problems related to different system library implementations for that function (Stephane Eranian) - Remove open coded management of short_name_allocated member (Adrian Hunter) - Several cleanups in the "dso" methods, constifying some parameters and renaming some fields to clarify its purpose (Arnaldo Carvalho de Melo) - Add per-feature check flags, fixing libunwind related build problems on some architectures (Jean Pihet) - Do not disable source line lookup just because of one failure. (Adrian Hunter) - Several 'perf kvm' man page corrections (Dongsheng Yang) - Correct the message in feature-libnuma checking, swowing the right devel package names for various distros (Dongsheng Yang) - Polish 'readn()' function and introduce its counterpart, 'writen()' (Jiri Olsa) - Start moving timechart state from global variables to a 'perf_tool' derived 'timechart' struct (Arnaldo Carvalho de Melo) ... and lots of fixes and improvements I forgot to list" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (282 commits) perf tools: Remove unnecessary callchain cursor state restore on unmatch perf callchain: Spare double comparison of callchain first entry perf tools: Do proper comm override error handling perf symbols: Export elf_section_by_name and reuse perf probe: Release all dynamically allocated parameters perf probe: Release allocated probe_trace_event if failed perf tools: Add 'build-test' make target tools lib traceevent: Unregister handler when xen plugin is unloaded tools lib traceevent: Unregister handler when scsi plugin is unloaded tools lib traceevent: Unregister handler when jbd2 plugin is is unloaded tools lib traceevent: Unregister handler when cfg80211 plugin is unloaded tools lib traceevent: Unregister handler when mac80211 plugin is unloaded tools lib traceevent: Unregister handler when sched_switch plugin is unloaded tools lib traceevent: Unregister handler when kvm plugin is unloaded tools lib traceevent: Unregister handler when kmem plugin is unloaded tools lib traceevent: Unregister handler when hrtimer plugin is unloaded tools lib traceevent: Unregister handler when function plugin is unloaded tools lib traceevent: Add pevent_unregister_print_function() tools lib traceevent: Add pevent_unregister_event_handler() tools lib traceevent: fix pointer-integer size mismatch ...
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c186
1 files changed, 128 insertions, 58 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee0d565f83e3..8b0e1c9234d9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -138,6 +138,7 @@ static const char *post_cmd = NULL;
138static bool sync_run = false; 138static bool sync_run = false;
139static unsigned int interval = 0; 139static unsigned int interval = 0;
140static unsigned int initial_delay = 0; 140static unsigned int initial_delay = 0;
141static unsigned int unit_width = 4; /* strlen("unit") */
141static bool forever = false; 142static bool forever = false;
142static struct timespec ref_time; 143static struct timespec ref_time;
143static struct cpu_map *aggr_map; 144static struct cpu_map *aggr_map;
@@ -184,8 +185,7 @@ static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
184 185
185static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) 186static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
186{ 187{
187 free(evsel->priv); 188 zfree(&evsel->priv);
188 evsel->priv = NULL;
189} 189}
190 190
191static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 191static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
@@ -207,15 +207,14 @@ static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
207 207
208static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) 208static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
209{ 209{
210 free(evsel->prev_raw_counts); 210 zfree(&evsel->prev_raw_counts);
211 evsel->prev_raw_counts = NULL;
212} 211}
213 212
214static void perf_evlist__free_stats(struct perf_evlist *evlist) 213static void perf_evlist__free_stats(struct perf_evlist *evlist)
215{ 214{
216 struct perf_evsel *evsel; 215 struct perf_evsel *evsel;
217 216
218 list_for_each_entry(evsel, &evlist->entries, node) { 217 evlist__for_each(evlist, evsel) {
219 perf_evsel__free_stat_priv(evsel); 218 perf_evsel__free_stat_priv(evsel);
220 perf_evsel__free_counts(evsel); 219 perf_evsel__free_counts(evsel);
221 perf_evsel__free_prev_raw_counts(evsel); 220 perf_evsel__free_prev_raw_counts(evsel);
@@ -226,7 +225,7 @@ static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
226{ 225{
227 struct perf_evsel *evsel; 226 struct perf_evsel *evsel;
228 227
229 list_for_each_entry(evsel, &evlist->entries, node) { 228 evlist__for_each(evlist, evsel) {
230 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 229 if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
231 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || 230 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
232 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) 231 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
@@ -260,7 +259,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
260{ 259{
261 struct perf_evsel *evsel; 260 struct perf_evsel *evsel;
262 261
263 list_for_each_entry(evsel, &evlist->entries, node) { 262 evlist__for_each(evlist, evsel) {
264 perf_evsel__reset_stat_priv(evsel); 263 perf_evsel__reset_stat_priv(evsel);
265 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); 264 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
266 } 265 }
@@ -327,13 +326,13 @@ static struct perf_evsel *nth_evsel(int n)
327 326
328 /* Assumes this only called when evsel_list does not change anymore. */ 327 /* Assumes this only called when evsel_list does not change anymore. */
329 if (!array) { 328 if (!array) {
330 list_for_each_entry(ev, &evsel_list->entries, node) 329 evlist__for_each(evsel_list, ev)
331 array_len++; 330 array_len++;
332 array = malloc(array_len * sizeof(void *)); 331 array = malloc(array_len * sizeof(void *));
333 if (!array) 332 if (!array)
334 exit(ENOMEM); 333 exit(ENOMEM);
335 j = 0; 334 j = 0;
336 list_for_each_entry(ev, &evsel_list->entries, node) 335 evlist__for_each(evsel_list, ev)
337 array[j++] = ev; 336 array[j++] = ev;
338 } 337 }
339 if (n < array_len) 338 if (n < array_len)
@@ -441,13 +440,13 @@ static void print_interval(void)
441 char prefix[64]; 440 char prefix[64];
442 441
443 if (aggr_mode == AGGR_GLOBAL) { 442 if (aggr_mode == AGGR_GLOBAL) {
444 list_for_each_entry(counter, &evsel_list->entries, node) { 443 evlist__for_each(evsel_list, counter) {
445 ps = counter->priv; 444 ps = counter->priv;
446 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 445 memset(ps->res_stats, 0, sizeof(ps->res_stats));
447 read_counter_aggr(counter); 446 read_counter_aggr(counter);
448 } 447 }
449 } else { 448 } else {
450 list_for_each_entry(counter, &evsel_list->entries, node) { 449 evlist__for_each(evsel_list, counter) {
451 ps = counter->priv; 450 ps = counter->priv;
452 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 451 memset(ps->res_stats, 0, sizeof(ps->res_stats));
453 read_counter(counter); 452 read_counter(counter);
@@ -461,17 +460,17 @@ static void print_interval(void)
461 if (num_print_interval == 0 && !csv_output) { 460 if (num_print_interval == 0 && !csv_output) {
462 switch (aggr_mode) { 461 switch (aggr_mode) {
463 case AGGR_SOCKET: 462 case AGGR_SOCKET:
464 fprintf(output, "# time socket cpus counts events\n"); 463 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
465 break; 464 break;
466 case AGGR_CORE: 465 case AGGR_CORE:
467 fprintf(output, "# time core cpus counts events\n"); 466 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
468 break; 467 break;
469 case AGGR_NONE: 468 case AGGR_NONE:
470 fprintf(output, "# time CPU counts events\n"); 469 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
471 break; 470 break;
472 case AGGR_GLOBAL: 471 case AGGR_GLOBAL:
473 default: 472 default:
474 fprintf(output, "# time counts events\n"); 473 fprintf(output, "# time counts %*s events\n", unit_width, "unit");
475 } 474 }
476 } 475 }
477 476
@@ -484,12 +483,12 @@ static void print_interval(void)
484 print_aggr(prefix); 483 print_aggr(prefix);
485 break; 484 break;
486 case AGGR_NONE: 485 case AGGR_NONE:
487 list_for_each_entry(counter, &evsel_list->entries, node) 486 evlist__for_each(evsel_list, counter)
488 print_counter(counter, prefix); 487 print_counter(counter, prefix);
489 break; 488 break;
490 case AGGR_GLOBAL: 489 case AGGR_GLOBAL:
491 default: 490 default:
492 list_for_each_entry(counter, &evsel_list->entries, node) 491 evlist__for_each(evsel_list, counter)
493 print_counter_aggr(counter, prefix); 492 print_counter_aggr(counter, prefix);
494 } 493 }
495 494
@@ -505,17 +504,31 @@ static void handle_initial_delay(void)
505 nthreads = thread_map__nr(evsel_list->threads); 504 nthreads = thread_map__nr(evsel_list->threads);
506 505
507 usleep(initial_delay * 1000); 506 usleep(initial_delay * 1000);
508 list_for_each_entry(counter, &evsel_list->entries, node) 507 evlist__for_each(evsel_list, counter)
509 perf_evsel__enable(counter, ncpus, nthreads); 508 perf_evsel__enable(counter, ncpus, nthreads);
510 } 509 }
511} 510}
512 511
512static volatile int workload_exec_errno;
513
514/*
515 * perf_evlist__prepare_workload will send a SIGUSR1
516 * if the fork fails, since we asked by setting its
517 * want_signal to true.
518 */
519static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
520 void *ucontext __maybe_unused)
521{
522 workload_exec_errno = info->si_value.sival_int;
523}
524
513static int __run_perf_stat(int argc, const char **argv) 525static int __run_perf_stat(int argc, const char **argv)
514{ 526{
515 char msg[512]; 527 char msg[512];
516 unsigned long long t0, t1; 528 unsigned long long t0, t1;
517 struct perf_evsel *counter; 529 struct perf_evsel *counter;
518 struct timespec ts; 530 struct timespec ts;
531 size_t l;
519 int status = 0; 532 int status = 0;
520 const bool forks = (argc > 0); 533 const bool forks = (argc > 0);
521 534
@@ -528,8 +541,8 @@ static int __run_perf_stat(int argc, const char **argv)
528 } 541 }
529 542
530 if (forks) { 543 if (forks) {
531 if (perf_evlist__prepare_workload(evsel_list, &target, argv, 544 if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
532 false, false) < 0) { 545 workload_exec_failed_signal) < 0) {
533 perror("failed to prepare workload"); 546 perror("failed to prepare workload");
534 return -1; 547 return -1;
535 } 548 }
@@ -539,7 +552,7 @@ static int __run_perf_stat(int argc, const char **argv)
539 if (group) 552 if (group)
540 perf_evlist__set_leader(evsel_list); 553 perf_evlist__set_leader(evsel_list);
541 554
542 list_for_each_entry(counter, &evsel_list->entries, node) { 555 evlist__for_each(evsel_list, counter) {
543 if (create_perf_stat_counter(counter) < 0) { 556 if (create_perf_stat_counter(counter) < 0) {
544 /* 557 /*
545 * PPC returns ENXIO for HW counters until 2.6.37 558 * PPC returns ENXIO for HW counters until 2.6.37
@@ -565,6 +578,10 @@ static int __run_perf_stat(int argc, const char **argv)
565 return -1; 578 return -1;
566 } 579 }
567 counter->supported = true; 580 counter->supported = true;
581
582 l = strlen(counter->unit);
583 if (l > unit_width)
584 unit_width = l;
568 } 585 }
569 586
570 if (perf_evlist__apply_filters(evsel_list)) { 587 if (perf_evlist__apply_filters(evsel_list)) {
@@ -590,6 +607,13 @@ static int __run_perf_stat(int argc, const char **argv)
590 } 607 }
591 } 608 }
592 wait(&status); 609 wait(&status);
610
611 if (workload_exec_errno) {
612 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
613 pr_err("Workload failed: %s\n", emsg);
614 return -1;
615 }
616
593 if (WIFSIGNALED(status)) 617 if (WIFSIGNALED(status))
594 psignal(WTERMSIG(status), argv[0]); 618 psignal(WTERMSIG(status), argv[0]);
595 } else { 619 } else {
@@ -606,13 +630,13 @@ static int __run_perf_stat(int argc, const char **argv)
606 update_stats(&walltime_nsecs_stats, t1 - t0); 630 update_stats(&walltime_nsecs_stats, t1 - t0);
607 631
608 if (aggr_mode == AGGR_GLOBAL) { 632 if (aggr_mode == AGGR_GLOBAL) {
609 list_for_each_entry(counter, &evsel_list->entries, node) { 633 evlist__for_each(evsel_list, counter) {
610 read_counter_aggr(counter); 634 read_counter_aggr(counter);
611 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 635 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
612 thread_map__nr(evsel_list->threads)); 636 thread_map__nr(evsel_list->threads));
613 } 637 }
614 } else { 638 } else {
615 list_for_each_entry(counter, &evsel_list->entries, node) { 639 evlist__for_each(evsel_list, counter) {
616 read_counter(counter); 640 read_counter(counter);
617 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); 641 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
618 } 642 }
@@ -621,7 +645,7 @@ static int __run_perf_stat(int argc, const char **argv)
621 return WEXITSTATUS(status); 645 return WEXITSTATUS(status);
622} 646}
623 647
624static int run_perf_stat(int argc __maybe_unused, const char **argv) 648static int run_perf_stat(int argc, const char **argv)
625{ 649{
626 int ret; 650 int ret;
627 651
@@ -704,14 +728,25 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
704static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 728static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
705{ 729{
706 double msecs = avg / 1e6; 730 double msecs = avg / 1e6;
707 const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; 731 const char *fmt_v, *fmt_n;
708 char name[25]; 732 char name[25];
709 733
734 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
735 fmt_n = csv_output ? "%s" : "%-25s";
736
710 aggr_printout(evsel, cpu, nr); 737 aggr_printout(evsel, cpu, nr);
711 738
712 scnprintf(name, sizeof(name), "%s%s", 739 scnprintf(name, sizeof(name), "%s%s",
713 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 740 perf_evsel__name(evsel), csv_output ? "" : " (msec)");
714 fprintf(output, fmt, msecs, csv_sep, name); 741
742 fprintf(output, fmt_v, msecs, csv_sep);
743
744 if (csv_output)
745 fprintf(output, "%s%s", evsel->unit, csv_sep);
746 else
747 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
748
749 fprintf(output, fmt_n, name);
715 750
716 if (evsel->cgrp) 751 if (evsel->cgrp)
717 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 752 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -908,21 +943,31 @@ static void print_ll_cache_misses(int cpu,
908static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 943static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
909{ 944{
910 double total, ratio = 0.0, total2; 945 double total, ratio = 0.0, total2;
946 double sc = evsel->scale;
911 const char *fmt; 947 const char *fmt;
912 948
913 if (csv_output) 949 if (csv_output) {
914 fmt = "%.0f%s%s"; 950 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s";
915 else if (big_num) 951 } else {
916 fmt = "%'18.0f%s%-25s"; 952 if (big_num)
917 else 953 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
918 fmt = "%18.0f%s%-25s"; 954 else
955 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
956 }
919 957
920 aggr_printout(evsel, cpu, nr); 958 aggr_printout(evsel, cpu, nr);
921 959
922 if (aggr_mode == AGGR_GLOBAL) 960 if (aggr_mode == AGGR_GLOBAL)
923 cpu = 0; 961 cpu = 0;
924 962
925 fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); 963 fprintf(output, fmt, avg, csv_sep);
964
965 if (evsel->unit)
966 fprintf(output, "%-*s%s",
967 csv_output ? 0 : unit_width,
968 evsel->unit, csv_sep);
969
970 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
926 971
927 if (evsel->cgrp) 972 if (evsel->cgrp)
928 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 973 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -941,7 +986,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
941 986
942 if (total && avg) { 987 if (total && avg) {
943 ratio = total / avg; 988 ratio = total / avg;
944 fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); 989 fprintf(output, "\n");
990 if (aggr_mode == AGGR_NONE)
991 fprintf(output, " ");
992 fprintf(output, " # %5.2f stalled cycles per insn", ratio);
945 } 993 }
946 994
947 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 995 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
@@ -1061,6 +1109,7 @@ static void print_aggr(char *prefix)
1061{ 1109{
1062 struct perf_evsel *counter; 1110 struct perf_evsel *counter;
1063 int cpu, cpu2, s, s2, id, nr; 1111 int cpu, cpu2, s, s2, id, nr;
1112 double uval;
1064 u64 ena, run, val; 1113 u64 ena, run, val;
1065 1114
1066 if (!(aggr_map || aggr_get_id)) 1115 if (!(aggr_map || aggr_get_id))
@@ -1068,7 +1117,7 @@ static void print_aggr(char *prefix)
1068 1117
1069 for (s = 0; s < aggr_map->nr; s++) { 1118 for (s = 0; s < aggr_map->nr; s++) {
1070 id = aggr_map->map[s]; 1119 id = aggr_map->map[s];
1071 list_for_each_entry(counter, &evsel_list->entries, node) { 1120 evlist__for_each(evsel_list, counter) {
1072 val = ena = run = 0; 1121 val = ena = run = 0;
1073 nr = 0; 1122 nr = 0;
1074 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1123 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
@@ -1087,11 +1136,17 @@ static void print_aggr(char *prefix)
1087 if (run == 0 || ena == 0) { 1136 if (run == 0 || ena == 0) {
1088 aggr_printout(counter, id, nr); 1137 aggr_printout(counter, id, nr);
1089 1138
1090 fprintf(output, "%*s%s%*s", 1139 fprintf(output, "%*s%s",
1091 csv_output ? 0 : 18, 1140 csv_output ? 0 : 18,
1092 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1141 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1093 csv_sep, 1142 csv_sep);
1094 csv_output ? 0 : -24, 1143
1144 fprintf(output, "%-*s%s",
1145 csv_output ? 0 : unit_width,
1146 counter->unit, csv_sep);
1147
1148 fprintf(output, "%*s",
1149 csv_output ? 0 : -25,
1095 perf_evsel__name(counter)); 1150 perf_evsel__name(counter));
1096 1151
1097 if (counter->cgrp) 1152 if (counter->cgrp)
@@ -1101,11 +1156,12 @@ static void print_aggr(char *prefix)
1101 fputc('\n', output); 1156 fputc('\n', output);
1102 continue; 1157 continue;
1103 } 1158 }
1159 uval = val * counter->scale;
1104 1160
1105 if (nsec_counter(counter)) 1161 if (nsec_counter(counter))
1106 nsec_printout(id, nr, counter, val); 1162 nsec_printout(id, nr, counter, uval);
1107 else 1163 else
1108 abs_printout(id, nr, counter, val); 1164 abs_printout(id, nr, counter, uval);
1109 1165
1110 if (!csv_output) { 1166 if (!csv_output) {
1111 print_noise(counter, 1.0); 1167 print_noise(counter, 1.0);
@@ -1128,16 +1184,21 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1128 struct perf_stat *ps = counter->priv; 1184 struct perf_stat *ps = counter->priv;
1129 double avg = avg_stats(&ps->res_stats[0]); 1185 double avg = avg_stats(&ps->res_stats[0]);
1130 int scaled = counter->counts->scaled; 1186 int scaled = counter->counts->scaled;
1187 double uval;
1131 1188
1132 if (prefix) 1189 if (prefix)
1133 fprintf(output, "%s", prefix); 1190 fprintf(output, "%s", prefix);
1134 1191
1135 if (scaled == -1) { 1192 if (scaled == -1) {
1136 fprintf(output, "%*s%s%*s", 1193 fprintf(output, "%*s%s",
1137 csv_output ? 0 : 18, 1194 csv_output ? 0 : 18,
1138 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1195 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1139 csv_sep, 1196 csv_sep);
1140 csv_output ? 0 : -24, 1197 fprintf(output, "%-*s%s",
1198 csv_output ? 0 : unit_width,
1199 counter->unit, csv_sep);
1200 fprintf(output, "%*s",
1201 csv_output ? 0 : -25,
1141 perf_evsel__name(counter)); 1202 perf_evsel__name(counter));
1142 1203
1143 if (counter->cgrp) 1204 if (counter->cgrp)
@@ -1147,10 +1208,12 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1147 return; 1208 return;
1148 } 1209 }
1149 1210
1211 uval = avg * counter->scale;
1212
1150 if (nsec_counter(counter)) 1213 if (nsec_counter(counter))
1151 nsec_printout(-1, 0, counter, avg); 1214 nsec_printout(-1, 0, counter, uval);
1152 else 1215 else
1153 abs_printout(-1, 0, counter, avg); 1216 abs_printout(-1, 0, counter, uval);
1154 1217
1155 print_noise(counter, avg); 1218 print_noise(counter, avg);
1156 1219
@@ -1177,6 +1240,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1177static void print_counter(struct perf_evsel *counter, char *prefix) 1240static void print_counter(struct perf_evsel *counter, char *prefix)
1178{ 1241{
1179 u64 ena, run, val; 1242 u64 ena, run, val;
1243 double uval;
1180 int cpu; 1244 int cpu;
1181 1245
1182 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1246 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
@@ -1188,14 +1252,20 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
1188 fprintf(output, "%s", prefix); 1252 fprintf(output, "%s", prefix);
1189 1253
1190 if (run == 0 || ena == 0) { 1254 if (run == 0 || ena == 0) {
1191 fprintf(output, "CPU%*d%s%*s%s%*s", 1255 fprintf(output, "CPU%*d%s%*s%s",
1192 csv_output ? 0 : -4, 1256 csv_output ? 0 : -4,
1193 perf_evsel__cpus(counter)->map[cpu], csv_sep, 1257 perf_evsel__cpus(counter)->map[cpu], csv_sep,
1194 csv_output ? 0 : 18, 1258 csv_output ? 0 : 18,
1195 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1259 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1196 csv_sep, 1260 csv_sep);
1197 csv_output ? 0 : -24, 1261
1198 perf_evsel__name(counter)); 1262 fprintf(output, "%-*s%s",
1263 csv_output ? 0 : unit_width,
1264 counter->unit, csv_sep);
1265
1266 fprintf(output, "%*s",
1267 csv_output ? 0 : -25,
1268 perf_evsel__name(counter));
1199 1269
1200 if (counter->cgrp) 1270 if (counter->cgrp)
1201 fprintf(output, "%s%s", 1271 fprintf(output, "%s%s",
@@ -1205,10 +1275,12 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
1205 continue; 1275 continue;
1206 } 1276 }
1207 1277
1278 uval = val * counter->scale;
1279
1208 if (nsec_counter(counter)) 1280 if (nsec_counter(counter))
1209 nsec_printout(cpu, 0, counter, val); 1281 nsec_printout(cpu, 0, counter, uval);
1210 else 1282 else
1211 abs_printout(cpu, 0, counter, val); 1283 abs_printout(cpu, 0, counter, uval);
1212 1284
1213 if (!csv_output) { 1285 if (!csv_output) {
1214 print_noise(counter, 1.0); 1286 print_noise(counter, 1.0);
@@ -1256,11 +1328,11 @@ static void print_stat(int argc, const char **argv)
1256 print_aggr(NULL); 1328 print_aggr(NULL);
1257 break; 1329 break;
1258 case AGGR_GLOBAL: 1330 case AGGR_GLOBAL:
1259 list_for_each_entry(counter, &evsel_list->entries, node) 1331 evlist__for_each(evsel_list, counter)
1260 print_counter_aggr(counter, NULL); 1332 print_counter_aggr(counter, NULL);
1261 break; 1333 break;
1262 case AGGR_NONE: 1334 case AGGR_NONE:
1263 list_for_each_entry(counter, &evsel_list->entries, node) 1335 evlist__for_each(evsel_list, counter)
1264 print_counter(counter, NULL); 1336 print_counter(counter, NULL);
1265 break; 1337 break;
1266 default: 1338 default:
@@ -1710,14 +1782,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1710 if (interval && interval < 100) { 1782 if (interval && interval < 100) {
1711 pr_err("print interval must be >= 100ms\n"); 1783 pr_err("print interval must be >= 100ms\n");
1712 parse_options_usage(stat_usage, options, "I", 1); 1784 parse_options_usage(stat_usage, options, "I", 1);
1713 goto out_free_maps; 1785 goto out;
1714 } 1786 }
1715 1787
1716 if (perf_evlist__alloc_stats(evsel_list, interval)) 1788 if (perf_evlist__alloc_stats(evsel_list, interval))
1717 goto out_free_maps; 1789 goto out;
1718 1790
1719 if (perf_stat_init_aggr_mode()) 1791 if (perf_stat_init_aggr_mode())
1720 goto out_free_maps; 1792 goto out;
1721 1793
1722 /* 1794 /*
1723 * We dont want to block the signals - that would cause 1795 * We dont want to block the signals - that would cause
@@ -1749,8 +1821,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1749 print_stat(argc, argv); 1821 print_stat(argc, argv);
1750 1822
1751 perf_evlist__free_stats(evsel_list); 1823 perf_evlist__free_stats(evsel_list);
1752out_free_maps:
1753 perf_evlist__delete_maps(evsel_list);
1754out: 1824out:
1755 perf_evlist__delete(evsel_list); 1825 perf_evlist__delete(evsel_list);
1756 return status; 1826 return status;