aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-27 07:50:47 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-26 15:03:16 -0400
commitc6264deff7ea6125492b442edad885e5429679af (patch)
tree8533d8e10cadd0eb31518a514486c561f004115b /tools/perf
parent8bb6c79f24e66538f606076915e918242c02ec7c (diff)
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list: Performance counter stats for './hackbench 10' (10 runs): 1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% ) 39,698 context-switches # 0.026 M/sec ( +- 12.19% ) 8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% ) 17,918 page-faults # 0.012 M/sec ( +- 0.37% ) 2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%) 1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%) 1,655,906,768 instructions # 0.56 insns per cycle # 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%) 338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%) 3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%) 606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%) 31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%) 3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%) 5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%) 0.138966828 seconds time elapsed ( +- 4.11% ) This can be used "at a glance" for narrower analysis. -d can also be used in addition to other -e events, to further expand an event list. Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/builtin-stat.c68
1 files changed, 60 insertions, 8 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 03bac6aa014b..6959fdecb203 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -73,6 +73,47 @@ static struct perf_event_attr default_attrs[] = {
73 73
74}; 74};
75 75
76/*
77 * Detailed stats:
78 */
79static struct perf_event_attr detailed_attrs[] = {
80
81 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
82 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
83 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
84 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
85
86 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
87 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
88 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
89 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
90 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
91
92 { .type = PERF_TYPE_HW_CACHE,
93 .config =
94 PERF_COUNT_HW_CACHE_L1D << 0 |
95 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
96 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
97
98 { .type = PERF_TYPE_HW_CACHE,
99 .config =
100 PERF_COUNT_HW_CACHE_L1D << 0 |
101 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
102 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
103
104 { .type = PERF_TYPE_HW_CACHE,
105 .config =
106 PERF_COUNT_HW_CACHE_LL << 0 |
107 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
108 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
109
110 { .type = PERF_TYPE_HW_CACHE,
111 .config =
112 PERF_COUNT_HW_CACHE_LL << 0 |
113 (PERF_COUNT_HW_CACHE_OP_READ << 8) |
114 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
115};
116
76struct perf_evlist *evsel_list; 117struct perf_evlist *evsel_list;
77 118
78static bool system_wide = false; 119static bool system_wide = false;
@@ -86,6 +127,7 @@ static pid_t target_pid = -1;
86static pid_t target_tid = -1; 127static pid_t target_tid = -1;
87static pid_t child_pid = -1; 128static pid_t child_pid = -1;
88static bool null_run = false; 129static bool null_run = false;
130static bool detailed_run = false;
89static bool big_num = true; 131static bool big_num = true;
90static int big_num_opt = -1; 132static int big_num_opt = -1;
91static const char *cpu_list; 133static const char *cpu_list;
@@ -550,7 +592,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
550 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 592 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
551 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 593 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
552 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 594 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
553 runtime_branches_stats[cpu].n != 0) { 595 runtime_l1_dcache_stats[cpu].n != 0) {
554 print_l1_dcache_misses(cpu, evsel, avg); 596 print_l1_dcache_misses(cpu, evsel, avg);
555 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 597 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
556 runtime_cacherefs_stats[cpu].n != 0) { 598 runtime_cacherefs_stats[cpu].n != 0) {
@@ -625,8 +667,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
625 avg_enabled = avg_stats(&ps->res_stats[1]); 667 avg_enabled = avg_stats(&ps->res_stats[1]);
626 avg_running = avg_stats(&ps->res_stats[2]); 668 avg_running = avg_stats(&ps->res_stats[2]);
627 669
628 fprintf(stderr, " (scaled from %.2f%%)", 670 fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled);
629 100 * avg_running / avg_enabled);
630 } 671 }
631 fprintf(stderr, "\n"); 672 fprintf(stderr, "\n");
632} 673}
@@ -668,10 +709,8 @@ static void print_counter(struct perf_evsel *counter)
668 if (!csv_output) { 709 if (!csv_output) {
669 print_noise(counter, 1.0); 710 print_noise(counter, 1.0);
670 711
671 if (run != ena) { 712 if (run != ena)
672 fprintf(stderr, " (scaled from %.2f%%)", 713 fprintf(stderr, " (%.2f%%)", 100.0 * run / ena);
673 100.0 * run / ena);
674 }
675 } 714 }
676 fputc('\n', stderr); 715 fputc('\n', stderr);
677 } 716 }
@@ -778,6 +817,8 @@ static const struct option options[] = {
778 "repeat command and print average + stddev (max: 100)"), 817 "repeat command and print average + stddev (max: 100)"),
779 OPT_BOOLEAN('n', "null", &null_run, 818 OPT_BOOLEAN('n', "null", &null_run,
780 "null run - dont start any counters"), 819 "null run - dont start any counters"),
820 OPT_BOOLEAN('d', "detailed", &detailed_run,
821 "detailed run - start a lot of events"),
781 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 822 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
782 "print large numbers with thousands\' separators", 823 "print large numbers with thousands\' separators",
783 stat__set_big_num), 824 stat__set_big_num),
@@ -839,7 +880,18 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
839 } 880 }
840 881
841 /* Set attrs and nr_counters if no event is selected and !null_run */ 882 /* Set attrs and nr_counters if no event is selected and !null_run */
842 if (!null_run && !evsel_list->nr_entries) { 883 if (detailed_run) {
884 size_t c;
885
886 for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) {
887 pos = perf_evsel__new(&detailed_attrs[c], c);
888 if (pos == NULL)
889 goto out;
890 perf_evlist__add(evsel_list, pos);
891 }
892 }
893 /* Set attrs and nr_counters if no event is selected and !null_run */
894 if (!detailed_run && !null_run && !evsel_list->nr_entries) {
843 size_t c; 895 size_t c;
844 896
845 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { 897 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {