diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-27 07:50:47 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-26 15:03:16 -0400 |
commit | c6264deff7ea6125492b442edad885e5429679af (patch) | |
tree | 8533d8e10cadd0eb31518a514486c561f004115b /tools | |
parent | 8bb6c79f24e66538f606076915e918242c02ec7c (diff) |
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-stat.c | 68 |
1 files changed, 60 insertions, 8 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 03bac6aa014b..6959fdecb203 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -73,6 +73,47 @@ static struct perf_event_attr default_attrs[] = { | |||
73 | 73 | ||
74 | }; | 74 | }; |
75 | 75 | ||
76 | /* | ||
77 | * Detailed stats: | ||
78 | */ | ||
79 | static struct perf_event_attr detailed_attrs[] = { | ||
80 | |||
81 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | ||
82 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, | ||
83 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, | ||
84 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | ||
85 | |||
86 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | ||
87 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES }, | ||
88 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | ||
89 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | ||
90 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | ||
91 | |||
92 | { .type = PERF_TYPE_HW_CACHE, | ||
93 | .config = | ||
94 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
95 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
96 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
97 | |||
98 | { .type = PERF_TYPE_HW_CACHE, | ||
99 | .config = | ||
100 | PERF_COUNT_HW_CACHE_L1D << 0 | | ||
101 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
102 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
103 | |||
104 | { .type = PERF_TYPE_HW_CACHE, | ||
105 | .config = | ||
106 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
107 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
108 | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, | ||
109 | |||
110 | { .type = PERF_TYPE_HW_CACHE, | ||
111 | .config = | ||
112 | PERF_COUNT_HW_CACHE_LL << 0 | | ||
113 | (PERF_COUNT_HW_CACHE_OP_READ << 8) | | ||
114 | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, | ||
115 | }; | ||
116 | |||
76 | struct perf_evlist *evsel_list; | 117 | struct perf_evlist *evsel_list; |
77 | 118 | ||
78 | static bool system_wide = false; | 119 | static bool system_wide = false; |
@@ -86,6 +127,7 @@ static pid_t target_pid = -1; | |||
86 | static pid_t target_tid = -1; | 127 | static pid_t target_tid = -1; |
87 | static pid_t child_pid = -1; | 128 | static pid_t child_pid = -1; |
88 | static bool null_run = false; | 129 | static bool null_run = false; |
130 | static bool detailed_run = false; | ||
89 | static bool big_num = true; | 131 | static bool big_num = true; |
90 | static int big_num_opt = -1; | 132 | static int big_num_opt = -1; |
91 | static const char *cpu_list; | 133 | static const char *cpu_list; |
@@ -550,7 +592,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
550 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | 592 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | |
551 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | 593 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | |
552 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | 594 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && |
553 | runtime_branches_stats[cpu].n != 0) { | 595 | runtime_l1_dcache_stats[cpu].n != 0) { |
554 | print_l1_dcache_misses(cpu, evsel, avg); | 596 | print_l1_dcache_misses(cpu, evsel, avg); |
555 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | 597 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && |
556 | runtime_cacherefs_stats[cpu].n != 0) { | 598 | runtime_cacherefs_stats[cpu].n != 0) { |
@@ -625,8 +667,7 @@ static void print_counter_aggr(struct perf_evsel *counter) | |||
625 | avg_enabled = avg_stats(&ps->res_stats[1]); | 667 | avg_enabled = avg_stats(&ps->res_stats[1]); |
626 | avg_running = avg_stats(&ps->res_stats[2]); | 668 | avg_running = avg_stats(&ps->res_stats[2]); |
627 | 669 | ||
628 | fprintf(stderr, " (scaled from %.2f%%)", | 670 | fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled); |
629 | 100 * avg_running / avg_enabled); | ||
630 | } | 671 | } |
631 | fprintf(stderr, "\n"); | 672 | fprintf(stderr, "\n"); |
632 | } | 673 | } |
@@ -668,10 +709,8 @@ static void print_counter(struct perf_evsel *counter) | |||
668 | if (!csv_output) { | 709 | if (!csv_output) { |
669 | print_noise(counter, 1.0); | 710 | print_noise(counter, 1.0); |
670 | 711 | ||
671 | if (run != ena) { | 712 | if (run != ena) |
672 | fprintf(stderr, " (scaled from %.2f%%)", | 713 | fprintf(stderr, " (%.2f%%)", 100.0 * run / ena); |
673 | 100.0 * run / ena); | ||
674 | } | ||
675 | } | 714 | } |
676 | fputc('\n', stderr); | 715 | fputc('\n', stderr); |
677 | } | 716 | } |
@@ -778,6 +817,8 @@ static const struct option options[] = { | |||
778 | "repeat command and print average + stddev (max: 100)"), | 817 | "repeat command and print average + stddev (max: 100)"), |
779 | OPT_BOOLEAN('n', "null", &null_run, | 818 | OPT_BOOLEAN('n', "null", &null_run, |
780 | "null run - dont start any counters"), | 819 | "null run - dont start any counters"), |
820 | OPT_BOOLEAN('d', "detailed", &detailed_run, | ||
821 | "detailed run - start a lot of events"), | ||
781 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, | 822 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, |
782 | "print large numbers with thousands\' separators", | 823 | "print large numbers with thousands\' separators", |
783 | stat__set_big_num), | 824 | stat__set_big_num), |
@@ -839,7 +880,18 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
839 | } | 880 | } |
840 | 881 | ||
841 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 882 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
842 | if (!null_run && !evsel_list->nr_entries) { | 883 | if (detailed_run) { |
884 | size_t c; | ||
885 | |||
886 | for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) { | ||
887 | pos = perf_evsel__new(&detailed_attrs[c], c); | ||
888 | if (pos == NULL) | ||
889 | goto out; | ||
890 | perf_evlist__add(evsel_list, pos); | ||
891 | } | ||
892 | } | ||
893 | /* Set attrs and nr_counters if no event is selected and !null_run */ | ||
894 | if (!detailed_run && !null_run && !evsel_list->nr_entries) { | ||
843 | size_t c; | 895 | size_t c; |
844 | 896 | ||
845 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { | 897 | for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { |