aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/perf/builtin-stat.c444
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/stat-shadow.c434
-rw-r--r--tools/perf/util/stat.h16
4 files changed, 455 insertions, 440 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 50918dc9fb31..ff3d25803400 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -102,13 +102,6 @@ static struct target target = {
102 .uid = UINT_MAX, 102 .uid = UINT_MAX,
103}; 103};
104 104
105enum aggr_mode {
106 AGGR_NONE,
107 AGGR_GLOBAL,
108 AGGR_SOCKET,
109 AGGR_CORE,
110};
111
112static int run_count = 1; 105static int run_count = 1;
113static bool no_inherit = false; 106static bool no_inherit = false;
114static bool scale = true; 107static bool scale = true;
@@ -234,72 +227,6 @@ out_free:
234 return -1; 227 return -1;
235} 228}
236 229
237enum {
238 CTX_BIT_USER = 1 << 0,
239 CTX_BIT_KERNEL = 1 << 1,
240 CTX_BIT_HV = 1 << 2,
241 CTX_BIT_HOST = 1 << 3,
242 CTX_BIT_IDLE = 1 << 4,
243 CTX_BIT_MAX = 1 << 5,
244};
245
246#define NUM_CTX CTX_BIT_MAX
247
248static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
249static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
250static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
251static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
252static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
253static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
254static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
255static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
256static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
257static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
258static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
259static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
260static struct stats walltime_nsecs_stats;
261static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
262static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
263
264static int evsel_context(struct perf_evsel *evsel)
265{
266 int ctx = 0;
267
268 if (evsel->attr.exclude_kernel)
269 ctx |= CTX_BIT_KERNEL;
270 if (evsel->attr.exclude_user)
271 ctx |= CTX_BIT_USER;
272 if (evsel->attr.exclude_hv)
273 ctx |= CTX_BIT_HV;
274 if (evsel->attr.exclude_host)
275 ctx |= CTX_BIT_HOST;
276 if (evsel->attr.exclude_idle)
277 ctx |= CTX_BIT_IDLE;
278
279 return ctx;
280}
281
282static void reset_shadow_stats(void)
283{
284 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
285 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
286 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
287 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
288 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
289 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
290 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
291 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
292 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
293 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
294 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
295 memset(runtime_cycles_in_tx_stats, 0,
296 sizeof(runtime_cycles_in_tx_stats));
297 memset(runtime_transaction_stats, 0,
298 sizeof(runtime_transaction_stats));
299 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
300 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
301}
302
303static void perf_stat__reset_stats(struct perf_evlist *evlist) 230static void perf_stat__reset_stats(struct perf_evlist *evlist)
304{ 231{
305 struct perf_evsel *evsel; 232 struct perf_evsel *evsel;
@@ -309,7 +236,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
309 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); 236 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
310 } 237 }
311 238
312 reset_shadow_stats(); 239 perf_stat__reset_shadow_stats();
313} 240}
314 241
315static int create_perf_stat_counter(struct perf_evsel *evsel) 242static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -346,46 +273,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
346 return 0; 273 return 0;
347} 274}
348 275
349/*
350 * Update various tracking values we maintain to print
351 * more semantic information such as miss/hit ratios,
352 * instruction rates, etc:
353 */
354static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
355 int cpu)
356{
357 int ctx = evsel_context(counter);
358
359 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
360 update_stats(&runtime_nsecs_stats[cpu], count[0]);
361 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
362 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
363 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
364 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
365 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
366 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
367 else if (perf_stat_evsel__is(counter, ELISION_START))
368 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
369 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
370 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
371 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
372 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
373 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
374 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
375 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
376 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
377 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
378 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
379 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
380 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
381 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
382 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
383 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
384 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
385 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
386 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
387}
388
389static void zero_per_pkg(struct perf_evsel *counter) 276static void zero_per_pkg(struct perf_evsel *counter)
390{ 277{
391 if (counter->per_pkg_mask) 278 if (counter->per_pkg_mask)
@@ -446,7 +333,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
446 perf_counts_values__scale(count, scale, NULL); 333 perf_counts_values__scale(count, scale, NULL);
447 evsel->counts->cpu[cpu] = *count; 334 evsel->counts->cpu[cpu] = *count;
448 if (aggr_mode == AGGR_NONE) 335 if (aggr_mode == AGGR_NONE)
449 update_shadow_stats(evsel, count->values, cpu); 336 perf_stat__update_shadow_stats(evsel, count->values, cpu);
450 break; 337 break;
451 case AGGR_GLOBAL: 338 case AGGR_GLOBAL:
452 aggr->val += count->val; 339 aggr->val += count->val;
@@ -494,7 +381,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
494 /* 381 /*
495 * Save the full runtime - to allow normalization during printout: 382 * Save the full runtime - to allow normalization during printout:
496 */ 383 */
497 update_shadow_stats(counter, count, 0); 384 perf_stat__update_shadow_stats(counter, count, 0);
498 385
499 return 0; 386 return 0;
500} 387}
@@ -872,329 +759,6 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
872 fprintf(output, " "); 759 fprintf(output, " ");
873} 760}
874 761
875/* used for get_ratio_color() */
876enum grc_type {
877 GRC_STALLED_CYCLES_FE,
878 GRC_STALLED_CYCLES_BE,
879 GRC_CACHE_MISSES,
880 GRC_MAX_NR
881};
882
883static const char *get_ratio_color(enum grc_type type, double ratio)
884{
885 static const double grc_table[GRC_MAX_NR][3] = {
886 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
887 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
888 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
889 };
890 const char *color = PERF_COLOR_NORMAL;
891
892 if (ratio > grc_table[type][0])
893 color = PERF_COLOR_RED;
894 else if (ratio > grc_table[type][1])
895 color = PERF_COLOR_MAGENTA;
896 else if (ratio > grc_table[type][2])
897 color = PERF_COLOR_YELLOW;
898
899 return color;
900}
901
902static void print_stalled_cycles_frontend(FILE *out, int cpu,
903 struct perf_evsel *evsel
904 __maybe_unused, double avg)
905{
906 double total, ratio = 0.0;
907 const char *color;
908 int ctx = evsel_context(evsel);
909
910 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
911
912 if (total)
913 ratio = avg / total * 100.0;
914
915 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
916
917 fprintf(out, " # ");
918 color_fprintf(out, color, "%6.2f%%", ratio);
919 fprintf(out, " frontend cycles idle ");
920}
921
922static void print_stalled_cycles_backend(FILE *out, int cpu,
923 struct perf_evsel *evsel
924 __maybe_unused, double avg)
925{
926 double total, ratio = 0.0;
927 const char *color;
928 int ctx = evsel_context(evsel);
929
930 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
931
932 if (total)
933 ratio = avg / total * 100.0;
934
935 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
936
937 fprintf(out, " # ");
938 color_fprintf(out, color, "%6.2f%%", ratio);
939 fprintf(out, " backend cycles idle ");
940}
941
942static void print_branch_misses(FILE *out, int cpu,
943 struct perf_evsel *evsel __maybe_unused,
944 double avg)
945{
946 double total, ratio = 0.0;
947 const char *color;
948 int ctx = evsel_context(evsel);
949
950 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
951
952 if (total)
953 ratio = avg / total * 100.0;
954
955 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
956
957 fprintf(out, " # ");
958 color_fprintf(out, color, "%6.2f%%", ratio);
959 fprintf(out, " of all branches ");
960}
961
962static void print_l1_dcache_misses(FILE *out, int cpu,
963 struct perf_evsel *evsel __maybe_unused,
964 double avg)
965{
966 double total, ratio = 0.0;
967 const char *color;
968 int ctx = evsel_context(evsel);
969
970 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
971
972 if (total)
973 ratio = avg / total * 100.0;
974
975 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
976
977 fprintf(out, " # ");
978 color_fprintf(out, color, "%6.2f%%", ratio);
979 fprintf(out, " of all L1-dcache hits ");
980}
981
982static void print_l1_icache_misses(FILE *out, int cpu,
983 struct perf_evsel *evsel __maybe_unused,
984 double avg)
985{
986 double total, ratio = 0.0;
987 const char *color;
988 int ctx = evsel_context(evsel);
989
990 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
991
992 if (total)
993 ratio = avg / total * 100.0;
994
995 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
996
997 fprintf(out, " # ");
998 color_fprintf(out, color, "%6.2f%%", ratio);
999 fprintf(out, " of all L1-icache hits ");
1000}
1001
1002static void print_dtlb_cache_misses(FILE *out, int cpu,
1003 struct perf_evsel *evsel __maybe_unused,
1004 double avg)
1005{
1006 double total, ratio = 0.0;
1007 const char *color;
1008 int ctx = evsel_context(evsel);
1009
1010 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
1011
1012 if (total)
1013 ratio = avg / total * 100.0;
1014
1015 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1016
1017 fprintf(out, " # ");
1018 color_fprintf(out, color, "%6.2f%%", ratio);
1019 fprintf(out, " of all dTLB cache hits ");
1020}
1021
1022static void print_itlb_cache_misses(FILE *out, int cpu,
1023 struct perf_evsel *evsel __maybe_unused,
1024 double avg)
1025{
1026 double total, ratio = 0.0;
1027 const char *color;
1028 int ctx = evsel_context(evsel);
1029
1030 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
1031
1032 if (total)
1033 ratio = avg / total * 100.0;
1034
1035 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1036
1037 fprintf(out, " # ");
1038 color_fprintf(out, color, "%6.2f%%", ratio);
1039 fprintf(out, " of all iTLB cache hits ");
1040}
1041
1042static void print_ll_cache_misses(FILE *out, int cpu,
1043 struct perf_evsel *evsel __maybe_unused,
1044 double avg)
1045{
1046 double total, ratio = 0.0;
1047 const char *color;
1048 int ctx = evsel_context(evsel);
1049
1050 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
1051
1052 if (total)
1053 ratio = avg / total * 100.0;
1054
1055 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1056
1057 fprintf(out, " # ");
1058 color_fprintf(out, color, "%6.2f%%", ratio);
1059 fprintf(out, " of all LL-cache hits ");
1060}
1061
1062static void print_shadow_stats(FILE *out, struct perf_evsel *evsel,
1063 double avg, int cpu, enum aggr_mode aggr)
1064{
1065 double total, ratio = 0.0, total2;
1066 int ctx = evsel_context(evsel);
1067
1068 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
1069 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
1070 if (total) {
1071 ratio = avg / total;
1072 fprintf(out, " # %5.2f insns per cycle ", ratio);
1073 } else {
1074 fprintf(out, " ");
1075 }
1076 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
1077 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
1078
1079 if (total && avg) {
1080 ratio = total / avg;
1081 fprintf(out, "\n");
1082 if (aggr == AGGR_NONE)
1083 fprintf(out, " ");
1084 fprintf(out, " # %5.2f stalled cycles per insn", ratio);
1085 }
1086
1087 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
1088 runtime_branches_stats[ctx][cpu].n != 0) {
1089 print_branch_misses(out, cpu, evsel, avg);
1090 } else if (
1091 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1092 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
1093 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1094 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1095 runtime_l1_dcache_stats[ctx][cpu].n != 0) {
1096 print_l1_dcache_misses(out, cpu, evsel, avg);
1097 } else if (
1098 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1099 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
1100 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1101 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1102 runtime_l1_icache_stats[ctx][cpu].n != 0) {
1103 print_l1_icache_misses(out, cpu, evsel, avg);
1104 } else if (
1105 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1106 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
1107 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1108 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1109 runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
1110 print_dtlb_cache_misses(out, cpu, evsel, avg);
1111 } else if (
1112 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1113 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
1114 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1115 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1116 runtime_itlb_cache_stats[ctx][cpu].n != 0) {
1117 print_itlb_cache_misses(out, cpu, evsel, avg);
1118 } else if (
1119 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1120 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
1121 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1122 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1123 runtime_ll_cache_stats[ctx][cpu].n != 0) {
1124 print_ll_cache_misses(out, cpu, evsel, avg);
1125 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
1126 runtime_cacherefs_stats[ctx][cpu].n != 0) {
1127 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
1128
1129 if (total)
1130 ratio = avg * 100 / total;
1131
1132 fprintf(out, " # %8.3f %% of all cache refs ", ratio);
1133
1134 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1135 print_stalled_cycles_frontend(out, cpu, evsel, avg);
1136 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1137 print_stalled_cycles_backend(out, cpu, evsel, avg);
1138 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1139 total = avg_stats(&runtime_nsecs_stats[cpu]);
1140
1141 if (total) {
1142 ratio = avg / total;
1143 fprintf(out, " # %8.3f GHz ", ratio);
1144 } else {
1145 fprintf(out, " ");
1146 }
1147 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
1148 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
1149 if (total)
1150 fprintf(out,
1151 " # %5.2f%% transactional cycles ",
1152 100.0 * (avg / total));
1153 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
1154 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
1155 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
1156 if (total2 < avg)
1157 total2 = avg;
1158 if (total)
1159 fprintf(out,
1160 " # %5.2f%% aborted cycles ",
1161 100.0 * ((total2-avg) / total));
1162 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
1163 avg > 0 &&
1164 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
1165 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
1166
1167 if (total)
1168 ratio = total / avg;
1169
1170 fprintf(out, " # %8.0f cycles / transaction ", ratio);
1171 } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
1172 avg > 0 &&
1173 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
1174 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
1175
1176 if (total)
1177 ratio = total / avg;
1178
1179 fprintf(out, " # %8.0f cycles / elision ", ratio);
1180 } else if (runtime_nsecs_stats[cpu].n != 0) {
1181 char unit = 'M';
1182
1183 total = avg_stats(&runtime_nsecs_stats[cpu]);
1184
1185 if (total)
1186 ratio = 1000.0 * avg / total;
1187 if (ratio < 0.001) {
1188 ratio *= 1000;
1189 unit = 'K';
1190 }
1191
1192 fprintf(out, " # %8.3f %c/sec ", ratio, unit);
1193 } else {
1194 fprintf(out, " ");
1195 }
1196}
1197
1198static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 762static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1199{ 763{
1200 double sc = evsel->scale; 764 double sc = evsel->scale;
@@ -1230,7 +794,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1230 if (csv_output || interval) 794 if (csv_output || interval)
1231 return; 795 return;
1232 796
1233 print_shadow_stats(output, evsel, avg, cpu, aggr_mode); 797 perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
1234} 798}
1235 799
1236static void print_aggr(char *prefix) 800static void print_aggr(char *prefix)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e4b676de2f64..586a59d46022 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -68,6 +68,7 @@ libperf-y += rblist.o
68libperf-y += intlist.o 68libperf-y += intlist.o
69libperf-y += vdso.o 69libperf-y += vdso.o
70libperf-y += stat.o 70libperf-y += stat.o
71libperf-y += stat-shadow.o
71libperf-y += record.o 72libperf-y += record.o
72libperf-y += srcline.o 73libperf-y += srcline.o
73libperf-y += data.o 74libperf-y += data.o
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 000000000000..53e8bb7bc852
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,434 @@
1#include <stdio.h>
2#include "evsel.h"
3#include "stat.h"
4#include "color.h"
5
6enum {
7 CTX_BIT_USER = 1 << 0,
8 CTX_BIT_KERNEL = 1 << 1,
9 CTX_BIT_HV = 1 << 2,
10 CTX_BIT_HOST = 1 << 3,
11 CTX_BIT_IDLE = 1 << 4,
12 CTX_BIT_MAX = 1 << 5,
13};
14
15#define NUM_CTX CTX_BIT_MAX
16
17static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
20static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
21static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
22static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
23static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
24static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
25static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
26static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
27static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
28static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
29static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
30static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
31
32struct stats walltime_nsecs_stats;
33
34static int evsel_context(struct perf_evsel *evsel)
35{
36 int ctx = 0;
37
38 if (evsel->attr.exclude_kernel)
39 ctx |= CTX_BIT_KERNEL;
40 if (evsel->attr.exclude_user)
41 ctx |= CTX_BIT_USER;
42 if (evsel->attr.exclude_hv)
43 ctx |= CTX_BIT_HV;
44 if (evsel->attr.exclude_host)
45 ctx |= CTX_BIT_HOST;
46 if (evsel->attr.exclude_idle)
47 ctx |= CTX_BIT_IDLE;
48
49 return ctx;
50}
51
52void perf_stat__reset_shadow_stats(void)
53{
54 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
55 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
56 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
57 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
58 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
59 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
60 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
61 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
62 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
63 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
64 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
65 memset(runtime_cycles_in_tx_stats, 0,
66 sizeof(runtime_cycles_in_tx_stats));
67 memset(runtime_transaction_stats, 0,
68 sizeof(runtime_transaction_stats));
69 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
70 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
71}
72
73/*
74 * Update various tracking values we maintain to print
75 * more semantic information such as miss/hit ratios,
76 * instruction rates, etc:
77 */
78void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
79 int cpu)
80{
81 int ctx = evsel_context(counter);
82
83 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
84 update_stats(&runtime_nsecs_stats[cpu], count[0]);
85 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
86 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
87 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
88 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
89 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
90 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
91 else if (perf_stat_evsel__is(counter, ELISION_START))
92 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
93 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
94 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
95 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
96 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
97 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
98 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
99 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
100 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
101 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
102 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
103 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
104 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
105 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
106 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
107 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
108 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
109 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
110 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
111}
112
113/* used for get_ratio_color() */
114enum grc_type {
115 GRC_STALLED_CYCLES_FE,
116 GRC_STALLED_CYCLES_BE,
117 GRC_CACHE_MISSES,
118 GRC_MAX_NR
119};
120
121static const char *get_ratio_color(enum grc_type type, double ratio)
122{
123 static const double grc_table[GRC_MAX_NR][3] = {
124 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
125 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
126 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
127 };
128 const char *color = PERF_COLOR_NORMAL;
129
130 if (ratio > grc_table[type][0])
131 color = PERF_COLOR_RED;
132 else if (ratio > grc_table[type][1])
133 color = PERF_COLOR_MAGENTA;
134 else if (ratio > grc_table[type][2])
135 color = PERF_COLOR_YELLOW;
136
137 return color;
138}
139
140static void print_stalled_cycles_frontend(FILE *out, int cpu,
141 struct perf_evsel *evsel
142 __maybe_unused, double avg)
143{
144 double total, ratio = 0.0;
145 const char *color;
146 int ctx = evsel_context(evsel);
147
148 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
149
150 if (total)
151 ratio = avg / total * 100.0;
152
153 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
154
155 fprintf(out, " # ");
156 color_fprintf(out, color, "%6.2f%%", ratio);
157 fprintf(out, " frontend cycles idle ");
158}
159
160static void print_stalled_cycles_backend(FILE *out, int cpu,
161 struct perf_evsel *evsel
162 __maybe_unused, double avg)
163{
164 double total, ratio = 0.0;
165 const char *color;
166 int ctx = evsel_context(evsel);
167
168 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
169
170 if (total)
171 ratio = avg / total * 100.0;
172
173 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
174
175 fprintf(out, " # ");
176 color_fprintf(out, color, "%6.2f%%", ratio);
177 fprintf(out, " backend cycles idle ");
178}
179
180static void print_branch_misses(FILE *out, int cpu,
181 struct perf_evsel *evsel __maybe_unused,
182 double avg)
183{
184 double total, ratio = 0.0;
185 const char *color;
186 int ctx = evsel_context(evsel);
187
188 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
189
190 if (total)
191 ratio = avg / total * 100.0;
192
193 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
194
195 fprintf(out, " # ");
196 color_fprintf(out, color, "%6.2f%%", ratio);
197 fprintf(out, " of all branches ");
198}
199
200static void print_l1_dcache_misses(FILE *out, int cpu,
201 struct perf_evsel *evsel __maybe_unused,
202 double avg)
203{
204 double total, ratio = 0.0;
205 const char *color;
206 int ctx = evsel_context(evsel);
207
208 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
209
210 if (total)
211 ratio = avg / total * 100.0;
212
213 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
214
215 fprintf(out, " # ");
216 color_fprintf(out, color, "%6.2f%%", ratio);
217 fprintf(out, " of all L1-dcache hits ");
218}
219
220static void print_l1_icache_misses(FILE *out, int cpu,
221 struct perf_evsel *evsel __maybe_unused,
222 double avg)
223{
224 double total, ratio = 0.0;
225 const char *color;
226 int ctx = evsel_context(evsel);
227
228 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
229
230 if (total)
231 ratio = avg / total * 100.0;
232
233 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
234
235 fprintf(out, " # ");
236 color_fprintf(out, color, "%6.2f%%", ratio);
237 fprintf(out, " of all L1-icache hits ");
238}
239
240static void print_dtlb_cache_misses(FILE *out, int cpu,
241 struct perf_evsel *evsel __maybe_unused,
242 double avg)
243{
244 double total, ratio = 0.0;
245 const char *color;
246 int ctx = evsel_context(evsel);
247
248 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
249
250 if (total)
251 ratio = avg / total * 100.0;
252
253 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
254
255 fprintf(out, " # ");
256 color_fprintf(out, color, "%6.2f%%", ratio);
257 fprintf(out, " of all dTLB cache hits ");
258}
259
260static void print_itlb_cache_misses(FILE *out, int cpu,
261 struct perf_evsel *evsel __maybe_unused,
262 double avg)
263{
264 double total, ratio = 0.0;
265 const char *color;
266 int ctx = evsel_context(evsel);
267
268 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
269
270 if (total)
271 ratio = avg / total * 100.0;
272
273 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
274
275 fprintf(out, " # ");
276 color_fprintf(out, color, "%6.2f%%", ratio);
277 fprintf(out, " of all iTLB cache hits ");
278}
279
280static void print_ll_cache_misses(FILE *out, int cpu,
281 struct perf_evsel *evsel __maybe_unused,
282 double avg)
283{
284 double total, ratio = 0.0;
285 const char *color;
286 int ctx = evsel_context(evsel);
287
288 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
289
290 if (total)
291 ratio = avg / total * 100.0;
292
293 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
294
295 fprintf(out, " # ");
296 color_fprintf(out, color, "%6.2f%%", ratio);
297 fprintf(out, " of all LL-cache hits ");
298}
299
300void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
301 double avg, int cpu, enum aggr_mode aggr)
302{
303 double total, ratio = 0.0, total2;
304 int ctx = evsel_context(evsel);
305
306 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
307 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
308 if (total) {
309 ratio = avg / total;
310 fprintf(out, " # %5.2f insns per cycle ", ratio);
311 } else {
312 fprintf(out, " ");
313 }
314 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
315 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
316
317 if (total && avg) {
318 ratio = total / avg;
319 fprintf(out, "\n");
320 if (aggr == AGGR_NONE)
321 fprintf(out, " ");
322 fprintf(out, " # %5.2f stalled cycles per insn", ratio);
323 }
324
325 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
326 runtime_branches_stats[ctx][cpu].n != 0) {
327 print_branch_misses(out, cpu, evsel, avg);
328 } else if (
329 evsel->attr.type == PERF_TYPE_HW_CACHE &&
330 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
331 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
332 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
333 runtime_l1_dcache_stats[ctx][cpu].n != 0) {
334 print_l1_dcache_misses(out, cpu, evsel, avg);
335 } else if (
336 evsel->attr.type == PERF_TYPE_HW_CACHE &&
337 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
338 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
339 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
340 runtime_l1_icache_stats[ctx][cpu].n != 0) {
341 print_l1_icache_misses(out, cpu, evsel, avg);
342 } else if (
343 evsel->attr.type == PERF_TYPE_HW_CACHE &&
344 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
345 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
346 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
347 runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
348 print_dtlb_cache_misses(out, cpu, evsel, avg);
349 } else if (
350 evsel->attr.type == PERF_TYPE_HW_CACHE &&
351 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
352 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
353 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
354 runtime_itlb_cache_stats[ctx][cpu].n != 0) {
355 print_itlb_cache_misses(out, cpu, evsel, avg);
356 } else if (
357 evsel->attr.type == PERF_TYPE_HW_CACHE &&
358 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
359 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
360 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
361 runtime_ll_cache_stats[ctx][cpu].n != 0) {
362 print_ll_cache_misses(out, cpu, evsel, avg);
363 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
364 runtime_cacherefs_stats[ctx][cpu].n != 0) {
365 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
366
367 if (total)
368 ratio = avg * 100 / total;
369
370 fprintf(out, " # %8.3f %% of all cache refs ", ratio);
371
372 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
373 print_stalled_cycles_frontend(out, cpu, evsel, avg);
374 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
375 print_stalled_cycles_backend(out, cpu, evsel, avg);
376 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
377 total = avg_stats(&runtime_nsecs_stats[cpu]);
378
379 if (total) {
380 ratio = avg / total;
381 fprintf(out, " # %8.3f GHz ", ratio);
382 } else {
383 fprintf(out, " ");
384 }
385 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
386 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
387 if (total)
388 fprintf(out,
389 " # %5.2f%% transactional cycles ",
390 100.0 * (avg / total));
391 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
392 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
393 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
394 if (total2 < avg)
395 total2 = avg;
396 if (total)
397 fprintf(out,
398 " # %5.2f%% aborted cycles ",
399 100.0 * ((total2-avg) / total));
400 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
401 avg > 0 &&
402 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
403 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
404
405 if (total)
406 ratio = total / avg;
407
408 fprintf(out, " # %8.0f cycles / transaction ", ratio);
409 } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
410 avg > 0 &&
411 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
412 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
413
414 if (total)
415 ratio = total / avg;
416
417 fprintf(out, " # %8.0f cycles / elision ", ratio);
418 } else if (runtime_nsecs_stats[cpu].n != 0) {
419 char unit = 'M';
420
421 total = avg_stats(&runtime_nsecs_stats[cpu]);
422
423 if (total)
424 ratio = 1000.0 * avg / total;
425 if (ratio < 0.001) {
426 ratio *= 1000;
427 unit = 'K';
428 }
429
430 fprintf(out, " # %8.3f %c/sec ", ratio, unit);
431 } else {
432 fprintf(out, " ");
433 }
434}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 3df529bd0774..615c779eb42a 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -2,6 +2,7 @@
2#define __PERF_STATS_H 2#define __PERF_STATS_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <stdio.h>
5 6
6struct stats 7struct stats
7{ 8{
@@ -23,6 +24,13 @@ struct perf_stat {
23 enum perf_stat_evsel_id id; 24 enum perf_stat_evsel_id id;
24}; 25};
25 26
27enum aggr_mode {
28 AGGR_NONE,
29 AGGR_GLOBAL,
30 AGGR_SOCKET,
31 AGGR_CORE,
32};
33
26void update_stats(struct stats *stats, u64 val); 34void update_stats(struct stats *stats, u64 val);
27double avg_stats(struct stats *stats); 35double avg_stats(struct stats *stats);
28double stddev_stats(struct stats *stats); 36double stddev_stats(struct stats *stats);
@@ -46,4 +54,12 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,
46 54
47void perf_stat_evsel_id_init(struct perf_evsel *evsel); 55void perf_stat_evsel_id_init(struct perf_evsel *evsel);
48 56
57extern struct stats walltime_nsecs_stats;
58
59void perf_stat__reset_shadow_stats(void);
60void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
61 int cpu);
62void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
63 double avg, int cpu, enum aggr_mode aggr);
64
49#endif 65#endif