diff options
-rw-r--r-- | tools/perf/builtin-stat.c | 444 | ||||
-rw-r--r-- | tools/perf/util/Build | 1 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 434 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 16 |
4 files changed, 455 insertions, 440 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 50918dc9fb31..ff3d25803400 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -102,13 +102,6 @@ static struct target target = { | |||
102 | .uid = UINT_MAX, | 102 | .uid = UINT_MAX, |
103 | }; | 103 | }; |
104 | 104 | ||
105 | enum aggr_mode { | ||
106 | AGGR_NONE, | ||
107 | AGGR_GLOBAL, | ||
108 | AGGR_SOCKET, | ||
109 | AGGR_CORE, | ||
110 | }; | ||
111 | |||
112 | static int run_count = 1; | 105 | static int run_count = 1; |
113 | static bool no_inherit = false; | 106 | static bool no_inherit = false; |
114 | static bool scale = true; | 107 | static bool scale = true; |
@@ -234,72 +227,6 @@ out_free: | |||
234 | return -1; | 227 | return -1; |
235 | } | 228 | } |
236 | 229 | ||
237 | enum { | ||
238 | CTX_BIT_USER = 1 << 0, | ||
239 | CTX_BIT_KERNEL = 1 << 1, | ||
240 | CTX_BIT_HV = 1 << 2, | ||
241 | CTX_BIT_HOST = 1 << 3, | ||
242 | CTX_BIT_IDLE = 1 << 4, | ||
243 | CTX_BIT_MAX = 1 << 5, | ||
244 | }; | ||
245 | |||
246 | #define NUM_CTX CTX_BIT_MAX | ||
247 | |||
248 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | ||
249 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | ||
250 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | ||
251 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | ||
252 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | ||
253 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | ||
254 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
255 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
256 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
257 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
258 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
259 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | ||
260 | static struct stats walltime_nsecs_stats; | ||
261 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | ||
262 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | ||
263 | |||
264 | static int evsel_context(struct perf_evsel *evsel) | ||
265 | { | ||
266 | int ctx = 0; | ||
267 | |||
268 | if (evsel->attr.exclude_kernel) | ||
269 | ctx |= CTX_BIT_KERNEL; | ||
270 | if (evsel->attr.exclude_user) | ||
271 | ctx |= CTX_BIT_USER; | ||
272 | if (evsel->attr.exclude_hv) | ||
273 | ctx |= CTX_BIT_HV; | ||
274 | if (evsel->attr.exclude_host) | ||
275 | ctx |= CTX_BIT_HOST; | ||
276 | if (evsel->attr.exclude_idle) | ||
277 | ctx |= CTX_BIT_IDLE; | ||
278 | |||
279 | return ctx; | ||
280 | } | ||
281 | |||
282 | static void reset_shadow_stats(void) | ||
283 | { | ||
284 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | ||
285 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | ||
286 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | ||
287 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | ||
288 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | ||
289 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | ||
290 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | ||
291 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | ||
292 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | ||
293 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | ||
294 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | ||
295 | memset(runtime_cycles_in_tx_stats, 0, | ||
296 | sizeof(runtime_cycles_in_tx_stats)); | ||
297 | memset(runtime_transaction_stats, 0, | ||
298 | sizeof(runtime_transaction_stats)); | ||
299 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | ||
300 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
301 | } | ||
302 | |||
303 | static void perf_stat__reset_stats(struct perf_evlist *evlist) | 230 | static void perf_stat__reset_stats(struct perf_evlist *evlist) |
304 | { | 231 | { |
305 | struct perf_evsel *evsel; | 232 | struct perf_evsel *evsel; |
@@ -309,7 +236,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist) | |||
309 | perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); | 236 | perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); |
310 | } | 237 | } |
311 | 238 | ||
312 | reset_shadow_stats(); | 239 | perf_stat__reset_shadow_stats(); |
313 | } | 240 | } |
314 | 241 | ||
315 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 242 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
@@ -346,46 +273,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
346 | return 0; | 273 | return 0; |
347 | } | 274 | } |
348 | 275 | ||
349 | /* | ||
350 | * Update various tracking values we maintain to print | ||
351 | * more semantic information such as miss/hit ratios, | ||
352 | * instruction rates, etc: | ||
353 | */ | ||
354 | static void update_shadow_stats(struct perf_evsel *counter, u64 *count, | ||
355 | int cpu) | ||
356 | { | ||
357 | int ctx = evsel_context(counter); | ||
358 | |||
359 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
360 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
361 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
362 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | ||
363 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | ||
364 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
365 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) | ||
366 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
367 | else if (perf_stat_evsel__is(counter, ELISION_START)) | ||
368 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | ||
369 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
370 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | ||
371 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
372 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | ||
373 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
374 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | ||
375 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
376 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | ||
377 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
378 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | ||
379 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
380 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
381 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
382 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
383 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
384 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | ||
385 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
386 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | ||
387 | } | ||
388 | |||
389 | static void zero_per_pkg(struct perf_evsel *counter) | 276 | static void zero_per_pkg(struct perf_evsel *counter) |
390 | { | 277 | { |
391 | if (counter->per_pkg_mask) | 278 | if (counter->per_pkg_mask) |
@@ -446,7 +333,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, | |||
446 | perf_counts_values__scale(count, scale, NULL); | 333 | perf_counts_values__scale(count, scale, NULL); |
447 | evsel->counts->cpu[cpu] = *count; | 334 | evsel->counts->cpu[cpu] = *count; |
448 | if (aggr_mode == AGGR_NONE) | 335 | if (aggr_mode == AGGR_NONE) |
449 | update_shadow_stats(evsel, count->values, cpu); | 336 | perf_stat__update_shadow_stats(evsel, count->values, cpu); |
450 | break; | 337 | break; |
451 | case AGGR_GLOBAL: | 338 | case AGGR_GLOBAL: |
452 | aggr->val += count->val; | 339 | aggr->val += count->val; |
@@ -494,7 +381,7 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
494 | /* | 381 | /* |
495 | * Save the full runtime - to allow normalization during printout: | 382 | * Save the full runtime - to allow normalization during printout: |
496 | */ | 383 | */ |
497 | update_shadow_stats(counter, count, 0); | 384 | perf_stat__update_shadow_stats(counter, count, 0); |
498 | 385 | ||
499 | return 0; | 386 | return 0; |
500 | } | 387 | } |
@@ -872,329 +759,6 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
872 | fprintf(output, " "); | 759 | fprintf(output, " "); |
873 | } | 760 | } |
874 | 761 | ||
875 | /* used for get_ratio_color() */ | ||
876 | enum grc_type { | ||
877 | GRC_STALLED_CYCLES_FE, | ||
878 | GRC_STALLED_CYCLES_BE, | ||
879 | GRC_CACHE_MISSES, | ||
880 | GRC_MAX_NR | ||
881 | }; | ||
882 | |||
883 | static const char *get_ratio_color(enum grc_type type, double ratio) | ||
884 | { | ||
885 | static const double grc_table[GRC_MAX_NR][3] = { | ||
886 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | ||
887 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | ||
888 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | ||
889 | }; | ||
890 | const char *color = PERF_COLOR_NORMAL; | ||
891 | |||
892 | if (ratio > grc_table[type][0]) | ||
893 | color = PERF_COLOR_RED; | ||
894 | else if (ratio > grc_table[type][1]) | ||
895 | color = PERF_COLOR_MAGENTA; | ||
896 | else if (ratio > grc_table[type][2]) | ||
897 | color = PERF_COLOR_YELLOW; | ||
898 | |||
899 | return color; | ||
900 | } | ||
901 | |||
902 | static void print_stalled_cycles_frontend(FILE *out, int cpu, | ||
903 | struct perf_evsel *evsel | ||
904 | __maybe_unused, double avg) | ||
905 | { | ||
906 | double total, ratio = 0.0; | ||
907 | const char *color; | ||
908 | int ctx = evsel_context(evsel); | ||
909 | |||
910 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
911 | |||
912 | if (total) | ||
913 | ratio = avg / total * 100.0; | ||
914 | |||
915 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | ||
916 | |||
917 | fprintf(out, " # "); | ||
918 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
919 | fprintf(out, " frontend cycles idle "); | ||
920 | } | ||
921 | |||
922 | static void print_stalled_cycles_backend(FILE *out, int cpu, | ||
923 | struct perf_evsel *evsel | ||
924 | __maybe_unused, double avg) | ||
925 | { | ||
926 | double total, ratio = 0.0; | ||
927 | const char *color; | ||
928 | int ctx = evsel_context(evsel); | ||
929 | |||
930 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
931 | |||
932 | if (total) | ||
933 | ratio = avg / total * 100.0; | ||
934 | |||
935 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | ||
936 | |||
937 | fprintf(out, " # "); | ||
938 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
939 | fprintf(out, " backend cycles idle "); | ||
940 | } | ||
941 | |||
942 | static void print_branch_misses(FILE *out, int cpu, | ||
943 | struct perf_evsel *evsel __maybe_unused, | ||
944 | double avg) | ||
945 | { | ||
946 | double total, ratio = 0.0; | ||
947 | const char *color; | ||
948 | int ctx = evsel_context(evsel); | ||
949 | |||
950 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | ||
951 | |||
952 | if (total) | ||
953 | ratio = avg / total * 100.0; | ||
954 | |||
955 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
956 | |||
957 | fprintf(out, " # "); | ||
958 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
959 | fprintf(out, " of all branches "); | ||
960 | } | ||
961 | |||
962 | static void print_l1_dcache_misses(FILE *out, int cpu, | ||
963 | struct perf_evsel *evsel __maybe_unused, | ||
964 | double avg) | ||
965 | { | ||
966 | double total, ratio = 0.0; | ||
967 | const char *color; | ||
968 | int ctx = evsel_context(evsel); | ||
969 | |||
970 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | ||
971 | |||
972 | if (total) | ||
973 | ratio = avg / total * 100.0; | ||
974 | |||
975 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
976 | |||
977 | fprintf(out, " # "); | ||
978 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
979 | fprintf(out, " of all L1-dcache hits "); | ||
980 | } | ||
981 | |||
982 | static void print_l1_icache_misses(FILE *out, int cpu, | ||
983 | struct perf_evsel *evsel __maybe_unused, | ||
984 | double avg) | ||
985 | { | ||
986 | double total, ratio = 0.0; | ||
987 | const char *color; | ||
988 | int ctx = evsel_context(evsel); | ||
989 | |||
990 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | ||
991 | |||
992 | if (total) | ||
993 | ratio = avg / total * 100.0; | ||
994 | |||
995 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
996 | |||
997 | fprintf(out, " # "); | ||
998 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
999 | fprintf(out, " of all L1-icache hits "); | ||
1000 | } | ||
1001 | |||
1002 | static void print_dtlb_cache_misses(FILE *out, int cpu, | ||
1003 | struct perf_evsel *evsel __maybe_unused, | ||
1004 | double avg) | ||
1005 | { | ||
1006 | double total, ratio = 0.0; | ||
1007 | const char *color; | ||
1008 | int ctx = evsel_context(evsel); | ||
1009 | |||
1010 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | ||
1011 | |||
1012 | if (total) | ||
1013 | ratio = avg / total * 100.0; | ||
1014 | |||
1015 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1016 | |||
1017 | fprintf(out, " # "); | ||
1018 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
1019 | fprintf(out, " of all dTLB cache hits "); | ||
1020 | } | ||
1021 | |||
1022 | static void print_itlb_cache_misses(FILE *out, int cpu, | ||
1023 | struct perf_evsel *evsel __maybe_unused, | ||
1024 | double avg) | ||
1025 | { | ||
1026 | double total, ratio = 0.0; | ||
1027 | const char *color; | ||
1028 | int ctx = evsel_context(evsel); | ||
1029 | |||
1030 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | ||
1031 | |||
1032 | if (total) | ||
1033 | ratio = avg / total * 100.0; | ||
1034 | |||
1035 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1036 | |||
1037 | fprintf(out, " # "); | ||
1038 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
1039 | fprintf(out, " of all iTLB cache hits "); | ||
1040 | } | ||
1041 | |||
1042 | static void print_ll_cache_misses(FILE *out, int cpu, | ||
1043 | struct perf_evsel *evsel __maybe_unused, | ||
1044 | double avg) | ||
1045 | { | ||
1046 | double total, ratio = 0.0; | ||
1047 | const char *color; | ||
1048 | int ctx = evsel_context(evsel); | ||
1049 | |||
1050 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | ||
1051 | |||
1052 | if (total) | ||
1053 | ratio = avg / total * 100.0; | ||
1054 | |||
1055 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
1056 | |||
1057 | fprintf(out, " # "); | ||
1058 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
1059 | fprintf(out, " of all LL-cache hits "); | ||
1060 | } | ||
1061 | |||
1062 | static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, | ||
1063 | double avg, int cpu, enum aggr_mode aggr) | ||
1064 | { | ||
1065 | double total, ratio = 0.0, total2; | ||
1066 | int ctx = evsel_context(evsel); | ||
1067 | |||
1068 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | ||
1069 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
1070 | if (total) { | ||
1071 | ratio = avg / total; | ||
1072 | fprintf(out, " # %5.2f insns per cycle ", ratio); | ||
1073 | } else { | ||
1074 | fprintf(out, " "); | ||
1075 | } | ||
1076 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | ||
1077 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | ||
1078 | |||
1079 | if (total && avg) { | ||
1080 | ratio = total / avg; | ||
1081 | fprintf(out, "\n"); | ||
1082 | if (aggr == AGGR_NONE) | ||
1083 | fprintf(out, " "); | ||
1084 | fprintf(out, " # %5.2f stalled cycles per insn", ratio); | ||
1085 | } | ||
1086 | |||
1087 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | ||
1088 | runtime_branches_stats[ctx][cpu].n != 0) { | ||
1089 | print_branch_misses(out, cpu, evsel, avg); | ||
1090 | } else if ( | ||
1091 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1092 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
1093 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1094 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1095 | runtime_l1_dcache_stats[ctx][cpu].n != 0) { | ||
1096 | print_l1_dcache_misses(out, cpu, evsel, avg); | ||
1097 | } else if ( | ||
1098 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1099 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
1100 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1101 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1102 | runtime_l1_icache_stats[ctx][cpu].n != 0) { | ||
1103 | print_l1_icache_misses(out, cpu, evsel, avg); | ||
1104 | } else if ( | ||
1105 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1106 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
1107 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1108 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1109 | runtime_dtlb_cache_stats[ctx][cpu].n != 0) { | ||
1110 | print_dtlb_cache_misses(out, cpu, evsel, avg); | ||
1111 | } else if ( | ||
1112 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1113 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
1114 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1115 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1116 | runtime_itlb_cache_stats[ctx][cpu].n != 0) { | ||
1117 | print_itlb_cache_misses(out, cpu, evsel, avg); | ||
1118 | } else if ( | ||
1119 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
1120 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
1121 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
1122 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
1123 | runtime_ll_cache_stats[ctx][cpu].n != 0) { | ||
1124 | print_ll_cache_misses(out, cpu, evsel, avg); | ||
1125 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
1126 | runtime_cacherefs_stats[ctx][cpu].n != 0) { | ||
1127 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); | ||
1128 | |||
1129 | if (total) | ||
1130 | ratio = avg * 100 / total; | ||
1131 | |||
1132 | fprintf(out, " # %8.3f %% of all cache refs ", ratio); | ||
1133 | |||
1134 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
1135 | print_stalled_cycles_frontend(out, cpu, evsel, avg); | ||
1136 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
1137 | print_stalled_cycles_backend(out, cpu, evsel, avg); | ||
1138 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
1139 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
1140 | |||
1141 | if (total) { | ||
1142 | ratio = avg / total; | ||
1143 | fprintf(out, " # %8.3f GHz ", ratio); | ||
1144 | } else { | ||
1145 | fprintf(out, " "); | ||
1146 | } | ||
1147 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | ||
1148 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
1149 | if (total) | ||
1150 | fprintf(out, | ||
1151 | " # %5.2f%% transactional cycles ", | ||
1152 | 100.0 * (avg / total)); | ||
1153 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { | ||
1154 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
1155 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
1156 | if (total2 < avg) | ||
1157 | total2 = avg; | ||
1158 | if (total) | ||
1159 | fprintf(out, | ||
1160 | " # %5.2f%% aborted cycles ", | ||
1161 | 100.0 * ((total2-avg) / total)); | ||
1162 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && | ||
1163 | avg > 0 && | ||
1164 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
1165 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
1166 | |||
1167 | if (total) | ||
1168 | ratio = total / avg; | ||
1169 | |||
1170 | fprintf(out, " # %8.0f cycles / transaction ", ratio); | ||
1171 | } else if (perf_stat_evsel__is(evsel, ELISION_START) && | ||
1172 | avg > 0 && | ||
1173 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
1174 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
1175 | |||
1176 | if (total) | ||
1177 | ratio = total / avg; | ||
1178 | |||
1179 | fprintf(out, " # %8.0f cycles / elision ", ratio); | ||
1180 | } else if (runtime_nsecs_stats[cpu].n != 0) { | ||
1181 | char unit = 'M'; | ||
1182 | |||
1183 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
1184 | |||
1185 | if (total) | ||
1186 | ratio = 1000.0 * avg / total; | ||
1187 | if (ratio < 0.001) { | ||
1188 | ratio *= 1000; | ||
1189 | unit = 'K'; | ||
1190 | } | ||
1191 | |||
1192 | fprintf(out, " # %8.3f %c/sec ", ratio, unit); | ||
1193 | } else { | ||
1194 | fprintf(out, " "); | ||
1195 | } | ||
1196 | } | ||
1197 | |||
1198 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | 762 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) |
1199 | { | 763 | { |
1200 | double sc = evsel->scale; | 764 | double sc = evsel->scale; |
@@ -1230,7 +794,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
1230 | if (csv_output || interval) | 794 | if (csv_output || interval) |
1231 | return; | 795 | return; |
1232 | 796 | ||
1233 | print_shadow_stats(output, evsel, avg, cpu, aggr_mode); | 797 | perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); |
1234 | } | 798 | } |
1235 | 799 | ||
1236 | static void print_aggr(char *prefix) | 800 | static void print_aggr(char *prefix) |
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e4b676de2f64..586a59d46022 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build | |||
@@ -68,6 +68,7 @@ libperf-y += rblist.o | |||
68 | libperf-y += intlist.o | 68 | libperf-y += intlist.o |
69 | libperf-y += vdso.o | 69 | libperf-y += vdso.o |
70 | libperf-y += stat.o | 70 | libperf-y += stat.o |
71 | libperf-y += stat-shadow.o | ||
71 | libperf-y += record.o | 72 | libperf-y += record.o |
72 | libperf-y += srcline.o | 73 | libperf-y += srcline.o |
73 | libperf-y += data.o | 74 | libperf-y += data.o |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c new file mode 100644 index 000000000000..53e8bb7bc852 --- /dev/null +++ b/tools/perf/util/stat-shadow.c | |||
@@ -0,0 +1,434 @@ | |||
1 | #include <stdio.h> | ||
2 | #include "evsel.h" | ||
3 | #include "stat.h" | ||
4 | #include "color.h" | ||
5 | |||
6 | enum { | ||
7 | CTX_BIT_USER = 1 << 0, | ||
8 | CTX_BIT_KERNEL = 1 << 1, | ||
9 | CTX_BIT_HV = 1 << 2, | ||
10 | CTX_BIT_HOST = 1 << 3, | ||
11 | CTX_BIT_IDLE = 1 << 4, | ||
12 | CTX_BIT_MAX = 1 << 5, | ||
13 | }; | ||
14 | |||
15 | #define NUM_CTX CTX_BIT_MAX | ||
16 | |||
17 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | ||
18 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | ||
19 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | ||
20 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | ||
21 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | ||
22 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | ||
23 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
24 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
25 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
26 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
27 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | ||
28 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | ||
29 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | ||
30 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | ||
31 | |||
32 | struct stats walltime_nsecs_stats; | ||
33 | |||
34 | static int evsel_context(struct perf_evsel *evsel) | ||
35 | { | ||
36 | int ctx = 0; | ||
37 | |||
38 | if (evsel->attr.exclude_kernel) | ||
39 | ctx |= CTX_BIT_KERNEL; | ||
40 | if (evsel->attr.exclude_user) | ||
41 | ctx |= CTX_BIT_USER; | ||
42 | if (evsel->attr.exclude_hv) | ||
43 | ctx |= CTX_BIT_HV; | ||
44 | if (evsel->attr.exclude_host) | ||
45 | ctx |= CTX_BIT_HOST; | ||
46 | if (evsel->attr.exclude_idle) | ||
47 | ctx |= CTX_BIT_IDLE; | ||
48 | |||
49 | return ctx; | ||
50 | } | ||
51 | |||
52 | void perf_stat__reset_shadow_stats(void) | ||
53 | { | ||
54 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | ||
55 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | ||
56 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | ||
57 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | ||
58 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | ||
59 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | ||
60 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | ||
61 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | ||
62 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | ||
63 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | ||
64 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | ||
65 | memset(runtime_cycles_in_tx_stats, 0, | ||
66 | sizeof(runtime_cycles_in_tx_stats)); | ||
67 | memset(runtime_transaction_stats, 0, | ||
68 | sizeof(runtime_transaction_stats)); | ||
69 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | ||
70 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Update various tracking values we maintain to print | ||
75 | * more semantic information such as miss/hit ratios, | ||
76 | * instruction rates, etc: | ||
77 | */ | ||
78 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | ||
79 | int cpu) | ||
80 | { | ||
81 | int ctx = evsel_context(counter); | ||
82 | |||
83 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||
84 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
85 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | ||
86 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | ||
87 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | ||
88 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
89 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) | ||
90 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | ||
91 | else if (perf_stat_evsel__is(counter, ELISION_START)) | ||
92 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | ||
93 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
94 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | ||
95 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | ||
96 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | ||
97 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||
98 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | ||
99 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | ||
100 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | ||
101 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
102 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | ||
103 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
104 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
105 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
106 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | ||
107 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
108 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | ||
109 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
110 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | ||
111 | } | ||
112 | |||
113 | /* used for get_ratio_color() */ | ||
114 | enum grc_type { | ||
115 | GRC_STALLED_CYCLES_FE, | ||
116 | GRC_STALLED_CYCLES_BE, | ||
117 | GRC_CACHE_MISSES, | ||
118 | GRC_MAX_NR | ||
119 | }; | ||
120 | |||
121 | static const char *get_ratio_color(enum grc_type type, double ratio) | ||
122 | { | ||
123 | static const double grc_table[GRC_MAX_NR][3] = { | ||
124 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | ||
125 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | ||
126 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | ||
127 | }; | ||
128 | const char *color = PERF_COLOR_NORMAL; | ||
129 | |||
130 | if (ratio > grc_table[type][0]) | ||
131 | color = PERF_COLOR_RED; | ||
132 | else if (ratio > grc_table[type][1]) | ||
133 | color = PERF_COLOR_MAGENTA; | ||
134 | else if (ratio > grc_table[type][2]) | ||
135 | color = PERF_COLOR_YELLOW; | ||
136 | |||
137 | return color; | ||
138 | } | ||
139 | |||
140 | static void print_stalled_cycles_frontend(FILE *out, int cpu, | ||
141 | struct perf_evsel *evsel | ||
142 | __maybe_unused, double avg) | ||
143 | { | ||
144 | double total, ratio = 0.0; | ||
145 | const char *color; | ||
146 | int ctx = evsel_context(evsel); | ||
147 | |||
148 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
149 | |||
150 | if (total) | ||
151 | ratio = avg / total * 100.0; | ||
152 | |||
153 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | ||
154 | |||
155 | fprintf(out, " # "); | ||
156 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
157 | fprintf(out, " frontend cycles idle "); | ||
158 | } | ||
159 | |||
160 | static void print_stalled_cycles_backend(FILE *out, int cpu, | ||
161 | struct perf_evsel *evsel | ||
162 | __maybe_unused, double avg) | ||
163 | { | ||
164 | double total, ratio = 0.0; | ||
165 | const char *color; | ||
166 | int ctx = evsel_context(evsel); | ||
167 | |||
168 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
169 | |||
170 | if (total) | ||
171 | ratio = avg / total * 100.0; | ||
172 | |||
173 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | ||
174 | |||
175 | fprintf(out, " # "); | ||
176 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
177 | fprintf(out, " backend cycles idle "); | ||
178 | } | ||
179 | |||
180 | static void print_branch_misses(FILE *out, int cpu, | ||
181 | struct perf_evsel *evsel __maybe_unused, | ||
182 | double avg) | ||
183 | { | ||
184 | double total, ratio = 0.0; | ||
185 | const char *color; | ||
186 | int ctx = evsel_context(evsel); | ||
187 | |||
188 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | ||
189 | |||
190 | if (total) | ||
191 | ratio = avg / total * 100.0; | ||
192 | |||
193 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
194 | |||
195 | fprintf(out, " # "); | ||
196 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
197 | fprintf(out, " of all branches "); | ||
198 | } | ||
199 | |||
200 | static void print_l1_dcache_misses(FILE *out, int cpu, | ||
201 | struct perf_evsel *evsel __maybe_unused, | ||
202 | double avg) | ||
203 | { | ||
204 | double total, ratio = 0.0; | ||
205 | const char *color; | ||
206 | int ctx = evsel_context(evsel); | ||
207 | |||
208 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | ||
209 | |||
210 | if (total) | ||
211 | ratio = avg / total * 100.0; | ||
212 | |||
213 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
214 | |||
215 | fprintf(out, " # "); | ||
216 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
217 | fprintf(out, " of all L1-dcache hits "); | ||
218 | } | ||
219 | |||
220 | static void print_l1_icache_misses(FILE *out, int cpu, | ||
221 | struct perf_evsel *evsel __maybe_unused, | ||
222 | double avg) | ||
223 | { | ||
224 | double total, ratio = 0.0; | ||
225 | const char *color; | ||
226 | int ctx = evsel_context(evsel); | ||
227 | |||
228 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | ||
229 | |||
230 | if (total) | ||
231 | ratio = avg / total * 100.0; | ||
232 | |||
233 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
234 | |||
235 | fprintf(out, " # "); | ||
236 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
237 | fprintf(out, " of all L1-icache hits "); | ||
238 | } | ||
239 | |||
240 | static void print_dtlb_cache_misses(FILE *out, int cpu, | ||
241 | struct perf_evsel *evsel __maybe_unused, | ||
242 | double avg) | ||
243 | { | ||
244 | double total, ratio = 0.0; | ||
245 | const char *color; | ||
246 | int ctx = evsel_context(evsel); | ||
247 | |||
248 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | ||
249 | |||
250 | if (total) | ||
251 | ratio = avg / total * 100.0; | ||
252 | |||
253 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
254 | |||
255 | fprintf(out, " # "); | ||
256 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
257 | fprintf(out, " of all dTLB cache hits "); | ||
258 | } | ||
259 | |||
260 | static void print_itlb_cache_misses(FILE *out, int cpu, | ||
261 | struct perf_evsel *evsel __maybe_unused, | ||
262 | double avg) | ||
263 | { | ||
264 | double total, ratio = 0.0; | ||
265 | const char *color; | ||
266 | int ctx = evsel_context(evsel); | ||
267 | |||
268 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | ||
269 | |||
270 | if (total) | ||
271 | ratio = avg / total * 100.0; | ||
272 | |||
273 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
274 | |||
275 | fprintf(out, " # "); | ||
276 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
277 | fprintf(out, " of all iTLB cache hits "); | ||
278 | } | ||
279 | |||
280 | static void print_ll_cache_misses(FILE *out, int cpu, | ||
281 | struct perf_evsel *evsel __maybe_unused, | ||
282 | double avg) | ||
283 | { | ||
284 | double total, ratio = 0.0; | ||
285 | const char *color; | ||
286 | int ctx = evsel_context(evsel); | ||
287 | |||
288 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | ||
289 | |||
290 | if (total) | ||
291 | ratio = avg / total * 100.0; | ||
292 | |||
293 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | ||
294 | |||
295 | fprintf(out, " # "); | ||
296 | color_fprintf(out, color, "%6.2f%%", ratio); | ||
297 | fprintf(out, " of all LL-cache hits "); | ||
298 | } | ||
299 | |||
300 | void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, | ||
301 | double avg, int cpu, enum aggr_mode aggr) | ||
302 | { | ||
303 | double total, ratio = 0.0, total2; | ||
304 | int ctx = evsel_context(evsel); | ||
305 | |||
306 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | ||
307 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
308 | if (total) { | ||
309 | ratio = avg / total; | ||
310 | fprintf(out, " # %5.2f insns per cycle ", ratio); | ||
311 | } else { | ||
312 | fprintf(out, " "); | ||
313 | } | ||
314 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | ||
315 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | ||
316 | |||
317 | if (total && avg) { | ||
318 | ratio = total / avg; | ||
319 | fprintf(out, "\n"); | ||
320 | if (aggr == AGGR_NONE) | ||
321 | fprintf(out, " "); | ||
322 | fprintf(out, " # %5.2f stalled cycles per insn", ratio); | ||
323 | } | ||
324 | |||
325 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | ||
326 | runtime_branches_stats[ctx][cpu].n != 0) { | ||
327 | print_branch_misses(out, cpu, evsel, avg); | ||
328 | } else if ( | ||
329 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
330 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
331 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
332 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
333 | runtime_l1_dcache_stats[ctx][cpu].n != 0) { | ||
334 | print_l1_dcache_misses(out, cpu, evsel, avg); | ||
335 | } else if ( | ||
336 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
337 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
338 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
339 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
340 | runtime_l1_icache_stats[ctx][cpu].n != 0) { | ||
341 | print_l1_icache_misses(out, cpu, evsel, avg); | ||
342 | } else if ( | ||
343 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
344 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
345 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
346 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
347 | runtime_dtlb_cache_stats[ctx][cpu].n != 0) { | ||
348 | print_dtlb_cache_misses(out, cpu, evsel, avg); | ||
349 | } else if ( | ||
350 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
351 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
352 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
353 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
354 | runtime_itlb_cache_stats[ctx][cpu].n != 0) { | ||
355 | print_itlb_cache_misses(out, cpu, evsel, avg); | ||
356 | } else if ( | ||
357 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
358 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
359 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
360 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
361 | runtime_ll_cache_stats[ctx][cpu].n != 0) { | ||
362 | print_ll_cache_misses(out, cpu, evsel, avg); | ||
363 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | ||
364 | runtime_cacherefs_stats[ctx][cpu].n != 0) { | ||
365 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); | ||
366 | |||
367 | if (total) | ||
368 | ratio = avg * 100 / total; | ||
369 | |||
370 | fprintf(out, " # %8.3f %% of all cache refs ", ratio); | ||
371 | |||
372 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
373 | print_stalled_cycles_frontend(out, cpu, evsel, avg); | ||
374 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | ||
375 | print_stalled_cycles_backend(out, cpu, evsel, avg); | ||
376 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | ||
377 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
378 | |||
379 | if (total) { | ||
380 | ratio = avg / total; | ||
381 | fprintf(out, " # %8.3f GHz ", ratio); | ||
382 | } else { | ||
383 | fprintf(out, " "); | ||
384 | } | ||
385 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | ||
386 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
387 | if (total) | ||
388 | fprintf(out, | ||
389 | " # %5.2f%% transactional cycles ", | ||
390 | 100.0 * (avg / total)); | ||
391 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { | ||
392 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | ||
393 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
394 | if (total2 < avg) | ||
395 | total2 = avg; | ||
396 | if (total) | ||
397 | fprintf(out, | ||
398 | " # %5.2f%% aborted cycles ", | ||
399 | 100.0 * ((total2-avg) / total)); | ||
400 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && | ||
401 | avg > 0 && | ||
402 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
403 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
404 | |||
405 | if (total) | ||
406 | ratio = total / avg; | ||
407 | |||
408 | fprintf(out, " # %8.0f cycles / transaction ", ratio); | ||
409 | } else if (perf_stat_evsel__is(evsel, ELISION_START) && | ||
410 | avg > 0 && | ||
411 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | ||
412 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | ||
413 | |||
414 | if (total) | ||
415 | ratio = total / avg; | ||
416 | |||
417 | fprintf(out, " # %8.0f cycles / elision ", ratio); | ||
418 | } else if (runtime_nsecs_stats[cpu].n != 0) { | ||
419 | char unit = 'M'; | ||
420 | |||
421 | total = avg_stats(&runtime_nsecs_stats[cpu]); | ||
422 | |||
423 | if (total) | ||
424 | ratio = 1000.0 * avg / total; | ||
425 | if (ratio < 0.001) { | ||
426 | ratio *= 1000; | ||
427 | unit = 'K'; | ||
428 | } | ||
429 | |||
430 | fprintf(out, " # %8.3f %c/sec ", ratio, unit); | ||
431 | } else { | ||
432 | fprintf(out, " "); | ||
433 | } | ||
434 | } | ||
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 3df529bd0774..615c779eb42a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __PERF_STATS_H | 2 | #define __PERF_STATS_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <stdio.h> | ||
5 | 6 | ||
6 | struct stats | 7 | struct stats |
7 | { | 8 | { |
@@ -23,6 +24,13 @@ struct perf_stat { | |||
23 | enum perf_stat_evsel_id id; | 24 | enum perf_stat_evsel_id id; |
24 | }; | 25 | }; |
25 | 26 | ||
27 | enum aggr_mode { | ||
28 | AGGR_NONE, | ||
29 | AGGR_GLOBAL, | ||
30 | AGGR_SOCKET, | ||
31 | AGGR_CORE, | ||
32 | }; | ||
33 | |||
26 | void update_stats(struct stats *stats, u64 val); | 34 | void update_stats(struct stats *stats, u64 val); |
27 | double avg_stats(struct stats *stats); | 35 | double avg_stats(struct stats *stats); |
28 | double stddev_stats(struct stats *stats); | 36 | double stddev_stats(struct stats *stats); |
@@ -46,4 +54,12 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, | |||
46 | 54 | ||
47 | void perf_stat_evsel_id_init(struct perf_evsel *evsel); | 55 | void perf_stat_evsel_id_init(struct perf_evsel *evsel); |
48 | 56 | ||
57 | extern struct stats walltime_nsecs_stats; | ||
58 | |||
59 | void perf_stat__reset_shadow_stats(void); | ||
60 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | ||
61 | int cpu); | ||
62 | void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, | ||
63 | double avg, int cpu, enum aggr_mode aggr); | ||
64 | |||
49 | #endif | 65 | #endif |