diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-05-19 08:01:42 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-19 08:30:50 -0400 |
commit | c3305257cd4df63e03e21e331a0140ae9c0faccc (patch) | |
tree | d5b9609bf96f004a9efe0aca326983489718751e /tools/perf | |
parent | 2cba3ffb9a9db3874304a1739002d053d53c738b (diff) |
perf stat: Add more cache-miss percentage printouts
Print out the cache-miss percentage as well if the cache refs were
collected, for all the generic cache event types.
Before:
11,103,723,230 dTLB-loads # 622.471 M/sec ( +- 0.30% )
87,065,337 dTLB-load-misses # 4.881 M/sec ( +- 0.90% )
After:
11,353,713,242 dTLB-loads # 626.020 M/sec ( +- 0.35% )
113,393,472 dTLB-load-misses # 1.00% of all dTLB cache hits ( +- 0.49% )
Also ASCII color highlight too high percentages, them when it's executed on the console.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-lkhwxsevdbd9a8nymx0vxc3y@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-stat.c | 138 |
1 files changed, 136 insertions, 2 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a89fc0835367..a9f06715e44d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -261,6 +261,10 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; | |||
261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | 262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; |
263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | 263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; |
264 | struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; | ||
265 | struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; | ||
266 | struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | ||
267 | struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | ||
264 | struct stats walltime_nsecs_stats; | 268 | struct stats walltime_nsecs_stats; |
265 | 269 | ||
266 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 270 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
@@ -317,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
317 | update_stats(&runtime_cacherefs_stats[0], count[0]); | 321 | update_stats(&runtime_cacherefs_stats[0], count[0]); |
318 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | 322 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) |
319 | update_stats(&runtime_l1_dcache_stats[0], count[0]); | 323 | update_stats(&runtime_l1_dcache_stats[0], count[0]); |
324 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
325 | update_stats(&runtime_l1_icache_stats[0], count[0]); | ||
326 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
327 | update_stats(&runtime_ll_cache_stats[0], count[0]); | ||
328 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
329 | update_stats(&runtime_dtlb_cache_stats[0], count[0]); | ||
330 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
331 | update_stats(&runtime_itlb_cache_stats[0], count[0]); | ||
320 | } | 332 | } |
321 | 333 | ||
322 | /* | 334 | /* |
@@ -630,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou | |||
630 | fprintf(stderr, " of all L1-dcache hits "); | 642 | fprintf(stderr, " of all L1-dcache hits "); |
631 | } | 643 | } |
632 | 644 | ||
645 | static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
646 | { | ||
647 | double total, ratio = 0.0; | ||
648 | const char *color; | ||
649 | |||
650 | total = avg_stats(&runtime_l1_icache_stats[cpu]); | ||
651 | |||
652 | if (total) | ||
653 | ratio = avg / total * 100.0; | ||
654 | |||
655 | color = PERF_COLOR_NORMAL; | ||
656 | if (ratio > 20.0) | ||
657 | color = PERF_COLOR_RED; | ||
658 | else if (ratio > 10.0) | ||
659 | color = PERF_COLOR_MAGENTA; | ||
660 | else if (ratio > 5.0) | ||
661 | color = PERF_COLOR_YELLOW; | ||
662 | |||
663 | fprintf(stderr, " # "); | ||
664 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
665 | fprintf(stderr, " of all L1-icache hits "); | ||
666 | } | ||
667 | |||
668 | static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
669 | { | ||
670 | double total, ratio = 0.0; | ||
671 | const char *color; | ||
672 | |||
673 | total = avg_stats(&runtime_dtlb_cache_stats[cpu]); | ||
674 | |||
675 | if (total) | ||
676 | ratio = avg / total * 100.0; | ||
677 | |||
678 | color = PERF_COLOR_NORMAL; | ||
679 | if (ratio > 20.0) | ||
680 | color = PERF_COLOR_RED; | ||
681 | else if (ratio > 10.0) | ||
682 | color = PERF_COLOR_MAGENTA; | ||
683 | else if (ratio > 5.0) | ||
684 | color = PERF_COLOR_YELLOW; | ||
685 | |||
686 | fprintf(stderr, " # "); | ||
687 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
688 | fprintf(stderr, " of all dTLB cache hits "); | ||
689 | } | ||
690 | |||
691 | static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
692 | { | ||
693 | double total, ratio = 0.0; | ||
694 | const char *color; | ||
695 | |||
696 | total = avg_stats(&runtime_itlb_cache_stats[cpu]); | ||
697 | |||
698 | if (total) | ||
699 | ratio = avg / total * 100.0; | ||
700 | |||
701 | color = PERF_COLOR_NORMAL; | ||
702 | if (ratio > 20.0) | ||
703 | color = PERF_COLOR_RED; | ||
704 | else if (ratio > 10.0) | ||
705 | color = PERF_COLOR_MAGENTA; | ||
706 | else if (ratio > 5.0) | ||
707 | color = PERF_COLOR_YELLOW; | ||
708 | |||
709 | fprintf(stderr, " # "); | ||
710 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
711 | fprintf(stderr, " of all iTLB cache hits "); | ||
712 | } | ||
713 | |||
714 | static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
715 | { | ||
716 | double total, ratio = 0.0; | ||
717 | const char *color; | ||
718 | |||
719 | total = avg_stats(&runtime_ll_cache_stats[cpu]); | ||
720 | |||
721 | if (total) | ||
722 | ratio = avg / total * 100.0; | ||
723 | |||
724 | color = PERF_COLOR_NORMAL; | ||
725 | if (ratio > 20.0) | ||
726 | color = PERF_COLOR_RED; | ||
727 | else if (ratio > 10.0) | ||
728 | color = PERF_COLOR_MAGENTA; | ||
729 | else if (ratio > 5.0) | ||
730 | color = PERF_COLOR_YELLOW; | ||
731 | |||
732 | fprintf(stderr, " # "); | ||
733 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
734 | fprintf(stderr, " of all LL-cache hits "); | ||
735 | } | ||
736 | |||
633 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | 737 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
634 | { | 738 | { |
635 | double total, ratio = 0.0; | 739 | double total, ratio = 0.0; |
@@ -684,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
684 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | 788 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && |
685 | runtime_l1_dcache_stats[cpu].n != 0) { | 789 | runtime_l1_dcache_stats[cpu].n != 0) { |
686 | print_l1_dcache_misses(cpu, evsel, avg); | 790 | print_l1_dcache_misses(cpu, evsel, avg); |
791 | } else if ( | ||
792 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
793 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
794 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
795 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
796 | runtime_l1_icache_stats[cpu].n != 0) { | ||
797 | print_l1_icache_misses(cpu, evsel, avg); | ||
798 | } else if ( | ||
799 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
800 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
801 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
802 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
803 | runtime_dtlb_cache_stats[cpu].n != 0) { | ||
804 | print_dtlb_cache_misses(cpu, evsel, avg); | ||
805 | } else if ( | ||
806 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
807 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
808 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
809 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
810 | runtime_itlb_cache_stats[cpu].n != 0) { | ||
811 | print_itlb_cache_misses(cpu, evsel, avg); | ||
812 | } else if ( | ||
813 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
814 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
815 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
816 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
817 | runtime_ll_cache_stats[cpu].n != 0) { | ||
818 | print_ll_cache_misses(cpu, evsel, avg); | ||
687 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | 819 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && |
688 | runtime_cacherefs_stats[cpu].n != 0) { | 820 | runtime_cacherefs_stats[cpu].n != 0) { |
689 | total = avg_stats(&runtime_cacherefs_stats[cpu]); | 821 | total = avg_stats(&runtime_cacherefs_stats[cpu]); |
@@ -842,10 +974,12 @@ static void print_stat(int argc, const char **argv) | |||
842 | } | 974 | } |
843 | 975 | ||
844 | if (!csv_output) { | 976 | if (!csv_output) { |
845 | fprintf(stderr, "\n"); | 977 | if (!null_run) |
846 | fprintf(stderr, " %18.9f seconds time elapsed", | 978 | fprintf(stderr, "\n"); |
979 | fprintf(stderr, " %17.9f seconds time elapsed", | ||
847 | avg_stats(&walltime_nsecs_stats)/1e9); | 980 | avg_stats(&walltime_nsecs_stats)/1e9); |
848 | if (run_count > 1) { | 981 | if (run_count > 1) { |
982 | fprintf(stderr, " "); | ||
849 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), | 983 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), |
850 | avg_stats(&walltime_nsecs_stats)); | 984 | avg_stats(&walltime_nsecs_stats)); |
851 | } | 985 | } |