diff options
| author | Ingo Molnar <mingo@elte.hu> | 2011-05-19 08:01:42 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2011-05-19 08:30:50 -0400 |
| commit | c3305257cd4df63e03e21e331a0140ae9c0faccc (patch) | |
| tree | d5b9609bf96f004a9efe0aca326983489718751e | |
| parent | 2cba3ffb9a9db3874304a1739002d053d53c738b (diff) | |
perf stat: Add more cache-miss percentage printouts
Print out the cache-miss percentage as well if the cache refs were
collected, for all the generic cache event types.
Before:
11,103,723,230 dTLB-loads # 622.471 M/sec ( +- 0.30% )
87,065,337 dTLB-load-misses # 4.881 M/sec ( +- 0.90% )
After:
11,353,713,242 dTLB-loads # 626.020 M/sec ( +- 0.35% )
113,393,472 dTLB-load-misses # 1.00% of all dTLB cache hits ( +- 0.49% )
Also ASCII color highlight too high percentages, them when it's executed on the console.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-lkhwxsevdbd9a8nymx0vxc3y@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | tools/perf/builtin-stat.c | 138 |
1 files changed, 136 insertions, 2 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a89fc0835367..a9f06715e44d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -261,6 +261,10 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; | |||
| 261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 261 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
| 262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | 262 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; |
| 263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | 263 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; |
| 264 | struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; | ||
| 265 | struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; | ||
| 266 | struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; | ||
| 267 | struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; | ||
| 264 | struct stats walltime_nsecs_stats; | 268 | struct stats walltime_nsecs_stats; |
| 265 | 269 | ||
| 266 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 270 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
| @@ -317,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
| 317 | update_stats(&runtime_cacherefs_stats[0], count[0]); | 321 | update_stats(&runtime_cacherefs_stats[0], count[0]); |
| 318 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | 322 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) |
| 319 | update_stats(&runtime_l1_dcache_stats[0], count[0]); | 323 | update_stats(&runtime_l1_dcache_stats[0], count[0]); |
| 324 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | ||
| 325 | update_stats(&runtime_l1_icache_stats[0], count[0]); | ||
| 326 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | ||
| 327 | update_stats(&runtime_ll_cache_stats[0], count[0]); | ||
| 328 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | ||
| 329 | update_stats(&runtime_dtlb_cache_stats[0], count[0]); | ||
| 330 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | ||
| 331 | update_stats(&runtime_itlb_cache_stats[0], count[0]); | ||
| 320 | } | 332 | } |
| 321 | 333 | ||
| 322 | /* | 334 | /* |
| @@ -630,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou | |||
| 630 | fprintf(stderr, " of all L1-dcache hits "); | 642 | fprintf(stderr, " of all L1-dcache hits "); |
| 631 | } | 643 | } |
| 632 | 644 | ||
| 645 | static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
| 646 | { | ||
| 647 | double total, ratio = 0.0; | ||
| 648 | const char *color; | ||
| 649 | |||
| 650 | total = avg_stats(&runtime_l1_icache_stats[cpu]); | ||
| 651 | |||
| 652 | if (total) | ||
| 653 | ratio = avg / total * 100.0; | ||
| 654 | |||
| 655 | color = PERF_COLOR_NORMAL; | ||
| 656 | if (ratio > 20.0) | ||
| 657 | color = PERF_COLOR_RED; | ||
| 658 | else if (ratio > 10.0) | ||
| 659 | color = PERF_COLOR_MAGENTA; | ||
| 660 | else if (ratio > 5.0) | ||
| 661 | color = PERF_COLOR_YELLOW; | ||
| 662 | |||
| 663 | fprintf(stderr, " # "); | ||
| 664 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
| 665 | fprintf(stderr, " of all L1-icache hits "); | ||
| 666 | } | ||
| 667 | |||
| 668 | static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
| 669 | { | ||
| 670 | double total, ratio = 0.0; | ||
| 671 | const char *color; | ||
| 672 | |||
| 673 | total = avg_stats(&runtime_dtlb_cache_stats[cpu]); | ||
| 674 | |||
| 675 | if (total) | ||
| 676 | ratio = avg / total * 100.0; | ||
| 677 | |||
| 678 | color = PERF_COLOR_NORMAL; | ||
| 679 | if (ratio > 20.0) | ||
| 680 | color = PERF_COLOR_RED; | ||
| 681 | else if (ratio > 10.0) | ||
| 682 | color = PERF_COLOR_MAGENTA; | ||
| 683 | else if (ratio > 5.0) | ||
| 684 | color = PERF_COLOR_YELLOW; | ||
| 685 | |||
| 686 | fprintf(stderr, " # "); | ||
| 687 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
| 688 | fprintf(stderr, " of all dTLB cache hits "); | ||
| 689 | } | ||
| 690 | |||
| 691 | static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
| 692 | { | ||
| 693 | double total, ratio = 0.0; | ||
| 694 | const char *color; | ||
| 695 | |||
| 696 | total = avg_stats(&runtime_itlb_cache_stats[cpu]); | ||
| 697 | |||
| 698 | if (total) | ||
| 699 | ratio = avg / total * 100.0; | ||
| 700 | |||
| 701 | color = PERF_COLOR_NORMAL; | ||
| 702 | if (ratio > 20.0) | ||
| 703 | color = PERF_COLOR_RED; | ||
| 704 | else if (ratio > 10.0) | ||
| 705 | color = PERF_COLOR_MAGENTA; | ||
| 706 | else if (ratio > 5.0) | ||
| 707 | color = PERF_COLOR_YELLOW; | ||
| 708 | |||
| 709 | fprintf(stderr, " # "); | ||
| 710 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
| 711 | fprintf(stderr, " of all iTLB cache hits "); | ||
| 712 | } | ||
| 713 | |||
| 714 | static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
| 715 | { | ||
| 716 | double total, ratio = 0.0; | ||
| 717 | const char *color; | ||
| 718 | |||
| 719 | total = avg_stats(&runtime_ll_cache_stats[cpu]); | ||
| 720 | |||
| 721 | if (total) | ||
| 722 | ratio = avg / total * 100.0; | ||
| 723 | |||
| 724 | color = PERF_COLOR_NORMAL; | ||
| 725 | if (ratio > 20.0) | ||
| 726 | color = PERF_COLOR_RED; | ||
| 727 | else if (ratio > 10.0) | ||
| 728 | color = PERF_COLOR_MAGENTA; | ||
| 729 | else if (ratio > 5.0) | ||
| 730 | color = PERF_COLOR_YELLOW; | ||
| 731 | |||
| 732 | fprintf(stderr, " # "); | ||
| 733 | color_fprintf(stderr, color, "%6.2f%%", ratio); | ||
| 734 | fprintf(stderr, " of all LL-cache hits "); | ||
| 735 | } | ||
| 736 | |||
| 633 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | 737 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
| 634 | { | 738 | { |
| 635 | double total, ratio = 0.0; | 739 | double total, ratio = 0.0; |
| @@ -684,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
| 684 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | 788 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && |
| 685 | runtime_l1_dcache_stats[cpu].n != 0) { | 789 | runtime_l1_dcache_stats[cpu].n != 0) { |
| 686 | print_l1_dcache_misses(cpu, evsel, avg); | 790 | print_l1_dcache_misses(cpu, evsel, avg); |
| 791 | } else if ( | ||
| 792 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
| 793 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | ||
| 794 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
| 795 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
| 796 | runtime_l1_icache_stats[cpu].n != 0) { | ||
| 797 | print_l1_icache_misses(cpu, evsel, avg); | ||
| 798 | } else if ( | ||
| 799 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
| 800 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | ||
| 801 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
| 802 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
| 803 | runtime_dtlb_cache_stats[cpu].n != 0) { | ||
| 804 | print_dtlb_cache_misses(cpu, evsel, avg); | ||
| 805 | } else if ( | ||
| 806 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
| 807 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | ||
| 808 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
| 809 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
| 810 | runtime_itlb_cache_stats[cpu].n != 0) { | ||
| 811 | print_itlb_cache_misses(cpu, evsel, avg); | ||
| 812 | } else if ( | ||
| 813 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
| 814 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | ||
| 815 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
| 816 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
| 817 | runtime_ll_cache_stats[cpu].n != 0) { | ||
| 818 | print_ll_cache_misses(cpu, evsel, avg); | ||
| 687 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | 819 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && |
| 688 | runtime_cacherefs_stats[cpu].n != 0) { | 820 | runtime_cacherefs_stats[cpu].n != 0) { |
| 689 | total = avg_stats(&runtime_cacherefs_stats[cpu]); | 821 | total = avg_stats(&runtime_cacherefs_stats[cpu]); |
| @@ -842,10 +974,12 @@ static void print_stat(int argc, const char **argv) | |||
| 842 | } | 974 | } |
| 843 | 975 | ||
| 844 | if (!csv_output) { | 976 | if (!csv_output) { |
| 845 | fprintf(stderr, "\n"); | 977 | if (!null_run) |
| 846 | fprintf(stderr, " %18.9f seconds time elapsed", | 978 | fprintf(stderr, "\n"); |
| 979 | fprintf(stderr, " %17.9f seconds time elapsed", | ||
| 847 | avg_stats(&walltime_nsecs_stats)/1e9); | 980 | avg_stats(&walltime_nsecs_stats)/1e9); |
| 848 | if (run_count > 1) { | 981 | if (run_count > 1) { |
| 982 | fprintf(stderr, " "); | ||
| 849 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), | 983 | print_noise_pct(stddev_stats(&walltime_nsecs_stats), |
| 850 | avg_stats(&walltime_nsecs_stats)); | 984 | avg_stats(&walltime_nsecs_stats)); |
| 851 | } | 985 | } |
