diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-27 07:25:24 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-26 14:32:24 -0400 |
commit | 8bb6c79f24e66538f606076915e918242c02ec7c (patch) | |
tree | 44196658307709aabd70ee2ab7ee6187e3f5b190 /tools | |
parent | c78df6c1d49b5d798f1579141e3a12be7c325d1e (diff) |
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-stat.c | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5d4e1b9b2d89..03bac6aa014b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -159,6 +159,7 @@ struct stats runtime_cycles_stats[MAX_NR_CPUS]; | |||
159 | struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS]; | 159 | struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS]; |
160 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 160 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
161 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | 161 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; |
162 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | ||
162 | struct stats walltime_nsecs_stats; | 163 | struct stats walltime_nsecs_stats; |
163 | 164 | ||
164 | static int create_perf_stat_counter(struct perf_evsel *evsel) | 165 | static int create_perf_stat_counter(struct perf_evsel *evsel) |
@@ -211,6 +212,8 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
211 | update_stats(&runtime_branches_stats[0], count[0]); | 212 | update_stats(&runtime_branches_stats[0], count[0]); |
212 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | 213 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) |
213 | update_stats(&runtime_cacherefs_stats[0], count[0]); | 214 | update_stats(&runtime_cacherefs_stats[0], count[0]); |
215 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | ||
216 | update_stats(&runtime_l1_dcache_stats[0], count[0]); | ||
214 | } | 217 | } |
215 | 218 | ||
216 | /* | 219 | /* |
@@ -473,6 +476,29 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double | |||
473 | fprintf(stderr, " of all branches "); | 476 | fprintf(stderr, " of all branches "); |
474 | } | 477 | } |
475 | 478 | ||
479 | static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) | ||
480 | { | ||
481 | double total, ratio = 0.0; | ||
482 | const char *color; | ||
483 | |||
484 | total = avg_stats(&runtime_l1_dcache_stats[cpu]); | ||
485 | |||
486 | if (total) | ||
487 | ratio = avg / total * 100.0; | ||
488 | |||
489 | color = PERF_COLOR_NORMAL; | ||
490 | if (ratio > 20.0) | ||
491 | color = PERF_COLOR_RED; | ||
492 | else if (ratio > 10.0) | ||
493 | color = PERF_COLOR_MAGENTA; | ||
494 | else if (ratio > 5.0) | ||
495 | color = PERF_COLOR_YELLOW; | ||
496 | |||
497 | fprintf(stderr, " # "); | ||
498 | color_fprintf(stderr, color, "%5.2f%%", ratio); | ||
499 | fprintf(stderr, " of all L1-dcache hits "); | ||
500 | } | ||
501 | |||
476 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | 502 | static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) |
477 | { | 503 | { |
478 | double total, ratio = 0.0; | 504 | double total, ratio = 0.0; |
@@ -519,6 +545,13 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
519 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && | 545 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && |
520 | runtime_branches_stats[cpu].n != 0) { | 546 | runtime_branches_stats[cpu].n != 0) { |
521 | print_branch_misses(cpu, evsel, avg); | 547 | print_branch_misses(cpu, evsel, avg); |
548 | } else if ( | ||
549 | evsel->attr.type == PERF_TYPE_HW_CACHE && | ||
550 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | ||
551 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | ||
552 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && | ||
553 | runtime_branches_stats[cpu].n != 0) { | ||
554 | print_l1_dcache_misses(cpu, evsel, avg); | ||
522 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | 555 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && |
523 | runtime_cacherefs_stats[cpu].n != 0) { | 556 | runtime_cacherefs_stats[cpu].n != 0) { |
524 | total = avg_stats(&runtime_cacherefs_stats[cpu]); | 557 | total = avg_stats(&runtime_cacherefs_stats[cpu]); |