aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-27 07:25:24 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-26 14:32:24 -0400
commit8bb6c79f24e66538f606076915e918242c02ec7c (patch)
tree44196658307709aabd70ee2ab7ee6187e3f5b190 /tools
parentc78df6c1d49b5d798f1579141e3a12be7c325d1e (diff)
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage: Performance counter stats for './bw_tcp localhost': 29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%) 8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%) 1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%) 2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%) 8,861,956,159 instructions # 0.30 insns per cycle # 0.93 stalled cycles per insn (scaled from 84.27%) 1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%) 74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%) 9978.695711 task-clock # 0.693 CPUs utilized 14.404347983 seconds time elapsed And color the result depending on the severity of cache-trashing. Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-stat.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5d4e1b9b2d89..03bac6aa014b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -159,6 +159,7 @@ struct stats runtime_cycles_stats[MAX_NR_CPUS];
159struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS]; 159struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS];
160struct stats runtime_branches_stats[MAX_NR_CPUS]; 160struct stats runtime_branches_stats[MAX_NR_CPUS];
161struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 161struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
162struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
162struct stats walltime_nsecs_stats; 163struct stats walltime_nsecs_stats;
163 164
164static int create_perf_stat_counter(struct perf_evsel *evsel) 165static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -211,6 +212,8 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
211 update_stats(&runtime_branches_stats[0], count[0]); 212 update_stats(&runtime_branches_stats[0], count[0]);
212 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 213 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
213 update_stats(&runtime_cacherefs_stats[0], count[0]); 214 update_stats(&runtime_cacherefs_stats[0], count[0]);
215 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
216 update_stats(&runtime_l1_dcache_stats[0], count[0]);
214} 217}
215 218
216/* 219/*
@@ -473,6 +476,29 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double
473 fprintf(stderr, " of all branches "); 476 fprintf(stderr, " of all branches ");
474} 477}
475 478
479static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
480{
481 double total, ratio = 0.0;
482 const char *color;
483
484 total = avg_stats(&runtime_l1_dcache_stats[cpu]);
485
486 if (total)
487 ratio = avg / total * 100.0;
488
489 color = PERF_COLOR_NORMAL;
490 if (ratio > 20.0)
491 color = PERF_COLOR_RED;
492 else if (ratio > 10.0)
493 color = PERF_COLOR_MAGENTA;
494 else if (ratio > 5.0)
495 color = PERF_COLOR_YELLOW;
496
497 fprintf(stderr, " # ");
498 color_fprintf(stderr, color, "%5.2f%%", ratio);
499 fprintf(stderr, " of all L1-dcache hits ");
500}
501
476static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) 502static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
477{ 503{
478 double total, ratio = 0.0; 504 double total, ratio = 0.0;
@@ -519,6 +545,13 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
519 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 545 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
520 runtime_branches_stats[cpu].n != 0) { 546 runtime_branches_stats[cpu].n != 0) {
521 print_branch_misses(cpu, evsel, avg); 547 print_branch_misses(cpu, evsel, avg);
548 } else if (
549 evsel->attr.type == PERF_TYPE_HW_CACHE &&
550 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
551 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
552 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
553 runtime_branches_stats[cpu].n != 0) {
554 print_l1_dcache_misses(cpu, evsel, avg);
522 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 555 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
523 runtime_cacherefs_stats[cpu].n != 0) { 556 runtime_cacherefs_stats[cpu].n != 0) {
524 total = avg_stats(&runtime_cacherefs_stats[cpu]); 557 total = avg_stats(&runtime_cacherefs_stats[cpu]);