aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-26 22:34:16 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-26 14:04:56 -0400
commit481f988a016f7a0327a5537bde4794349fc4625c (patch)
treec7375e288e3f565c9ef178fe27c98d3f63cc13c7 /tools
parentdcd9936a5a6d89512b5323c1145647f2dbe0236f (diff)
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per instruction" value. Also change the unit of the cycles output from M/sec to GHz - this is more intuitive. Prettify the output to: Performance counter stats for './loop_1b_instructions': 239.775036 task-clock # 0.997 CPUs utilized 761,903,912 cycles # 3.178 GHz 356,620,620 stalled-cycles # 46.81% of all cycles are idle 1,001,578,351 instructions # 1.31 insns per cycle # 0.36 stalled cycles per insn 14,782 cache-references # 0.062 M/sec 5,694 cache-misses # 38.520 % of all cache refs 0.240493656 seconds time elapsed Also adjust the --repeat output to make the percentages align vertically: Performance counter stats for './loop_1b_instructions' (10 runs): 236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% ) 756,553,086 cycles # 3.204 GHz ( +- 0.002% ) 354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% ) 1,001,389,700 instructions # 1.32 insns per cycle # 0.35 stalled cycles per insn ( +- 0.000% ) 10,166 cache-references # 0.043 M/sec ( +- 0.742% ) 468 cache-misses # 4.608 % of all cache refs ( +- 13.385% ) 0.236874136 seconds time elapsed ( +- 0.01% ) Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-stat.c43
1 files changed, 30 insertions, 13 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e5e82f62c784..e881c2061381 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -156,6 +156,7 @@ static double stddev_stats(struct stats *stats)
156 156
157struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 157struct stats runtime_nsecs_stats[MAX_NR_CPUS];
158struct stats runtime_cycles_stats[MAX_NR_CPUS]; 158struct stats runtime_cycles_stats[MAX_NR_CPUS];
159struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS];
159struct stats runtime_branches_stats[MAX_NR_CPUS]; 160struct stats runtime_branches_stats[MAX_NR_CPUS];
160struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 161struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
161struct stats walltime_nsecs_stats; 162struct stats walltime_nsecs_stats;
@@ -204,6 +205,8 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
204 update_stats(&runtime_nsecs_stats[0], count[0]); 205 update_stats(&runtime_nsecs_stats[0], count[0]);
205 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 206 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
206 update_stats(&runtime_cycles_stats[0], count[0]); 207 update_stats(&runtime_cycles_stats[0], count[0]);
208 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
209 update_stats(&runtime_stalled_cycles_stats[0], count[0]);
207 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 210 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
208 update_stats(&runtime_branches_stats[0], count[0]); 211 update_stats(&runtime_branches_stats[0], count[0]);
209 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 212 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
@@ -412,8 +415,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
412 return; 415 return;
413 416
414 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 417 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
415 fprintf(stderr, " # %10.3f CPUs", 418 fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats));
416 avg / avg_stats(&walltime_nsecs_stats));
417} 419}
418 420
419static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) 421static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
@@ -450,7 +452,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
450 if (total) 452 if (total)
451 ratio = avg / total; 453 ratio = avg / total;
452 454
453 fprintf(stderr, " # ( %4.2f instructions per cycle )", ratio); 455 fprintf(stderr, " # %4.2f insns per cycle", ratio);
456
457 total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
458
459 if (total && avg) {
460 ratio = total / avg;
461 fprintf(stderr, "\n # %4.2f stalled cycles per insn", ratio);
462 }
463
454 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 464 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
455 runtime_branches_stats[cpu].n != 0) { 465 runtime_branches_stats[cpu].n != 0) {
456 total = avg_stats(&runtime_branches_stats[cpu]); 466 total = avg_stats(&runtime_branches_stats[cpu]);
@@ -458,7 +468,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
458 if (total) 468 if (total)
459 ratio = avg * 100 / total; 469 ratio = avg * 100 / total;
460 470
461 fprintf(stderr, " # %10.3f %%", ratio); 471 fprintf(stderr, " # %8.3f %% of all branches", ratio);
462 472
463 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 473 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
464 runtime_cacherefs_stats[cpu].n != 0) { 474 runtime_cacherefs_stats[cpu].n != 0) {
@@ -467,22 +477,29 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
467 if (total) 477 if (total)
468 ratio = avg * 100 / total; 478 ratio = avg * 100 / total;
469 479
470 fprintf(stderr, " # %10.3f %%", ratio); 480 fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
471 481
472 } else if (runtime_nsecs_stats[cpu].n != 0) { 482 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
483 total = avg_stats(&runtime_cycles_stats[cpu]);
484
485 if (total)
486 ratio = avg / total * 100.0;
487
488 fprintf(stderr, " # %5.2f%% of all cycles are idle ", ratio);
489 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
473 total = avg_stats(&runtime_nsecs_stats[cpu]); 490 total = avg_stats(&runtime_nsecs_stats[cpu]);
474 491
475 if (total) 492 if (total)
476 ratio = 1000.0 * avg / total; 493 ratio = 1.0 * avg / total;
477 494
478 fprintf(stderr, " # %10.3f M/sec", ratio); 495 fprintf(stderr, " # %8.3f GHz ", ratio);
479 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) { 496 } else if (runtime_nsecs_stats[cpu].n != 0) {
480 total = avg_stats(&runtime_cycles_stats[cpu]); 497 total = avg_stats(&runtime_nsecs_stats[cpu]);
481 498
482 if (total) 499 if (total)
483 ratio = avg / total * 100.0; 500 ratio = 1000.0 * avg / total;
484 501
485 fprintf(stderr, " # (%5.2f%% of all cycles )", ratio); 502 fprintf(stderr, " # %8.3f M/sec ", ratio);
486 } 503 }
487} 504}
488 505
@@ -619,7 +636,7 @@ static void print_stat(int argc, const char **argv)
619 fprintf(stderr, " %18.9f seconds time elapsed", 636 fprintf(stderr, " %18.9f seconds time elapsed",
620 avg_stats(&walltime_nsecs_stats)/1e9); 637 avg_stats(&walltime_nsecs_stats)/1e9);
621 if (run_count > 1) { 638 if (run_count > 1) {
622 fprintf(stderr, " ( +- %7.3f%% )", 639 fprintf(stderr, " ( +-%5.2f%% )",
623 100*stddev_stats(&walltime_nsecs_stats) / 640 100*stddev_stats(&walltime_nsecs_stats) /
624 avg_stats(&walltime_nsecs_stats)); 641 avg_stats(&walltime_nsecs_stats));
625 } 642 }