aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-29 07:49:08 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-29 08:35:55 -0400
commitd3d1e86da07b4565815e3dbcd082f53017d215f8 (patch)
treec2fad06e57607b32d943d6b590325ae5d8ef3d01 /tools
parent129c04cb8ce2e4bf3f17223f58ef16aa8a2cb3b8 (diff)
perf stat: Analyze front-end and back-end stall counts
Sample output: Performance counter stats for './loop_1b': 873.691065 task-clock # 1.000 CPUs utilized 1 context-switches # 0.000 M/sec 1 CPU-migrations # 0.000 M/sec 96 page-faults # 0.000 M/sec 2,012,637,222 cycles # 2.304 GHz (66.58%) 1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%) 7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%) 2,004,551,046 instructions # 1.00 insns per cycle # 0.50 stalled cycles per insn (66.80%) 1,001,304,992 branches # 1146.063 M/sec (66.76%) 39,453 branch-misses # 0.00% of all branches (66.64%) 0.874046121 seconds time elapsed Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-stat.c41
-rw-r--r--tools/perf/util/parse-events.c7
2 files changed, 39 insertions, 9 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6a4a8a399d95..e45449938b80 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -201,7 +201,8 @@ static double stddev_stats(struct stats *stats)
201 201
202struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 202struct stats runtime_nsecs_stats[MAX_NR_CPUS];
203struct stats runtime_cycles_stats[MAX_NR_CPUS]; 203struct stats runtime_cycles_stats[MAX_NR_CPUS];
204struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS]; 204struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
205struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
205struct stats runtime_branches_stats[MAX_NR_CPUS]; 206struct stats runtime_branches_stats[MAX_NR_CPUS];
206struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 207struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
207struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; 208struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
@@ -251,8 +252,10 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
251 update_stats(&runtime_nsecs_stats[0], count[0]); 252 update_stats(&runtime_nsecs_stats[0], count[0]);
252 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 253 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
253 update_stats(&runtime_cycles_stats[0], count[0]); 254 update_stats(&runtime_cycles_stats[0], count[0]);
255 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
256 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
254 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 257 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
255 update_stats(&runtime_stalled_cycles_stats[0], count[0]); 258 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
256 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 259 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
257 update_stats(&runtime_branches_stats[0], count[0]); 260 update_stats(&runtime_branches_stats[0], count[0]);
258 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 261 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
@@ -478,7 +481,30 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
478 fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); 481 fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats));
479} 482}
480 483
481static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg) 484static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
485{
486 double total, ratio = 0.0;
487 const char *color;
488
489 total = avg_stats(&runtime_cycles_stats[cpu]);
490
491 if (total)
492 ratio = avg / total * 100.0;
493
494 color = PERF_COLOR_NORMAL;
495 if (ratio > 75.0)
496 color = PERF_COLOR_RED;
497 else if (ratio > 50.0)
498 color = PERF_COLOR_MAGENTA;
499 else if (ratio > 20.0)
500 color = PERF_COLOR_YELLOW;
501
502 fprintf(stderr, " # ");
503 color_fprintf(stderr, color, "%5.2f%%", ratio);
504 fprintf(stderr, " frontend cycles idle ");
505}
506
507static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
482{ 508{
483 double total, ratio = 0.0; 509 double total, ratio = 0.0;
484 const char *color; 510 const char *color;
@@ -498,7 +524,7 @@ static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, doubl
498 524
499 fprintf(stderr, " # "); 525 fprintf(stderr, " # ");
500 color_fprintf(stderr, color, "%5.2f%%", ratio); 526 color_fprintf(stderr, color, "%5.2f%%", ratio);
501 fprintf(stderr, " of all cycles are idle "); 527 fprintf(stderr, " backend cycles idle ");
502} 528}
503 529
504static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) 530static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
@@ -583,7 +609,8 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
583 609
584 fprintf(stderr, " # %4.2f insns per cycle ", ratio); 610 fprintf(stderr, " # %4.2f insns per cycle ", ratio);
585 611
586 total = avg_stats(&runtime_stalled_cycles_stats[cpu]); 612 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
613 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
587 614
588 if (total && avg) { 615 if (total && avg) {
589 ratio = total / avg; 616 ratio = total / avg;
@@ -609,8 +636,10 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
609 636
610 fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); 637 fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
611 638
639 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
640 print_stalled_cycles_frontend(cpu, evsel, avg);
612 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 641 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
613 print_stalled_cycles(cpu, evsel, avg); 642 print_stalled_cycles_backend(cpu, evsel, avg);
614 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 643 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
615 total = avg_stats(&runtime_nsecs_stats[cpu]); 644 total = avg_stats(&runtime_nsecs_stats[cpu]);
616 645
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 04d2f0a96674..8a407f3e286f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -60,7 +60,7 @@ static struct event_symbol event_symbols[] = {
60#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) 60#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
61#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) 61#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
62 62
63static const char *hw_event_names[] = { 63static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
64 "cycles", 64 "cycles",
65 "instructions", 65 "instructions",
66 "cache-references", 66 "cache-references",
@@ -68,10 +68,11 @@ static const char *hw_event_names[] = {
68 "branches", 68 "branches",
69 "branch-misses", 69 "branch-misses",
70 "bus-cycles", 70 "bus-cycles",
71 "stalled-cycles", 71 "stalled-cycles-frontend",
72 "stalled-cycles-backend",
72}; 73};
73 74
74static const char *sw_event_names[] = { 75static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
75 "cpu-clock", 76 "cpu-clock",
76 "task-clock", 77 "task-clock",
77 "page-faults", 78 "page-faults",