diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-29 07:49:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-29 08:35:55 -0400 |
commit | d3d1e86da07b4565815e3dbcd082f53017d215f8 (patch) | |
tree | c2fad06e57607b32d943d6b590325ae5d8ef3d01 /tools/perf | |
parent | 129c04cb8ce2e4bf3f17223f58ef16aa8a2cb3b8 (diff) |
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-stat.c | 41 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 7 |
2 files changed, 39 insertions, 9 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6a4a8a399d95..e45449938b80 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -201,7 +201,8 @@ static double stddev_stats(struct stats *stats) | |||
201 | 201 | ||
202 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 202 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
203 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; | 203 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; |
204 | struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS]; | 204 | struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; |
205 | struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; | ||
205 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | 206 | struct stats runtime_branches_stats[MAX_NR_CPUS]; |
206 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; | 207 | struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; |
207 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; | 208 | struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; |
@@ -251,8 +252,10 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
251 | update_stats(&runtime_nsecs_stats[0], count[0]); | 252 | update_stats(&runtime_nsecs_stats[0], count[0]); |
252 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | 253 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
253 | update_stats(&runtime_cycles_stats[0], count[0]); | 254 | update_stats(&runtime_cycles_stats[0], count[0]); |
255 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) | ||
256 | update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); | ||
254 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | 257 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) |
255 | update_stats(&runtime_stalled_cycles_stats[0], count[0]); | 258 | update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); |
256 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | 259 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) |
257 | update_stats(&runtime_branches_stats[0], count[0]); | 260 | update_stats(&runtime_branches_stats[0], count[0]); |
258 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | 261 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) |
@@ -478,7 +481,30 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
478 | fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); | 481 | fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); |
479 | } | 482 | } |
480 | 483 | ||
481 | static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg) | 484 | static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) |
485 | { | ||
486 | double total, ratio = 0.0; | ||
487 | const char *color; | ||
488 | |||
489 | total = avg_stats(&runtime_cycles_stats[cpu]); | ||
490 | |||
491 | if (total) | ||
492 | ratio = avg / total * 100.0; | ||
493 | |||
494 | color = PERF_COLOR_NORMAL; | ||
495 | if (ratio > 75.0) | ||
496 | color = PERF_COLOR_RED; | ||
497 | else if (ratio > 50.0) | ||
498 | color = PERF_COLOR_MAGENTA; | ||
499 | else if (ratio > 20.0) | ||
500 | color = PERF_COLOR_YELLOW; | ||
501 | |||
502 | fprintf(stderr, " # "); | ||
503 | color_fprintf(stderr, color, "%5.2f%%", ratio); | ||
504 | fprintf(stderr, " frontend cycles idle "); | ||
505 | } | ||
506 | |||
507 | static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) | ||
482 | { | 508 | { |
483 | double total, ratio = 0.0; | 509 | double total, ratio = 0.0; |
484 | const char *color; | 510 | const char *color; |
@@ -498,7 +524,7 @@ static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, doubl | |||
498 | 524 | ||
499 | fprintf(stderr, " # "); | 525 | fprintf(stderr, " # "); |
500 | color_fprintf(stderr, color, "%5.2f%%", ratio); | 526 | color_fprintf(stderr, color, "%5.2f%%", ratio); |
501 | fprintf(stderr, " of all cycles are idle "); | 527 | fprintf(stderr, " backend cycles idle "); |
502 | } | 528 | } |
503 | 529 | ||
504 | static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) | 530 | static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) |
@@ -583,7 +609,8 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
583 | 609 | ||
584 | fprintf(stderr, " # %4.2f insns per cycle ", ratio); | 610 | fprintf(stderr, " # %4.2f insns per cycle ", ratio); |
585 | 611 | ||
586 | total = avg_stats(&runtime_stalled_cycles_stats[cpu]); | 612 | total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); |
613 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); | ||
587 | 614 | ||
588 | if (total && avg) { | 615 | if (total && avg) { |
589 | ratio = total / avg; | 616 | ratio = total / avg; |
@@ -609,8 +636,10 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
609 | 636 | ||
610 | fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); | 637 | fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); |
611 | 638 | ||
639 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { | ||
640 | print_stalled_cycles_frontend(cpu, evsel, avg); | ||
612 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { | 641 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { |
613 | print_stalled_cycles(cpu, evsel, avg); | 642 | print_stalled_cycles_backend(cpu, evsel, avg); |
614 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { | 643 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { |
615 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 644 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
616 | 645 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 04d2f0a96674..8a407f3e286f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -60,7 +60,7 @@ static struct event_symbol event_symbols[] = { | |||
60 | #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) | 60 | #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) |
61 | #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) | 61 | #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) |
62 | 62 | ||
63 | static const char *hw_event_names[] = { | 63 | static const char *hw_event_names[PERF_COUNT_HW_MAX] = { |
64 | "cycles", | 64 | "cycles", |
65 | "instructions", | 65 | "instructions", |
66 | "cache-references", | 66 | "cache-references", |
@@ -68,10 +68,11 @@ static const char *hw_event_names[] = { | |||
68 | "branches", | 68 | "branches", |
69 | "branch-misses", | 69 | "branch-misses", |
70 | "bus-cycles", | 70 | "bus-cycles", |
71 | "stalled-cycles", | 71 | "stalled-cycles-frontend", |
72 | "stalled-cycles-backend", | ||
72 | }; | 73 | }; |
73 | 74 | ||
74 | static const char *sw_event_names[] = { | 75 | static const char *sw_event_names[PERF_COUNT_SW_MAX] = { |
75 | "cpu-clock", | 76 | "cpu-clock", |
76 | "task-clock", | 77 | "task-clock", |
77 | "page-faults", | 78 | "page-faults", |