diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-26 23:20:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-26 14:04:57 -0400 |
commit | 1fc570ad89e55dc32dfa4dda1311948b38f26524 (patch) | |
tree | 5e775a1f2627301110bd11246dd68cf727961c94 /tools | |
parent | 481f988a016f7a0327a5537bde4794349fc4625c (diff) |
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-stat.c | 5 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 11 |
2 files changed, 8 insertions, 8 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e881c2061381..924d18c407b8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -65,11 +65,10 @@ static struct perf_event_attr default_attrs[] = { | |||
65 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | 65 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, |
66 | 66 | ||
67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | 67 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, |
68 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES }, | ||
68 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | 69 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, |
69 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 70 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
70 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, | 71 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, |
71 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, | ||
72 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
73 | 72 | ||
74 | }; | 73 | }; |
75 | 74 | ||
@@ -468,7 +467,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
468 | if (total) | 467 | if (total) |
469 | ratio = avg * 100 / total; | 468 | ratio = avg * 100 / total; |
470 | 469 | ||
471 | fprintf(stderr, " # %8.3f %% of all branches", ratio); | 470 | fprintf(stderr, " # %5.2f %% of all branches ", ratio); |
472 | 471 | ||
473 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && | 472 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && |
474 | runtime_cacherefs_stats[cpu].n != 0) { | 473 | runtime_cacherefs_stats[cpu].n != 0) { |
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b5bfef12f399..bbbb735268ef 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -32,13 +32,13 @@ char debugfs_path[MAXPATHLEN]; | |||
32 | 32 | ||
33 | static struct event_symbol event_symbols[] = { | 33 | static struct event_symbol event_symbols[] = { |
34 | { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, | 34 | { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, |
35 | { CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" }, | ||
35 | { CHW(INSTRUCTIONS), "instructions", "" }, | 36 | { CHW(INSTRUCTIONS), "instructions", "" }, |
36 | { CHW(CACHE_REFERENCES), "cache-references", "" }, | 37 | { CHW(CACHE_REFERENCES), "cache-references", "" }, |
37 | { CHW(CACHE_MISSES), "cache-misses", "" }, | 38 | { CHW(CACHE_MISSES), "cache-misses", "" }, |
38 | { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, | 39 | { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, |
39 | { CHW(BRANCH_MISSES), "branch-misses", "" }, | 40 | { CHW(BRANCH_MISSES), "branch-misses", "" }, |
40 | { CHW(BUS_CYCLES), "bus-cycles", "" }, | 41 | { CHW(BUS_CYCLES), "bus-cycles", "" }, |
41 | { CHW(STALLED_CYCLES), "stalled-cycles", "" }, | ||
42 | 42 | ||
43 | { CSW(CPU_CLOCK), "cpu-clock", "" }, | 43 | { CSW(CPU_CLOCK), "cpu-clock", "" }, |
44 | { CSW(TASK_CLOCK), "task-clock", "" }, | 44 | { CSW(TASK_CLOCK), "task-clock", "" }, |
@@ -54,9 +54,9 @@ static struct event_symbol event_symbols[] = { | |||
54 | #define __PERF_EVENT_FIELD(config, name) \ | 54 | #define __PERF_EVENT_FIELD(config, name) \ |
55 | ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) | 55 | ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) |
56 | 56 | ||
57 | #define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) | 57 | #define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) |
58 | #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) | 58 | #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) |
59 | #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) | 59 | #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) |
60 | #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) | 60 | #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) |
61 | 61 | ||
62 | static const char *hw_event_names[] = { | 62 | static const char *hw_event_names[] = { |
@@ -67,6 +67,7 @@ static const char *hw_event_names[] = { | |||
67 | "branches", | 67 | "branches", |
68 | "branch-misses", | 68 | "branch-misses", |
69 | "bus-cycles", | 69 | "bus-cycles", |
70 | "stalled-cycles", | ||
70 | }; | 71 | }; |
71 | 72 | ||
72 | static const char *sw_event_names[] = { | 73 | static const char *sw_event_names[] = { |
@@ -308,7 +309,7 @@ const char *__event_name(int type, u64 config) | |||
308 | 309 | ||
309 | switch (type) { | 310 | switch (type) { |
310 | case PERF_TYPE_HARDWARE: | 311 | case PERF_TYPE_HARDWARE: |
311 | if (config < PERF_COUNT_HW_MAX) | 312 | if (config < PERF_COUNT_HW_MAX && hw_event_names[config]) |
312 | return hw_event_names[config]; | 313 | return hw_event_names[config]; |
313 | return "unknown-hardware"; | 314 | return "unknown-hardware"; |
314 | 315 | ||
@@ -334,7 +335,7 @@ const char *__event_name(int type, u64 config) | |||
334 | } | 335 | } |
335 | 336 | ||
336 | case PERF_TYPE_SOFTWARE: | 337 | case PERF_TYPE_SOFTWARE: |
337 | if (config < PERF_COUNT_SW_MAX) | 338 | if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) |
338 | return sw_event_names[config]; | 339 | return sw_event_names[config]; |
339 | return "unknown-software"; | 340 | return "unknown-software"; |
340 | 341 | ||