diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-29 07:19:47 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-29 08:23:58 -0400 |
commit | 8f62242246351b5a4bc0c1f00c0c7003edea128a (patch) | |
tree | 9021c99956e0f9dc64655aaa4309c0f0fdb055c9 | |
parent | ede70290046043b2638204cab55e26ea1d0c6cd9 (diff) |
perf events: Add generic front-end and back-end stalled cycle event definitions
Add two generic hardware events: front-end and back-end stalled cycles.
These events measure conditions when the CPU is executing code but its
capabilities are not fully utilized. Understanding such situations and
analyzing them is an important sub-task of code optimization workflows.
Both events limit performance: most front end stalls tend to be caused
by branch misprediction or instruction fetch cachemisses, backend
stalls can be caused by various resource shortages or inefficient
instruction scheduling.
Front-end stalls are the more important ones: code cannot run fast
if the instruction stream is not being kept up.
An over-utilized back-end can cause front-end stalls and thus
has to be kept an eye on as well.
The exact composition is very program logic and instruction mix
dependent.
We use the terms 'stall', 'front-end' and 'back-end' loosely and
try to use the best available events from specific CPUs that
approximate these concepts.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 2 | ||||
-rw-r--r-- | include/linux/perf_event.h | 3 |
2 files changed, 3 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1ea94224f62e..393085b87a2c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void) | |||
1414 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1414 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1415 | 1415 | ||
1416 | /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1416 | /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1417 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1; | 1417 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; |
1418 | 1418 | ||
1419 | if (ebx & 0x40) { | 1419 | if (ebx & 0x40) { |
1420 | /* | 1420 | /* |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ac636dd20a0c..4e2d7ae71499 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -52,7 +52,8 @@ enum perf_hw_id { | |||
52 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, | 52 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, |
53 | PERF_COUNT_HW_BRANCH_MISSES = 5, | 53 | PERF_COUNT_HW_BRANCH_MISSES = 5, |
54 | PERF_COUNT_HW_BUS_CYCLES = 6, | 54 | PERF_COUNT_HW_BUS_CYCLES = 6, |
55 | PERF_COUNT_HW_STALLED_CYCLES = 7, | 55 | PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, |
56 | PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, | ||
56 | 57 | ||
57 | PERF_COUNT_HW_MAX, /* non-ABI */ | 58 | PERF_COUNT_HW_MAX, /* non-ABI */ |
58 | }; | 59 | }; |