diff options
author | Stephane Eranian <eranian@google.com> | 2012-02-09 17:20:55 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-03-05 08:55:41 -0500 |
commit | c5cc2cd906ea9fe73e3c93f9ad824996faa278cc (patch) | |
tree | c61d6ba7c7725409217ca288a010125cb1054792 /arch/x86/kernel/cpu/perf_event_intel_lbr.c | |
parent | ff3fb511ba377e8a0a7f553cc352237f70d08121 (diff) |
perf/x86: Add Intel LBR mappings for PERF_SAMPLE_BRANCH filters
This patch adds the mappings from the generic PERF_SAMPLE_BRANCH_*
filters to the actual Intel x86LBR filters, whenever they exist.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1328826068-11713-6-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_lbr.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 103 |
1 files changed, 101 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 6710a5116ebd..e54a063b2863 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -14,6 +14,49 @@ enum { | |||
14 | }; | 14 | }; |
15 | 15 | ||
16 | /* | 16 | /* |
17 | * Intel LBR_SELECT bits | ||
18 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | ||
19 | * | ||
20 | * Hardware branch filter (not available on all CPUs) | ||
21 | */ | ||
22 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | ||
23 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | ||
24 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | ||
25 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | ||
26 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | ||
27 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | ||
28 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | ||
29 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | ||
30 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | ||
31 | |||
32 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | ||
33 | #define LBR_USER (1 << LBR_USER_BIT) | ||
34 | #define LBR_JCC (1 << LBR_JCC_BIT) | ||
35 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | ||
36 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | ||
37 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | ||
38 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | ||
39 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | ||
40 | #define LBR_FAR (1 << LBR_FAR_BIT) | ||
41 | |||
42 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||
43 | |||
44 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | ||
45 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | ||
46 | #define LBR_IGN 0 /* ignored */ | ||
47 | |||
48 | #define LBR_ANY \ | ||
49 | (LBR_JCC |\ | ||
50 | LBR_REL_CALL |\ | ||
51 | LBR_IND_CALL |\ | ||
52 | LBR_RETURN |\ | ||
53 | LBR_REL_JMP |\ | ||
54 | LBR_IND_JMP |\ | ||
55 | LBR_FAR) | ||
56 | |||
57 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
58 | |||
59 | /* | ||
17 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | 60 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI |
18 | * otherwise it becomes near impossible to get a reliable stack. | 61 | * otherwise it becomes near impossible to get a reliable stack. |
19 | */ | 62 | */ |
@@ -151,8 +194,6 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
151 | cpuc->lbr_stack.nr = i; | 194 | cpuc->lbr_stack.nr = i; |
152 | } | 195 | } |
153 | 196 | ||
154 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
155 | |||
156 | /* | 197 | /* |
157 | * Due to lack of segmentation in Linux the effective address (offset) | 198 | * Due to lack of segmentation in Linux the effective address (offset) |
158 | * is the same as the linear address, allowing us to merge the LIP and EIP | 199 | * is the same as the linear address, allowing us to merge the LIP and EIP |
@@ -200,26 +241,84 @@ void intel_pmu_lbr_read(void) | |||
200 | intel_pmu_lbr_read_64(cpuc); | 241 | intel_pmu_lbr_read_64(cpuc); |
201 | } | 242 | } |
202 | 243 | ||
244 | /* | ||
245 | * Map interface branch filters onto LBR filters | ||
246 | */ | ||
247 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
248 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
249 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
250 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
251 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
252 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | ||
253 | | LBR_IND_JMP | LBR_FAR, | ||
254 | /* | ||
255 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | ||
256 | */ | ||
257 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | ||
258 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | ||
259 | /* | ||
260 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | ||
261 | */ | ||
262 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | ||
263 | }; | ||
264 | |||
265 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
266 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
267 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
268 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
269 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
270 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | ||
271 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | ||
272 | | LBR_FAR, | ||
273 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | ||
274 | }; | ||
275 | |||
276 | /* core */ | ||
203 | void intel_pmu_lbr_init_core(void) | 277 | void intel_pmu_lbr_init_core(void) |
204 | { | 278 | { |
205 | x86_pmu.lbr_nr = 4; | 279 | x86_pmu.lbr_nr = 4; |
206 | x86_pmu.lbr_tos = MSR_LBR_TOS; | 280 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
207 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; | 281 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
208 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; | 282 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
283 | |||
284 | pr_cont("4-deep LBR, "); | ||
209 | } | 285 | } |
210 | 286 | ||
287 | /* nehalem/westmere */ | ||
211 | void intel_pmu_lbr_init_nhm(void) | 288 | void intel_pmu_lbr_init_nhm(void) |
212 | { | 289 | { |
213 | x86_pmu.lbr_nr = 16; | 290 | x86_pmu.lbr_nr = 16; |
214 | x86_pmu.lbr_tos = MSR_LBR_TOS; | 291 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
215 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | 292 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; |
216 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | 293 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; |
294 | |||
295 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
296 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | ||
297 | |||
298 | pr_cont("16-deep LBR, "); | ||
217 | } | 299 | } |
218 | 300 | ||
301 | /* sandy bridge */ | ||
302 | void intel_pmu_lbr_init_snb(void) | ||
303 | { | ||
304 | x86_pmu.lbr_nr = 16; | ||
305 | x86_pmu.lbr_tos = MSR_LBR_TOS; | ||
306 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | ||
307 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | ||
308 | |||
309 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
310 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | ||
311 | |||
312 | pr_cont("16-deep LBR, "); | ||
313 | } | ||
314 | |||
315 | /* atom */ | ||
219 | void intel_pmu_lbr_init_atom(void) | 316 | void intel_pmu_lbr_init_atom(void) |
220 | { | 317 | { |
221 | x86_pmu.lbr_nr = 8; | 318 | x86_pmu.lbr_nr = 8; |
222 | x86_pmu.lbr_tos = MSR_LBR_TOS; | 319 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
223 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; | 320 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
224 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; | 321 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
322 | |||
323 | pr_cont("8-deep LBR, "); | ||
225 | } | 324 | } |