aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-02-09 17:20:55 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-05 08:55:41 -0500
commitc5cc2cd906ea9fe73e3c93f9ad824996faa278cc (patch)
treec61d6ba7c7725409217ca288a010125cb1054792 /arch
parentff3fb511ba377e8a0a7f553cc352237f70d08121 (diff)
perf/x86: Add Intel LBR mappings for PERF_SAMPLE_BRANCH filters
This patch adds the mappings from the generic PERF_SAMPLE_BRANCH_* filters to the actual Intel x86LBR filters, whenever they exist. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-6-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c103
3 files changed, 104 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 9b9c580a7ab8..4e948976aefb 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -539,6 +539,8 @@ void intel_pmu_lbr_init_nhm(void);
539 539
540void intel_pmu_lbr_init_atom(void); 540void intel_pmu_lbr_init_atom(void);
541 541
542void intel_pmu_lbr_init_snb(void);
543
542int p4_pmu_init(void); 544int p4_pmu_init(void);
543 545
544int p6_pmu_init(void); 546int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 97f7bb587519..b0db01692441 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1757,7 +1757,7 @@ __init int intel_pmu_init(void)
1757 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1757 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1758 sizeof(hw_cache_event_ids)); 1758 sizeof(hw_cache_event_ids));
1759 1759
1760 intel_pmu_lbr_init_nhm(); 1760 intel_pmu_lbr_init_snb();
1761 1761
1762 x86_pmu.event_constraints = intel_snb_event_constraints; 1762 x86_pmu.event_constraints = intel_snb_event_constraints;
1763 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1763 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 6710a5116ebd..e54a063b2863 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -14,6 +14,49 @@ enum {
14}; 14};
15 15
16/* 16/*
17 * Intel LBR_SELECT bits
18 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
19 *
20 * Hardware branch filter (not available on all CPUs)
21 */
22#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
23#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
24#define LBR_JCC_BIT 2 /* do not capture conditional branches */
25#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
26#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
27#define LBR_RETURN_BIT 5 /* do not capture near returns */
28#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
29#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
30#define LBR_FAR_BIT 8 /* do not capture far branches */
31
32#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
33#define LBR_USER (1 << LBR_USER_BIT)
34#define LBR_JCC (1 << LBR_JCC_BIT)
35#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
36#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
37#define LBR_RETURN (1 << LBR_RETURN_BIT)
38#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
39#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
40#define LBR_FAR (1 << LBR_FAR_BIT)
41
42#define LBR_PLM (LBR_KERNEL | LBR_USER)
43
44#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
45#define LBR_NOT_SUPP -1 /* LBR filter not supported */
46#define LBR_IGN 0 /* ignored */
47
48#define LBR_ANY \
49 (LBR_JCC |\
50 LBR_REL_CALL |\
51 LBR_IND_CALL |\
52 LBR_RETURN |\
53 LBR_REL_JMP |\
54 LBR_IND_JMP |\
55 LBR_FAR)
56
57#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
58
59/*
17 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 60 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
18 * otherwise it becomes near impossible to get a reliable stack. 61 * otherwise it becomes near impossible to get a reliable stack.
19 */ 62 */
@@ -151,8 +194,6 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
151 cpuc->lbr_stack.nr = i; 194 cpuc->lbr_stack.nr = i;
152} 195}
153 196
154#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
155
156/* 197/*
157 * Due to lack of segmentation in Linux the effective address (offset) 198 * Due to lack of segmentation in Linux the effective address (offset)
158 * is the same as the linear address, allowing us to merge the LIP and EIP 199 * is the same as the linear address, allowing us to merge the LIP and EIP
@@ -200,26 +241,84 @@ void intel_pmu_lbr_read(void)
200 intel_pmu_lbr_read_64(cpuc); 241 intel_pmu_lbr_read_64(cpuc);
201} 242}
202 243
244/*
245 * Map interface branch filters onto LBR filters
246 */
247static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
248 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
249 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
250 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
251 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
252 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
253 | LBR_IND_JMP | LBR_FAR,
254 /*
255 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
256 */
257 [PERF_SAMPLE_BRANCH_ANY_CALL] =
258 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
259 /*
260 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
261 */
262 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
263};
264
265static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
266 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
267 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
268 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
269 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
270 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
271 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
272 | LBR_FAR,
273 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
274};
275
276/* core */
203void intel_pmu_lbr_init_core(void) 277void intel_pmu_lbr_init_core(void)
204{ 278{
205 x86_pmu.lbr_nr = 4; 279 x86_pmu.lbr_nr = 4;
206 x86_pmu.lbr_tos = MSR_LBR_TOS; 280 x86_pmu.lbr_tos = MSR_LBR_TOS;
207 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 281 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
208 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 282 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
283
284 pr_cont("4-deep LBR, ");
209} 285}
210 286
287/* nehalem/westmere */
211void intel_pmu_lbr_init_nhm(void) 288void intel_pmu_lbr_init_nhm(void)
212{ 289{
213 x86_pmu.lbr_nr = 16; 290 x86_pmu.lbr_nr = 16;
214 x86_pmu.lbr_tos = MSR_LBR_TOS; 291 x86_pmu.lbr_tos = MSR_LBR_TOS;
215 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 292 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
216 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 293 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
294
295 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
296 x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
297
298 pr_cont("16-deep LBR, ");
217} 299}
218 300
301/* sandy bridge */
302void intel_pmu_lbr_init_snb(void)
303{
304 x86_pmu.lbr_nr = 16;
305 x86_pmu.lbr_tos = MSR_LBR_TOS;
306 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
307 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
308
309 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
310 x86_pmu.lbr_sel_map = snb_lbr_sel_map;
311
312 pr_cont("16-deep LBR, ");
313}
314
315/* atom */
219void intel_pmu_lbr_init_atom(void) 316void intel_pmu_lbr_init_atom(void)
220{ 317{
221 x86_pmu.lbr_nr = 8; 318 x86_pmu.lbr_nr = 8;
222 x86_pmu.lbr_tos = MSR_LBR_TOS; 319 x86_pmu.lbr_tos = MSR_LBR_TOS;
223 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 320 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
224 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 321 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
322
323 pr_cont("8-deep LBR, ");
225} 324}