aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-02-09 17:20:57 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-05 08:55:41 -0500
commit60ce0fbd072695866cb27b729690ab59dce705a5 (patch)
treee11c2aa50129bbcbc4e9eb39bf00bbfd63891df3
parent88c9a65e13f393fd60d8b9e9c659a34f9e39967d (diff)
perf/x86: Implement PERF_SAMPLE_BRANCH for Intel CPUs
This patch implements PERF_SAMPLE_BRANCH support for Intel x86processors. It connects PERF_SAMPLE_BRANCH to the actual LBR. The patch adds the hooks in the PMU irq handler to save the LBR on counter overflow for both regular and PEBS modes. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-8-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c35
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c86
4 files changed, 125 insertions, 8 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 4e948976aef..ef7419cbd13 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -541,6 +541,8 @@ void intel_pmu_lbr_init_atom(void);
541 541
542void intel_pmu_lbr_init_snb(void); 542void intel_pmu_lbr_init_snb(void);
543 543
544int intel_pmu_setup_lbr_filter(struct perf_event *event);
545
544int p4_pmu_init(void); 546int p4_pmu_init(void);
545 547
546int p6_pmu_init(void); 548int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index b0db0169244..7cc1e2dcc4d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -727,6 +727,19 @@ static __initconst const u64 atom_hw_cache_event_ids
727 }, 727 },
728}; 728};
729 729
730static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
731{
732 /* user explicitly requested branch sampling */
733 if (has_branch_stack(event))
734 return true;
735
736 /* implicit branch sampling to correct PEBS skid */
737 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
738 return true;
739
740 return false;
741}
742
730static void intel_pmu_disable_all(void) 743static void intel_pmu_disable_all(void)
731{ 744{
732 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 745 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -881,6 +894,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
881 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 894 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
882 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 895 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
883 896
897 /*
898 * must disable before any actual event
899 * because any event may be combined with LBR
900 */
901 if (intel_pmu_needs_lbr_smpl(event))
902 intel_pmu_lbr_disable(event);
903
884 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 904 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
885 intel_pmu_disable_fixed(hwc); 905 intel_pmu_disable_fixed(hwc);
886 return; 906 return;
@@ -935,6 +955,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
935 intel_pmu_enable_bts(hwc->config); 955 intel_pmu_enable_bts(hwc->config);
936 return; 956 return;
937 } 957 }
958 /*
959 * must enabled before any actual event
960 * because any event may be combined with LBR
961 */
962 if (intel_pmu_needs_lbr_smpl(event))
963 intel_pmu_lbr_enable(event);
938 964
939 if (event->attr.exclude_host) 965 if (event->attr.exclude_host)
940 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 966 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1057,6 +1083,9 @@ again:
1057 1083
1058 data.period = event->hw.last_period; 1084 data.period = event->hw.last_period;
1059 1085
1086 if (has_branch_stack(event))
1087 data.br_stack = &cpuc->lbr_stack;
1088
1060 if (perf_event_overflow(event, &data, regs)) 1089 if (perf_event_overflow(event, &data, regs))
1061 x86_pmu_stop(event, 0); 1090 x86_pmu_stop(event, 0);
1062 } 1091 }
@@ -1305,6 +1334,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
1305 event->hw.config = alt_config; 1334 event->hw.config = alt_config;
1306 } 1335 }
1307 1336
1337 if (intel_pmu_needs_lbr_smpl(event)) {
1338 ret = intel_pmu_setup_lbr_filter(event);
1339 if (ret)
1340 return ret;
1341 }
1342
1308 if (event->attr.type != PERF_TYPE_RAW) 1343 if (event->attr.type != PERF_TYPE_RAW)
1309 return 0; 1344 return 0;
1310 1345
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index d6bd49faa40..ee7e3c8d9d6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -439,9 +439,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
440 440
441 cpuc->pebs_enabled |= 1ULL << hwc->idx; 441 cpuc->pebs_enabled |= 1ULL << hwc->idx;
442
443 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
444 intel_pmu_lbr_enable(event);
445} 442}
446 443
447void intel_pmu_pebs_disable(struct perf_event *event) 444void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +451,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
454 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 451 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
455 452
456 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 453 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
457
458 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
459 intel_pmu_lbr_disable(event);
460} 454}
461 455
462void intel_pmu_pebs_enable_all(void) 456void intel_pmu_pebs_enable_all(void)
@@ -572,6 +566,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
572 * both formats and we don't use the other fields in this 566 * both formats and we don't use the other fields in this
573 * routine. 567 * routine.
574 */ 568 */
569 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
575 struct pebs_record_core *pebs = __pebs; 570 struct pebs_record_core *pebs = __pebs;
576 struct perf_sample_data data; 571 struct perf_sample_data data;
577 struct pt_regs regs; 572 struct pt_regs regs;
@@ -602,6 +597,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
602 else 597 else
603 regs.flags &= ~PERF_EFLAGS_EXACT; 598 regs.flags &= ~PERF_EFLAGS_EXACT;
604 599
600 if (has_branch_stack(event))
601 data.br_stack = &cpuc->lbr_stack;
602
605 if (perf_event_overflow(event, &data, &regs)) 603 if (perf_event_overflow(event, &data, &regs))
606 x86_pmu_stop(event, 0); 604 x86_pmu_stop(event, 0);
607} 605}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 07f0ff88e44..d0fb864ff2b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -56,6 +56,10 @@ enum {
56 56
57#define LBR_FROM_FLAG_MISPRED (1ULL << 63) 57#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
58 58
59#define for_each_branch_sample_type(x) \
60 for ((x) = PERF_SAMPLE_BRANCH_USER; \
61 (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
62
59/* 63/*
60 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 64 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
61 * otherwise it becomes near impossible to get a reliable stack. 65 * otherwise it becomes near impossible to get a reliable stack.
@@ -64,6 +68,10 @@ enum {
64static void __intel_pmu_lbr_enable(void) 68static void __intel_pmu_lbr_enable(void)
65{ 69{
66 u64 debugctl; 70 u64 debugctl;
71 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
72
73 if (cpuc->lbr_sel)
74 wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
67 75
68 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 76 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
69 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 77 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -119,7 +127,6 @@ void intel_pmu_lbr_enable(struct perf_event *event)
119 * Reset the LBR stack if we changed task context to 127 * Reset the LBR stack if we changed task context to
120 * avoid data leaks. 128 * avoid data leaks.
121 */ 129 */
122
123 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 130 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
124 intel_pmu_lbr_reset(); 131 intel_pmu_lbr_reset();
125 cpuc->lbr_context = event->ctx; 132 cpuc->lbr_context = event->ctx;
@@ -138,8 +145,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
138 cpuc->lbr_users--; 145 cpuc->lbr_users--;
139 WARN_ON_ONCE(cpuc->lbr_users < 0); 146 WARN_ON_ONCE(cpuc->lbr_users < 0);
140 147
141 if (cpuc->enabled && !cpuc->lbr_users) 148 if (cpuc->enabled && !cpuc->lbr_users) {
142 __intel_pmu_lbr_disable(); 149 __intel_pmu_lbr_disable();
150 /* avoid stale pointer */
151 cpuc->lbr_context = NULL;
152 }
143} 153}
144 154
145void intel_pmu_lbr_enable_all(void) 155void intel_pmu_lbr_enable_all(void)
@@ -158,6 +168,9 @@ void intel_pmu_lbr_disable_all(void)
158 __intel_pmu_lbr_disable(); 168 __intel_pmu_lbr_disable();
159} 169}
160 170
171/*
172 * TOS = most recently recorded branch
173 */
161static inline u64 intel_pmu_lbr_tos(void) 174static inline u64 intel_pmu_lbr_tos(void)
162{ 175{
163 u64 tos; 176 u64 tos;
@@ -242,6 +255,75 @@ void intel_pmu_lbr_read(void)
242} 255}
243 256
244/* 257/*
258 * setup the HW LBR filter
259 * Used only when available, may not be enough to disambiguate
260 * all branches, may need the help of the SW filter
261 */
262static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
263{
264 struct hw_perf_event_extra *reg;
265 u64 br_type = event->attr.branch_sample_type;
266 u64 mask = 0, m;
267 u64 v;
268
269 for_each_branch_sample_type(m) {
270 if (!(br_type & m))
271 continue;
272
273 v = x86_pmu.lbr_sel_map[m];
274 if (v == LBR_NOT_SUPP)
275 return -EOPNOTSUPP;
276 mask |= v;
277
278 if (m == PERF_SAMPLE_BRANCH_ANY)
279 break;
280 }
281 reg = &event->hw.branch_reg;
282 reg->idx = EXTRA_REG_LBR;
283
284 /* LBR_SELECT operates in suppress mode so invert mask */
285 reg->config = ~mask & x86_pmu.lbr_sel_mask;
286
287 return 0;
288}
289
290/*
291 * all the bits supported on some flavor of x86LBR
292 * we ignore BRANCH_HV because it is not supported
293 */
294#define PERF_SAMPLE_BRANCH_X86_ALL \
295 (PERF_SAMPLE_BRANCH_ANY |\
296 PERF_SAMPLE_BRANCH_USER |\
297 PERF_SAMPLE_BRANCH_KERNEL)
298
299int intel_pmu_setup_lbr_filter(struct perf_event *event)
300{
301 u64 br_type = event->attr.branch_sample_type;
302
303 /*
304 * no LBR on this PMU
305 */
306 if (!x86_pmu.lbr_nr)
307 return -EOPNOTSUPP;
308
309 /*
310 * if no LBR HW filter, users can only
311 * capture all branches
312 */
313 if (!x86_pmu.lbr_sel_map) {
314 if (br_type != PERF_SAMPLE_BRANCH_X86_ALL)
315 return -EOPNOTSUPP;
316 return 0;
317 }
318 /*
319 * we ignore branch priv levels we do not
320 * know about: BRANCH_HV
321 */
322
323 return intel_pmu_setup_hw_lbr_filter(event);
324}
325
326/*
245 * Map interface branch filters onto LBR filters 327 * Map interface branch filters onto LBR filters
246 */ 328 */
247static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { 329static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {