aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/x86/kernel/cpu/perf_event_intel.c
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c826
1 files changed, 155 insertions, 671 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 93b9e1181f8..f88af2c2a56 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,36 +1,29 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
1/* 3/*
2 * Per core/cpu state 4 * Per core/cpu state
3 * 5 *
4 * Used to coordinate shared registers between HT threads or 6 * Used to coordinate shared registers between HT threads or
5 * among events on a single PMU. 7 * among events on a single PMU.
6 */ 8 */
7 9struct intel_shared_regs {
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 struct er_account regs[EXTRA_REG_MAX];
9 11 int refcnt; /* per-core: #HT threads */
10#include <linux/stddef.h> 12 unsigned core_id; /* per-core: core id */
11#include <linux/types.h> 13};
12#include <linux/init.h>
13#include <linux/slab.h>
14#include <linux/export.h>
15
16#include <asm/hardirq.h>
17#include <asm/apic.h>
18
19#include "perf_event.h"
20 14
21/* 15/*
22 * Intel PerfMon, used on Core and later. 16 * Intel PerfMon, used on Core and later.
23 */ 17 */
24static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = 18static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
25{ 19{
26 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 20 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
27 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 21 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
28 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, 22 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
29 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, 23 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
30 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 24 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
31 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 25 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
32 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 26 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
33 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
34}; 27};
35 28
36static struct event_constraint intel_core_event_constraints[] __read_mostly = 29static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -48,7 +41,12 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
48{ 41{
49 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 42 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
50 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 43 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
51 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 44 /*
45 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
46 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
47 * ratio between these counters.
48 */
49 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
52 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 50 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
53 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 51 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
54 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 52 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -66,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
66{ 64{
67 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 65 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
68 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 66 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
69 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 67 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
70 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 68 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
71 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 69 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
72 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 70 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -88,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
88{ 86{
89 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
90 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 88 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
91 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 89 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
92 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 90 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
93 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 91 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
94 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 92 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -100,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
100{ 98{
101 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 99 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
102 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
103 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 101 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
104 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
105 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
106 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -123,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
123{ 121{
124 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 122 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
125 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 123 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
126 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 124 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
127 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
128}; 126};
129 127
@@ -138,84 +136,6 @@ static u64 intel_pmu_event_map(int hw_event)
138 return intel_perfmon_event_map[hw_event]; 136 return intel_perfmon_event_map[hw_event];
139} 137}
140 138
141#define SNB_DMND_DATA_RD (1ULL << 0)
142#define SNB_DMND_RFO (1ULL << 1)
143#define SNB_DMND_IFETCH (1ULL << 2)
144#define SNB_DMND_WB (1ULL << 3)
145#define SNB_PF_DATA_RD (1ULL << 4)
146#define SNB_PF_RFO (1ULL << 5)
147#define SNB_PF_IFETCH (1ULL << 6)
148#define SNB_LLC_DATA_RD (1ULL << 7)
149#define SNB_LLC_RFO (1ULL << 8)
150#define SNB_LLC_IFETCH (1ULL << 9)
151#define SNB_BUS_LOCKS (1ULL << 10)
152#define SNB_STRM_ST (1ULL << 11)
153#define SNB_OTHER (1ULL << 15)
154#define SNB_RESP_ANY (1ULL << 16)
155#define SNB_NO_SUPP (1ULL << 17)
156#define SNB_LLC_HITM (1ULL << 18)
157#define SNB_LLC_HITE (1ULL << 19)
158#define SNB_LLC_HITS (1ULL << 20)
159#define SNB_LLC_HITF (1ULL << 21)
160#define SNB_LOCAL (1ULL << 22)
161#define SNB_REMOTE (0xffULL << 23)
162#define SNB_SNP_NONE (1ULL << 31)
163#define SNB_SNP_NOT_NEEDED (1ULL << 32)
164#define SNB_SNP_MISS (1ULL << 33)
165#define SNB_NO_FWD (1ULL << 34)
166#define SNB_SNP_FWD (1ULL << 35)
167#define SNB_HITM (1ULL << 36)
168#define SNB_NON_DRAM (1ULL << 37)
169
170#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
171#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
172#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
173
174#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
175 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
176 SNB_HITM)
177
178#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
179#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
180
181#define SNB_L3_ACCESS SNB_RESP_ANY
182#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
183
184static __initconst const u64 snb_hw_cache_extra_regs
185 [PERF_COUNT_HW_CACHE_MAX]
186 [PERF_COUNT_HW_CACHE_OP_MAX]
187 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
188{
189 [ C(LL ) ] = {
190 [ C(OP_READ) ] = {
191 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
192 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
193 },
194 [ C(OP_WRITE) ] = {
195 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
196 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
197 },
198 [ C(OP_PREFETCH) ] = {
199 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
200 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
201 },
202 },
203 [ C(NODE) ] = {
204 [ C(OP_READ) ] = {
205 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
206 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
207 },
208 [ C(OP_WRITE) ] = {
209 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
210 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
211 },
212 [ C(OP_PREFETCH) ] = {
213 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
214 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
215 },
216 },
217};
218
219static __initconst const u64 snb_hw_cache_event_ids 139static __initconst const u64 snb_hw_cache_event_ids
220 [PERF_COUNT_HW_CACHE_MAX] 140 [PERF_COUNT_HW_CACHE_MAX]
221 [PERF_COUNT_HW_CACHE_OP_MAX] 141 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -313,16 +233,16 @@ static __initconst const u64 snb_hw_cache_event_ids
313 }, 233 },
314 [ C(NODE) ] = { 234 [ C(NODE) ] = {
315 [ C(OP_READ) ] = { 235 [ C(OP_READ) ] = {
316 [ C(RESULT_ACCESS) ] = 0x01b7, 236 [ C(RESULT_ACCESS) ] = -1,
317 [ C(RESULT_MISS) ] = 0x01b7, 237 [ C(RESULT_MISS) ] = -1,
318 }, 238 },
319 [ C(OP_WRITE) ] = { 239 [ C(OP_WRITE) ] = {
320 [ C(RESULT_ACCESS) ] = 0x01b7, 240 [ C(RESULT_ACCESS) ] = -1,
321 [ C(RESULT_MISS) ] = 0x01b7, 241 [ C(RESULT_MISS) ] = -1,
322 }, 242 },
323 [ C(OP_PREFETCH) ] = { 243 [ C(OP_PREFETCH) ] = {
324 [ C(RESULT_ACCESS) ] = 0x01b7, 244 [ C(RESULT_ACCESS) ] = -1,
325 [ C(RESULT_MISS) ] = 0x01b7, 245 [ C(RESULT_MISS) ] = -1,
326 }, 246 },
327 }, 247 },
328 248
@@ -465,15 +385,14 @@ static __initconst const u64 westmere_hw_cache_event_ids
465#define NHM_LOCAL_DRAM (1 << 14) 385#define NHM_LOCAL_DRAM (1 << 14)
466#define NHM_NON_DRAM (1 << 15) 386#define NHM_NON_DRAM (1 << 15)
467 387
468#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) 388#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
469#define NHM_REMOTE (NHM_REMOTE_DRAM)
470 389
471#define NHM_DMND_READ (NHM_DMND_DATA_RD) 390#define NHM_DMND_READ (NHM_DMND_DATA_RD)
472#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 391#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
473#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 392#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
474 393
475#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 394#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
476#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) 395#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
477#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 396#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
478 397
479static __initconst const u64 nehalem_hw_cache_extra_regs 398static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -497,16 +416,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
497 }, 416 },
498 [ C(NODE) ] = { 417 [ C(NODE) ] = {
499 [ C(OP_READ) ] = { 418 [ C(OP_READ) ] = {
500 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, 419 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
501 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, 420 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
502 }, 421 },
503 [ C(OP_WRITE) ] = { 422 [ C(OP_WRITE) ] = {
504 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, 423 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
505 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, 424 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
506 }, 425 },
507 [ C(OP_PREFETCH) ] = { 426 [ C(OP_PREFETCH) ] = {
508 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, 427 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
509 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, 428 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
510 }, 429 },
511 }, 430 },
512}; 431};
@@ -808,26 +727,13 @@ static __initconst const u64 atom_hw_cache_event_ids
808 }, 727 },
809}; 728};
810 729
811static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
812{
813 /* user explicitly requested branch sampling */
814 if (has_branch_stack(event))
815 return true;
816
817 /* implicit branch sampling to correct PEBS skid */
818 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
819 return true;
820
821 return false;
822}
823
824static void intel_pmu_disable_all(void) 730static void intel_pmu_disable_all(void)
825{ 731{
826 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 732 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
827 733
828 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 734 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
829 735
830 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 736 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
831 intel_pmu_disable_bts(); 737 intel_pmu_disable_bts();
832 738
833 intel_pmu_pebs_disable_all(); 739 intel_pmu_pebs_disable_all();
@@ -840,12 +746,11 @@ static void intel_pmu_enable_all(int added)
840 746
841 intel_pmu_pebs_enable_all(); 747 intel_pmu_pebs_enable_all();
842 intel_pmu_lbr_enable_all(); 748 intel_pmu_lbr_enable_all();
843 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 749 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
844 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
845 750
846 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 751 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
847 struct perf_event *event = 752 struct perf_event *event =
848 cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 753 cpuc->events[X86_PMC_IDX_FIXED_BTS];
849 754
850 if (WARN_ON_ONCE(!event)) 755 if (WARN_ON_ONCE(!event))
851 return; 756 return;
@@ -951,7 +856,7 @@ static inline void intel_pmu_ack_status(u64 ack)
951 856
952static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) 857static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
953{ 858{
954 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 859 int idx = hwc->idx - X86_PMC_IDX_FIXED;
955 u64 ctrl_val, mask; 860 u64 ctrl_val, mask;
956 861
957 mask = 0xfULL << (idx * 4); 862 mask = 0xfULL << (idx * 4);
@@ -964,24 +869,13 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
964static void intel_pmu_disable_event(struct perf_event *event) 869static void intel_pmu_disable_event(struct perf_event *event)
965{ 870{
966 struct hw_perf_event *hwc = &event->hw; 871 struct hw_perf_event *hwc = &event->hw;
967 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
968 872
969 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 873 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
970 intel_pmu_disable_bts(); 874 intel_pmu_disable_bts();
971 intel_pmu_drain_bts_buffer(); 875 intel_pmu_drain_bts_buffer();
972 return; 876 return;
973 } 877 }
974 878
975 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
976 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
977
978 /*
979 * must disable before any actual event
980 * because any event may be combined with LBR
981 */
982 if (intel_pmu_needs_lbr_smpl(event))
983 intel_pmu_lbr_disable(event);
984
985 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 879 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
986 intel_pmu_disable_fixed(hwc); 880 intel_pmu_disable_fixed(hwc);
987 return; 881 return;
@@ -995,7 +889,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
995 889
996static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 890static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
997{ 891{
998 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 892 int idx = hwc->idx - X86_PMC_IDX_FIXED;
999 u64 ctrl_val, bits, mask; 893 u64 ctrl_val, bits, mask;
1000 894
1001 /* 895 /*
@@ -1027,26 +921,14 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
1027static void intel_pmu_enable_event(struct perf_event *event) 921static void intel_pmu_enable_event(struct perf_event *event)
1028{ 922{
1029 struct hw_perf_event *hwc = &event->hw; 923 struct hw_perf_event *hwc = &event->hw;
1030 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1031 924
1032 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 925 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
1033 if (!__this_cpu_read(cpu_hw_events.enabled)) 926 if (!__this_cpu_read(cpu_hw_events.enabled))
1034 return; 927 return;
1035 928
1036 intel_pmu_enable_bts(hwc->config); 929 intel_pmu_enable_bts(hwc->config);
1037 return; 930 return;
1038 } 931 }
1039 /*
1040 * must enabled before any actual event
1041 * because any event may be combined with LBR
1042 */
1043 if (intel_pmu_needs_lbr_smpl(event))
1044 intel_pmu_lbr_enable(event);
1045
1046 if (event->attr.exclude_host)
1047 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
1048 if (event->attr.exclude_guest)
1049 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
1050 932
1051 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 933 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1052 intel_pmu_enable_fixed(hwc); 934 intel_pmu_enable_fixed(hwc);
@@ -1063,7 +945,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
1063 * Save and restart an expired event. Called by NMI contexts, 945 * Save and restart an expired event. Called by NMI contexts,
1064 * so it has to be careful about preempting normal event ops: 946 * so it has to be careful about preempting normal event ops:
1065 */ 947 */
1066int intel_pmu_save_and_restart(struct perf_event *event) 948static int intel_pmu_save_and_restart(struct perf_event *event)
1067{ 949{
1068 x86_perf_event_update(event); 950 x86_perf_event_update(event);
1069 return x86_perf_event_set_period(event); 951 return x86_perf_event_set_period(event);
@@ -1080,14 +962,14 @@ static void intel_pmu_reset(void)
1080 962
1081 local_irq_save(flags); 963 local_irq_save(flags);
1082 964
1083 pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); 965 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1084 966
1085 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 967 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1086 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); 968 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
1087 wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); 969 checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
1088 } 970 }
1089 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 971 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
1090 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 972 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1091 973
1092 if (ds) 974 if (ds)
1093 ds->bts_index = ds->bts_buffer_base; 975 ds->bts_index = ds->bts_buffer_base;
@@ -1107,6 +989,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1107 u64 status; 989 u64 status;
1108 int handled; 990 int handled;
1109 991
992 perf_sample_data_init(&data, 0);
993
1110 cpuc = &__get_cpu_var(cpu_hw_events); 994 cpuc = &__get_cpu_var(cpu_hw_events);
1111 995
1112 /* 996 /*
@@ -1160,10 +1044,7 @@ again:
1160 if (!intel_pmu_save_and_restart(event)) 1044 if (!intel_pmu_save_and_restart(event))
1161 continue; 1045 continue;
1162 1046
1163 perf_sample_data_init(&data, 0, event->hw.last_period); 1047 data.period = event->hw.last_period;
1164
1165 if (has_branch_stack(event))
1166 data.br_stack = &cpuc->lbr_stack;
1167 1048
1168 if (perf_event_overflow(event, &data, regs)) 1049 if (perf_event_overflow(event, &data, regs))
1169 x86_pmu_stop(event, 0); 1050 x86_pmu_stop(event, 0);
@@ -1199,33 +1080,27 @@ intel_bts_constraints(struct perf_event *event)
1199 return NULL; 1080 return NULL;
1200} 1081}
1201 1082
1202static int intel_alt_er(int idx) 1083static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1203{ 1084{
1204 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1085 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1205 return idx; 1086 return false;
1206
1207 if (idx == EXTRA_REG_RSP_0)
1208 return EXTRA_REG_RSP_1;
1209 1087
1210 if (idx == EXTRA_REG_RSP_1) 1088 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
1211 return EXTRA_REG_RSP_0;
1212
1213 return idx;
1214}
1215
1216static void intel_fixup_er(struct perf_event *event, int idx)
1217{
1218 event->hw.extra_reg.idx = idx;
1219
1220 if (idx == EXTRA_REG_RSP_0) {
1221 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1222 event->hw.config |= 0x01b7;
1223 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1224 } else if (idx == EXTRA_REG_RSP_1) {
1225 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1089 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1226 event->hw.config |= 0x01bb; 1090 event->hw.config |= 0x01bb;
1091 event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
1227 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1092 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1093 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
1094 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1095 event->hw.config |= 0x01b7;
1096 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1097 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1228 } 1098 }
1099
1100 if (event->hw.extra_reg.idx == orig_idx)
1101 return false;
1102
1103 return true;
1229} 1104}
1230 1105
1231/* 1106/*
@@ -1237,24 +1112,20 @@ static void intel_fixup_er(struct perf_event *event, int idx)
1237 */ 1112 */
1238static struct event_constraint * 1113static struct event_constraint *
1239__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 1114__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1240 struct perf_event *event, 1115 struct perf_event *event)
1241 struct hw_perf_event_extra *reg)
1242{ 1116{
1243 struct event_constraint *c = &emptyconstraint; 1117 struct event_constraint *c = &emptyconstraint;
1118 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1244 struct er_account *era; 1119 struct er_account *era;
1245 unsigned long flags; 1120 unsigned long flags;
1246 int idx = reg->idx; 1121 int orig_idx = reg->idx;
1247 1122
1248 /* 1123 /* already allocated shared msr */
1249 * reg->alloc can be set due to existing state, so for fake cpuc we 1124 if (reg->alloc)
1250 * need to ignore this, otherwise we might fail to allocate proper fake 1125 return &unconstrained;
1251 * state for this extra reg constraint. Also see the comment below.
1252 */
1253 if (reg->alloc && !cpuc->is_fake)
1254 return NULL; /* call x86_get_event_constraint() */
1255 1126
1256again: 1127again:
1257 era = &cpuc->shared_regs->regs[idx]; 1128 era = &cpuc->shared_regs->regs[reg->idx];
1258 /* 1129 /*
1259 * we use spin_lock_irqsave() to avoid lockdep issues when 1130 * we use spin_lock_irqsave() to avoid lockdep issues when
1260 * passing a fake cpuc 1131 * passing a fake cpuc
@@ -1263,29 +1134,6 @@ again:
1263 1134
1264 if (!atomic_read(&era->ref) || era->config == reg->config) { 1135 if (!atomic_read(&era->ref) || era->config == reg->config) {
1265 1136
1266 /*
1267 * If its a fake cpuc -- as per validate_{group,event}() we
1268 * shouldn't touch event state and we can avoid doing so
1269 * since both will only call get_event_constraints() once
1270 * on each event, this avoids the need for reg->alloc.
1271 *
1272 * Not doing the ER fixup will only result in era->reg being
1273 * wrong, but since we won't actually try and program hardware
1274 * this isn't a problem either.
1275 */
1276 if (!cpuc->is_fake) {
1277 if (idx != reg->idx)
1278 intel_fixup_er(event, idx);
1279
1280 /*
1281 * x86_schedule_events() can call get_event_constraints()
1282 * multiple times on events in the case of incremental
1283 * scheduling(). reg->alloc ensures we only do the ER
1284 * allocation once.
1285 */
1286 reg->alloc = 1;
1287 }
1288
1289 /* lock in msr value */ 1137 /* lock in msr value */
1290 era->config = reg->config; 1138 era->config = reg->config;
1291 era->reg = reg->reg; 1139 era->reg = reg->reg;
@@ -1293,17 +1141,21 @@ again:
1293 /* one more user */ 1141 /* one more user */
1294 atomic_inc(&era->ref); 1142 atomic_inc(&era->ref);
1295 1143
1144 /* no need to reallocate during incremental event scheduling */
1145 reg->alloc = 1;
1146
1296 /* 1147 /*
1297 * need to call x86_get_event_constraint() 1148 * All events using extra_reg are unconstrained.
1298 * to check if associated event has constraints 1149 * Avoids calling x86_get_event_constraints()
1150 *
1151 * Must revisit if extra_reg controlling events
1152 * ever have constraints. Worst case we go through
1153 * the regular event constraint table.
1299 */ 1154 */
1300 c = NULL; 1155 c = &unconstrained;
1301 } else { 1156 } else if (intel_try_alt_er(event, orig_idx)) {
1302 idx = intel_alt_er(idx); 1157 raw_spin_unlock(&era->lock);
1303 if (idx != reg->idx) { 1158 goto again;
1304 raw_spin_unlock_irqrestore(&era->lock, flags);
1305 goto again;
1306 }
1307 } 1159 }
1308 raw_spin_unlock_irqrestore(&era->lock, flags); 1160 raw_spin_unlock_irqrestore(&era->lock, flags);
1309 1161
@@ -1317,14 +1169,11 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1317 struct er_account *era; 1169 struct er_account *era;
1318 1170
1319 /* 1171 /*
1320 * Only put constraint if extra reg was actually allocated. Also takes 1172 * only put constraint if extra reg was actually
1321 * care of event which do not use an extra shared reg. 1173 * allocated. Also takes care of event which do
1322 * 1174 * not use an extra shared reg
1323 * Also, if this is a fake cpuc we shouldn't touch any event state
1324 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1325 * either since it'll be thrown out.
1326 */ 1175 */
1327 if (!reg->alloc || cpuc->is_fake) 1176 if (!reg->alloc)
1328 return; 1177 return;
1329 1178
1330 era = &cpuc->shared_regs->regs[reg->idx]; 1179 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1340,39 +1189,12 @@ static struct event_constraint *
1340intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 1189intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1341 struct perf_event *event) 1190 struct perf_event *event)
1342{ 1191{
1343 struct event_constraint *c = NULL, *d; 1192 struct event_constraint *c = NULL;
1344 struct hw_perf_event_extra *xreg, *breg;
1345
1346 xreg = &event->hw.extra_reg;
1347 if (xreg->idx != EXTRA_REG_NONE) {
1348 c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
1349 if (c == &emptyconstraint)
1350 return c;
1351 }
1352 breg = &event->hw.branch_reg;
1353 if (breg->idx != EXTRA_REG_NONE) {
1354 d = __intel_shared_reg_get_constraints(cpuc, event, breg);
1355 if (d == &emptyconstraint) {
1356 __intel_shared_reg_put_constraints(cpuc, xreg);
1357 c = d;
1358 }
1359 }
1360 return c;
1361}
1362 1193
1363struct event_constraint * 1194 if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
1364x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1195 c = __intel_shared_reg_get_constraints(cpuc, event);
1365{
1366 struct event_constraint *c;
1367 1196
1368 if (x86_pmu.event_constraints) { 1197 return c;
1369 for_each_event_constraint(c, x86_pmu.event_constraints) {
1370 if ((event->hw.config & c->cmask) == c->code)
1371 return c;
1372 }
1373 }
1374
1375 return &unconstrained;
1376} 1198}
1377 1199
1378static struct event_constraint * 1200static struct event_constraint *
@@ -1404,10 +1226,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1404 reg = &event->hw.extra_reg; 1226 reg = &event->hw.extra_reg;
1405 if (reg->idx != EXTRA_REG_NONE) 1227 if (reg->idx != EXTRA_REG_NONE)
1406 __intel_shared_reg_put_constraints(cpuc, reg); 1228 __intel_shared_reg_put_constraints(cpuc, reg);
1407
1408 reg = &event->hw.branch_reg;
1409 if (reg->idx != EXTRA_REG_NONE)
1410 __intel_shared_reg_put_constraints(cpuc, reg);
1411} 1229}
1412 1230
1413static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1231static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1416,9 +1234,15 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1416 intel_put_shared_regs_event_constraints(cpuc, event); 1234 intel_put_shared_regs_event_constraints(cpuc, event);
1417} 1235}
1418 1236
1419static void intel_pebs_aliases_core2(struct perf_event *event) 1237static int intel_pmu_hw_config(struct perf_event *event)
1420{ 1238{
1421 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 1239 int ret = x86_pmu_hw_config(event);
1240
1241 if (ret)
1242 return ret;
1243
1244 if (event->attr.precise_ip &&
1245 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1422 /* 1246 /*
1423 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1247 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1424 * (0x003c) so that we can use it with PEBS. 1248 * (0x003c) so that we can use it with PEBS.
@@ -1437,56 +1261,11 @@ static void intel_pebs_aliases_core2(struct perf_event *event)
1437 * 1261 *
1438 * Thereby we gain a PEBS capable cycle counter. 1262 * Thereby we gain a PEBS capable cycle counter.
1439 */ 1263 */
1440 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1264 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
1441
1442 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1443 event->hw.config = alt_config;
1444 }
1445}
1446
1447static void intel_pebs_aliases_snb(struct perf_event *event)
1448{
1449 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1450 /*
1451 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1452 * (0x003c) so that we can use it with PEBS.
1453 *
1454 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1455 * PEBS capable. However we can use UOPS_RETIRED.ALL
1456 * (0x01c2), which is a PEBS capable event, to get the same
1457 * count.
1458 *
1459 * UOPS_RETIRED.ALL counts the number of cycles that retires
1460 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1461 * larger than the maximum number of micro-ops that can be
1462 * retired per cycle (4) and then inverting the condition, we
1463 * count all cycles that retire 16 or less micro-ops, which
1464 * is every cycle.
1465 *
1466 * Thereby we gain a PEBS capable cycle counter.
1467 */
1468 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1469 1265
1470 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1266 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1471 event->hw.config = alt_config; 1267 event->hw.config = alt_config;
1472 } 1268 }
1473}
1474
1475static int intel_pmu_hw_config(struct perf_event *event)
1476{
1477 int ret = x86_pmu_hw_config(event);
1478
1479 if (ret)
1480 return ret;
1481
1482 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1483 x86_pmu.pebs_aliases(event);
1484
1485 if (intel_pmu_needs_lbr_smpl(event)) {
1486 ret = intel_pmu_setup_lbr_filter(event);
1487 if (ret)
1488 return ret;
1489 }
1490 1269
1491 if (event->attr.type != PERF_TYPE_RAW) 1270 if (event->attr.type != PERF_TYPE_RAW)
1492 return 0; 1271 return 0;
@@ -1505,117 +1284,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
1505 return 0; 1284 return 0;
1506} 1285}
1507 1286
1508struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
1509{
1510 if (x86_pmu.guest_get_msrs)
1511 return x86_pmu.guest_get_msrs(nr);
1512 *nr = 0;
1513 return NULL;
1514}
1515EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
1516
1517static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
1518{
1519 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1520 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1521
1522 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
1523 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
1524 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
1525 /*
1526 * If PMU counter has PEBS enabled it is not enough to disable counter
1527 * on a guest entry since PEBS memory write can overshoot guest entry
1528 * and corrupt guest memory. Disabling PEBS solves the problem.
1529 */
1530 arr[1].msr = MSR_IA32_PEBS_ENABLE;
1531 arr[1].host = cpuc->pebs_enabled;
1532 arr[1].guest = 0;
1533
1534 *nr = 2;
1535 return arr;
1536}
1537
1538static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
1539{
1540 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1541 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1542 int idx;
1543
1544 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1545 struct perf_event *event = cpuc->events[idx];
1546
1547 arr[idx].msr = x86_pmu_config_addr(idx);
1548 arr[idx].host = arr[idx].guest = 0;
1549
1550 if (!test_bit(idx, cpuc->active_mask))
1551 continue;
1552
1553 arr[idx].host = arr[idx].guest =
1554 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
1555
1556 if (event->attr.exclude_host)
1557 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1558 else if (event->attr.exclude_guest)
1559 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1560 }
1561
1562 *nr = x86_pmu.num_counters;
1563 return arr;
1564}
1565
1566static void core_pmu_enable_event(struct perf_event *event)
1567{
1568 if (!event->attr.exclude_host)
1569 x86_pmu_enable_event(event);
1570}
1571
1572static void core_pmu_enable_all(int added)
1573{
1574 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1575 int idx;
1576
1577 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1578 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
1579
1580 if (!test_bit(idx, cpuc->active_mask) ||
1581 cpuc->events[idx]->attr.exclude_host)
1582 continue;
1583
1584 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1585 }
1586}
1587
1588PMU_FORMAT_ATTR(event, "config:0-7" );
1589PMU_FORMAT_ATTR(umask, "config:8-15" );
1590PMU_FORMAT_ATTR(edge, "config:18" );
1591PMU_FORMAT_ATTR(pc, "config:19" );
1592PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1593PMU_FORMAT_ATTR(inv, "config:23" );
1594PMU_FORMAT_ATTR(cmask, "config:24-31" );
1595
1596static struct attribute *intel_arch_formats_attr[] = {
1597 &format_attr_event.attr,
1598 &format_attr_umask.attr,
1599 &format_attr_edge.attr,
1600 &format_attr_pc.attr,
1601 &format_attr_inv.attr,
1602 &format_attr_cmask.attr,
1603 NULL,
1604};
1605
1606ssize_t intel_event_sysfs_show(char *page, u64 config)
1607{
1608 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
1609
1610 return x86_event_sysfs_show(page, config, event);
1611}
1612
1613static __initconst const struct x86_pmu core_pmu = { 1287static __initconst const struct x86_pmu core_pmu = {
1614 .name = "core", 1288 .name = "core",
1615 .handle_irq = x86_pmu_handle_irq, 1289 .handle_irq = x86_pmu_handle_irq,
1616 .disable_all = x86_pmu_disable_all, 1290 .disable_all = x86_pmu_disable_all,
1617 .enable_all = core_pmu_enable_all, 1291 .enable_all = x86_pmu_enable_all,
1618 .enable = core_pmu_enable_event, 1292 .enable = x86_pmu_enable_event,
1619 .disable = x86_pmu_disable_event, 1293 .disable = x86_pmu_disable_event,
1620 .hw_config = x86_pmu_hw_config, 1294 .hw_config = x86_pmu_hw_config,
1621 .schedule_events = x86_schedule_events, 1295 .schedule_events = x86_schedule_events,
@@ -1633,12 +1307,9 @@ static __initconst const struct x86_pmu core_pmu = {
1633 .get_event_constraints = intel_get_event_constraints, 1307 .get_event_constraints = intel_get_event_constraints,
1634 .put_event_constraints = intel_put_event_constraints, 1308 .put_event_constraints = intel_put_event_constraints,
1635 .event_constraints = intel_core_event_constraints, 1309 .event_constraints = intel_core_event_constraints,
1636 .guest_get_msrs = core_guest_get_msrs,
1637 .format_attrs = intel_arch_formats_attr,
1638 .events_sysfs_show = intel_event_sysfs_show,
1639}; 1310};
1640 1311
1641struct intel_shared_regs *allocate_shared_regs(int cpu) 1312static struct intel_shared_regs *allocate_shared_regs(int cpu)
1642{ 1313{
1643 struct intel_shared_regs *regs; 1314 struct intel_shared_regs *regs;
1644 int i; 1315 int i;
@@ -1661,7 +1332,7 @@ static int intel_pmu_cpu_prepare(int cpu)
1661{ 1332{
1662 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1333 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1663 1334
1664 if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) 1335 if (!x86_pmu.extra_regs)
1665 return NOTIFY_OK; 1336 return NOTIFY_OK;
1666 1337
1667 cpuc->shared_regs = allocate_shared_regs(cpu); 1338 cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1683,28 +1354,22 @@ static void intel_pmu_cpu_starting(int cpu)
1683 */ 1354 */
1684 intel_pmu_lbr_reset(); 1355 intel_pmu_lbr_reset();
1685 1356
1686 cpuc->lbr_sel = NULL; 1357 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
1687
1688 if (!cpuc->shared_regs)
1689 return; 1358 return;
1690 1359
1691 if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { 1360 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1692 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1361 struct intel_shared_regs *pc;
1693 struct intel_shared_regs *pc;
1694 1362
1695 pc = per_cpu(cpu_hw_events, i).shared_regs; 1363 pc = per_cpu(cpu_hw_events, i).shared_regs;
1696 if (pc && pc->core_id == core_id) { 1364 if (pc && pc->core_id == core_id) {
1697 cpuc->kfree_on_online = cpuc->shared_regs; 1365 kfree(cpuc->shared_regs);
1698 cpuc->shared_regs = pc; 1366 cpuc->shared_regs = pc;
1699 break; 1367 break;
1700 }
1701 } 1368 }
1702 cpuc->shared_regs->core_id = core_id;
1703 cpuc->shared_regs->refcnt++;
1704 } 1369 }
1705 1370
1706 if (x86_pmu.lbr_sel_map) 1371 cpuc->shared_regs->core_id = core_id;
1707 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 1372 cpuc->shared_regs->refcnt++;
1708} 1373}
1709 1374
1710static void intel_pmu_cpu_dying(int cpu) 1375static void intel_pmu_cpu_dying(int cpu)
@@ -1722,33 +1387,6 @@ static void intel_pmu_cpu_dying(int cpu)
1722 fini_debug_store_on_cpu(cpu); 1387 fini_debug_store_on_cpu(cpu);
1723} 1388}
1724 1389
1725static void intel_pmu_flush_branch_stack(void)
1726{
1727 /*
1728 * Intel LBR does not tag entries with the
1729 * PID of the current task, then we need to
1730 * flush it on ctxsw
1731 * For now, we simply reset it
1732 */
1733 if (x86_pmu.lbr_nr)
1734 intel_pmu_lbr_reset();
1735}
1736
1737PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
1738
1739static struct attribute *intel_arch3_formats_attr[] = {
1740 &format_attr_event.attr,
1741 &format_attr_umask.attr,
1742 &format_attr_edge.attr,
1743 &format_attr_pc.attr,
1744 &format_attr_any.attr,
1745 &format_attr_inv.attr,
1746 &format_attr_cmask.attr,
1747
1748 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1749 NULL,
1750};
1751
1752static __initconst const struct x86_pmu intel_pmu = { 1390static __initconst const struct x86_pmu intel_pmu = {
1753 .name = "Intel", 1391 .name = "Intel",
1754 .handle_irq = intel_pmu_handle_irq, 1392 .handle_irq = intel_pmu_handle_irq,
@@ -1771,19 +1409,13 @@ static __initconst const struct x86_pmu intel_pmu = {
1771 .max_period = (1ULL << 31) - 1, 1409 .max_period = (1ULL << 31) - 1,
1772 .get_event_constraints = intel_get_event_constraints, 1410 .get_event_constraints = intel_get_event_constraints,
1773 .put_event_constraints = intel_put_event_constraints, 1411 .put_event_constraints = intel_put_event_constraints,
1774 .pebs_aliases = intel_pebs_aliases_core2,
1775
1776 .format_attrs = intel_arch3_formats_attr,
1777 .events_sysfs_show = intel_event_sysfs_show,
1778 1412
1779 .cpu_prepare = intel_pmu_cpu_prepare, 1413 .cpu_prepare = intel_pmu_cpu_prepare,
1780 .cpu_starting = intel_pmu_cpu_starting, 1414 .cpu_starting = intel_pmu_cpu_starting,
1781 .cpu_dying = intel_pmu_cpu_dying, 1415 .cpu_dying = intel_pmu_cpu_dying,
1782 .guest_get_msrs = intel_guest_get_msrs,
1783 .flush_branch_stack = intel_pmu_flush_branch_stack,
1784}; 1416};
1785 1417
1786static __init void intel_clovertown_quirk(void) 1418static void intel_clovertown_quirks(void)
1787{ 1419{
1788 /* 1420 /*
1789 * PEBS is unreliable due to: 1421 * PEBS is unreliable due to:
@@ -1804,119 +1436,23 @@ static __init void intel_clovertown_quirk(void)
1804 * But taken together it might just make sense to not enable PEBS on 1436 * But taken together it might just make sense to not enable PEBS on
1805 * these chips. 1437 * these chips.
1806 */ 1438 */
1807 pr_warn("PEBS disabled due to CPU errata\n"); 1439 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1808 x86_pmu.pebs = 0; 1440 x86_pmu.pebs = 0;
1809 x86_pmu.pebs_constraints = NULL; 1441 x86_pmu.pebs_constraints = NULL;
1810} 1442}
1811 1443
1812static int intel_snb_pebs_broken(int cpu) 1444static __init int intel_pmu_init(void)
1813{
1814 u32 rev = UINT_MAX; /* default to broken for unknown models */
1815
1816 switch (cpu_data(cpu).x86_model) {
1817 case 42: /* SNB */
1818 rev = 0x28;
1819 break;
1820
1821 case 45: /* SNB-EP */
1822 switch (cpu_data(cpu).x86_mask) {
1823 case 6: rev = 0x618; break;
1824 case 7: rev = 0x70c; break;
1825 }
1826 }
1827
1828 return (cpu_data(cpu).microcode < rev);
1829}
1830
1831static void intel_snb_check_microcode(void)
1832{
1833 int pebs_broken = 0;
1834 int cpu;
1835
1836 get_online_cpus();
1837 for_each_online_cpu(cpu) {
1838 if ((pebs_broken = intel_snb_pebs_broken(cpu)))
1839 break;
1840 }
1841 put_online_cpus();
1842
1843 if (pebs_broken == x86_pmu.pebs_broken)
1844 return;
1845
1846 /*
1847 * Serialized by the microcode lock..
1848 */
1849 if (x86_pmu.pebs_broken) {
1850 pr_info("PEBS enabled due to microcode update\n");
1851 x86_pmu.pebs_broken = 0;
1852 } else {
1853 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
1854 x86_pmu.pebs_broken = 1;
1855 }
1856}
1857
1858static __init void intel_sandybridge_quirk(void)
1859{
1860 x86_pmu.check_microcode = intel_snb_check_microcode;
1861 intel_snb_check_microcode();
1862}
1863
1864static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
1865 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
1866 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
1867 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
1868 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
1869 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
1870 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
1871 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
1872};
1873
1874static __init void intel_arch_events_quirk(void)
1875{
1876 int bit;
1877
1878 /* disable event that reported as not presend by cpuid */
1879 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
1880 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
1881 pr_warn("CPUID marked event: \'%s\' unavailable\n",
1882 intel_arch_events_map[bit].name);
1883 }
1884}
1885
1886static __init void intel_nehalem_quirk(void)
1887{
1888 union cpuid10_ebx ebx;
1889
1890 ebx.full = x86_pmu.events_maskl;
1891 if (ebx.split.no_branch_misses_retired) {
1892 /*
1893 * Erratum AAJ80 detected, we work it around by using
1894 * the BR_MISP_EXEC.ANY event. This will over-count
1895 * branch-misses, but it's still much better than the
1896 * architectural event which is often completely bogus:
1897 */
1898 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1899 ebx.split.no_branch_misses_retired = 0;
1900 x86_pmu.events_maskl = ebx.full;
1901 pr_info("CPU erratum AAJ80 worked around\n");
1902 }
1903}
1904
1905__init int intel_pmu_init(void)
1906{ 1445{
1907 union cpuid10_edx edx; 1446 union cpuid10_edx edx;
1908 union cpuid10_eax eax; 1447 union cpuid10_eax eax;
1909 union cpuid10_ebx ebx;
1910 struct event_constraint *c;
1911 unsigned int unused; 1448 unsigned int unused;
1449 unsigned int ebx;
1912 int version; 1450 int version;
1913 1451
1914 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 1452 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1915 switch (boot_cpu_data.x86) { 1453 switch (boot_cpu_data.x86) {
1916 case 0x6: 1454 case 0x6:
1917 return p6_pmu_init(); 1455 return p6_pmu_init();
1918 case 0xb:
1919 return knc_pmu_init();
1920 case 0xf: 1456 case 0xf:
1921 return p4_pmu_init(); 1457 return p4_pmu_init();
1922 } 1458 }
@@ -1927,8 +1463,8 @@ __init int intel_pmu_init(void)
1927 * Check whether the Architectural PerfMon supports 1463 * Check whether the Architectural PerfMon supports
1928 * Branch Misses Retired hw_event or not. 1464 * Branch Misses Retired hw_event or not.
1929 */ 1465 */
1930 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); 1466 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1931 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) 1467 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1932 return -ENODEV; 1468 return -ENODEV;
1933 1469
1934 version = eax.split.version_id; 1470 version = eax.split.version_id;
@@ -1942,11 +1478,6 @@ __init int intel_pmu_init(void)
1942 x86_pmu.cntval_bits = eax.split.bit_width; 1478 x86_pmu.cntval_bits = eax.split.bit_width;
1943 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 1479 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
1944 1480
1945 x86_pmu.events_maskl = ebx.full;
1946 x86_pmu.events_mask_len = eax.split.mask_length;
1947
1948 x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
1949
1950 /* 1481 /*
1951 * Quirk: v2 perfmon does not report fixed-purpose events, so 1482 * Quirk: v2 perfmon does not report fixed-purpose events, so
1952 * assume at least 3 events: 1483 * assume at least 3 events:
@@ -1966,8 +1497,6 @@ __init int intel_pmu_init(void)
1966 1497
1967 intel_ds_init(); 1498 intel_ds_init();
1968 1499
1969 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
1970
1971 /* 1500 /*
1972 * Install the hw-cache-events table: 1501 * Install the hw-cache-events table:
1973 */ 1502 */
@@ -1977,7 +1506,7 @@ __init int intel_pmu_init(void)
1977 break; 1506 break;
1978 1507
1979 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1508 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1980 x86_add_quirk(intel_clovertown_quirk); 1509 x86_pmu.quirks = intel_clovertown_quirks;
1981 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1510 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1982 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1511 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1983 case 29: /* six-core 45 nm xeon "Dunnington" */ 1512 case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -2007,19 +1536,25 @@ __init int intel_pmu_init(void)
2007 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1536 x86_pmu.extra_regs = intel_nehalem_extra_regs;
2008 1537
2009 /* UOPS_ISSUED.STALLED_CYCLES */ 1538 /* UOPS_ISSUED.STALLED_CYCLES */
2010 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 1539 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
2011 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2012 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1540 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
2013 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 1541 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
2014 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
2015 1542
2016 x86_add_quirk(intel_nehalem_quirk); 1543 if (ebx & 0x40) {
1544 /*
1545 * Erratum AAJ80 detected, we work it around by using
1546 * the BR_MISP_EXEC.ANY event. This will over-count
1547 * branch-misses, but it's still much better than the
1548 * architectural event which is often completely bogus:
1549 */
1550 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
2017 1551
1552 pr_cont("erratum AAJ80 worked around, ");
1553 }
2018 pr_cont("Nehalem events, "); 1554 pr_cont("Nehalem events, ");
2019 break; 1555 break;
2020 1556
2021 case 28: /* Atom */ 1557 case 28: /* Atom */
2022 case 54: /* Cedariew */
2023 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 1558 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2024 sizeof(hw_cache_event_ids)); 1559 sizeof(hw_cache_event_ids));
2025 1560
@@ -2047,65 +1582,34 @@ __init int intel_pmu_init(void)
2047 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1582 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2048 1583
2049 /* UOPS_ISSUED.STALLED_CYCLES */ 1584 /* UOPS_ISSUED.STALLED_CYCLES */
2050 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 1585 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
2051 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2052 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1586 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
2053 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 1587 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
2054 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
2055 1588
2056 pr_cont("Westmere events, "); 1589 pr_cont("Westmere events, ");
2057 break; 1590 break;
2058 1591
2059 case 42: /* SandyBridge */ 1592 case 42: /* SandyBridge */
2060 case 45: /* SandyBridge, "Romely-EP" */ 1593 case 45: /* SandyBridge, "Romely-EP" */
2061 x86_add_quirk(intel_sandybridge_quirk);
2062 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1594 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2063 sizeof(hw_cache_event_ids)); 1595 sizeof(hw_cache_event_ids));
2064 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
2065 sizeof(hw_cache_extra_regs));
2066 1596
2067 intel_pmu_lbr_init_snb(); 1597 intel_pmu_lbr_init_nhm();
2068 1598
2069 x86_pmu.event_constraints = intel_snb_event_constraints; 1599 x86_pmu.event_constraints = intel_snb_event_constraints;
2070 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1600 x86_pmu.pebs_constraints = intel_snb_pebs_events;
2071 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2072 x86_pmu.extra_regs = intel_snb_extra_regs; 1601 x86_pmu.extra_regs = intel_snb_extra_regs;
2073 /* all extra regs are per-cpu when HT is on */ 1602 /* all extra regs are per-cpu when HT is on */
2074 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1603 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2075 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 1604 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2076 1605
2077 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1606 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2078 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 1607 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
2079 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2080 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 1608 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
2081 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 1609 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
2082 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
2083 1610
2084 pr_cont("SandyBridge events, "); 1611 pr_cont("SandyBridge events, ");
2085 break; 1612 break;
2086 case 58: /* IvyBridge */
2087 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2088 sizeof(hw_cache_event_ids));
2089 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
2090 sizeof(hw_cache_extra_regs));
2091
2092 intel_pmu_lbr_init_snb();
2093
2094 x86_pmu.event_constraints = intel_snb_event_constraints;
2095 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
2096 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2097 x86_pmu.extra_regs = intel_snb_extra_regs;
2098 /* all extra regs are per-cpu when HT is on */
2099 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2100 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2101
2102 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
2103 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
2104 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
2105
2106 pr_cont("IvyBridge events, ");
2107 break;
2108
2109 1613
2110 default: 1614 default:
2111 switch (x86_pmu.version) { 1615 switch (x86_pmu.version) {
@@ -2122,38 +1626,18 @@ __init int intel_pmu_init(void)
2122 break; 1626 break;
2123 } 1627 }
2124 } 1628 }
1629 return 0;
1630}
2125 1631
2126 if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { 1632#else /* CONFIG_CPU_SUP_INTEL */
2127 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2128 x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
2129 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
2130 }
2131 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
2132
2133 if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
2134 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2135 x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
2136 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
2137 }
2138
2139 x86_pmu.intel_ctrl |=
2140 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
2141
2142 if (x86_pmu.event_constraints) {
2143 /*
2144 * event on fixed counter2 (REF_CYCLES) only works on this
2145 * counter, so do not extend mask to generic counters
2146 */
2147 for_each_event_constraint(c, x86_pmu.event_constraints) {
2148 if (c->cmask != X86_RAW_EVENT_MASK
2149 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2150 continue;
2151 }
2152
2153 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
2154 c->weight += x86_pmu.num_counters;
2155 }
2156 }
2157 1633
1634static int intel_pmu_init(void)
1635{
2158 return 0; 1636 return 0;
2159} 1637}
1638
1639static struct intel_shared_regs *allocate_shared_regs(int cpu)
1640{
1641 return NULL;
1642}
1643#endif /* CONFIG_CPU_SUP_INTEL */