diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 417 |
1 files changed, 403 insertions, 14 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c1d79..8fc2b2cee1d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,5 +1,27 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | ||
15 | * Per core state | ||
16 | * This used to coordinate shared registers for HT threads. | ||
17 | */ | ||
18 | struct intel_percore { | ||
19 | raw_spinlock_t lock; /* protect structure */ | ||
20 | struct er_account regs[MAX_EXTRA_REGS]; | ||
21 | int refcnt; /* number of threads */ | ||
22 | unsigned core_id; | ||
23 | }; | ||
24 | |||
3 | /* | 25 | /* |
4 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
5 | */ | 27 | */ |
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
64 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
65 | }; | 87 | }; |
66 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | ||
90 | { | ||
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
92 | EVENT_EXTRA_END | ||
93 | }; | ||
94 | |||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] = |
68 | { | 102 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] = | |||
76 | EVENT_CONSTRAINT_END | 110 | EVENT_CONSTRAINT_END |
77 | }; | 111 | }; |
78 | 112 | ||
113 | static struct event_constraint intel_snb_event_constraints[] = | ||
114 | { | ||
115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
117 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
118 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | ||
119 | INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ | ||
120 | INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ | ||
121 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | ||
122 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | ||
123 | EVENT_CONSTRAINT_END | ||
124 | }; | ||
125 | |||
126 | static struct extra_reg intel_westmere_extra_regs[] = | ||
127 | { | ||
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | ||
130 | EVENT_EXTRA_END | ||
131 | }; | ||
132 | |||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
79 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] = |
80 | { | 141 | { |
81 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event) | |||
89 | return intel_perfmon_event_map[hw_event]; | 150 | return intel_perfmon_event_map[hw_event]; |
90 | } | 151 | } |
91 | 152 | ||
153 | static __initconst const u64 snb_hw_cache_event_ids | ||
154 | [PERF_COUNT_HW_CACHE_MAX] | ||
155 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
156 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
157 | { | ||
158 | [ C(L1D) ] = { | ||
159 | [ C(OP_READ) ] = { | ||
160 | [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ | ||
161 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ | ||
162 | }, | ||
163 | [ C(OP_WRITE) ] = { | ||
164 | [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ | ||
165 | [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ | ||
166 | }, | ||
167 | [ C(OP_PREFETCH) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = 0x0, | ||
169 | [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ | ||
170 | }, | ||
171 | }, | ||
172 | [ C(L1I ) ] = { | ||
173 | [ C(OP_READ) ] = { | ||
174 | [ C(RESULT_ACCESS) ] = 0x0, | ||
175 | [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ | ||
176 | }, | ||
177 | [ C(OP_WRITE) ] = { | ||
178 | [ C(RESULT_ACCESS) ] = -1, | ||
179 | [ C(RESULT_MISS) ] = -1, | ||
180 | }, | ||
181 | [ C(OP_PREFETCH) ] = { | ||
182 | [ C(RESULT_ACCESS) ] = 0x0, | ||
183 | [ C(RESULT_MISS) ] = 0x0, | ||
184 | }, | ||
185 | }, | ||
186 | [ C(LL ) ] = { | ||
187 | /* | ||
188 | * TBD: Need Off-core Response Performance Monitoring support | ||
189 | */ | ||
190 | [ C(OP_READ) ] = { | ||
191 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | ||
192 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
193 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
194 | [ C(RESULT_MISS) ] = 0x01bb, | ||
195 | }, | ||
196 | [ C(OP_WRITE) ] = { | ||
197 | /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ | ||
198 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
199 | /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ | ||
200 | [ C(RESULT_MISS) ] = 0x01bb, | ||
201 | }, | ||
202 | [ C(OP_PREFETCH) ] = { | ||
203 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | ||
204 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
205 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
206 | [ C(RESULT_MISS) ] = 0x01bb, | ||
207 | }, | ||
208 | }, | ||
209 | [ C(DTLB) ] = { | ||
210 | [ C(OP_READ) ] = { | ||
211 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ | ||
212 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ | ||
213 | }, | ||
214 | [ C(OP_WRITE) ] = { | ||
215 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ | ||
216 | [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ | ||
217 | }, | ||
218 | [ C(OP_PREFETCH) ] = { | ||
219 | [ C(RESULT_ACCESS) ] = 0x0, | ||
220 | [ C(RESULT_MISS) ] = 0x0, | ||
221 | }, | ||
222 | }, | ||
223 | [ C(ITLB) ] = { | ||
224 | [ C(OP_READ) ] = { | ||
225 | [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ | ||
226 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ | ||
227 | }, | ||
228 | [ C(OP_WRITE) ] = { | ||
229 | [ C(RESULT_ACCESS) ] = -1, | ||
230 | [ C(RESULT_MISS) ] = -1, | ||
231 | }, | ||
232 | [ C(OP_PREFETCH) ] = { | ||
233 | [ C(RESULT_ACCESS) ] = -1, | ||
234 | [ C(RESULT_MISS) ] = -1, | ||
235 | }, | ||
236 | }, | ||
237 | [ C(BPU ) ] = { | ||
238 | [ C(OP_READ) ] = { | ||
239 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
240 | [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
241 | }, | ||
242 | [ C(OP_WRITE) ] = { | ||
243 | [ C(RESULT_ACCESS) ] = -1, | ||
244 | [ C(RESULT_MISS) ] = -1, | ||
245 | }, | ||
246 | [ C(OP_PREFETCH) ] = { | ||
247 | [ C(RESULT_ACCESS) ] = -1, | ||
248 | [ C(RESULT_MISS) ] = -1, | ||
249 | }, | ||
250 | }, | ||
251 | }; | ||
252 | |||
92 | static __initconst const u64 westmere_hw_cache_event_ids | 253 | static __initconst const u64 westmere_hw_cache_event_ids |
93 | [PERF_COUNT_HW_CACHE_MAX] | 254 | [PERF_COUNT_HW_CACHE_MAX] |
94 | [PERF_COUNT_HW_CACHE_OP_MAX] | 255 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
124 | }, | 285 | }, |
125 | [ C(LL ) ] = { | 286 | [ C(LL ) ] = { |
126 | [ C(OP_READ) ] = { | 287 | [ C(OP_READ) ] = { |
127 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 288 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ |
128 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 289 | [ C(RESULT_ACCESS) ] = 0x01b7, |
290 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
291 | [ C(RESULT_MISS) ] = 0x01bb, | ||
129 | }, | 292 | }, |
293 | /* | ||
294 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
295 | * on RFO. | ||
296 | */ | ||
130 | [ C(OP_WRITE) ] = { | 297 | [ C(OP_WRITE) ] = { |
131 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 298 | /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ |
132 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 299 | [ C(RESULT_ACCESS) ] = 0x01bb, |
300 | /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ | ||
301 | [ C(RESULT_MISS) ] = 0x01b7, | ||
133 | }, | 302 | }, |
134 | [ C(OP_PREFETCH) ] = { | 303 | [ C(OP_PREFETCH) ] = { |
135 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 304 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ |
136 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 305 | [ C(RESULT_ACCESS) ] = 0x01b7, |
306 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
307 | [ C(RESULT_MISS) ] = 0x01bb, | ||
137 | }, | 308 | }, |
138 | }, | 309 | }, |
139 | [ C(DTLB) ] = { | 310 | [ C(DTLB) ] = { |
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
180 | }, | 351 | }, |
181 | }; | 352 | }; |
182 | 353 | ||
354 | /* | ||
355 | * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 | ||
356 | */ | ||
357 | |||
358 | #define DMND_DATA_RD (1 << 0) | ||
359 | #define DMND_RFO (1 << 1) | ||
360 | #define DMND_WB (1 << 3) | ||
361 | #define PF_DATA_RD (1 << 4) | ||
362 | #define PF_DATA_RFO (1 << 5) | ||
363 | #define RESP_UNCORE_HIT (1 << 8) | ||
364 | #define RESP_MISS (0xf600) /* non uncore hit */ | ||
365 | |||
366 | static __initconst const u64 nehalem_hw_cache_extra_regs | ||
367 | [PERF_COUNT_HW_CACHE_MAX] | ||
368 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
369 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
370 | { | ||
371 | [ C(LL ) ] = { | ||
372 | [ C(OP_READ) ] = { | ||
373 | [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, | ||
374 | [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, | ||
375 | }, | ||
376 | [ C(OP_WRITE) ] = { | ||
377 | [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, | ||
378 | [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, | ||
379 | }, | ||
380 | [ C(OP_PREFETCH) ] = { | ||
381 | [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, | ||
382 | [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, | ||
383 | }, | ||
384 | } | ||
385 | }; | ||
386 | |||
183 | static __initconst const u64 nehalem_hw_cache_event_ids | 387 | static __initconst const u64 nehalem_hw_cache_event_ids |
184 | [PERF_COUNT_HW_CACHE_MAX] | 388 | [PERF_COUNT_HW_CACHE_MAX] |
185 | [PERF_COUNT_HW_CACHE_OP_MAX] | 389 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
215 | }, | 419 | }, |
216 | [ C(LL ) ] = { | 420 | [ C(LL ) ] = { |
217 | [ C(OP_READ) ] = { | 421 | [ C(OP_READ) ] = { |
218 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 422 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
219 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 423 | [ C(RESULT_ACCESS) ] = 0x01b7, |
424 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ | ||
425 | [ C(RESULT_MISS) ] = 0x01b7, | ||
220 | }, | 426 | }, |
427 | /* | ||
428 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
429 | * on RFO. | ||
430 | */ | ||
221 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
222 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 432 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
223 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 433 | [ C(RESULT_ACCESS) ] = 0x01b7, |
434 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ | ||
435 | [ C(RESULT_MISS) ] = 0x01b7, | ||
224 | }, | 436 | }, |
225 | [ C(OP_PREFETCH) ] = { | 437 | [ C(OP_PREFETCH) ] = { |
226 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 438 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
227 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 439 | [ C(RESULT_ACCESS) ] = 0x01b7, |
440 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ | ||
441 | [ C(RESULT_MISS) ] = 0x01b7, | ||
228 | }, | 442 | }, |
229 | }, | 443 | }, |
230 | [ C(DTLB) ] = { | 444 | [ C(DTLB) ] = { |
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void) | |||
691 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 905 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
692 | 906 | ||
693 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 907 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
694 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 908 | checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); |
695 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 909 | checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); |
696 | } | 910 | } |
697 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) | 911 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
698 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 912 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event) | |||
794 | } | 1008 | } |
795 | 1009 | ||
796 | static struct event_constraint * | 1010 | static struct event_constraint * |
1011 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
1012 | { | ||
1013 | struct hw_perf_event *hwc = &event->hw; | ||
1014 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
1015 | struct event_constraint *c; | ||
1016 | struct intel_percore *pc; | ||
1017 | struct er_account *era; | ||
1018 | int i; | ||
1019 | int free_slot; | ||
1020 | int found; | ||
1021 | |||
1022 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | ||
1023 | return NULL; | ||
1024 | |||
1025 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | ||
1026 | if (e != c->code) | ||
1027 | continue; | ||
1028 | |||
1029 | /* | ||
1030 | * Allocate resource per core. | ||
1031 | */ | ||
1032 | pc = cpuc->per_core; | ||
1033 | if (!pc) | ||
1034 | break; | ||
1035 | c = &emptyconstraint; | ||
1036 | raw_spin_lock(&pc->lock); | ||
1037 | free_slot = -1; | ||
1038 | found = 0; | ||
1039 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1040 | era = &pc->regs[i]; | ||
1041 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1042 | /* Allow sharing same config */ | ||
1043 | if (hwc->extra_config == era->extra_config) { | ||
1044 | era->ref++; | ||
1045 | cpuc->percore_used = 1; | ||
1046 | hwc->extra_alloc = 1; | ||
1047 | c = NULL; | ||
1048 | } | ||
1049 | /* else conflict */ | ||
1050 | found = 1; | ||
1051 | break; | ||
1052 | } else if (era->ref == 0 && free_slot == -1) | ||
1053 | free_slot = i; | ||
1054 | } | ||
1055 | if (!found && free_slot != -1) { | ||
1056 | era = &pc->regs[free_slot]; | ||
1057 | era->ref = 1; | ||
1058 | era->extra_reg = hwc->extra_reg; | ||
1059 | era->extra_config = hwc->extra_config; | ||
1060 | cpuc->percore_used = 1; | ||
1061 | hwc->extra_alloc = 1; | ||
1062 | c = NULL; | ||
1063 | } | ||
1064 | raw_spin_unlock(&pc->lock); | ||
1065 | return c; | ||
1066 | } | ||
1067 | |||
1068 | return NULL; | ||
1069 | } | ||
1070 | |||
1071 | static struct event_constraint * | ||
797 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1072 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
798 | { | 1073 | { |
799 | struct event_constraint *c; | 1074 | struct event_constraint *c; |
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
806 | if (c) | 1081 | if (c) |
807 | return c; | 1082 | return c; |
808 | 1083 | ||
1084 | c = intel_percore_constraints(cpuc, event); | ||
1085 | if (c) | ||
1086 | return c; | ||
1087 | |||
809 | return x86_get_event_constraints(cpuc, event); | 1088 | return x86_get_event_constraints(cpuc, event); |
810 | } | 1089 | } |
811 | 1090 | ||
1091 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | ||
1092 | struct perf_event *event) | ||
1093 | { | ||
1094 | struct extra_reg *er; | ||
1095 | struct intel_percore *pc; | ||
1096 | struct er_account *era; | ||
1097 | struct hw_perf_event *hwc = &event->hw; | ||
1098 | int i, allref; | ||
1099 | |||
1100 | if (!cpuc->percore_used) | ||
1101 | return; | ||
1102 | |||
1103 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
1104 | if (er->event != (hwc->config & er->config_mask)) | ||
1105 | continue; | ||
1106 | |||
1107 | pc = cpuc->per_core; | ||
1108 | raw_spin_lock(&pc->lock); | ||
1109 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1110 | era = &pc->regs[i]; | ||
1111 | if (era->ref > 0 && | ||
1112 | era->extra_config == hwc->extra_config && | ||
1113 | era->extra_reg == er->msr) { | ||
1114 | era->ref--; | ||
1115 | hwc->extra_alloc = 0; | ||
1116 | break; | ||
1117 | } | ||
1118 | } | ||
1119 | allref = 0; | ||
1120 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1121 | allref += pc->regs[i].ref; | ||
1122 | if (allref == 0) | ||
1123 | cpuc->percore_used = 0; | ||
1124 | raw_spin_unlock(&pc->lock); | ||
1125 | break; | ||
1126 | } | ||
1127 | } | ||
1128 | |||
812 | static int intel_pmu_hw_config(struct perf_event *event) | 1129 | static int intel_pmu_hw_config(struct perf_event *event) |
813 | { | 1130 | { |
814 | int ret = x86_pmu_hw_config(event); | 1131 | int ret = x86_pmu_hw_config(event); |
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = { | |||
880 | */ | 1197 | */ |
881 | .max_period = (1ULL << 31) - 1, | 1198 | .max_period = (1ULL << 31) - 1, |
882 | .get_event_constraints = intel_get_event_constraints, | 1199 | .get_event_constraints = intel_get_event_constraints, |
1200 | .put_event_constraints = intel_put_event_constraints, | ||
883 | .event_constraints = intel_core_event_constraints, | 1201 | .event_constraints = intel_core_event_constraints, |
884 | }; | 1202 | }; |
885 | 1203 | ||
1204 | static int intel_pmu_cpu_prepare(int cpu) | ||
1205 | { | ||
1206 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1207 | |||
1208 | if (!cpu_has_ht_siblings()) | ||
1209 | return NOTIFY_OK; | ||
1210 | |||
1211 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | ||
1212 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1213 | if (!cpuc->per_core) | ||
1214 | return NOTIFY_BAD; | ||
1215 | |||
1216 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1217 | cpuc->per_core->core_id = -1; | ||
1218 | return NOTIFY_OK; | ||
1219 | } | ||
1220 | |||
886 | static void intel_pmu_cpu_starting(int cpu) | 1221 | static void intel_pmu_cpu_starting(int cpu) |
887 | { | 1222 | { |
1223 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1224 | int core_id = topology_core_id(cpu); | ||
1225 | int i; | ||
1226 | |||
888 | init_debug_store_on_cpu(cpu); | 1227 | init_debug_store_on_cpu(cpu); |
889 | /* | 1228 | /* |
890 | * Deal with CPUs that don't clear their LBRs on power-up. | 1229 | * Deal with CPUs that don't clear their LBRs on power-up. |
891 | */ | 1230 | */ |
892 | intel_pmu_lbr_reset(); | 1231 | intel_pmu_lbr_reset(); |
1232 | |||
1233 | if (!cpu_has_ht_siblings()) | ||
1234 | return; | ||
1235 | |||
1236 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | ||
1237 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | ||
1238 | |||
1239 | if (pc && pc->core_id == core_id) { | ||
1240 | kfree(cpuc->per_core); | ||
1241 | cpuc->per_core = pc; | ||
1242 | break; | ||
1243 | } | ||
1244 | } | ||
1245 | |||
1246 | cpuc->per_core->core_id = core_id; | ||
1247 | cpuc->per_core->refcnt++; | ||
893 | } | 1248 | } |
894 | 1249 | ||
895 | static void intel_pmu_cpu_dying(int cpu) | 1250 | static void intel_pmu_cpu_dying(int cpu) |
896 | { | 1251 | { |
1252 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1253 | struct intel_percore *pc = cpuc->per_core; | ||
1254 | |||
1255 | if (pc) { | ||
1256 | if (pc->core_id == -1 || --pc->refcnt == 0) | ||
1257 | kfree(pc); | ||
1258 | cpuc->per_core = NULL; | ||
1259 | } | ||
1260 | |||
897 | fini_debug_store_on_cpu(cpu); | 1261 | fini_debug_store_on_cpu(cpu); |
898 | } | 1262 | } |
899 | 1263 | ||
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
918 | */ | 1282 | */ |
919 | .max_period = (1ULL << 31) - 1, | 1283 | .max_period = (1ULL << 31) - 1, |
920 | .get_event_constraints = intel_get_event_constraints, | 1284 | .get_event_constraints = intel_get_event_constraints, |
1285 | .put_event_constraints = intel_put_event_constraints, | ||
921 | 1286 | ||
1287 | .cpu_prepare = intel_pmu_cpu_prepare, | ||
922 | .cpu_starting = intel_pmu_cpu_starting, | 1288 | .cpu_starting = intel_pmu_cpu_starting, |
923 | .cpu_dying = intel_pmu_cpu_dying, | 1289 | .cpu_dying = intel_pmu_cpu_dying, |
924 | }; | 1290 | }; |
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void) | |||
1024 | intel_pmu_lbr_init_core(); | 1390 | intel_pmu_lbr_init_core(); |
1025 | 1391 | ||
1026 | x86_pmu.event_constraints = intel_core2_event_constraints; | 1392 | x86_pmu.event_constraints = intel_core2_event_constraints; |
1393 | x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; | ||
1027 | pr_cont("Core2 events, "); | 1394 | pr_cont("Core2 events, "); |
1028 | break; | 1395 | break; |
1029 | 1396 | ||
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void) | |||
1032 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | 1399 | case 46: /* 45 nm nehalem-ex, "Beckton" */ |
1033 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1400 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
1034 | sizeof(hw_cache_event_ids)); | 1401 | sizeof(hw_cache_event_ids)); |
1402 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1403 | sizeof(hw_cache_extra_regs)); | ||
1035 | 1404 | ||
1036 | intel_pmu_lbr_init_nhm(); | 1405 | intel_pmu_lbr_init_nhm(); |
1037 | 1406 | ||
1038 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1407 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1408 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | ||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1039 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | ||
1040 | pr_cont("Nehalem events, "); | 1412 | pr_cont("Nehalem events, "); |
1041 | break; | 1413 | break; |
1042 | 1414 | ||
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void) | |||
1047 | intel_pmu_lbr_init_atom(); | 1419 | intel_pmu_lbr_init_atom(); |
1048 | 1420 | ||
1049 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1421 | x86_pmu.event_constraints = intel_gen_event_constraints; |
1422 | x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; | ||
1050 | pr_cont("Atom events, "); | 1423 | pr_cont("Atom events, "); |
1051 | break; | 1424 | break; |
1052 | 1425 | ||
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void) | |||
1054 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1427 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1055 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1428 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1056 | sizeof(hw_cache_event_ids)); | 1429 | sizeof(hw_cache_event_ids)); |
1430 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1431 | sizeof(hw_cache_extra_regs)); | ||
1057 | 1432 | ||
1058 | intel_pmu_lbr_init_nhm(); | 1433 | intel_pmu_lbr_init_nhm(); |
1059 | 1434 | ||
1060 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1435 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1436 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1061 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1437 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1438 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | ||
1439 | x86_pmu.extra_regs = intel_westmere_extra_regs; | ||
1062 | pr_cont("Westmere events, "); | 1440 | pr_cont("Westmere events, "); |
1063 | break; | 1441 | break; |
1064 | 1442 | ||
1443 | case 42: /* SandyBridge */ | ||
1444 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | ||
1445 | sizeof(hw_cache_event_ids)); | ||
1446 | |||
1447 | intel_pmu_lbr_init_nhm(); | ||
1448 | |||
1449 | x86_pmu.event_constraints = intel_snb_event_constraints; | ||
1450 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | ||
1451 | pr_cont("SandyBridge events, "); | ||
1452 | break; | ||
1453 | |||
1065 | default: | 1454 | default: |
1066 | /* | 1455 | /* |
1067 | * default constraints for v2 and up | 1456 | * default constraints for v2 and up |