diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 1555 |
1 files changed, 193 insertions, 1362 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a920f173a220..db5bdc8addf8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/kdebug.h> | 21 | #include <linux/kdebug.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
25 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
26 | #include <linux/bitops.h> | 27 | #include <linux/bitops.h> |
@@ -28,6 +29,7 @@ | |||
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | ||
31 | 33 | ||
32 | static u64 perf_event_mask __read_mostly; | 34 | static u64 perf_event_mask __read_mostly; |
33 | 35 | ||
@@ -73,13 +75,20 @@ struct debug_store { | |||
73 | struct event_constraint { | 75 | struct event_constraint { |
74 | union { | 76 | union { |
75 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 77 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
76 | u64 idxmsk64[1]; | 78 | u64 idxmsk64; |
77 | }; | 79 | }; |
78 | int code; | 80 | u64 code; |
79 | int cmask; | 81 | u64 cmask; |
80 | int weight; | 82 | int weight; |
81 | }; | 83 | }; |
82 | 84 | ||
85 | struct amd_nb { | ||
86 | int nb_id; /* NorthBridge id */ | ||
87 | int refcnt; /* reference count */ | ||
88 | struct perf_event *owners[X86_PMC_IDX_MAX]; | ||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | ||
90 | }; | ||
91 | |||
83 | struct cpu_hw_events { | 92 | struct cpu_hw_events { |
84 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
85 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
@@ -92,10 +101,11 @@ struct cpu_hw_events { | |||
92 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
93 | u64 tags[X86_PMC_IDX_MAX]; | 102 | u64 tags[X86_PMC_IDX_MAX]; |
94 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
104 | struct amd_nb *amd_nb; | ||
95 | }; | 105 | }; |
96 | 106 | ||
97 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 107 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ |
98 | { .idxmsk64[0] = (n) }, \ | 108 | { .idxmsk64 = (n) }, \ |
99 | .code = (c), \ | 109 | .code = (c), \ |
100 | .cmask = (m), \ | 110 | .cmask = (m), \ |
101 | .weight = (w), \ | 111 | .weight = (w), \ |
@@ -108,7 +118,7 @@ struct cpu_hw_events { | |||
108 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) |
109 | 119 | ||
110 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
111 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) | 121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) |
112 | 122 | ||
113 | #define EVENT_CONSTRAINT_END \ | 123 | #define EVENT_CONSTRAINT_END \ |
114 | EVENT_CONSTRAINT(0, 0, 0) | 124 | EVENT_CONSTRAINT(0, 0, 0) |
@@ -125,8 +135,8 @@ struct x86_pmu { | |||
125 | int (*handle_irq)(struct pt_regs *); | 135 | int (*handle_irq)(struct pt_regs *); |
126 | void (*disable_all)(void); | 136 | void (*disable_all)(void); |
127 | void (*enable_all)(void); | 137 | void (*enable_all)(void); |
128 | void (*enable)(struct hw_perf_event *, int); | 138 | void (*enable)(struct perf_event *); |
129 | void (*disable)(struct hw_perf_event *, int); | 139 | void (*disable)(struct perf_event *); |
130 | unsigned eventsel; | 140 | unsigned eventsel; |
131 | unsigned perfctr; | 141 | unsigned perfctr; |
132 | u64 (*event_map)(int); | 142 | u64 (*event_map)(int); |
@@ -149,6 +159,11 @@ struct x86_pmu { | |||
149 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
150 | struct perf_event *event); | 160 | struct perf_event *event); |
151 | struct event_constraint *event_constraints; | 161 | struct event_constraint *event_constraints; |
162 | |||
163 | int (*cpu_prepare)(int cpu); | ||
164 | void (*cpu_starting)(int cpu); | ||
165 | void (*cpu_dying)(int cpu); | ||
166 | void (*cpu_dead)(int cpu); | ||
152 | }; | 167 | }; |
153 | 168 | ||
154 | static struct x86_pmu x86_pmu __read_mostly; | 169 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -157,142 +172,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | |||
157 | .enabled = 1, | 172 | .enabled = 1, |
158 | }; | 173 | }; |
159 | 174 | ||
160 | static int x86_perf_event_set_period(struct perf_event *event, | 175 | static int x86_perf_event_set_period(struct perf_event *event); |
161 | struct hw_perf_event *hwc, int idx); | ||
162 | |||
163 | /* | ||
164 | * Not sure about some of these | ||
165 | */ | ||
166 | static const u64 p6_perfmon_event_map[] = | ||
167 | { | ||
168 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
169 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
170 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
171 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
172 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
173 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
174 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
175 | }; | ||
176 | |||
177 | static u64 p6_pmu_event_map(int hw_event) | ||
178 | { | ||
179 | return p6_perfmon_event_map[hw_event]; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Event setting that is specified not to count anything. | ||
184 | * We use this to effectively disable a counter. | ||
185 | * | ||
186 | * L2_RQSTS with 0 MESI unit mask. | ||
187 | */ | ||
188 | #define P6_NOP_EVENT 0x0000002EULL | ||
189 | |||
190 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
191 | { | ||
192 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
193 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
194 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
195 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
196 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
197 | |||
198 | #define P6_EVNTSEL_MASK \ | ||
199 | (P6_EVNTSEL_EVENT_MASK | \ | ||
200 | P6_EVNTSEL_UNIT_MASK | \ | ||
201 | P6_EVNTSEL_EDGE_MASK | \ | ||
202 | P6_EVNTSEL_INV_MASK | \ | ||
203 | P6_EVNTSEL_REG_MASK) | ||
204 | |||
205 | return hw_event & P6_EVNTSEL_MASK; | ||
206 | } | ||
207 | |||
208 | static struct event_constraint intel_p6_event_constraints[] = | ||
209 | { | ||
210 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
211 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
212 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
213 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
214 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
215 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
216 | EVENT_CONSTRAINT_END | ||
217 | }; | ||
218 | |||
219 | /* | ||
220 | * Intel PerfMon v3. Used on Core2 and later. | ||
221 | */ | ||
222 | static const u64 intel_perfmon_event_map[] = | ||
223 | { | ||
224 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
225 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
226 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
227 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
228 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
229 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
230 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
231 | }; | ||
232 | |||
233 | static struct event_constraint intel_core_event_constraints[] = | ||
234 | { | ||
235 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
236 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
237 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
238 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
239 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
240 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
241 | EVENT_CONSTRAINT_END | ||
242 | }; | ||
243 | |||
244 | static struct event_constraint intel_core2_event_constraints[] = | ||
245 | { | ||
246 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
247 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
248 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
249 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
250 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
251 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
252 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
253 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
254 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
255 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
256 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
257 | EVENT_CONSTRAINT_END | ||
258 | }; | ||
259 | |||
260 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
261 | { | ||
262 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
263 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
264 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
265 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
266 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
267 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
268 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
269 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
270 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
271 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
272 | EVENT_CONSTRAINT_END | ||
273 | }; | ||
274 | |||
275 | static struct event_constraint intel_westmere_event_constraints[] = | ||
276 | { | ||
277 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
278 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
279 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
280 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
281 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
282 | EVENT_CONSTRAINT_END | ||
283 | }; | ||
284 | |||
285 | static struct event_constraint intel_gen_event_constraints[] = | ||
286 | { | ||
287 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
288 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
289 | EVENT_CONSTRAINT_END | ||
290 | }; | ||
291 | |||
292 | static u64 intel_pmu_event_map(int hw_event) | ||
293 | { | ||
294 | return intel_perfmon_event_map[hw_event]; | ||
295 | } | ||
296 | 176 | ||
297 | /* | 177 | /* |
298 | * Generalized hw caching related hw_event table, filled | 178 | * Generalized hw caching related hw_event table, filled |
@@ -309,526 +189,18 @@ static u64 __read_mostly hw_cache_event_ids | |||
309 | [PERF_COUNT_HW_CACHE_OP_MAX] | 189 | [PERF_COUNT_HW_CACHE_OP_MAX] |
310 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 190 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
311 | 191 | ||
312 | static __initconst u64 westmere_hw_cache_event_ids | ||
313 | [PERF_COUNT_HW_CACHE_MAX] | ||
314 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
315 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
316 | { | ||
317 | [ C(L1D) ] = { | ||
318 | [ C(OP_READ) ] = { | ||
319 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
320 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
321 | }, | ||
322 | [ C(OP_WRITE) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
324 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
325 | }, | ||
326 | [ C(OP_PREFETCH) ] = { | ||
327 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
328 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
329 | }, | ||
330 | }, | ||
331 | [ C(L1I ) ] = { | ||
332 | [ C(OP_READ) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
334 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
335 | }, | ||
336 | [ C(OP_WRITE) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = -1, | ||
338 | [ C(RESULT_MISS) ] = -1, | ||
339 | }, | ||
340 | [ C(OP_PREFETCH) ] = { | ||
341 | [ C(RESULT_ACCESS) ] = 0x0, | ||
342 | [ C(RESULT_MISS) ] = 0x0, | ||
343 | }, | ||
344 | }, | ||
345 | [ C(LL ) ] = { | ||
346 | [ C(OP_READ) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
348 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
349 | }, | ||
350 | [ C(OP_WRITE) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
352 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
353 | }, | ||
354 | [ C(OP_PREFETCH) ] = { | ||
355 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
356 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
357 | }, | ||
358 | }, | ||
359 | [ C(DTLB) ] = { | ||
360 | [ C(OP_READ) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
362 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
363 | }, | ||
364 | [ C(OP_WRITE) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
366 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
367 | }, | ||
368 | [ C(OP_PREFETCH) ] = { | ||
369 | [ C(RESULT_ACCESS) ] = 0x0, | ||
370 | [ C(RESULT_MISS) ] = 0x0, | ||
371 | }, | ||
372 | }, | ||
373 | [ C(ITLB) ] = { | ||
374 | [ C(OP_READ) ] = { | ||
375 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
376 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
377 | }, | ||
378 | [ C(OP_WRITE) ] = { | ||
379 | [ C(RESULT_ACCESS) ] = -1, | ||
380 | [ C(RESULT_MISS) ] = -1, | ||
381 | }, | ||
382 | [ C(OP_PREFETCH) ] = { | ||
383 | [ C(RESULT_ACCESS) ] = -1, | ||
384 | [ C(RESULT_MISS) ] = -1, | ||
385 | }, | ||
386 | }, | ||
387 | [ C(BPU ) ] = { | ||
388 | [ C(OP_READ) ] = { | ||
389 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
390 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
391 | }, | ||
392 | [ C(OP_WRITE) ] = { | ||
393 | [ C(RESULT_ACCESS) ] = -1, | ||
394 | [ C(RESULT_MISS) ] = -1, | ||
395 | }, | ||
396 | [ C(OP_PREFETCH) ] = { | ||
397 | [ C(RESULT_ACCESS) ] = -1, | ||
398 | [ C(RESULT_MISS) ] = -1, | ||
399 | }, | ||
400 | }, | ||
401 | }; | ||
402 | |||
403 | static __initconst u64 nehalem_hw_cache_event_ids | ||
404 | [PERF_COUNT_HW_CACHE_MAX] | ||
405 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
406 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
407 | { | ||
408 | [ C(L1D) ] = { | ||
409 | [ C(OP_READ) ] = { | ||
410 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
411 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
412 | }, | ||
413 | [ C(OP_WRITE) ] = { | ||
414 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
415 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
416 | }, | ||
417 | [ C(OP_PREFETCH) ] = { | ||
418 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
419 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
420 | }, | ||
421 | }, | ||
422 | [ C(L1I ) ] = { | ||
423 | [ C(OP_READ) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
425 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
426 | }, | ||
427 | [ C(OP_WRITE) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = -1, | ||
429 | [ C(RESULT_MISS) ] = -1, | ||
430 | }, | ||
431 | [ C(OP_PREFETCH) ] = { | ||
432 | [ C(RESULT_ACCESS) ] = 0x0, | ||
433 | [ C(RESULT_MISS) ] = 0x0, | ||
434 | }, | ||
435 | }, | ||
436 | [ C(LL ) ] = { | ||
437 | [ C(OP_READ) ] = { | ||
438 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
439 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
440 | }, | ||
441 | [ C(OP_WRITE) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
443 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
444 | }, | ||
445 | [ C(OP_PREFETCH) ] = { | ||
446 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
447 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
448 | }, | ||
449 | }, | ||
450 | [ C(DTLB) ] = { | ||
451 | [ C(OP_READ) ] = { | ||
452 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
453 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
454 | }, | ||
455 | [ C(OP_WRITE) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
457 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
458 | }, | ||
459 | [ C(OP_PREFETCH) ] = { | ||
460 | [ C(RESULT_ACCESS) ] = 0x0, | ||
461 | [ C(RESULT_MISS) ] = 0x0, | ||
462 | }, | ||
463 | }, | ||
464 | [ C(ITLB) ] = { | ||
465 | [ C(OP_READ) ] = { | ||
466 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
467 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
468 | }, | ||
469 | [ C(OP_WRITE) ] = { | ||
470 | [ C(RESULT_ACCESS) ] = -1, | ||
471 | [ C(RESULT_MISS) ] = -1, | ||
472 | }, | ||
473 | [ C(OP_PREFETCH) ] = { | ||
474 | [ C(RESULT_ACCESS) ] = -1, | ||
475 | [ C(RESULT_MISS) ] = -1, | ||
476 | }, | ||
477 | }, | ||
478 | [ C(BPU ) ] = { | ||
479 | [ C(OP_READ) ] = { | ||
480 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
481 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
482 | }, | ||
483 | [ C(OP_WRITE) ] = { | ||
484 | [ C(RESULT_ACCESS) ] = -1, | ||
485 | [ C(RESULT_MISS) ] = -1, | ||
486 | }, | ||
487 | [ C(OP_PREFETCH) ] = { | ||
488 | [ C(RESULT_ACCESS) ] = -1, | ||
489 | [ C(RESULT_MISS) ] = -1, | ||
490 | }, | ||
491 | }, | ||
492 | }; | ||
493 | |||
494 | static __initconst u64 core2_hw_cache_event_ids | ||
495 | [PERF_COUNT_HW_CACHE_MAX] | ||
496 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
497 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
498 | { | ||
499 | [ C(L1D) ] = { | ||
500 | [ C(OP_READ) ] = { | ||
501 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
502 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
503 | }, | ||
504 | [ C(OP_WRITE) ] = { | ||
505 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
506 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
507 | }, | ||
508 | [ C(OP_PREFETCH) ] = { | ||
509 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
510 | [ C(RESULT_MISS) ] = 0, | ||
511 | }, | ||
512 | }, | ||
513 | [ C(L1I ) ] = { | ||
514 | [ C(OP_READ) ] = { | ||
515 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
516 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
517 | }, | ||
518 | [ C(OP_WRITE) ] = { | ||
519 | [ C(RESULT_ACCESS) ] = -1, | ||
520 | [ C(RESULT_MISS) ] = -1, | ||
521 | }, | ||
522 | [ C(OP_PREFETCH) ] = { | ||
523 | [ C(RESULT_ACCESS) ] = 0, | ||
524 | [ C(RESULT_MISS) ] = 0, | ||
525 | }, | ||
526 | }, | ||
527 | [ C(LL ) ] = { | ||
528 | [ C(OP_READ) ] = { | ||
529 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
530 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
531 | }, | ||
532 | [ C(OP_WRITE) ] = { | ||
533 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
534 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
535 | }, | ||
536 | [ C(OP_PREFETCH) ] = { | ||
537 | [ C(RESULT_ACCESS) ] = 0, | ||
538 | [ C(RESULT_MISS) ] = 0, | ||
539 | }, | ||
540 | }, | ||
541 | [ C(DTLB) ] = { | ||
542 | [ C(OP_READ) ] = { | ||
543 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
544 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
545 | }, | ||
546 | [ C(OP_WRITE) ] = { | ||
547 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
548 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
549 | }, | ||
550 | [ C(OP_PREFETCH) ] = { | ||
551 | [ C(RESULT_ACCESS) ] = 0, | ||
552 | [ C(RESULT_MISS) ] = 0, | ||
553 | }, | ||
554 | }, | ||
555 | [ C(ITLB) ] = { | ||
556 | [ C(OP_READ) ] = { | ||
557 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
558 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
559 | }, | ||
560 | [ C(OP_WRITE) ] = { | ||
561 | [ C(RESULT_ACCESS) ] = -1, | ||
562 | [ C(RESULT_MISS) ] = -1, | ||
563 | }, | ||
564 | [ C(OP_PREFETCH) ] = { | ||
565 | [ C(RESULT_ACCESS) ] = -1, | ||
566 | [ C(RESULT_MISS) ] = -1, | ||
567 | }, | ||
568 | }, | ||
569 | [ C(BPU ) ] = { | ||
570 | [ C(OP_READ) ] = { | ||
571 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
572 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
573 | }, | ||
574 | [ C(OP_WRITE) ] = { | ||
575 | [ C(RESULT_ACCESS) ] = -1, | ||
576 | [ C(RESULT_MISS) ] = -1, | ||
577 | }, | ||
578 | [ C(OP_PREFETCH) ] = { | ||
579 | [ C(RESULT_ACCESS) ] = -1, | ||
580 | [ C(RESULT_MISS) ] = -1, | ||
581 | }, | ||
582 | }, | ||
583 | }; | ||
584 | |||
585 | static __initconst u64 atom_hw_cache_event_ids | ||
586 | [PERF_COUNT_HW_CACHE_MAX] | ||
587 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
588 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
589 | { | ||
590 | [ C(L1D) ] = { | ||
591 | [ C(OP_READ) ] = { | ||
592 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
593 | [ C(RESULT_MISS) ] = 0, | ||
594 | }, | ||
595 | [ C(OP_WRITE) ] = { | ||
596 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
597 | [ C(RESULT_MISS) ] = 0, | ||
598 | }, | ||
599 | [ C(OP_PREFETCH) ] = { | ||
600 | [ C(RESULT_ACCESS) ] = 0x0, | ||
601 | [ C(RESULT_MISS) ] = 0, | ||
602 | }, | ||
603 | }, | ||
604 | [ C(L1I ) ] = { | ||
605 | [ C(OP_READ) ] = { | ||
606 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
607 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
608 | }, | ||
609 | [ C(OP_WRITE) ] = { | ||
610 | [ C(RESULT_ACCESS) ] = -1, | ||
611 | [ C(RESULT_MISS) ] = -1, | ||
612 | }, | ||
613 | [ C(OP_PREFETCH) ] = { | ||
614 | [ C(RESULT_ACCESS) ] = 0, | ||
615 | [ C(RESULT_MISS) ] = 0, | ||
616 | }, | ||
617 | }, | ||
618 | [ C(LL ) ] = { | ||
619 | [ C(OP_READ) ] = { | ||
620 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
621 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
622 | }, | ||
623 | [ C(OP_WRITE) ] = { | ||
624 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
625 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
626 | }, | ||
627 | [ C(OP_PREFETCH) ] = { | ||
628 | [ C(RESULT_ACCESS) ] = 0, | ||
629 | [ C(RESULT_MISS) ] = 0, | ||
630 | }, | ||
631 | }, | ||
632 | [ C(DTLB) ] = { | ||
633 | [ C(OP_READ) ] = { | ||
634 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
635 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
636 | }, | ||
637 | [ C(OP_WRITE) ] = { | ||
638 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
639 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
640 | }, | ||
641 | [ C(OP_PREFETCH) ] = { | ||
642 | [ C(RESULT_ACCESS) ] = 0, | ||
643 | [ C(RESULT_MISS) ] = 0, | ||
644 | }, | ||
645 | }, | ||
646 | [ C(ITLB) ] = { | ||
647 | [ C(OP_READ) ] = { | ||
648 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
649 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
650 | }, | ||
651 | [ C(OP_WRITE) ] = { | ||
652 | [ C(RESULT_ACCESS) ] = -1, | ||
653 | [ C(RESULT_MISS) ] = -1, | ||
654 | }, | ||
655 | [ C(OP_PREFETCH) ] = { | ||
656 | [ C(RESULT_ACCESS) ] = -1, | ||
657 | [ C(RESULT_MISS) ] = -1, | ||
658 | }, | ||
659 | }, | ||
660 | [ C(BPU ) ] = { | ||
661 | [ C(OP_READ) ] = { | ||
662 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
663 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
664 | }, | ||
665 | [ C(OP_WRITE) ] = { | ||
666 | [ C(RESULT_ACCESS) ] = -1, | ||
667 | [ C(RESULT_MISS) ] = -1, | ||
668 | }, | ||
669 | [ C(OP_PREFETCH) ] = { | ||
670 | [ C(RESULT_ACCESS) ] = -1, | ||
671 | [ C(RESULT_MISS) ] = -1, | ||
672 | }, | ||
673 | }, | ||
674 | }; | ||
675 | |||
676 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
677 | { | ||
678 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
679 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
680 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
681 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
682 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
683 | |||
684 | #define CORE_EVNTSEL_MASK \ | ||
685 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
686 | INTEL_ARCH_UNIT_MASK | \ | ||
687 | INTEL_ARCH_EDGE_MASK | \ | ||
688 | INTEL_ARCH_INV_MASK | \ | ||
689 | INTEL_ARCH_CNT_MASK) | ||
690 | |||
691 | return hw_event & CORE_EVNTSEL_MASK; | ||
692 | } | ||
693 | |||
694 | static __initconst u64 amd_hw_cache_event_ids | ||
695 | [PERF_COUNT_HW_CACHE_MAX] | ||
696 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
697 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
698 | { | ||
699 | [ C(L1D) ] = { | ||
700 | [ C(OP_READ) ] = { | ||
701 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
702 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
703 | }, | ||
704 | [ C(OP_WRITE) ] = { | ||
705 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
706 | [ C(RESULT_MISS) ] = 0, | ||
707 | }, | ||
708 | [ C(OP_PREFETCH) ] = { | ||
709 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
710 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
711 | }, | ||
712 | }, | ||
713 | [ C(L1I ) ] = { | ||
714 | [ C(OP_READ) ] = { | ||
715 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
716 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
717 | }, | ||
718 | [ C(OP_WRITE) ] = { | ||
719 | [ C(RESULT_ACCESS) ] = -1, | ||
720 | [ C(RESULT_MISS) ] = -1, | ||
721 | }, | ||
722 | [ C(OP_PREFETCH) ] = { | ||
723 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
724 | [ C(RESULT_MISS) ] = 0, | ||
725 | }, | ||
726 | }, | ||
727 | [ C(LL ) ] = { | ||
728 | [ C(OP_READ) ] = { | ||
729 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
730 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
731 | }, | ||
732 | [ C(OP_WRITE) ] = { | ||
733 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
734 | [ C(RESULT_MISS) ] = 0, | ||
735 | }, | ||
736 | [ C(OP_PREFETCH) ] = { | ||
737 | [ C(RESULT_ACCESS) ] = 0, | ||
738 | [ C(RESULT_MISS) ] = 0, | ||
739 | }, | ||
740 | }, | ||
741 | [ C(DTLB) ] = { | ||
742 | [ C(OP_READ) ] = { | ||
743 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
744 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
745 | }, | ||
746 | [ C(OP_WRITE) ] = { | ||
747 | [ C(RESULT_ACCESS) ] = 0, | ||
748 | [ C(RESULT_MISS) ] = 0, | ||
749 | }, | ||
750 | [ C(OP_PREFETCH) ] = { | ||
751 | [ C(RESULT_ACCESS) ] = 0, | ||
752 | [ C(RESULT_MISS) ] = 0, | ||
753 | }, | ||
754 | }, | ||
755 | [ C(ITLB) ] = { | ||
756 | [ C(OP_READ) ] = { | ||
757 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
758 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
759 | }, | ||
760 | [ C(OP_WRITE) ] = { | ||
761 | [ C(RESULT_ACCESS) ] = -1, | ||
762 | [ C(RESULT_MISS) ] = -1, | ||
763 | }, | ||
764 | [ C(OP_PREFETCH) ] = { | ||
765 | [ C(RESULT_ACCESS) ] = -1, | ||
766 | [ C(RESULT_MISS) ] = -1, | ||
767 | }, | ||
768 | }, | ||
769 | [ C(BPU ) ] = { | ||
770 | [ C(OP_READ) ] = { | ||
771 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
772 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
773 | }, | ||
774 | [ C(OP_WRITE) ] = { | ||
775 | [ C(RESULT_ACCESS) ] = -1, | ||
776 | [ C(RESULT_MISS) ] = -1, | ||
777 | }, | ||
778 | [ C(OP_PREFETCH) ] = { | ||
779 | [ C(RESULT_ACCESS) ] = -1, | ||
780 | [ C(RESULT_MISS) ] = -1, | ||
781 | }, | ||
782 | }, | ||
783 | }; | ||
784 | |||
785 | /* | ||
786 | * AMD Performance Monitor K7 and later. | ||
787 | */ | ||
788 | static const u64 amd_perfmon_event_map[] = | ||
789 | { | ||
790 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
791 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
792 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
793 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
794 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
795 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
796 | }; | ||
797 | |||
798 | static u64 amd_pmu_event_map(int hw_event) | ||
799 | { | ||
800 | return amd_perfmon_event_map[hw_event]; | ||
801 | } | ||
802 | |||
803 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
804 | { | ||
805 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
806 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
807 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
808 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
809 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
810 | |||
811 | #define K7_EVNTSEL_MASK \ | ||
812 | (K7_EVNTSEL_EVENT_MASK | \ | ||
813 | K7_EVNTSEL_UNIT_MASK | \ | ||
814 | K7_EVNTSEL_EDGE_MASK | \ | ||
815 | K7_EVNTSEL_INV_MASK | \ | ||
816 | K7_EVNTSEL_REG_MASK) | ||
817 | |||
818 | return hw_event & K7_EVNTSEL_MASK; | ||
819 | } | ||
820 | |||
821 | /* | 192 | /* |
822 | * Propagate event elapsed time into the generic event. | 193 | * Propagate event elapsed time into the generic event. |
823 | * Can only be executed on the CPU where the event is active. | 194 | * Can only be executed on the CPU where the event is active. |
824 | * Returns the delta events processed. | 195 | * Returns the delta events processed. |
825 | */ | 196 | */ |
826 | static u64 | 197 | static u64 |
827 | x86_perf_event_update(struct perf_event *event, | 198 | x86_perf_event_update(struct perf_event *event) |
828 | struct hw_perf_event *hwc, int idx) | ||
829 | { | 199 | { |
200 | struct hw_perf_event *hwc = &event->hw; | ||
830 | int shift = 64 - x86_pmu.event_bits; | 201 | int shift = 64 - x86_pmu.event_bits; |
831 | u64 prev_raw_count, new_raw_count; | 202 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | ||
832 | s64 delta; | 204 | s64 delta; |
833 | 205 | ||
834 | if (idx == X86_PMC_IDX_FIXED_BTS) | 206 | if (idx == X86_PMC_IDX_FIXED_BTS) |
@@ -928,7 +300,7 @@ static inline bool bts_available(void) | |||
928 | return x86_pmu.enable_bts != NULL; | 300 | return x86_pmu.enable_bts != NULL; |
929 | } | 301 | } |
930 | 302 | ||
931 | static inline void init_debug_store_on_cpu(int cpu) | 303 | static void init_debug_store_on_cpu(int cpu) |
932 | { | 304 | { |
933 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
934 | 306 | ||
@@ -940,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu) | |||
940 | (u32)((u64)(unsigned long)ds >> 32)); | 312 | (u32)((u64)(unsigned long)ds >> 32)); |
941 | } | 313 | } |
942 | 314 | ||
943 | static inline void fini_debug_store_on_cpu(int cpu) | 315 | static void fini_debug_store_on_cpu(int cpu) |
944 | { | 316 | { |
945 | if (!per_cpu(cpu_hw_events, cpu).ds) | 317 | if (!per_cpu(cpu_hw_events, cpu).ds) |
946 | return; | 318 | return; |
@@ -1069,42 +441,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
1069 | return 0; | 441 | return 0; |
1070 | } | 442 | } |
1071 | 443 | ||
1072 | static void intel_pmu_enable_bts(u64 config) | ||
1073 | { | ||
1074 | unsigned long debugctlmsr; | ||
1075 | |||
1076 | debugctlmsr = get_debugctlmsr(); | ||
1077 | |||
1078 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
1079 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
1080 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
1081 | |||
1082 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
1083 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
1084 | |||
1085 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
1086 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
1087 | |||
1088 | update_debugctlmsr(debugctlmsr); | ||
1089 | } | ||
1090 | |||
1091 | static void intel_pmu_disable_bts(void) | ||
1092 | { | ||
1093 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1094 | unsigned long debugctlmsr; | ||
1095 | |||
1096 | if (!cpuc->ds) | ||
1097 | return; | ||
1098 | |||
1099 | debugctlmsr = get_debugctlmsr(); | ||
1100 | |||
1101 | debugctlmsr &= | ||
1102 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
1103 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
1104 | |||
1105 | update_debugctlmsr(debugctlmsr); | ||
1106 | } | ||
1107 | |||
1108 | /* | 444 | /* |
1109 | * Setup the hardware configuration for a given attr_type | 445 | * Setup the hardware configuration for a given attr_type |
1110 | */ | 446 | */ |
@@ -1174,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1174 | */ | 510 | */ |
1175 | if (attr->type == PERF_TYPE_RAW) { | 511 | if (attr->type == PERF_TYPE_RAW) { |
1176 | hwc->config |= x86_pmu.raw_event(attr->config); | 512 | hwc->config |= x86_pmu.raw_event(attr->config); |
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
1177 | return 0; | 516 | return 0; |
1178 | } | 517 | } |
1179 | 518 | ||
@@ -1213,26 +552,6 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1213 | return 0; | 552 | return 0; |
1214 | } | 553 | } |
1215 | 554 | ||
1216 | static void p6_pmu_disable_all(void) | ||
1217 | { | ||
1218 | u64 val; | ||
1219 | |||
1220 | /* p6 only has one enable register */ | ||
1221 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1222 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1223 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1224 | } | ||
1225 | |||
1226 | static void intel_pmu_disable_all(void) | ||
1227 | { | ||
1228 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1229 | |||
1230 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
1231 | |||
1232 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1233 | intel_pmu_disable_bts(); | ||
1234 | } | ||
1235 | |||
1236 | static void x86_pmu_disable_all(void) | 555 | static void x86_pmu_disable_all(void) |
1237 | { | 556 | { |
1238 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1244,9 +563,9 @@ static void x86_pmu_disable_all(void) | |||
1244 | if (!test_bit(idx, cpuc->active_mask)) | 563 | if (!test_bit(idx, cpuc->active_mask)) |
1245 | continue; | 564 | continue; |
1246 | rdmsrl(x86_pmu.eventsel + idx, val); | 565 | rdmsrl(x86_pmu.eventsel + idx, val); |
1247 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | 566 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) |
1248 | continue; | 567 | continue; |
1249 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 568 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
1250 | wrmsrl(x86_pmu.eventsel + idx, val); | 569 | wrmsrl(x86_pmu.eventsel + idx, val); |
1251 | } | 570 | } |
1252 | } | 571 | } |
@@ -1268,33 +587,6 @@ void hw_perf_disable(void) | |||
1268 | x86_pmu.disable_all(); | 587 | x86_pmu.disable_all(); |
1269 | } | 588 | } |
1270 | 589 | ||
1271 | static void p6_pmu_enable_all(void) | ||
1272 | { | ||
1273 | unsigned long val; | ||
1274 | |||
1275 | /* p6 only has one enable register */ | ||
1276 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1277 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1278 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1279 | } | ||
1280 | |||
1281 | static void intel_pmu_enable_all(void) | ||
1282 | { | ||
1283 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1284 | |||
1285 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1286 | |||
1287 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1288 | struct perf_event *event = | ||
1289 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1290 | |||
1291 | if (WARN_ON_ONCE(!event)) | ||
1292 | return; | ||
1293 | |||
1294 | intel_pmu_enable_bts(event->hw.config); | ||
1295 | } | ||
1296 | } | ||
1297 | |||
1298 | static void x86_pmu_enable_all(void) | 590 | static void x86_pmu_enable_all(void) |
1299 | { | 591 | { |
1300 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1308,7 +600,7 @@ static void x86_pmu_enable_all(void) | |||
1308 | continue; | 600 | continue; |
1309 | 601 | ||
1310 | val = event->hw.config; | 602 | val = event->hw.config; |
1311 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 603 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
1312 | wrmsrl(x86_pmu.eventsel + idx, val); | 604 | wrmsrl(x86_pmu.eventsel + idx, val); |
1313 | } | 605 | } |
1314 | } | 606 | } |
@@ -1330,8 +622,8 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1330 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 622 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1331 | 623 | ||
1332 | for (i = 0; i < n; i++) { | 624 | for (i = 0; i < n; i++) { |
1333 | constraints[i] = | 625 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
1334 | x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 626 | constraints[i] = c; |
1335 | } | 627 | } |
1336 | 628 | ||
1337 | /* | 629 | /* |
@@ -1353,7 +645,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1353 | if (test_bit(hwc->idx, used_mask)) | 645 | if (test_bit(hwc->idx, used_mask)) |
1354 | break; | 646 | break; |
1355 | 647 | ||
1356 | set_bit(hwc->idx, used_mask); | 648 | __set_bit(hwc->idx, used_mask); |
1357 | if (assign) | 649 | if (assign) |
1358 | assign[i] = hwc->idx; | 650 | assign[i] = hwc->idx; |
1359 | } | 651 | } |
@@ -1394,7 +686,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1394 | if (c->weight != w) | 686 | if (c->weight != w) |
1395 | continue; | 687 | continue; |
1396 | 688 | ||
1397 | for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | 689 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { |
1398 | if (!test_bit(j, used_mask)) | 690 | if (!test_bit(j, used_mask)) |
1399 | break; | 691 | break; |
1400 | } | 692 | } |
@@ -1402,7 +694,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1402 | if (j == X86_PMC_IDX_MAX) | 694 | if (j == X86_PMC_IDX_MAX) |
1403 | break; | 695 | break; |
1404 | 696 | ||
1405 | set_bit(j, used_mask); | 697 | __set_bit(j, used_mask); |
1406 | 698 | ||
1407 | if (assign) | 699 | if (assign) |
1408 | assign[i] = j; | 700 | assign[i] = j; |
@@ -1495,7 +787,8 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, | |||
1495 | hwc->last_tag == cpuc->tags[i]; | 787 | hwc->last_tag == cpuc->tags[i]; |
1496 | } | 788 | } |
1497 | 789 | ||
1498 | static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); | 790 | static int x86_pmu_start(struct perf_event *event); |
791 | static void x86_pmu_stop(struct perf_event *event); | ||
1499 | 792 | ||
1500 | void hw_perf_enable(void) | 793 | void hw_perf_enable(void) |
1501 | { | 794 | { |
@@ -1511,6 +804,7 @@ void hw_perf_enable(void) | |||
1511 | return; | 804 | return; |
1512 | 805 | ||
1513 | if (cpuc->n_added) { | 806 | if (cpuc->n_added) { |
807 | int n_running = cpuc->n_events - cpuc->n_added; | ||
1514 | /* | 808 | /* |
1515 | * apply assignment obtained either from | 809 | * apply assignment obtained either from |
1516 | * hw_perf_group_sched_in() or x86_pmu_enable() | 810 | * hw_perf_group_sched_in() or x86_pmu_enable() |
@@ -1518,8 +812,7 @@ void hw_perf_enable(void) | |||
1518 | * step1: save events moving to new counters | 812 | * step1: save events moving to new counters |
1519 | * step2: reprogram moved events into new counters | 813 | * step2: reprogram moved events into new counters |
1520 | */ | 814 | */ |
1521 | for (i = 0; i < cpuc->n_events; i++) { | 815 | for (i = 0; i < n_running; i++) { |
1522 | |||
1523 | event = cpuc->event_list[i]; | 816 | event = cpuc->event_list[i]; |
1524 | hwc = &event->hw; | 817 | hwc = &event->hw; |
1525 | 818 | ||
@@ -1533,30 +826,19 @@ void hw_perf_enable(void) | |||
1533 | match_prev_assignment(hwc, cpuc, i)) | 826 | match_prev_assignment(hwc, cpuc, i)) |
1534 | continue; | 827 | continue; |
1535 | 828 | ||
1536 | __x86_pmu_disable(event, cpuc); | 829 | x86_pmu_stop(event); |
1537 | |||
1538 | hwc->idx = -1; | ||
1539 | } | 830 | } |
1540 | 831 | ||
1541 | for (i = 0; i < cpuc->n_events; i++) { | 832 | for (i = 0; i < cpuc->n_events; i++) { |
1542 | |||
1543 | event = cpuc->event_list[i]; | 833 | event = cpuc->event_list[i]; |
1544 | hwc = &event->hw; | 834 | hwc = &event->hw; |
1545 | 835 | ||
1546 | if (hwc->idx == -1) { | 836 | if (!match_prev_assignment(hwc, cpuc, i)) |
1547 | x86_assign_hw_event(event, cpuc, i); | 837 | x86_assign_hw_event(event, cpuc, i); |
1548 | x86_perf_event_set_period(event, hwc, hwc->idx); | 838 | else if (i < n_running) |
1549 | } | 839 | continue; |
1550 | /* | ||
1551 | * need to mark as active because x86_pmu_disable() | ||
1552 | * clear active_mask and events[] yet it preserves | ||
1553 | * idx | ||
1554 | */ | ||
1555 | set_bit(hwc->idx, cpuc->active_mask); | ||
1556 | cpuc->events[hwc->idx] = event; | ||
1557 | 840 | ||
1558 | x86_pmu.enable(hwc, hwc->idx); | 841 | x86_pmu_start(event); |
1559 | perf_event_update_userpage(event); | ||
1560 | } | 842 | } |
1561 | cpuc->n_added = 0; | 843 | cpuc->n_added = 0; |
1562 | perf_events_lapic_init(); | 844 | perf_events_lapic_init(); |
@@ -1568,70 +850,16 @@ void hw_perf_enable(void) | |||
1568 | x86_pmu.enable_all(); | 850 | x86_pmu.enable_all(); |
1569 | } | 851 | } |
1570 | 852 | ||
1571 | static inline u64 intel_pmu_get_status(void) | 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) |
1572 | { | ||
1573 | u64 status; | ||
1574 | |||
1575 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1576 | |||
1577 | return status; | ||
1578 | } | ||
1579 | |||
1580 | static inline void intel_pmu_ack_status(u64 ack) | ||
1581 | { | ||
1582 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
1583 | } | ||
1584 | |||
1585 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1586 | { | ||
1587 | (void)checking_wrmsrl(hwc->config_base + idx, | ||
1588 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
1589 | } | ||
1590 | |||
1591 | static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1592 | { | 854 | { |
1593 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
1594 | } | 856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); |
1595 | |||
1596 | static inline void | ||
1597 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1598 | { | ||
1599 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1600 | u64 ctrl_val, mask; | ||
1601 | |||
1602 | mask = 0xfULL << (idx * 4); | ||
1603 | |||
1604 | rdmsrl(hwc->config_base, ctrl_val); | ||
1605 | ctrl_val &= ~mask; | ||
1606 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1607 | } | 857 | } |
1608 | 858 | ||
1609 | static inline void | 859 | static inline void x86_pmu_disable_event(struct perf_event *event) |
1610 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1611 | { | 860 | { |
1612 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 861 | struct hw_perf_event *hwc = &event->hw; |
1613 | u64 val = P6_NOP_EVENT; | 862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); |
1614 | |||
1615 | if (cpuc->enabled) | ||
1616 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1617 | |||
1618 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1619 | } | ||
1620 | |||
1621 | static inline void | ||
1622 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1623 | { | ||
1624 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1625 | intel_pmu_disable_bts(); | ||
1626 | return; | ||
1627 | } | ||
1628 | |||
1629 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1630 | intel_pmu_disable_fixed(hwc, idx); | ||
1631 | return; | ||
1632 | } | ||
1633 | |||
1634 | x86_pmu_disable_event(hwc, idx); | ||
1635 | } | 863 | } |
1636 | 864 | ||
1637 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -1641,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | |||
1641 | * To be called with the event disabled in hw: | 869 | * To be called with the event disabled in hw: |
1642 | */ | 870 | */ |
1643 | static int | 871 | static int |
1644 | x86_perf_event_set_period(struct perf_event *event, | 872 | x86_perf_event_set_period(struct perf_event *event) |
1645 | struct hw_perf_event *hwc, int idx) | ||
1646 | { | 873 | { |
874 | struct hw_perf_event *hwc = &event->hw; | ||
1647 | s64 left = atomic64_read(&hwc->period_left); | 875 | s64 left = atomic64_read(&hwc->period_left); |
1648 | s64 period = hwc->sample_period; | 876 | s64 period = hwc->sample_period; |
1649 | int err, ret = 0; | 877 | int err, ret = 0, idx = hwc->idx; |
1650 | 878 | ||
1651 | if (idx == X86_PMC_IDX_FIXED_BTS) | 879 | if (idx == X86_PMC_IDX_FIXED_BTS) |
1652 | return 0; | 880 | return 0; |
@@ -1692,75 +920,11 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1692 | return ret; | 920 | return ret; |
1693 | } | 921 | } |
1694 | 922 | ||
1695 | static inline void | 923 | static void x86_pmu_enable_event(struct perf_event *event) |
1696 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1697 | { | ||
1698 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1699 | u64 ctrl_val, bits, mask; | ||
1700 | int err; | ||
1701 | |||
1702 | /* | ||
1703 | * Enable IRQ generation (0x8), | ||
1704 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
1705 | * if requested: | ||
1706 | */ | ||
1707 | bits = 0x8ULL; | ||
1708 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
1709 | bits |= 0x2; | ||
1710 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1711 | bits |= 0x1; | ||
1712 | |||
1713 | /* | ||
1714 | * ANY bit is supported in v3 and up | ||
1715 | */ | ||
1716 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
1717 | bits |= 0x4; | ||
1718 | |||
1719 | bits <<= (idx * 4); | ||
1720 | mask = 0xfULL << (idx * 4); | ||
1721 | |||
1722 | rdmsrl(hwc->config_base, ctrl_val); | ||
1723 | ctrl_val &= ~mask; | ||
1724 | ctrl_val |= bits; | ||
1725 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1726 | } | ||
1727 | |||
1728 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1729 | { | 924 | { |
1730 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1731 | u64 val; | ||
1732 | |||
1733 | val = hwc->config; | ||
1734 | if (cpuc->enabled) | 926 | if (cpuc->enabled) |
1735 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 927 | __x86_pmu_enable_event(&event->hw); |
1736 | |||
1737 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1738 | } | ||
1739 | |||
1740 | |||
1741 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1742 | { | ||
1743 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1744 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
1745 | return; | ||
1746 | |||
1747 | intel_pmu_enable_bts(hwc->config); | ||
1748 | return; | ||
1749 | } | ||
1750 | |||
1751 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1752 | intel_pmu_enable_fixed(hwc, idx); | ||
1753 | return; | ||
1754 | } | ||
1755 | |||
1756 | __x86_pmu_enable_event(hwc, idx); | ||
1757 | } | ||
1758 | |||
1759 | static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1760 | { | ||
1761 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1762 | if (cpuc->enabled) | ||
1763 | __x86_pmu_enable_event(hwc, idx); | ||
1764 | } | 928 | } |
1765 | 929 | ||
1766 | /* | 930 | /* |
@@ -1796,21 +960,32 @@ static int x86_pmu_enable(struct perf_event *event) | |||
1796 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 960 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1797 | 961 | ||
1798 | cpuc->n_events = n; | 962 | cpuc->n_events = n; |
1799 | cpuc->n_added = n - n0; | 963 | cpuc->n_added += n - n0; |
1800 | 964 | ||
1801 | return 0; | 965 | return 0; |
1802 | } | 966 | } |
1803 | 967 | ||
1804 | static void x86_pmu_unthrottle(struct perf_event *event) | 968 | static int x86_pmu_start(struct perf_event *event) |
1805 | { | 969 | { |
1806 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 970 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1807 | struct hw_perf_event *hwc = &event->hw; | 971 | int idx = event->hw.idx; |
1808 | 972 | ||
1809 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | 973 | if (idx == -1) |
1810 | cpuc->events[hwc->idx] != event)) | 974 | return -EAGAIN; |
1811 | return; | 975 | |
976 | x86_perf_event_set_period(event); | ||
977 | cpuc->events[idx] = event; | ||
978 | __set_bit(idx, cpuc->active_mask); | ||
979 | x86_pmu.enable(event); | ||
980 | perf_event_update_userpage(event); | ||
1812 | 981 | ||
1813 | x86_pmu.enable(hwc, hwc->idx); | 982 | return 0; |
983 | } | ||
984 | |||
985 | static void x86_pmu_unthrottle(struct perf_event *event) | ||
986 | { | ||
987 | int ret = x86_pmu_start(event); | ||
988 | WARN_ON_ONCE(ret); | ||
1814 | } | 989 | } |
1815 | 990 | ||
1816 | void perf_event_print_debug(void) | 991 | void perf_event_print_debug(void) |
@@ -1864,87 +1039,22 @@ void perf_event_print_debug(void) | |||
1864 | local_irq_restore(flags); | 1039 | local_irq_restore(flags); |
1865 | } | 1040 | } |
1866 | 1041 | ||
1867 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | 1042 | static void x86_pmu_stop(struct perf_event *event) |
1868 | { | ||
1869 | struct debug_store *ds = cpuc->ds; | ||
1870 | struct bts_record { | ||
1871 | u64 from; | ||
1872 | u64 to; | ||
1873 | u64 flags; | ||
1874 | }; | ||
1875 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1876 | struct bts_record *at, *top; | ||
1877 | struct perf_output_handle handle; | ||
1878 | struct perf_event_header header; | ||
1879 | struct perf_sample_data data; | ||
1880 | struct pt_regs regs; | ||
1881 | |||
1882 | if (!event) | ||
1883 | return; | ||
1884 | |||
1885 | if (!ds) | ||
1886 | return; | ||
1887 | |||
1888 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1889 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1890 | |||
1891 | if (top <= at) | ||
1892 | return; | ||
1893 | |||
1894 | ds->bts_index = ds->bts_buffer_base; | ||
1895 | |||
1896 | |||
1897 | data.period = event->hw.last_period; | ||
1898 | data.addr = 0; | ||
1899 | data.raw = NULL; | ||
1900 | regs.ip = 0; | ||
1901 | |||
1902 | /* | ||
1903 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1904 | * We will overwrite the from and to address before we output | ||
1905 | * the sample. | ||
1906 | */ | ||
1907 | perf_prepare_sample(&header, &data, event, ®s); | ||
1908 | |||
1909 | if (perf_output_begin(&handle, event, | ||
1910 | header.size * (top - at), 1, 1)) | ||
1911 | return; | ||
1912 | |||
1913 | for (; at < top; at++) { | ||
1914 | data.ip = at->from; | ||
1915 | data.addr = at->to; | ||
1916 | |||
1917 | perf_output_sample(&handle, &header, &data, event); | ||
1918 | } | ||
1919 | |||
1920 | perf_output_end(&handle); | ||
1921 | |||
1922 | /* There's new data available. */ | ||
1923 | event->hw.interrupts++; | ||
1924 | event->pending_kill = POLL_IN; | ||
1925 | } | ||
1926 | |||
1927 | static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc) | ||
1928 | { | 1043 | { |
1044 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1929 | struct hw_perf_event *hwc = &event->hw; | 1045 | struct hw_perf_event *hwc = &event->hw; |
1930 | int idx = hwc->idx; | 1046 | int idx = hwc->idx; |
1931 | 1047 | ||
1932 | /* | 1048 | if (!__test_and_clear_bit(idx, cpuc->active_mask)) |
1933 | * Must be done before we disable, otherwise the nmi handler | 1049 | return; |
1934 | * could reenable again: | 1050 | |
1935 | */ | 1051 | x86_pmu.disable(event); |
1936 | clear_bit(idx, cpuc->active_mask); | ||
1937 | x86_pmu.disable(hwc, idx); | ||
1938 | 1052 | ||
1939 | /* | 1053 | /* |
1940 | * Drain the remaining delta count out of a event | 1054 | * Drain the remaining delta count out of a event |
1941 | * that we are disabling: | 1055 | * that we are disabling: |
1942 | */ | 1056 | */ |
1943 | x86_perf_event_update(event, hwc, idx); | 1057 | x86_perf_event_update(event); |
1944 | |||
1945 | /* Drain the remaining BTS records. */ | ||
1946 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1947 | intel_pmu_drain_bts_buffer(cpuc); | ||
1948 | 1058 | ||
1949 | cpuc->events[idx] = NULL; | 1059 | cpuc->events[idx] = NULL; |
1950 | } | 1060 | } |
@@ -1954,7 +1064,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1954 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1064 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1955 | int i; | 1065 | int i; |
1956 | 1066 | ||
1957 | __x86_pmu_disable(event, cpuc); | 1067 | x86_pmu_stop(event); |
1958 | 1068 | ||
1959 | for (i = 0; i < cpuc->n_events; i++) { | 1069 | for (i = 0; i < cpuc->n_events; i++) { |
1960 | if (event == cpuc->event_list[i]) { | 1070 | if (event == cpuc->event_list[i]) { |
@@ -1972,117 +1082,6 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1972 | perf_event_update_userpage(event); | 1082 | perf_event_update_userpage(event); |
1973 | } | 1083 | } |
1974 | 1084 | ||
1975 | /* | ||
1976 | * Save and restart an expired event. Called by NMI contexts, | ||
1977 | * so it has to be careful about preempting normal event ops: | ||
1978 | */ | ||
1979 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
1980 | { | ||
1981 | struct hw_perf_event *hwc = &event->hw; | ||
1982 | int idx = hwc->idx; | ||
1983 | int ret; | ||
1984 | |||
1985 | x86_perf_event_update(event, hwc, idx); | ||
1986 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
1987 | |||
1988 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1989 | intel_pmu_enable_event(hwc, idx); | ||
1990 | |||
1991 | return ret; | ||
1992 | } | ||
1993 | |||
1994 | static void intel_pmu_reset(void) | ||
1995 | { | ||
1996 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
1997 | unsigned long flags; | ||
1998 | int idx; | ||
1999 | |||
2000 | if (!x86_pmu.num_events) | ||
2001 | return; | ||
2002 | |||
2003 | local_irq_save(flags); | ||
2004 | |||
2005 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
2006 | |||
2007 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
2008 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
2009 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
2010 | } | ||
2011 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
2012 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
2013 | } | ||
2014 | if (ds) | ||
2015 | ds->bts_index = ds->bts_buffer_base; | ||
2016 | |||
2017 | local_irq_restore(flags); | ||
2018 | } | ||
2019 | |||
2020 | /* | ||
2021 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
2022 | * rules apply: | ||
2023 | */ | ||
2024 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
2025 | { | ||
2026 | struct perf_sample_data data; | ||
2027 | struct cpu_hw_events *cpuc; | ||
2028 | int bit, loops; | ||
2029 | u64 ack, status; | ||
2030 | |||
2031 | data.addr = 0; | ||
2032 | data.raw = NULL; | ||
2033 | |||
2034 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2035 | |||
2036 | perf_disable(); | ||
2037 | intel_pmu_drain_bts_buffer(cpuc); | ||
2038 | status = intel_pmu_get_status(); | ||
2039 | if (!status) { | ||
2040 | perf_enable(); | ||
2041 | return 0; | ||
2042 | } | ||
2043 | |||
2044 | loops = 0; | ||
2045 | again: | ||
2046 | if (++loops > 100) { | ||
2047 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
2048 | perf_event_print_debug(); | ||
2049 | intel_pmu_reset(); | ||
2050 | perf_enable(); | ||
2051 | return 1; | ||
2052 | } | ||
2053 | |||
2054 | inc_irq_stat(apic_perf_irqs); | ||
2055 | ack = status; | ||
2056 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
2057 | struct perf_event *event = cpuc->events[bit]; | ||
2058 | |||
2059 | clear_bit(bit, (unsigned long *) &status); | ||
2060 | if (!test_bit(bit, cpuc->active_mask)) | ||
2061 | continue; | ||
2062 | |||
2063 | if (!intel_pmu_save_and_restart(event)) | ||
2064 | continue; | ||
2065 | |||
2066 | data.period = event->hw.last_period; | ||
2067 | |||
2068 | if (perf_event_overflow(event, 1, &data, regs)) | ||
2069 | intel_pmu_disable_event(&event->hw, bit); | ||
2070 | } | ||
2071 | |||
2072 | intel_pmu_ack_status(ack); | ||
2073 | |||
2074 | /* | ||
2075 | * Repeat if there is more work to be done: | ||
2076 | */ | ||
2077 | status = intel_pmu_get_status(); | ||
2078 | if (status) | ||
2079 | goto again; | ||
2080 | |||
2081 | perf_enable(); | ||
2082 | |||
2083 | return 1; | ||
2084 | } | ||
2085 | |||
2086 | static int x86_pmu_handle_irq(struct pt_regs *regs) | 1085 | static int x86_pmu_handle_irq(struct pt_regs *regs) |
2087 | { | 1086 | { |
2088 | struct perf_sample_data data; | 1087 | struct perf_sample_data data; |
@@ -2092,8 +1091,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
2092 | int idx, handled = 0; | 1091 | int idx, handled = 0; |
2093 | u64 val; | 1092 | u64 val; |
2094 | 1093 | ||
2095 | data.addr = 0; | 1094 | perf_sample_data_init(&data, 0); |
2096 | data.raw = NULL; | ||
2097 | 1095 | ||
2098 | cpuc = &__get_cpu_var(cpu_hw_events); | 1096 | cpuc = &__get_cpu_var(cpu_hw_events); |
2099 | 1097 | ||
@@ -2104,7 +1102,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
2104 | event = cpuc->events[idx]; | 1102 | event = cpuc->events[idx]; |
2105 | hwc = &event->hw; | 1103 | hwc = &event->hw; |
2106 | 1104 | ||
2107 | val = x86_perf_event_update(event, hwc, idx); | 1105 | val = x86_perf_event_update(event); |
2108 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) |
2109 | continue; | 1107 | continue; |
2110 | 1108 | ||
@@ -2114,11 +1112,11 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
2114 | handled = 1; | 1112 | handled = 1; |
2115 | data.period = event->hw.last_period; | 1113 | data.period = event->hw.last_period; |
2116 | 1114 | ||
2117 | if (!x86_perf_event_set_period(event, hwc, idx)) | 1115 | if (!x86_perf_event_set_period(event)) |
2118 | continue; | 1116 | continue; |
2119 | 1117 | ||
2120 | if (perf_event_overflow(event, 1, &data, regs)) | 1118 | if (perf_event_overflow(event, 1, &data, regs)) |
2121 | x86_pmu.disable(hwc, idx); | 1119 | x86_pmu_stop(event); |
2122 | } | 1120 | } |
2123 | 1121 | ||
2124 | if (handled) | 1122 | if (handled) |
@@ -2195,36 +1193,20 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
2195 | return NOTIFY_STOP; | 1193 | return NOTIFY_STOP; |
2196 | } | 1194 | } |
2197 | 1195 | ||
2198 | static struct event_constraint unconstrained; | 1196 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { |
2199 | 1197 | .notifier_call = perf_event_nmi_handler, | |
2200 | static struct event_constraint bts_constraint = | 1198 | .next = NULL, |
2201 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | 1199 | .priority = 1 |
2202 | 1200 | }; | |
2203 | static struct event_constraint * | ||
2204 | intel_special_constraints(struct perf_event *event) | ||
2205 | { | ||
2206 | unsigned int hw_event; | ||
2207 | |||
2208 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
2209 | |||
2210 | if (unlikely((hw_event == | ||
2211 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
2212 | (event->hw.sample_period == 1))) { | ||
2213 | 1201 | ||
2214 | return &bts_constraint; | 1202 | static struct event_constraint unconstrained; |
2215 | } | 1203 | static struct event_constraint emptyconstraint; |
2216 | return NULL; | ||
2217 | } | ||
2218 | 1204 | ||
2219 | static struct event_constraint * | 1205 | static struct event_constraint * |
2220 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1206 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
2221 | { | 1207 | { |
2222 | struct event_constraint *c; | 1208 | struct event_constraint *c; |
2223 | 1209 | ||
2224 | c = intel_special_constraints(event); | ||
2225 | if (c) | ||
2226 | return c; | ||
2227 | |||
2228 | if (x86_pmu.event_constraints) { | 1210 | if (x86_pmu.event_constraints) { |
2229 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1211 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
2230 | if ((event->hw.config & c->cmask) == c->code) | 1212 | if ((event->hw.config & c->cmask) == c->code) |
@@ -2235,19 +1217,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
2235 | return &unconstrained; | 1217 | return &unconstrained; |
2236 | } | 1218 | } |
2237 | 1219 | ||
2238 | static struct event_constraint * | ||
2239 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2240 | { | ||
2241 | return &unconstrained; | ||
2242 | } | ||
2243 | |||
2244 | static int x86_event_sched_in(struct perf_event *event, | 1220 | static int x86_event_sched_in(struct perf_event *event, |
2245 | struct perf_cpu_context *cpuctx, int cpu) | 1221 | struct perf_cpu_context *cpuctx) |
2246 | { | 1222 | { |
2247 | int ret = 0; | 1223 | int ret = 0; |
2248 | 1224 | ||
2249 | event->state = PERF_EVENT_STATE_ACTIVE; | 1225 | event->state = PERF_EVENT_STATE_ACTIVE; |
2250 | event->oncpu = cpu; | 1226 | event->oncpu = smp_processor_id(); |
2251 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | 1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; |
2252 | 1228 | ||
2253 | if (!is_x86_event(event)) | 1229 | if (!is_x86_event(event)) |
@@ -2263,7 +1239,7 @@ static int x86_event_sched_in(struct perf_event *event, | |||
2263 | } | 1239 | } |
2264 | 1240 | ||
2265 | static void x86_event_sched_out(struct perf_event *event, | 1241 | static void x86_event_sched_out(struct perf_event *event, |
2266 | struct perf_cpu_context *cpuctx, int cpu) | 1242 | struct perf_cpu_context *cpuctx) |
2267 | { | 1243 | { |
2268 | event->state = PERF_EVENT_STATE_INACTIVE; | 1244 | event->state = PERF_EVENT_STATE_INACTIVE; |
2269 | event->oncpu = -1; | 1245 | event->oncpu = -1; |
@@ -2291,9 +1267,9 @@ static void x86_event_sched_out(struct perf_event *event, | |||
2291 | */ | 1267 | */ |
2292 | int hw_perf_group_sched_in(struct perf_event *leader, | 1268 | int hw_perf_group_sched_in(struct perf_event *leader, |
2293 | struct perf_cpu_context *cpuctx, | 1269 | struct perf_cpu_context *cpuctx, |
2294 | struct perf_event_context *ctx, int cpu) | 1270 | struct perf_event_context *ctx) |
2295 | { | 1271 | { |
2296 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
2297 | struct perf_event *sub; | 1273 | struct perf_event *sub; |
2298 | int assign[X86_PMC_IDX_MAX]; | 1274 | int assign[X86_PMC_IDX_MAX]; |
2299 | int n0, n1, ret; | 1275 | int n0, n1, ret; |
@@ -2307,14 +1283,14 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
2307 | if (ret) | 1283 | if (ret) |
2308 | return ret; | 1284 | return ret; |
2309 | 1285 | ||
2310 | ret = x86_event_sched_in(leader, cpuctx, cpu); | 1286 | ret = x86_event_sched_in(leader, cpuctx); |
2311 | if (ret) | 1287 | if (ret) |
2312 | return ret; | 1288 | return ret; |
2313 | 1289 | ||
2314 | n1 = 1; | 1290 | n1 = 1; |
2315 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
2316 | if (sub->state > PERF_EVENT_STATE_OFF) { | 1292 | if (sub->state > PERF_EVENT_STATE_OFF) { |
2317 | ret = x86_event_sched_in(sub, cpuctx, cpu); | 1293 | ret = x86_event_sched_in(sub, cpuctx); |
2318 | if (ret) | 1294 | if (ret) |
2319 | goto undo; | 1295 | goto undo; |
2320 | ++n1; | 1296 | ++n1; |
@@ -2327,7 +1303,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
2327 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | 1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); |
2328 | 1304 | ||
2329 | cpuc->n_events = n0; | 1305 | cpuc->n_events = n0; |
2330 | cpuc->n_added = n1; | 1306 | cpuc->n_added += n1; |
2331 | ctx->nr_active += n1; | 1307 | ctx->nr_active += n1; |
2332 | 1308 | ||
2333 | /* | 1309 | /* |
@@ -2339,11 +1315,11 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
2339 | */ | 1315 | */ |
2340 | return 1; | 1316 | return 1; |
2341 | undo: | 1317 | undo: |
2342 | x86_event_sched_out(leader, cpuctx, cpu); | 1318 | x86_event_sched_out(leader, cpuctx); |
2343 | n0 = 1; | 1319 | n0 = 1; |
2344 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
2345 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | 1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { |
2346 | x86_event_sched_out(sub, cpuctx, cpu); | 1322 | x86_event_sched_out(sub, cpuctx); |
2347 | if (++n0 == n1) | 1323 | if (++n0 == n1) |
2348 | break; | 1324 | break; |
2349 | } | 1325 | } |
@@ -2351,243 +1327,43 @@ undo: | |||
2351 | return ret; | 1327 | return ret; |
2352 | } | 1328 | } |
2353 | 1329 | ||
2354 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | 1330 | #include "perf_event_amd.c" |
2355 | .notifier_call = perf_event_nmi_handler, | 1331 | #include "perf_event_p6.c" |
2356 | .next = NULL, | 1332 | #include "perf_event_intel.c" |
2357 | .priority = 1 | ||
2358 | }; | ||
2359 | |||
2360 | static __initconst struct x86_pmu p6_pmu = { | ||
2361 | .name = "p6", | ||
2362 | .handle_irq = x86_pmu_handle_irq, | ||
2363 | .disable_all = p6_pmu_disable_all, | ||
2364 | .enable_all = p6_pmu_enable_all, | ||
2365 | .enable = p6_pmu_enable_event, | ||
2366 | .disable = p6_pmu_disable_event, | ||
2367 | .eventsel = MSR_P6_EVNTSEL0, | ||
2368 | .perfctr = MSR_P6_PERFCTR0, | ||
2369 | .event_map = p6_pmu_event_map, | ||
2370 | .raw_event = p6_pmu_raw_event, | ||
2371 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
2372 | .apic = 1, | ||
2373 | .max_period = (1ULL << 31) - 1, | ||
2374 | .version = 0, | ||
2375 | .num_events = 2, | ||
2376 | /* | ||
2377 | * Events have 40 bits implemented. However they are designed such | ||
2378 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
2379 | * effective width of a event for P6-like PMU is 32 bits only. | ||
2380 | * | ||
2381 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
2382 | */ | ||
2383 | .event_bits = 32, | ||
2384 | .event_mask = (1ULL << 32) - 1, | ||
2385 | .get_event_constraints = intel_get_event_constraints, | ||
2386 | .event_constraints = intel_p6_event_constraints | ||
2387 | }; | ||
2388 | |||
2389 | static __initconst struct x86_pmu core_pmu = { | ||
2390 | .name = "core", | ||
2391 | .handle_irq = x86_pmu_handle_irq, | ||
2392 | .disable_all = x86_pmu_disable_all, | ||
2393 | .enable_all = x86_pmu_enable_all, | ||
2394 | .enable = x86_pmu_enable_event, | ||
2395 | .disable = x86_pmu_disable_event, | ||
2396 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2397 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2398 | .event_map = intel_pmu_event_map, | ||
2399 | .raw_event = intel_pmu_raw_event, | ||
2400 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2401 | .apic = 1, | ||
2402 | /* | ||
2403 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2404 | * so we install an artificial 1<<31 period regardless of | ||
2405 | * the generic event period: | ||
2406 | */ | ||
2407 | .max_period = (1ULL << 31) - 1, | ||
2408 | .get_event_constraints = intel_get_event_constraints, | ||
2409 | .event_constraints = intel_core_event_constraints, | ||
2410 | }; | ||
2411 | |||
2412 | static __initconst struct x86_pmu intel_pmu = { | ||
2413 | .name = "Intel", | ||
2414 | .handle_irq = intel_pmu_handle_irq, | ||
2415 | .disable_all = intel_pmu_disable_all, | ||
2416 | .enable_all = intel_pmu_enable_all, | ||
2417 | .enable = intel_pmu_enable_event, | ||
2418 | .disable = intel_pmu_disable_event, | ||
2419 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2420 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2421 | .event_map = intel_pmu_event_map, | ||
2422 | .raw_event = intel_pmu_raw_event, | ||
2423 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2424 | .apic = 1, | ||
2425 | /* | ||
2426 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2427 | * so we install an artificial 1<<31 period regardless of | ||
2428 | * the generic event period: | ||
2429 | */ | ||
2430 | .max_period = (1ULL << 31) - 1, | ||
2431 | .enable_bts = intel_pmu_enable_bts, | ||
2432 | .disable_bts = intel_pmu_disable_bts, | ||
2433 | .get_event_constraints = intel_get_event_constraints | ||
2434 | }; | ||
2435 | |||
2436 | static __initconst struct x86_pmu amd_pmu = { | ||
2437 | .name = "AMD", | ||
2438 | .handle_irq = x86_pmu_handle_irq, | ||
2439 | .disable_all = x86_pmu_disable_all, | ||
2440 | .enable_all = x86_pmu_enable_all, | ||
2441 | .enable = x86_pmu_enable_event, | ||
2442 | .disable = x86_pmu_disable_event, | ||
2443 | .eventsel = MSR_K7_EVNTSEL0, | ||
2444 | .perfctr = MSR_K7_PERFCTR0, | ||
2445 | .event_map = amd_pmu_event_map, | ||
2446 | .raw_event = amd_pmu_raw_event, | ||
2447 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
2448 | .num_events = 4, | ||
2449 | .event_bits = 48, | ||
2450 | .event_mask = (1ULL << 48) - 1, | ||
2451 | .apic = 1, | ||
2452 | /* use highest bit to detect overflow */ | ||
2453 | .max_period = (1ULL << 47) - 1, | ||
2454 | .get_event_constraints = amd_get_event_constraints | ||
2455 | }; | ||
2456 | |||
2457 | static __init int p6_pmu_init(void) | ||
2458 | { | ||
2459 | switch (boot_cpu_data.x86_model) { | ||
2460 | case 1: | ||
2461 | case 3: /* Pentium Pro */ | ||
2462 | case 5: | ||
2463 | case 6: /* Pentium II */ | ||
2464 | case 7: | ||
2465 | case 8: | ||
2466 | case 11: /* Pentium III */ | ||
2467 | case 9: | ||
2468 | case 13: | ||
2469 | /* Pentium M */ | ||
2470 | break; | ||
2471 | default: | ||
2472 | pr_cont("unsupported p6 CPU model %d ", | ||
2473 | boot_cpu_data.x86_model); | ||
2474 | return -ENODEV; | ||
2475 | } | ||
2476 | |||
2477 | x86_pmu = p6_pmu; | ||
2478 | |||
2479 | return 0; | ||
2480 | } | ||
2481 | 1333 | ||
2482 | static __init int intel_pmu_init(void) | 1334 | static int __cpuinit |
1335 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
2483 | { | 1336 | { |
2484 | union cpuid10_edx edx; | 1337 | unsigned int cpu = (long)hcpu; |
2485 | union cpuid10_eax eax; | 1338 | int ret = NOTIFY_OK; |
2486 | unsigned int unused; | ||
2487 | unsigned int ebx; | ||
2488 | int version; | ||
2489 | |||
2490 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
2491 | /* check for P6 processor family */ | ||
2492 | if (boot_cpu_data.x86 == 6) { | ||
2493 | return p6_pmu_init(); | ||
2494 | } else { | ||
2495 | return -ENODEV; | ||
2496 | } | ||
2497 | } | ||
2498 | |||
2499 | /* | ||
2500 | * Check whether the Architectural PerfMon supports | ||
2501 | * Branch Misses Retired hw_event or not. | ||
2502 | */ | ||
2503 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
2504 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
2505 | return -ENODEV; | ||
2506 | |||
2507 | version = eax.split.version_id; | ||
2508 | if (version < 2) | ||
2509 | x86_pmu = core_pmu; | ||
2510 | else | ||
2511 | x86_pmu = intel_pmu; | ||
2512 | |||
2513 | x86_pmu.version = version; | ||
2514 | x86_pmu.num_events = eax.split.num_events; | ||
2515 | x86_pmu.event_bits = eax.split.bit_width; | ||
2516 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
2517 | 1339 | ||
2518 | /* | 1340 | switch (action & ~CPU_TASKS_FROZEN) { |
2519 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1341 | case CPU_UP_PREPARE: |
2520 | * assume at least 3 events: | 1342 | if (x86_pmu.cpu_prepare) |
2521 | */ | 1343 | ret = x86_pmu.cpu_prepare(cpu); |
2522 | if (version > 1) | ||
2523 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
2524 | |||
2525 | /* | ||
2526 | * Install the hw-cache-events table: | ||
2527 | */ | ||
2528 | switch (boot_cpu_data.x86_model) { | ||
2529 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
2530 | pr_cont("Core events, "); | ||
2531 | break; | 1344 | break; |
2532 | 1345 | ||
2533 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1346 | case CPU_STARTING: |
2534 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1347 | if (x86_pmu.cpu_starting) |
2535 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1348 | x86_pmu.cpu_starting(cpu); |
2536 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
2537 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
2538 | sizeof(hw_cache_event_ids)); | ||
2539 | |||
2540 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
2541 | pr_cont("Core2 events, "); | ||
2542 | break; | 1349 | break; |
2543 | 1350 | ||
2544 | case 26: /* 45 nm nehalem, "Bloomfield" */ | 1351 | case CPU_DYING: |
2545 | case 30: /* 45 nm nehalem, "Lynnfield" */ | 1352 | if (x86_pmu.cpu_dying) |
2546 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1353 | x86_pmu.cpu_dying(cpu); |
2547 | sizeof(hw_cache_event_ids)); | ||
2548 | |||
2549 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
2550 | pr_cont("Nehalem/Corei7 events, "); | ||
2551 | break; | 1354 | break; |
2552 | case 28: | ||
2553 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2554 | sizeof(hw_cache_event_ids)); | ||
2555 | 1355 | ||
2556 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1356 | case CPU_UP_CANCELED: |
2557 | pr_cont("Atom events, "); | 1357 | case CPU_DEAD: |
1358 | if (x86_pmu.cpu_dead) | ||
1359 | x86_pmu.cpu_dead(cpu); | ||
2558 | break; | 1360 | break; |
2559 | 1361 | ||
2560 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
2561 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
2562 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
2563 | sizeof(hw_cache_event_ids)); | ||
2564 | |||
2565 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
2566 | pr_cont("Westmere events, "); | ||
2567 | break; | ||
2568 | default: | 1362 | default: |
2569 | /* | 1363 | break; |
2570 | * default constraints for v2 and up | ||
2571 | */ | ||
2572 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
2573 | pr_cont("generic architected perfmon, "); | ||
2574 | } | 1364 | } |
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | static __init int amd_pmu_init(void) | ||
2579 | { | ||
2580 | /* Performance-monitoring supported from K7 and later: */ | ||
2581 | if (boot_cpu_data.x86 < 6) | ||
2582 | return -ENODEV; | ||
2583 | 1365 | ||
2584 | x86_pmu = amd_pmu; | 1366 | return ret; |
2585 | |||
2586 | /* Events are common for all AMDs */ | ||
2587 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
2588 | sizeof(hw_cache_event_ids)); | ||
2589 | |||
2590 | return 0; | ||
2591 | } | 1367 | } |
2592 | 1368 | ||
2593 | static void __init pmu_check_apic(void) | 1369 | static void __init pmu_check_apic(void) |
@@ -2602,6 +1378,7 @@ static void __init pmu_check_apic(void) | |||
2602 | 1378 | ||
2603 | void __init init_hw_perf_events(void) | 1379 | void __init init_hw_perf_events(void) |
2604 | { | 1380 | { |
1381 | struct event_constraint *c; | ||
2605 | int err; | 1382 | int err; |
2606 | 1383 | ||
2607 | pr_info("Performance Events: "); | 1384 | pr_info("Performance Events: "); |
@@ -2650,6 +1427,16 @@ void __init init_hw_perf_events(void) | |||
2650 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, |
2651 | 0, x86_pmu.num_events); | 1428 | 0, x86_pmu.num_events); |
2652 | 1429 | ||
1430 | if (x86_pmu.event_constraints) { | ||
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | ||
1433 | continue; | ||
1434 | |||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | ||
1436 | c->weight += x86_pmu.num_events; | ||
1437 | } | ||
1438 | } | ||
1439 | |||
2653 | pr_info("... version: %d\n", x86_pmu.version); | 1440 | pr_info("... version: %d\n", x86_pmu.version); |
2654 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); |
2655 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); |
@@ -2657,16 +1444,20 @@ void __init init_hw_perf_events(void) | |||
2657 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
2658 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); |
2659 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); |
1447 | |||
1448 | perf_cpu_notifier(x86_pmu_notifier); | ||
2660 | } | 1449 | } |
2661 | 1450 | ||
2662 | static inline void x86_pmu_read(struct perf_event *event) | 1451 | static inline void x86_pmu_read(struct perf_event *event) |
2663 | { | 1452 | { |
2664 | x86_perf_event_update(event, &event->hw, event->hw.idx); | 1453 | x86_perf_event_update(event); |
2665 | } | 1454 | } |
2666 | 1455 | ||
2667 | static const struct pmu pmu = { | 1456 | static const struct pmu pmu = { |
2668 | .enable = x86_pmu_enable, | 1457 | .enable = x86_pmu_enable, |
2669 | .disable = x86_pmu_disable, | 1458 | .disable = x86_pmu_disable, |
1459 | .start = x86_pmu_start, | ||
1460 | .stop = x86_pmu_stop, | ||
2670 | .read = x86_pmu_read, | 1461 | .read = x86_pmu_read, |
2671 | .unthrottle = x86_pmu_unthrottle, | 1462 | .unthrottle = x86_pmu_unthrottle, |
2672 | }; | 1463 | }; |
@@ -2841,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
2841 | return len; | 1632 | return len; |
2842 | } | 1633 | } |
2843 | 1634 | ||
2844 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1635 | #ifdef CONFIG_COMPAT |
1636 | static inline int | ||
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2845 | { | 1638 | { |
2846 | unsigned long bytes; | 1639 | /* 32-bit process in 64-bit kernel. */ |
1640 | struct stack_frame_ia32 frame; | ||
1641 | const void __user *fp; | ||
2847 | 1642 | ||
2848 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | 1643 | if (!test_thread_flag(TIF_IA32)) |
1644 | return 0; | ||
2849 | 1645 | ||
2850 | return bytes == sizeof(*frame); | 1646 | fp = compat_ptr(regs->bp); |
1647 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
1648 | unsigned long bytes; | ||
1649 | frame.next_frame = 0; | ||
1650 | frame.return_address = 0; | ||
1651 | |||
1652 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); | ||
1653 | if (bytes != sizeof(frame)) | ||
1654 | break; | ||
1655 | |||
1656 | if (fp < compat_ptr(regs->sp)) | ||
1657 | break; | ||
1658 | |||
1659 | callchain_store(entry, frame.return_address); | ||
1660 | fp = compat_ptr(frame.next_frame); | ||
1661 | } | ||
1662 | return 1; | ||
1663 | } | ||
1664 | #else | ||
1665 | static inline int | ||
1666 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1667 | { | ||
1668 | return 0; | ||
2851 | } | 1669 | } |
1670 | #endif | ||
2852 | 1671 | ||
2853 | static void | 1672 | static void |
2854 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1673 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -2864,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2864 | callchain_store(entry, PERF_CONTEXT_USER); | 1683 | callchain_store(entry, PERF_CONTEXT_USER); |
2865 | callchain_store(entry, regs->ip); | 1684 | callchain_store(entry, regs->ip); |
2866 | 1685 | ||
1686 | if (perf_callchain_user32(regs, entry)) | ||
1687 | return; | ||
1688 | |||
2867 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1689 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1690 | unsigned long bytes; | ||
2868 | frame.next_frame = NULL; | 1691 | frame.next_frame = NULL; |
2869 | frame.return_address = 0; | 1692 | frame.return_address = 0; |
2870 | 1693 | ||
2871 | if (!copy_stack_frame(fp, &frame)) | 1694 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1695 | if (bytes != sizeof(frame)) | ||
2872 | break; | 1696 | break; |
2873 | 1697 | ||
2874 | if ((unsigned long)fp < regs->sp) | 1698 | if ((unsigned long)fp < regs->sp) |
@@ -2915,7 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2915 | return entry; | 1739 | return entry; |
2916 | } | 1740 | } |
2917 | 1741 | ||
2918 | void hw_perf_event_setup_online(int cpu) | 1742 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) |
2919 | { | 1743 | { |
2920 | init_debug_store_on_cpu(cpu); | 1744 | regs->ip = ip; |
1745 | /* | ||
1746 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
1747 | * the skip level | ||
1748 | */ | ||
1749 | regs->bp = rewind_frame_pointer(skip + 1); | ||
1750 | regs->cs = __KERNEL_CS; | ||
1751 | local_save_flags(regs->flags); | ||
2921 | } | 1752 | } |