diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-02-26 06:05:05 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-02-26 09:44:04 -0500 |
commit | f22f54f4491acd987a6c5a92de52b60ca8b58b61 (patch) | |
tree | 7eae87b08e828e8f0b1223f267abb004d6a5f7e7 /arch/x86/kernel/cpu/perf_event.c | |
parent | 48fb4fdd6b667ebeccbc6cde0a8a5a148d5c6b68 (diff) |
perf_events, x86: Split PMU definitions into separate files
Split amd,p6,intel into separate files so that we can easily deal with
CONFIG_CPU_SUP_* things, needed to make things build now that perf_event.c
relies on symbols from amd.c
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 1524 |
1 files changed, 10 insertions, 1514 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index dd09ccc867d3..641ccb9dddbc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -161,8 +161,6 @@ struct x86_pmu { | |||
161 | 161 | ||
162 | static struct x86_pmu x86_pmu __read_mostly; | 162 | static struct x86_pmu x86_pmu __read_mostly; |
163 | 163 | ||
164 | static raw_spinlock_t amd_nb_lock; | ||
165 | |||
166 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 164 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
167 | .enabled = 1, | 165 | .enabled = 1, |
168 | }; | 166 | }; |
@@ -171,140 +169,6 @@ static int x86_perf_event_set_period(struct perf_event *event, | |||
171 | struct hw_perf_event *hwc, int idx); | 169 | struct hw_perf_event *hwc, int idx); |
172 | 170 | ||
173 | /* | 171 | /* |
174 | * Not sure about some of these | ||
175 | */ | ||
176 | static const u64 p6_perfmon_event_map[] = | ||
177 | { | ||
178 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
179 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
180 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
181 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
182 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
183 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
184 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
185 | }; | ||
186 | |||
187 | static u64 p6_pmu_event_map(int hw_event) | ||
188 | { | ||
189 | return p6_perfmon_event_map[hw_event]; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Event setting that is specified not to count anything. | ||
194 | * We use this to effectively disable a counter. | ||
195 | * | ||
196 | * L2_RQSTS with 0 MESI unit mask. | ||
197 | */ | ||
198 | #define P6_NOP_EVENT 0x0000002EULL | ||
199 | |||
200 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
201 | { | ||
202 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
203 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
204 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
205 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
206 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
207 | |||
208 | #define P6_EVNTSEL_MASK \ | ||
209 | (P6_EVNTSEL_EVENT_MASK | \ | ||
210 | P6_EVNTSEL_UNIT_MASK | \ | ||
211 | P6_EVNTSEL_EDGE_MASK | \ | ||
212 | P6_EVNTSEL_INV_MASK | \ | ||
213 | P6_EVNTSEL_REG_MASK) | ||
214 | |||
215 | return hw_event & P6_EVNTSEL_MASK; | ||
216 | } | ||
217 | |||
218 | static struct event_constraint intel_p6_event_constraints[] = | ||
219 | { | ||
220 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
221 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
222 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
223 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
224 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
225 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
226 | EVENT_CONSTRAINT_END | ||
227 | }; | ||
228 | |||
229 | /* | ||
230 | * Intel PerfMon v3. Used on Core2 and later. | ||
231 | */ | ||
232 | static const u64 intel_perfmon_event_map[] = | ||
233 | { | ||
234 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
235 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
236 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
237 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
238 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
239 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
240 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
241 | }; | ||
242 | |||
243 | static struct event_constraint intel_core_event_constraints[] = | ||
244 | { | ||
245 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
246 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
247 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
248 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
249 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
250 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
251 | EVENT_CONSTRAINT_END | ||
252 | }; | ||
253 | |||
254 | static struct event_constraint intel_core2_event_constraints[] = | ||
255 | { | ||
256 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
257 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
258 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
259 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
260 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
261 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
262 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
263 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
264 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
265 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
266 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
267 | EVENT_CONSTRAINT_END | ||
268 | }; | ||
269 | |||
270 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
271 | { | ||
272 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
273 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
274 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
275 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
276 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
277 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
278 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
279 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
280 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
281 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
282 | EVENT_CONSTRAINT_END | ||
283 | }; | ||
284 | |||
285 | static struct event_constraint intel_westmere_event_constraints[] = | ||
286 | { | ||
287 | FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
288 | FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
289 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
290 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
291 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
292 | EVENT_CONSTRAINT_END | ||
293 | }; | ||
294 | |||
295 | static struct event_constraint intel_gen_event_constraints[] = | ||
296 | { | ||
297 | FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ | ||
298 | FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ | ||
299 | EVENT_CONSTRAINT_END | ||
300 | }; | ||
301 | |||
302 | static u64 intel_pmu_event_map(int hw_event) | ||
303 | { | ||
304 | return intel_perfmon_event_map[hw_event]; | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Generalized hw caching related hw_event table, filled | 172 | * Generalized hw caching related hw_event table, filled |
309 | * in on a per model basis. A value of 0 means | 173 | * in on a per model basis. A value of 0 means |
310 | * 'not supported', -1 means 'hw_event makes no sense on | 174 | * 'not supported', -1 means 'hw_event makes no sense on |
@@ -319,515 +183,6 @@ static u64 __read_mostly hw_cache_event_ids | |||
319 | [PERF_COUNT_HW_CACHE_OP_MAX] | 183 | [PERF_COUNT_HW_CACHE_OP_MAX] |
320 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 184 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
321 | 185 | ||
322 | static __initconst u64 westmere_hw_cache_event_ids | ||
323 | [PERF_COUNT_HW_CACHE_MAX] | ||
324 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
325 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
326 | { | ||
327 | [ C(L1D) ] = { | ||
328 | [ C(OP_READ) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
330 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
331 | }, | ||
332 | [ C(OP_WRITE) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
334 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
335 | }, | ||
336 | [ C(OP_PREFETCH) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
338 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
339 | }, | ||
340 | }, | ||
341 | [ C(L1I ) ] = { | ||
342 | [ C(OP_READ) ] = { | ||
343 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
344 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
345 | }, | ||
346 | [ C(OP_WRITE) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = -1, | ||
348 | [ C(RESULT_MISS) ] = -1, | ||
349 | }, | ||
350 | [ C(OP_PREFETCH) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = 0x0, | ||
352 | [ C(RESULT_MISS) ] = 0x0, | ||
353 | }, | ||
354 | }, | ||
355 | [ C(LL ) ] = { | ||
356 | [ C(OP_READ) ] = { | ||
357 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
358 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
359 | }, | ||
360 | [ C(OP_WRITE) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
362 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
363 | }, | ||
364 | [ C(OP_PREFETCH) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
366 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
367 | }, | ||
368 | }, | ||
369 | [ C(DTLB) ] = { | ||
370 | [ C(OP_READ) ] = { | ||
371 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
372 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
373 | }, | ||
374 | [ C(OP_WRITE) ] = { | ||
375 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
376 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
377 | }, | ||
378 | [ C(OP_PREFETCH) ] = { | ||
379 | [ C(RESULT_ACCESS) ] = 0x0, | ||
380 | [ C(RESULT_MISS) ] = 0x0, | ||
381 | }, | ||
382 | }, | ||
383 | [ C(ITLB) ] = { | ||
384 | [ C(OP_READ) ] = { | ||
385 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
386 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
387 | }, | ||
388 | [ C(OP_WRITE) ] = { | ||
389 | [ C(RESULT_ACCESS) ] = -1, | ||
390 | [ C(RESULT_MISS) ] = -1, | ||
391 | }, | ||
392 | [ C(OP_PREFETCH) ] = { | ||
393 | [ C(RESULT_ACCESS) ] = -1, | ||
394 | [ C(RESULT_MISS) ] = -1, | ||
395 | }, | ||
396 | }, | ||
397 | [ C(BPU ) ] = { | ||
398 | [ C(OP_READ) ] = { | ||
399 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
400 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
401 | }, | ||
402 | [ C(OP_WRITE) ] = { | ||
403 | [ C(RESULT_ACCESS) ] = -1, | ||
404 | [ C(RESULT_MISS) ] = -1, | ||
405 | }, | ||
406 | [ C(OP_PREFETCH) ] = { | ||
407 | [ C(RESULT_ACCESS) ] = -1, | ||
408 | [ C(RESULT_MISS) ] = -1, | ||
409 | }, | ||
410 | }, | ||
411 | }; | ||
412 | |||
413 | static __initconst u64 nehalem_hw_cache_event_ids | ||
414 | [PERF_COUNT_HW_CACHE_MAX] | ||
415 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
416 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
417 | { | ||
418 | [ C(L1D) ] = { | ||
419 | [ C(OP_READ) ] = { | ||
420 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
421 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
422 | }, | ||
423 | [ C(OP_WRITE) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
425 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
426 | }, | ||
427 | [ C(OP_PREFETCH) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
429 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
430 | }, | ||
431 | }, | ||
432 | [ C(L1I ) ] = { | ||
433 | [ C(OP_READ) ] = { | ||
434 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
435 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
436 | }, | ||
437 | [ C(OP_WRITE) ] = { | ||
438 | [ C(RESULT_ACCESS) ] = -1, | ||
439 | [ C(RESULT_MISS) ] = -1, | ||
440 | }, | ||
441 | [ C(OP_PREFETCH) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = 0x0, | ||
443 | [ C(RESULT_MISS) ] = 0x0, | ||
444 | }, | ||
445 | }, | ||
446 | [ C(LL ) ] = { | ||
447 | [ C(OP_READ) ] = { | ||
448 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
449 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
450 | }, | ||
451 | [ C(OP_WRITE) ] = { | ||
452 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
453 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
454 | }, | ||
455 | [ C(OP_PREFETCH) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
457 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
458 | }, | ||
459 | }, | ||
460 | [ C(DTLB) ] = { | ||
461 | [ C(OP_READ) ] = { | ||
462 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
463 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
464 | }, | ||
465 | [ C(OP_WRITE) ] = { | ||
466 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
467 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
468 | }, | ||
469 | [ C(OP_PREFETCH) ] = { | ||
470 | [ C(RESULT_ACCESS) ] = 0x0, | ||
471 | [ C(RESULT_MISS) ] = 0x0, | ||
472 | }, | ||
473 | }, | ||
474 | [ C(ITLB) ] = { | ||
475 | [ C(OP_READ) ] = { | ||
476 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
477 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
478 | }, | ||
479 | [ C(OP_WRITE) ] = { | ||
480 | [ C(RESULT_ACCESS) ] = -1, | ||
481 | [ C(RESULT_MISS) ] = -1, | ||
482 | }, | ||
483 | [ C(OP_PREFETCH) ] = { | ||
484 | [ C(RESULT_ACCESS) ] = -1, | ||
485 | [ C(RESULT_MISS) ] = -1, | ||
486 | }, | ||
487 | }, | ||
488 | [ C(BPU ) ] = { | ||
489 | [ C(OP_READ) ] = { | ||
490 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
491 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
492 | }, | ||
493 | [ C(OP_WRITE) ] = { | ||
494 | [ C(RESULT_ACCESS) ] = -1, | ||
495 | [ C(RESULT_MISS) ] = -1, | ||
496 | }, | ||
497 | [ C(OP_PREFETCH) ] = { | ||
498 | [ C(RESULT_ACCESS) ] = -1, | ||
499 | [ C(RESULT_MISS) ] = -1, | ||
500 | }, | ||
501 | }, | ||
502 | }; | ||
503 | |||
504 | static __initconst u64 core2_hw_cache_event_ids | ||
505 | [PERF_COUNT_HW_CACHE_MAX] | ||
506 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
507 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
508 | { | ||
509 | [ C(L1D) ] = { | ||
510 | [ C(OP_READ) ] = { | ||
511 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
512 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
513 | }, | ||
514 | [ C(OP_WRITE) ] = { | ||
515 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
516 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
517 | }, | ||
518 | [ C(OP_PREFETCH) ] = { | ||
519 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
520 | [ C(RESULT_MISS) ] = 0, | ||
521 | }, | ||
522 | }, | ||
523 | [ C(L1I ) ] = { | ||
524 | [ C(OP_READ) ] = { | ||
525 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
526 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
527 | }, | ||
528 | [ C(OP_WRITE) ] = { | ||
529 | [ C(RESULT_ACCESS) ] = -1, | ||
530 | [ C(RESULT_MISS) ] = -1, | ||
531 | }, | ||
532 | [ C(OP_PREFETCH) ] = { | ||
533 | [ C(RESULT_ACCESS) ] = 0, | ||
534 | [ C(RESULT_MISS) ] = 0, | ||
535 | }, | ||
536 | }, | ||
537 | [ C(LL ) ] = { | ||
538 | [ C(OP_READ) ] = { | ||
539 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
540 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
541 | }, | ||
542 | [ C(OP_WRITE) ] = { | ||
543 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
544 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
545 | }, | ||
546 | [ C(OP_PREFETCH) ] = { | ||
547 | [ C(RESULT_ACCESS) ] = 0, | ||
548 | [ C(RESULT_MISS) ] = 0, | ||
549 | }, | ||
550 | }, | ||
551 | [ C(DTLB) ] = { | ||
552 | [ C(OP_READ) ] = { | ||
553 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
554 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
555 | }, | ||
556 | [ C(OP_WRITE) ] = { | ||
557 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
558 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
559 | }, | ||
560 | [ C(OP_PREFETCH) ] = { | ||
561 | [ C(RESULT_ACCESS) ] = 0, | ||
562 | [ C(RESULT_MISS) ] = 0, | ||
563 | }, | ||
564 | }, | ||
565 | [ C(ITLB) ] = { | ||
566 | [ C(OP_READ) ] = { | ||
567 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
568 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
569 | }, | ||
570 | [ C(OP_WRITE) ] = { | ||
571 | [ C(RESULT_ACCESS) ] = -1, | ||
572 | [ C(RESULT_MISS) ] = -1, | ||
573 | }, | ||
574 | [ C(OP_PREFETCH) ] = { | ||
575 | [ C(RESULT_ACCESS) ] = -1, | ||
576 | [ C(RESULT_MISS) ] = -1, | ||
577 | }, | ||
578 | }, | ||
579 | [ C(BPU ) ] = { | ||
580 | [ C(OP_READ) ] = { | ||
581 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
582 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
583 | }, | ||
584 | [ C(OP_WRITE) ] = { | ||
585 | [ C(RESULT_ACCESS) ] = -1, | ||
586 | [ C(RESULT_MISS) ] = -1, | ||
587 | }, | ||
588 | [ C(OP_PREFETCH) ] = { | ||
589 | [ C(RESULT_ACCESS) ] = -1, | ||
590 | [ C(RESULT_MISS) ] = -1, | ||
591 | }, | ||
592 | }, | ||
593 | }; | ||
594 | |||
595 | static __initconst u64 atom_hw_cache_event_ids | ||
596 | [PERF_COUNT_HW_CACHE_MAX] | ||
597 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
598 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
599 | { | ||
600 | [ C(L1D) ] = { | ||
601 | [ C(OP_READ) ] = { | ||
602 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
603 | [ C(RESULT_MISS) ] = 0, | ||
604 | }, | ||
605 | [ C(OP_WRITE) ] = { | ||
606 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
607 | [ C(RESULT_MISS) ] = 0, | ||
608 | }, | ||
609 | [ C(OP_PREFETCH) ] = { | ||
610 | [ C(RESULT_ACCESS) ] = 0x0, | ||
611 | [ C(RESULT_MISS) ] = 0, | ||
612 | }, | ||
613 | }, | ||
614 | [ C(L1I ) ] = { | ||
615 | [ C(OP_READ) ] = { | ||
616 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
617 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
618 | }, | ||
619 | [ C(OP_WRITE) ] = { | ||
620 | [ C(RESULT_ACCESS) ] = -1, | ||
621 | [ C(RESULT_MISS) ] = -1, | ||
622 | }, | ||
623 | [ C(OP_PREFETCH) ] = { | ||
624 | [ C(RESULT_ACCESS) ] = 0, | ||
625 | [ C(RESULT_MISS) ] = 0, | ||
626 | }, | ||
627 | }, | ||
628 | [ C(LL ) ] = { | ||
629 | [ C(OP_READ) ] = { | ||
630 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
631 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
632 | }, | ||
633 | [ C(OP_WRITE) ] = { | ||
634 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
635 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
636 | }, | ||
637 | [ C(OP_PREFETCH) ] = { | ||
638 | [ C(RESULT_ACCESS) ] = 0, | ||
639 | [ C(RESULT_MISS) ] = 0, | ||
640 | }, | ||
641 | }, | ||
642 | [ C(DTLB) ] = { | ||
643 | [ C(OP_READ) ] = { | ||
644 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
645 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
646 | }, | ||
647 | [ C(OP_WRITE) ] = { | ||
648 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
649 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
650 | }, | ||
651 | [ C(OP_PREFETCH) ] = { | ||
652 | [ C(RESULT_ACCESS) ] = 0, | ||
653 | [ C(RESULT_MISS) ] = 0, | ||
654 | }, | ||
655 | }, | ||
656 | [ C(ITLB) ] = { | ||
657 | [ C(OP_READ) ] = { | ||
658 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
659 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
660 | }, | ||
661 | [ C(OP_WRITE) ] = { | ||
662 | [ C(RESULT_ACCESS) ] = -1, | ||
663 | [ C(RESULT_MISS) ] = -1, | ||
664 | }, | ||
665 | [ C(OP_PREFETCH) ] = { | ||
666 | [ C(RESULT_ACCESS) ] = -1, | ||
667 | [ C(RESULT_MISS) ] = -1, | ||
668 | }, | ||
669 | }, | ||
670 | [ C(BPU ) ] = { | ||
671 | [ C(OP_READ) ] = { | ||
672 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
673 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
674 | }, | ||
675 | [ C(OP_WRITE) ] = { | ||
676 | [ C(RESULT_ACCESS) ] = -1, | ||
677 | [ C(RESULT_MISS) ] = -1, | ||
678 | }, | ||
679 | [ C(OP_PREFETCH) ] = { | ||
680 | [ C(RESULT_ACCESS) ] = -1, | ||
681 | [ C(RESULT_MISS) ] = -1, | ||
682 | }, | ||
683 | }, | ||
684 | }; | ||
685 | |||
686 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
687 | { | ||
688 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
689 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
690 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
691 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
692 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
693 | |||
694 | #define CORE_EVNTSEL_MASK \ | ||
695 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
696 | INTEL_ARCH_UNIT_MASK | \ | ||
697 | INTEL_ARCH_EDGE_MASK | \ | ||
698 | INTEL_ARCH_INV_MASK | \ | ||
699 | INTEL_ARCH_CNT_MASK) | ||
700 | |||
701 | return hw_event & CORE_EVNTSEL_MASK; | ||
702 | } | ||
703 | |||
704 | static __initconst u64 amd_hw_cache_event_ids | ||
705 | [PERF_COUNT_HW_CACHE_MAX] | ||
706 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
707 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
708 | { | ||
709 | [ C(L1D) ] = { | ||
710 | [ C(OP_READ) ] = { | ||
711 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
712 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
713 | }, | ||
714 | [ C(OP_WRITE) ] = { | ||
715 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
716 | [ C(RESULT_MISS) ] = 0, | ||
717 | }, | ||
718 | [ C(OP_PREFETCH) ] = { | ||
719 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
720 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
721 | }, | ||
722 | }, | ||
723 | [ C(L1I ) ] = { | ||
724 | [ C(OP_READ) ] = { | ||
725 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
726 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
727 | }, | ||
728 | [ C(OP_WRITE) ] = { | ||
729 | [ C(RESULT_ACCESS) ] = -1, | ||
730 | [ C(RESULT_MISS) ] = -1, | ||
731 | }, | ||
732 | [ C(OP_PREFETCH) ] = { | ||
733 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
734 | [ C(RESULT_MISS) ] = 0, | ||
735 | }, | ||
736 | }, | ||
737 | [ C(LL ) ] = { | ||
738 | [ C(OP_READ) ] = { | ||
739 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
740 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
741 | }, | ||
742 | [ C(OP_WRITE) ] = { | ||
743 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
744 | [ C(RESULT_MISS) ] = 0, | ||
745 | }, | ||
746 | [ C(OP_PREFETCH) ] = { | ||
747 | [ C(RESULT_ACCESS) ] = 0, | ||
748 | [ C(RESULT_MISS) ] = 0, | ||
749 | }, | ||
750 | }, | ||
751 | [ C(DTLB) ] = { | ||
752 | [ C(OP_READ) ] = { | ||
753 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
754 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
755 | }, | ||
756 | [ C(OP_WRITE) ] = { | ||
757 | [ C(RESULT_ACCESS) ] = 0, | ||
758 | [ C(RESULT_MISS) ] = 0, | ||
759 | }, | ||
760 | [ C(OP_PREFETCH) ] = { | ||
761 | [ C(RESULT_ACCESS) ] = 0, | ||
762 | [ C(RESULT_MISS) ] = 0, | ||
763 | }, | ||
764 | }, | ||
765 | [ C(ITLB) ] = { | ||
766 | [ C(OP_READ) ] = { | ||
767 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
768 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
769 | }, | ||
770 | [ C(OP_WRITE) ] = { | ||
771 | [ C(RESULT_ACCESS) ] = -1, | ||
772 | [ C(RESULT_MISS) ] = -1, | ||
773 | }, | ||
774 | [ C(OP_PREFETCH) ] = { | ||
775 | [ C(RESULT_ACCESS) ] = -1, | ||
776 | [ C(RESULT_MISS) ] = -1, | ||
777 | }, | ||
778 | }, | ||
779 | [ C(BPU ) ] = { | ||
780 | [ C(OP_READ) ] = { | ||
781 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
782 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
783 | }, | ||
784 | [ C(OP_WRITE) ] = { | ||
785 | [ C(RESULT_ACCESS) ] = -1, | ||
786 | [ C(RESULT_MISS) ] = -1, | ||
787 | }, | ||
788 | [ C(OP_PREFETCH) ] = { | ||
789 | [ C(RESULT_ACCESS) ] = -1, | ||
790 | [ C(RESULT_MISS) ] = -1, | ||
791 | }, | ||
792 | }, | ||
793 | }; | ||
794 | |||
795 | /* | ||
796 | * AMD Performance Monitor K7 and later. | ||
797 | */ | ||
798 | static const u64 amd_perfmon_event_map[] = | ||
799 | { | ||
800 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
801 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
802 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
803 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
804 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
805 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
806 | }; | ||
807 | |||
808 | static u64 amd_pmu_event_map(int hw_event) | ||
809 | { | ||
810 | return amd_perfmon_event_map[hw_event]; | ||
811 | } | ||
812 | |||
813 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
814 | { | ||
815 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | ||
816 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
817 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
818 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
819 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
820 | |||
821 | #define K7_EVNTSEL_MASK \ | ||
822 | (K7_EVNTSEL_EVENT_MASK | \ | ||
823 | K7_EVNTSEL_UNIT_MASK | \ | ||
824 | K7_EVNTSEL_EDGE_MASK | \ | ||
825 | K7_EVNTSEL_INV_MASK | \ | ||
826 | K7_EVNTSEL_REG_MASK) | ||
827 | |||
828 | return hw_event & K7_EVNTSEL_MASK; | ||
829 | } | ||
830 | |||
831 | /* | 186 | /* |
832 | * Propagate event elapsed time into the generic event. | 187 | * Propagate event elapsed time into the generic event. |
833 | * Can only be executed on the CPU where the event is active. | 188 | * Can only be executed on the CPU where the event is active. |
@@ -1079,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
1079 | return 0; | 434 | return 0; |
1080 | } | 435 | } |
1081 | 436 | ||
1082 | static void intel_pmu_enable_bts(u64 config) | ||
1083 | { | ||
1084 | unsigned long debugctlmsr; | ||
1085 | |||
1086 | debugctlmsr = get_debugctlmsr(); | ||
1087 | |||
1088 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
1089 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
1090 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
1091 | |||
1092 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
1093 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
1094 | |||
1095 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
1096 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
1097 | |||
1098 | update_debugctlmsr(debugctlmsr); | ||
1099 | } | ||
1100 | |||
1101 | static void intel_pmu_disable_bts(void) | ||
1102 | { | ||
1103 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1104 | unsigned long debugctlmsr; | ||
1105 | |||
1106 | if (!cpuc->ds) | ||
1107 | return; | ||
1108 | |||
1109 | debugctlmsr = get_debugctlmsr(); | ||
1110 | |||
1111 | debugctlmsr &= | ||
1112 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
1113 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
1114 | |||
1115 | update_debugctlmsr(debugctlmsr); | ||
1116 | } | ||
1117 | |||
1118 | /* | 437 | /* |
1119 | * Setup the hardware configuration for a given attr_type | 438 | * Setup the hardware configuration for a given attr_type |
1120 | */ | 439 | */ |
@@ -1223,26 +542,6 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1223 | return 0; | 542 | return 0; |
1224 | } | 543 | } |
1225 | 544 | ||
1226 | static void p6_pmu_disable_all(void) | ||
1227 | { | ||
1228 | u64 val; | ||
1229 | |||
1230 | /* p6 only has one enable register */ | ||
1231 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1232 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1233 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1234 | } | ||
1235 | |||
1236 | static void intel_pmu_disable_all(void) | ||
1237 | { | ||
1238 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1239 | |||
1240 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
1241 | |||
1242 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1243 | intel_pmu_disable_bts(); | ||
1244 | } | ||
1245 | |||
1246 | static void x86_pmu_disable_all(void) | 545 | static void x86_pmu_disable_all(void) |
1247 | { | 546 | { |
1248 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 547 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1278,33 +577,6 @@ void hw_perf_disable(void) | |||
1278 | x86_pmu.disable_all(); | 577 | x86_pmu.disable_all(); |
1279 | } | 578 | } |
1280 | 579 | ||
1281 | static void p6_pmu_enable_all(void) | ||
1282 | { | ||
1283 | unsigned long val; | ||
1284 | |||
1285 | /* p6 only has one enable register */ | ||
1286 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1287 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1288 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1289 | } | ||
1290 | |||
1291 | static void intel_pmu_enable_all(void) | ||
1292 | { | ||
1293 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1294 | |||
1295 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1296 | |||
1297 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1298 | struct perf_event *event = | ||
1299 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1300 | |||
1301 | if (WARN_ON_ONCE(!event)) | ||
1302 | return; | ||
1303 | |||
1304 | intel_pmu_enable_bts(event->hw.config); | ||
1305 | } | ||
1306 | } | ||
1307 | |||
1308 | static void x86_pmu_enable_all(void) | 580 | static void x86_pmu_enable_all(void) |
1309 | { | 581 | { |
1310 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 582 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1578,20 +850,6 @@ void hw_perf_enable(void) | |||
1578 | x86_pmu.enable_all(); | 850 | x86_pmu.enable_all(); |
1579 | } | 851 | } |
1580 | 852 | ||
1581 | static inline u64 intel_pmu_get_status(void) | ||
1582 | { | ||
1583 | u64 status; | ||
1584 | |||
1585 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1586 | |||
1587 | return status; | ||
1588 | } | ||
1589 | |||
1590 | static inline void intel_pmu_ack_status(u64 ack) | ||
1591 | { | ||
1592 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
1593 | } | ||
1594 | |||
1595 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1596 | { | 854 | { |
1597 | (void)checking_wrmsrl(hwc->config_base + idx, | 855 | (void)checking_wrmsrl(hwc->config_base + idx, |
@@ -1603,47 +861,6 @@ static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | |||
1603 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 861 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); |
1604 | } | 862 | } |
1605 | 863 | ||
1606 | static inline void | ||
1607 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1608 | { | ||
1609 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1610 | u64 ctrl_val, mask; | ||
1611 | |||
1612 | mask = 0xfULL << (idx * 4); | ||
1613 | |||
1614 | rdmsrl(hwc->config_base, ctrl_val); | ||
1615 | ctrl_val &= ~mask; | ||
1616 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1617 | } | ||
1618 | |||
1619 | static inline void | ||
1620 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1621 | { | ||
1622 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1623 | u64 val = P6_NOP_EVENT; | ||
1624 | |||
1625 | if (cpuc->enabled) | ||
1626 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1627 | |||
1628 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1629 | } | ||
1630 | |||
1631 | static inline void | ||
1632 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1633 | { | ||
1634 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1635 | intel_pmu_disable_bts(); | ||
1636 | return; | ||
1637 | } | ||
1638 | |||
1639 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1640 | intel_pmu_disable_fixed(hwc, idx); | ||
1641 | return; | ||
1642 | } | ||
1643 | |||
1644 | x86_pmu_disable_event(hwc, idx); | ||
1645 | } | ||
1646 | |||
1647 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 864 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
1648 | 865 | ||
1649 | /* | 866 | /* |
@@ -1702,70 +919,6 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1702 | return ret; | 919 | return ret; |
1703 | } | 920 | } |
1704 | 921 | ||
1705 | static inline void | ||
1706 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1707 | { | ||
1708 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1709 | u64 ctrl_val, bits, mask; | ||
1710 | int err; | ||
1711 | |||
1712 | /* | ||
1713 | * Enable IRQ generation (0x8), | ||
1714 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
1715 | * if requested: | ||
1716 | */ | ||
1717 | bits = 0x8ULL; | ||
1718 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
1719 | bits |= 0x2; | ||
1720 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1721 | bits |= 0x1; | ||
1722 | |||
1723 | /* | ||
1724 | * ANY bit is supported in v3 and up | ||
1725 | */ | ||
1726 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
1727 | bits |= 0x4; | ||
1728 | |||
1729 | bits <<= (idx * 4); | ||
1730 | mask = 0xfULL << (idx * 4); | ||
1731 | |||
1732 | rdmsrl(hwc->config_base, ctrl_val); | ||
1733 | ctrl_val &= ~mask; | ||
1734 | ctrl_val |= bits; | ||
1735 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1736 | } | ||
1737 | |||
1738 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1739 | { | ||
1740 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1741 | u64 val; | ||
1742 | |||
1743 | val = hwc->config; | ||
1744 | if (cpuc->enabled) | ||
1745 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1746 | |||
1747 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1748 | } | ||
1749 | |||
1750 | |||
1751 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1752 | { | ||
1753 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1754 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
1755 | return; | ||
1756 | |||
1757 | intel_pmu_enable_bts(hwc->config); | ||
1758 | return; | ||
1759 | } | ||
1760 | |||
1761 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1762 | intel_pmu_enable_fixed(hwc, idx); | ||
1763 | return; | ||
1764 | } | ||
1765 | |||
1766 | __x86_pmu_enable_event(hwc, idx); | ||
1767 | } | ||
1768 | |||
1769 | static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 922 | static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1770 | { | 923 | { |
1771 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 924 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1887,66 +1040,6 @@ void perf_event_print_debug(void) | |||
1887 | local_irq_restore(flags); | 1040 | local_irq_restore(flags); |
1888 | } | 1041 | } |
1889 | 1042 | ||
1890 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | ||
1891 | { | ||
1892 | struct debug_store *ds = cpuc->ds; | ||
1893 | struct bts_record { | ||
1894 | u64 from; | ||
1895 | u64 to; | ||
1896 | u64 flags; | ||
1897 | }; | ||
1898 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1899 | struct bts_record *at, *top; | ||
1900 | struct perf_output_handle handle; | ||
1901 | struct perf_event_header header; | ||
1902 | struct perf_sample_data data; | ||
1903 | struct pt_regs regs; | ||
1904 | |||
1905 | if (!event) | ||
1906 | return; | ||
1907 | |||
1908 | if (!ds) | ||
1909 | return; | ||
1910 | |||
1911 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1912 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1913 | |||
1914 | if (top <= at) | ||
1915 | return; | ||
1916 | |||
1917 | ds->bts_index = ds->bts_buffer_base; | ||
1918 | |||
1919 | |||
1920 | data.period = event->hw.last_period; | ||
1921 | data.addr = 0; | ||
1922 | data.raw = NULL; | ||
1923 | regs.ip = 0; | ||
1924 | |||
1925 | /* | ||
1926 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1927 | * We will overwrite the from and to address before we output | ||
1928 | * the sample. | ||
1929 | */ | ||
1930 | perf_prepare_sample(&header, &data, event, ®s); | ||
1931 | |||
1932 | if (perf_output_begin(&handle, event, | ||
1933 | header.size * (top - at), 1, 1)) | ||
1934 | return; | ||
1935 | |||
1936 | for (; at < top; at++) { | ||
1937 | data.ip = at->from; | ||
1938 | data.addr = at->to; | ||
1939 | |||
1940 | perf_output_sample(&handle, &header, &data, event); | ||
1941 | } | ||
1942 | |||
1943 | perf_output_end(&handle); | ||
1944 | |||
1945 | /* There's new data available. */ | ||
1946 | event->hw.interrupts++; | ||
1947 | event->pending_kill = POLL_IN; | ||
1948 | } | ||
1949 | |||
1950 | static void x86_pmu_stop(struct perf_event *event) | 1043 | static void x86_pmu_stop(struct perf_event *event) |
1951 | { | 1044 | { |
1952 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1045 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -1966,10 +1059,6 @@ static void x86_pmu_stop(struct perf_event *event) | |||
1966 | */ | 1059 | */ |
1967 | x86_perf_event_update(event, hwc, idx); | 1060 | x86_perf_event_update(event, hwc, idx); |
1968 | 1061 | ||
1969 | /* Drain the remaining BTS records. */ | ||
1970 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1971 | intel_pmu_drain_bts_buffer(cpuc); | ||
1972 | |||
1973 | cpuc->events[idx] = NULL; | 1062 | cpuc->events[idx] = NULL; |
1974 | } | 1063 | } |
1975 | 1064 | ||
@@ -1996,114 +1085,6 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1996 | perf_event_update_userpage(event); | 1085 | perf_event_update_userpage(event); |
1997 | } | 1086 | } |
1998 | 1087 | ||
1999 | /* | ||
2000 | * Save and restart an expired event. Called by NMI contexts, | ||
2001 | * so it has to be careful about preempting normal event ops: | ||
2002 | */ | ||
2003 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
2004 | { | ||
2005 | struct hw_perf_event *hwc = &event->hw; | ||
2006 | int idx = hwc->idx; | ||
2007 | int ret; | ||
2008 | |||
2009 | x86_perf_event_update(event, hwc, idx); | ||
2010 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
2011 | |||
2012 | return ret; | ||
2013 | } | ||
2014 | |||
2015 | static void intel_pmu_reset(void) | ||
2016 | { | ||
2017 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
2018 | unsigned long flags; | ||
2019 | int idx; | ||
2020 | |||
2021 | if (!x86_pmu.num_events) | ||
2022 | return; | ||
2023 | |||
2024 | local_irq_save(flags); | ||
2025 | |||
2026 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
2027 | |||
2028 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
2029 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
2030 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
2031 | } | ||
2032 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
2033 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
2034 | } | ||
2035 | if (ds) | ||
2036 | ds->bts_index = ds->bts_buffer_base; | ||
2037 | |||
2038 | local_irq_restore(flags); | ||
2039 | } | ||
2040 | |||
2041 | /* | ||
2042 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
2043 | * rules apply: | ||
2044 | */ | ||
2045 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
2046 | { | ||
2047 | struct perf_sample_data data; | ||
2048 | struct cpu_hw_events *cpuc; | ||
2049 | int bit, loops; | ||
2050 | u64 ack, status; | ||
2051 | |||
2052 | data.addr = 0; | ||
2053 | data.raw = NULL; | ||
2054 | |||
2055 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2056 | |||
2057 | perf_disable(); | ||
2058 | intel_pmu_drain_bts_buffer(cpuc); | ||
2059 | status = intel_pmu_get_status(); | ||
2060 | if (!status) { | ||
2061 | perf_enable(); | ||
2062 | return 0; | ||
2063 | } | ||
2064 | |||
2065 | loops = 0; | ||
2066 | again: | ||
2067 | if (++loops > 100) { | ||
2068 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
2069 | perf_event_print_debug(); | ||
2070 | intel_pmu_reset(); | ||
2071 | perf_enable(); | ||
2072 | return 1; | ||
2073 | } | ||
2074 | |||
2075 | inc_irq_stat(apic_perf_irqs); | ||
2076 | ack = status; | ||
2077 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
2078 | struct perf_event *event = cpuc->events[bit]; | ||
2079 | |||
2080 | clear_bit(bit, (unsigned long *) &status); | ||
2081 | if (!test_bit(bit, cpuc->active_mask)) | ||
2082 | continue; | ||
2083 | |||
2084 | if (!intel_pmu_save_and_restart(event)) | ||
2085 | continue; | ||
2086 | |||
2087 | data.period = event->hw.last_period; | ||
2088 | |||
2089 | if (perf_event_overflow(event, 1, &data, regs)) | ||
2090 | intel_pmu_disable_event(&event->hw, bit); | ||
2091 | } | ||
2092 | |||
2093 | intel_pmu_ack_status(ack); | ||
2094 | |||
2095 | /* | ||
2096 | * Repeat if there is more work to be done: | ||
2097 | */ | ||
2098 | status = intel_pmu_get_status(); | ||
2099 | if (status) | ||
2100 | goto again; | ||
2101 | |||
2102 | perf_enable(); | ||
2103 | |||
2104 | return 1; | ||
2105 | } | ||
2106 | |||
2107 | static int x86_pmu_handle_irq(struct pt_regs *regs) | 1088 | static int x86_pmu_handle_irq(struct pt_regs *regs) |
2108 | { | 1089 | { |
2109 | struct perf_sample_data data; | 1090 | struct perf_sample_data data; |
@@ -2216,37 +1197,20 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
2216 | return NOTIFY_STOP; | 1197 | return NOTIFY_STOP; |
2217 | } | 1198 | } |
2218 | 1199 | ||
1200 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | ||
1201 | .notifier_call = perf_event_nmi_handler, | ||
1202 | .next = NULL, | ||
1203 | .priority = 1 | ||
1204 | }; | ||
1205 | |||
2219 | static struct event_constraint unconstrained; | 1206 | static struct event_constraint unconstrained; |
2220 | static struct event_constraint emptyconstraint; | 1207 | static struct event_constraint emptyconstraint; |
2221 | 1208 | ||
2222 | static struct event_constraint bts_constraint = | ||
2223 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
2224 | |||
2225 | static struct event_constraint * | ||
2226 | intel_special_constraints(struct perf_event *event) | ||
2227 | { | ||
2228 | unsigned int hw_event; | ||
2229 | |||
2230 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
2231 | |||
2232 | if (unlikely((hw_event == | ||
2233 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
2234 | (event->hw.sample_period == 1))) { | ||
2235 | |||
2236 | return &bts_constraint; | ||
2237 | } | ||
2238 | return NULL; | ||
2239 | } | ||
2240 | |||
2241 | static struct event_constraint * | 1209 | static struct event_constraint * |
2242 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1210 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
2243 | { | 1211 | { |
2244 | struct event_constraint *c; | 1212 | struct event_constraint *c; |
2245 | 1213 | ||
2246 | c = intel_special_constraints(event); | ||
2247 | if (c) | ||
2248 | return c; | ||
2249 | |||
2250 | if (x86_pmu.event_constraints) { | 1214 | if (x86_pmu.event_constraints) { |
2251 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1215 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
2252 | if ((event->hw.config & c->cmask) == c->code) | 1216 | if ((event->hw.config & c->cmask) == c->code) |
@@ -2257,148 +1221,6 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
2257 | return &unconstrained; | 1221 | return &unconstrained; |
2258 | } | 1222 | } |
2259 | 1223 | ||
2260 | /* | ||
2261 | * AMD64 events are detected based on their event codes. | ||
2262 | */ | ||
2263 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | ||
2264 | { | ||
2265 | return (hwc->config & 0xe0) == 0xe0; | ||
2266 | } | ||
2267 | |||
2268 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
2269 | struct perf_event *event) | ||
2270 | { | ||
2271 | struct hw_perf_event *hwc = &event->hw; | ||
2272 | struct amd_nb *nb = cpuc->amd_nb; | ||
2273 | int i; | ||
2274 | |||
2275 | /* | ||
2276 | * only care about NB events | ||
2277 | */ | ||
2278 | if (!(nb && amd_is_nb_event(hwc))) | ||
2279 | return; | ||
2280 | |||
2281 | /* | ||
2282 | * need to scan whole list because event may not have | ||
2283 | * been assigned during scheduling | ||
2284 | * | ||
2285 | * no race condition possible because event can only | ||
2286 | * be removed on one CPU at a time AND PMU is disabled | ||
2287 | * when we come here | ||
2288 | */ | ||
2289 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
2290 | if (nb->owners[i] == event) { | ||
2291 | cmpxchg(nb->owners+i, event, NULL); | ||
2292 | break; | ||
2293 | } | ||
2294 | } | ||
2295 | } | ||
2296 | |||
2297 | /* | ||
2298 | * AMD64 NorthBridge events need special treatment because | ||
2299 | * counter access needs to be synchronized across all cores | ||
2300 | * of a package. Refer to BKDG section 3.12 | ||
2301 | * | ||
2302 | * NB events are events measuring L3 cache, Hypertransport | ||
2303 | * traffic. They are identified by an event code >= 0xe00. | ||
2304 | * They measure events on the NorthBride which is shared | ||
2305 | * by all cores on a package. NB events are counted on a | ||
2306 | * shared set of counters. When a NB event is programmed | ||
2307 | * in a counter, the data actually comes from a shared | ||
2308 | * counter. Thus, access to those counters needs to be | ||
2309 | * synchronized. | ||
2310 | * | ||
2311 | * We implement the synchronization such that no two cores | ||
2312 | * can be measuring NB events using the same counters. Thus, | ||
2313 | * we maintain a per-NB allocation table. The available slot | ||
2314 | * is propagated using the event_constraint structure. | ||
2315 | * | ||
2316 | * We provide only one choice for each NB event based on | ||
2317 | * the fact that only NB events have restrictions. Consequently, | ||
2318 | * if a counter is available, there is a guarantee the NB event | ||
2319 | * will be assigned to it. If no slot is available, an empty | ||
2320 | * constraint is returned and scheduling will eventually fail | ||
2321 | * for this event. | ||
2322 | * | ||
2323 | * Note that all cores attached the same NB compete for the same | ||
2324 | * counters to host NB events, this is why we use atomic ops. Some | ||
2325 | * multi-chip CPUs may have more than one NB. | ||
2326 | * | ||
2327 | * Given that resources are allocated (cmpxchg), they must be | ||
2328 | * eventually freed for others to use. This is accomplished by | ||
2329 | * calling amd_put_event_constraints(). | ||
2330 | * | ||
2331 | * Non NB events are not impacted by this restriction. | ||
2332 | */ | ||
2333 | static struct event_constraint * | ||
2334 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2335 | { | ||
2336 | struct hw_perf_event *hwc = &event->hw; | ||
2337 | struct amd_nb *nb = cpuc->amd_nb; | ||
2338 | struct perf_event *old = NULL; | ||
2339 | int max = x86_pmu.num_events; | ||
2340 | int i, j, k = -1; | ||
2341 | |||
2342 | /* | ||
2343 | * if not NB event or no NB, then no constraints | ||
2344 | */ | ||
2345 | if (!(nb && amd_is_nb_event(hwc))) | ||
2346 | return &unconstrained; | ||
2347 | |||
2348 | /* | ||
2349 | * detect if already present, if so reuse | ||
2350 | * | ||
2351 | * cannot merge with actual allocation | ||
2352 | * because of possible holes | ||
2353 | * | ||
2354 | * event can already be present yet not assigned (in hwc->idx) | ||
2355 | * because of successive calls to x86_schedule_events() from | ||
2356 | * hw_perf_group_sched_in() without hw_perf_enable() | ||
2357 | */ | ||
2358 | for (i = 0; i < max; i++) { | ||
2359 | /* | ||
2360 | * keep track of first free slot | ||
2361 | */ | ||
2362 | if (k == -1 && !nb->owners[i]) | ||
2363 | k = i; | ||
2364 | |||
2365 | /* already present, reuse */ | ||
2366 | if (nb->owners[i] == event) | ||
2367 | goto done; | ||
2368 | } | ||
2369 | /* | ||
2370 | * not present, so grab a new slot | ||
2371 | * starting either at: | ||
2372 | */ | ||
2373 | if (hwc->idx != -1) { | ||
2374 | /* previous assignment */ | ||
2375 | i = hwc->idx; | ||
2376 | } else if (k != -1) { | ||
2377 | /* start from free slot found */ | ||
2378 | i = k; | ||
2379 | } else { | ||
2380 | /* | ||
2381 | * event not found, no slot found in | ||
2382 | * first pass, try again from the | ||
2383 | * beginning | ||
2384 | */ | ||
2385 | i = 0; | ||
2386 | } | ||
2387 | j = i; | ||
2388 | do { | ||
2389 | old = cmpxchg(nb->owners+i, NULL, event); | ||
2390 | if (!old) | ||
2391 | break; | ||
2392 | if (++i == max) | ||
2393 | i = 0; | ||
2394 | } while (i != j); | ||
2395 | done: | ||
2396 | if (!old) | ||
2397 | return &nb->event_constraints[i]; | ||
2398 | |||
2399 | return &emptyconstraint; | ||
2400 | } | ||
2401 | |||
2402 | static int x86_event_sched_in(struct perf_event *event, | 1224 | static int x86_event_sched_in(struct perf_event *event, |
2403 | struct perf_cpu_context *cpuctx) | 1225 | struct perf_cpu_context *cpuctx) |
2404 | { | 1226 | { |
@@ -2509,335 +1331,9 @@ undo: | |||
2509 | return ret; | 1331 | return ret; |
2510 | } | 1332 | } |
2511 | 1333 | ||
2512 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | 1334 | #include "perf_event_amd.c" |
2513 | .notifier_call = perf_event_nmi_handler, | 1335 | #include "perf_event_p6.c" |
2514 | .next = NULL, | 1336 | #include "perf_event_intel.c" |
2515 | .priority = 1 | ||
2516 | }; | ||
2517 | |||
2518 | static __initconst struct x86_pmu p6_pmu = { | ||
2519 | .name = "p6", | ||
2520 | .handle_irq = x86_pmu_handle_irq, | ||
2521 | .disable_all = p6_pmu_disable_all, | ||
2522 | .enable_all = p6_pmu_enable_all, | ||
2523 | .enable = p6_pmu_enable_event, | ||
2524 | .disable = p6_pmu_disable_event, | ||
2525 | .eventsel = MSR_P6_EVNTSEL0, | ||
2526 | .perfctr = MSR_P6_PERFCTR0, | ||
2527 | .event_map = p6_pmu_event_map, | ||
2528 | .raw_event = p6_pmu_raw_event, | ||
2529 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
2530 | .apic = 1, | ||
2531 | .max_period = (1ULL << 31) - 1, | ||
2532 | .version = 0, | ||
2533 | .num_events = 2, | ||
2534 | /* | ||
2535 | * Events have 40 bits implemented. However they are designed such | ||
2536 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
2537 | * effective width of a event for P6-like PMU is 32 bits only. | ||
2538 | * | ||
2539 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
2540 | */ | ||
2541 | .event_bits = 32, | ||
2542 | .event_mask = (1ULL << 32) - 1, | ||
2543 | .get_event_constraints = intel_get_event_constraints, | ||
2544 | .event_constraints = intel_p6_event_constraints | ||
2545 | }; | ||
2546 | |||
2547 | static __initconst struct x86_pmu core_pmu = { | ||
2548 | .name = "core", | ||
2549 | .handle_irq = x86_pmu_handle_irq, | ||
2550 | .disable_all = x86_pmu_disable_all, | ||
2551 | .enable_all = x86_pmu_enable_all, | ||
2552 | .enable = x86_pmu_enable_event, | ||
2553 | .disable = x86_pmu_disable_event, | ||
2554 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2555 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2556 | .event_map = intel_pmu_event_map, | ||
2557 | .raw_event = intel_pmu_raw_event, | ||
2558 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2559 | .apic = 1, | ||
2560 | /* | ||
2561 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2562 | * so we install an artificial 1<<31 period regardless of | ||
2563 | * the generic event period: | ||
2564 | */ | ||
2565 | .max_period = (1ULL << 31) - 1, | ||
2566 | .get_event_constraints = intel_get_event_constraints, | ||
2567 | .event_constraints = intel_core_event_constraints, | ||
2568 | }; | ||
2569 | |||
2570 | static __initconst struct x86_pmu intel_pmu = { | ||
2571 | .name = "Intel", | ||
2572 | .handle_irq = intel_pmu_handle_irq, | ||
2573 | .disable_all = intel_pmu_disable_all, | ||
2574 | .enable_all = intel_pmu_enable_all, | ||
2575 | .enable = intel_pmu_enable_event, | ||
2576 | .disable = intel_pmu_disable_event, | ||
2577 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2578 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2579 | .event_map = intel_pmu_event_map, | ||
2580 | .raw_event = intel_pmu_raw_event, | ||
2581 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2582 | .apic = 1, | ||
2583 | /* | ||
2584 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2585 | * so we install an artificial 1<<31 period regardless of | ||
2586 | * the generic event period: | ||
2587 | */ | ||
2588 | .max_period = (1ULL << 31) - 1, | ||
2589 | .enable_bts = intel_pmu_enable_bts, | ||
2590 | .disable_bts = intel_pmu_disable_bts, | ||
2591 | .get_event_constraints = intel_get_event_constraints | ||
2592 | }; | ||
2593 | |||
2594 | static __initconst struct x86_pmu amd_pmu = { | ||
2595 | .name = "AMD", | ||
2596 | .handle_irq = x86_pmu_handle_irq, | ||
2597 | .disable_all = x86_pmu_disable_all, | ||
2598 | .enable_all = x86_pmu_enable_all, | ||
2599 | .enable = x86_pmu_enable_event, | ||
2600 | .disable = x86_pmu_disable_event, | ||
2601 | .eventsel = MSR_K7_EVNTSEL0, | ||
2602 | .perfctr = MSR_K7_PERFCTR0, | ||
2603 | .event_map = amd_pmu_event_map, | ||
2604 | .raw_event = amd_pmu_raw_event, | ||
2605 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
2606 | .num_events = 4, | ||
2607 | .event_bits = 48, | ||
2608 | .event_mask = (1ULL << 48) - 1, | ||
2609 | .apic = 1, | ||
2610 | /* use highest bit to detect overflow */ | ||
2611 | .max_period = (1ULL << 47) - 1, | ||
2612 | .get_event_constraints = amd_get_event_constraints, | ||
2613 | .put_event_constraints = amd_put_event_constraints | ||
2614 | }; | ||
2615 | |||
2616 | static __init int p6_pmu_init(void) | ||
2617 | { | ||
2618 | switch (boot_cpu_data.x86_model) { | ||
2619 | case 1: | ||
2620 | case 3: /* Pentium Pro */ | ||
2621 | case 5: | ||
2622 | case 6: /* Pentium II */ | ||
2623 | case 7: | ||
2624 | case 8: | ||
2625 | case 11: /* Pentium III */ | ||
2626 | case 9: | ||
2627 | case 13: | ||
2628 | /* Pentium M */ | ||
2629 | break; | ||
2630 | default: | ||
2631 | pr_cont("unsupported p6 CPU model %d ", | ||
2632 | boot_cpu_data.x86_model); | ||
2633 | return -ENODEV; | ||
2634 | } | ||
2635 | |||
2636 | x86_pmu = p6_pmu; | ||
2637 | |||
2638 | return 0; | ||
2639 | } | ||
2640 | |||
2641 | static __init int intel_pmu_init(void) | ||
2642 | { | ||
2643 | union cpuid10_edx edx; | ||
2644 | union cpuid10_eax eax; | ||
2645 | unsigned int unused; | ||
2646 | unsigned int ebx; | ||
2647 | int version; | ||
2648 | |||
2649 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
2650 | /* check for P6 processor family */ | ||
2651 | if (boot_cpu_data.x86 == 6) { | ||
2652 | return p6_pmu_init(); | ||
2653 | } else { | ||
2654 | return -ENODEV; | ||
2655 | } | ||
2656 | } | ||
2657 | |||
2658 | /* | ||
2659 | * Check whether the Architectural PerfMon supports | ||
2660 | * Branch Misses Retired hw_event or not. | ||
2661 | */ | ||
2662 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
2663 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
2664 | return -ENODEV; | ||
2665 | |||
2666 | version = eax.split.version_id; | ||
2667 | if (version < 2) | ||
2668 | x86_pmu = core_pmu; | ||
2669 | else | ||
2670 | x86_pmu = intel_pmu; | ||
2671 | |||
2672 | x86_pmu.version = version; | ||
2673 | x86_pmu.num_events = eax.split.num_events; | ||
2674 | x86_pmu.event_bits = eax.split.bit_width; | ||
2675 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
2676 | |||
2677 | /* | ||
2678 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
2679 | * assume at least 3 events: | ||
2680 | */ | ||
2681 | if (version > 1) | ||
2682 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
2683 | |||
2684 | /* | ||
2685 | * Install the hw-cache-events table: | ||
2686 | */ | ||
2687 | switch (boot_cpu_data.x86_model) { | ||
2688 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
2689 | pr_cont("Core events, "); | ||
2690 | break; | ||
2691 | |||
2692 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
2693 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
2694 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
2695 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
2696 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
2697 | sizeof(hw_cache_event_ids)); | ||
2698 | |||
2699 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
2700 | pr_cont("Core2 events, "); | ||
2701 | break; | ||
2702 | |||
2703 | case 26: /* 45 nm nehalem, "Bloomfield" */ | ||
2704 | case 30: /* 45 nm nehalem, "Lynnfield" */ | ||
2705 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
2706 | sizeof(hw_cache_event_ids)); | ||
2707 | |||
2708 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
2709 | pr_cont("Nehalem/Corei7 events, "); | ||
2710 | break; | ||
2711 | case 28: | ||
2712 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2713 | sizeof(hw_cache_event_ids)); | ||
2714 | |||
2715 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
2716 | pr_cont("Atom events, "); | ||
2717 | break; | ||
2718 | |||
2719 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
2720 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
2721 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
2722 | sizeof(hw_cache_event_ids)); | ||
2723 | |||
2724 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
2725 | pr_cont("Westmere events, "); | ||
2726 | break; | ||
2727 | default: | ||
2728 | /* | ||
2729 | * default constraints for v2 and up | ||
2730 | */ | ||
2731 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
2732 | pr_cont("generic architected perfmon, "); | ||
2733 | } | ||
2734 | return 0; | ||
2735 | } | ||
2736 | |||
2737 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | ||
2738 | { | ||
2739 | struct amd_nb *nb; | ||
2740 | int i; | ||
2741 | |||
2742 | nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); | ||
2743 | if (!nb) | ||
2744 | return NULL; | ||
2745 | |||
2746 | memset(nb, 0, sizeof(*nb)); | ||
2747 | nb->nb_id = nb_id; | ||
2748 | |||
2749 | /* | ||
2750 | * initialize all possible NB constraints | ||
2751 | */ | ||
2752 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
2753 | set_bit(i, nb->event_constraints[i].idxmsk); | ||
2754 | nb->event_constraints[i].weight = 1; | ||
2755 | } | ||
2756 | return nb; | ||
2757 | } | ||
2758 | |||
2759 | static void amd_pmu_cpu_online(int cpu) | ||
2760 | { | ||
2761 | struct cpu_hw_events *cpu1, *cpu2; | ||
2762 | struct amd_nb *nb = NULL; | ||
2763 | int i, nb_id; | ||
2764 | |||
2765 | if (boot_cpu_data.x86_max_cores < 2) | ||
2766 | return; | ||
2767 | |||
2768 | /* | ||
2769 | * function may be called too early in the | ||
2770 | * boot process, in which case nb_id is bogus | ||
2771 | */ | ||
2772 | nb_id = amd_get_nb_id(cpu); | ||
2773 | if (nb_id == BAD_APICID) | ||
2774 | return; | ||
2775 | |||
2776 | cpu1 = &per_cpu(cpu_hw_events, cpu); | ||
2777 | cpu1->amd_nb = NULL; | ||
2778 | |||
2779 | raw_spin_lock(&amd_nb_lock); | ||
2780 | |||
2781 | for_each_online_cpu(i) { | ||
2782 | cpu2 = &per_cpu(cpu_hw_events, i); | ||
2783 | nb = cpu2->amd_nb; | ||
2784 | if (!nb) | ||
2785 | continue; | ||
2786 | if (nb->nb_id == nb_id) | ||
2787 | goto found; | ||
2788 | } | ||
2789 | |||
2790 | nb = amd_alloc_nb(cpu, nb_id); | ||
2791 | if (!nb) { | ||
2792 | pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); | ||
2793 | raw_spin_unlock(&amd_nb_lock); | ||
2794 | return; | ||
2795 | } | ||
2796 | found: | ||
2797 | nb->refcnt++; | ||
2798 | cpu1->amd_nb = nb; | ||
2799 | |||
2800 | raw_spin_unlock(&amd_nb_lock); | ||
2801 | } | ||
2802 | |||
2803 | static void amd_pmu_cpu_offline(int cpu) | ||
2804 | { | ||
2805 | struct cpu_hw_events *cpuhw; | ||
2806 | |||
2807 | if (boot_cpu_data.x86_max_cores < 2) | ||
2808 | return; | ||
2809 | |||
2810 | cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
2811 | |||
2812 | raw_spin_lock(&amd_nb_lock); | ||
2813 | |||
2814 | if (--cpuhw->amd_nb->refcnt == 0) | ||
2815 | kfree(cpuhw->amd_nb); | ||
2816 | |||
2817 | cpuhw->amd_nb = NULL; | ||
2818 | |||
2819 | raw_spin_unlock(&amd_nb_lock); | ||
2820 | } | ||
2821 | |||
2822 | static __init int amd_pmu_init(void) | ||
2823 | { | ||
2824 | /* Performance-monitoring supported from K7 and later: */ | ||
2825 | if (boot_cpu_data.x86 < 6) | ||
2826 | return -ENODEV; | ||
2827 | |||
2828 | x86_pmu = amd_pmu; | ||
2829 | |||
2830 | /* Events are common for all AMDs */ | ||
2831 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
2832 | sizeof(hw_cache_event_ids)); | ||
2833 | |||
2834 | /* | ||
2835 | * explicitly initialize the boot cpu, other cpus will get | ||
2836 | * the cpu hotplug callbacks from smp_init() | ||
2837 | */ | ||
2838 | amd_pmu_cpu_online(smp_processor_id()); | ||
2839 | return 0; | ||
2840 | } | ||
2841 | 1337 | ||
2842 | static void __init pmu_check_apic(void) | 1338 | static void __init pmu_check_apic(void) |
2843 | { | 1339 | { |