diff options
author | Andi Kleen <ak@linux.intel.com> | 2011-03-02 21:34:48 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-03-04 05:32:53 -0500 |
commit | e994d7d23a0bae34cd28834e85522ed4e782faf7 (patch) | |
tree | f9b08a69bdccf047cba9449adee4dd86ed1e8892 | |
parent | a7e3ed1e470116c9d12c2f778431a481a6be8ab6 (diff) |
perf: Fix LLC-* events on Intel Nehalem/Westmere
On Intel Nehalem and Westmere CPUs the generic perf LLC-* events count the
L2 caches, not the real L3 LLC - this was inconsistent with behavior on
other CPUs.
Fixing this requires the use of the special OFFCORE_RESPONSE
events which need a separate mask register.
This has been implemented by the previous patch, now use this infrastructure
to set correct events for the LLC-* on Nehalem and Westmere.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1299119690-13991-3-git-send-email-ming.m.lin@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 81 |
2 files changed, 79 insertions, 17 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ec6a6db07332..4d6ce5d612da 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -310,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids | |||
310 | [PERF_COUNT_HW_CACHE_MAX] | 310 | [PERF_COUNT_HW_CACHE_MAX] |
311 | [PERF_COUNT_HW_CACHE_OP_MAX] | 311 | [PERF_COUNT_HW_CACHE_OP_MAX] |
312 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 312 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
313 | static u64 __read_mostly hw_cache_extra_regs | ||
314 | [PERF_COUNT_HW_CACHE_MAX] | ||
315 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
316 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
313 | 317 | ||
314 | /* | 318 | /* |
315 | * Propagate event elapsed time into the generic event. | 319 | * Propagate event elapsed time into the generic event. |
@@ -524,8 +528,9 @@ static inline int x86_pmu_initialized(void) | |||
524 | } | 528 | } |
525 | 529 | ||
526 | static inline int | 530 | static inline int |
527 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | 531 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) |
528 | { | 532 | { |
533 | struct perf_event_attr *attr = &event->attr; | ||
529 | unsigned int cache_type, cache_op, cache_result; | 534 | unsigned int cache_type, cache_op, cache_result; |
530 | u64 config, val; | 535 | u64 config, val; |
531 | 536 | ||
@@ -552,8 +557,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
552 | return -EINVAL; | 557 | return -EINVAL; |
553 | 558 | ||
554 | hwc->config |= val; | 559 | hwc->config |= val; |
555 | 560 | attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; | |
556 | return 0; | 561 | return x86_pmu_extra_regs(val, event); |
557 | } | 562 | } |
558 | 563 | ||
559 | static int x86_setup_perfctr(struct perf_event *event) | 564 | static int x86_setup_perfctr(struct perf_event *event) |
@@ -578,10 +583,10 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
578 | } | 583 | } |
579 | 584 | ||
580 | if (attr->type == PERF_TYPE_RAW) | 585 | if (attr->type == PERF_TYPE_RAW) |
581 | return 0; | 586 | return x86_pmu_extra_regs(event->attr.config, event); |
582 | 587 | ||
583 | if (attr->type == PERF_TYPE_HW_CACHE) | 588 | if (attr->type == PERF_TYPE_HW_CACHE) |
584 | return set_ext_hw_attr(hwc, attr); | 589 | return set_ext_hw_attr(hwc, event); |
585 | 590 | ||
586 | if (attr->config >= x86_pmu.max_events) | 591 | if (attr->config >= x86_pmu.max_events) |
587 | return -EINVAL; | 592 | return -EINVAL; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 13cb6cf013f6..6e9b6763ff48 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -285,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
285 | }, | 285 | }, |
286 | [ C(LL ) ] = { | 286 | [ C(LL ) ] = { |
287 | [ C(OP_READ) ] = { | 287 | [ C(OP_READ) ] = { |
288 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 288 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ |
289 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 289 | [ C(RESULT_ACCESS) ] = 0x01b7, |
290 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
291 | [ C(RESULT_MISS) ] = 0x01bb, | ||
290 | }, | 292 | }, |
293 | /* | ||
294 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
295 | * on RFO. | ||
296 | */ | ||
291 | [ C(OP_WRITE) ] = { | 297 | [ C(OP_WRITE) ] = { |
292 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 298 | /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ |
293 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 299 | [ C(RESULT_ACCESS) ] = 0x01bb, |
300 | /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ | ||
301 | [ C(RESULT_MISS) ] = 0x01b7, | ||
294 | }, | 302 | }, |
295 | [ C(OP_PREFETCH) ] = { | 303 | [ C(OP_PREFETCH) ] = { |
296 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 304 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ |
297 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 305 | [ C(RESULT_ACCESS) ] = 0x01b7, |
306 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
307 | [ C(RESULT_MISS) ] = 0x01bb, | ||
298 | }, | 308 | }, |
299 | }, | 309 | }, |
300 | [ C(DTLB) ] = { | 310 | [ C(DTLB) ] = { |
@@ -341,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
341 | }, | 351 | }, |
342 | }; | 352 | }; |
343 | 353 | ||
354 | /* | ||
355 | * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 | ||
356 | */ | ||
357 | |||
358 | #define DMND_DATA_RD (1 << 0) | ||
359 | #define DMND_RFO (1 << 1) | ||
360 | #define DMND_WB (1 << 3) | ||
361 | #define PF_DATA_RD (1 << 4) | ||
362 | #define PF_DATA_RFO (1 << 5) | ||
363 | #define RESP_UNCORE_HIT (1 << 8) | ||
364 | #define RESP_MISS (0xf600) /* non uncore hit */ | ||
365 | |||
366 | static __initconst const u64 nehalem_hw_cache_extra_regs | ||
367 | [PERF_COUNT_HW_CACHE_MAX] | ||
368 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
369 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
370 | { | ||
371 | [ C(LL ) ] = { | ||
372 | [ C(OP_READ) ] = { | ||
373 | [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, | ||
374 | [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, | ||
375 | }, | ||
376 | [ C(OP_WRITE) ] = { | ||
377 | [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, | ||
378 | [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, | ||
379 | }, | ||
380 | [ C(OP_PREFETCH) ] = { | ||
381 | [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, | ||
382 | [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, | ||
383 | }, | ||
384 | } | ||
385 | }; | ||
386 | |||
344 | static __initconst const u64 nehalem_hw_cache_event_ids | 387 | static __initconst const u64 nehalem_hw_cache_event_ids |
345 | [PERF_COUNT_HW_CACHE_MAX] | 388 | [PERF_COUNT_HW_CACHE_MAX] |
346 | [PERF_COUNT_HW_CACHE_OP_MAX] | 389 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -376,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
376 | }, | 419 | }, |
377 | [ C(LL ) ] = { | 420 | [ C(LL ) ] = { |
378 | [ C(OP_READ) ] = { | 421 | [ C(OP_READ) ] = { |
379 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 422 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
380 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 423 | [ C(RESULT_ACCESS) ] = 0x01b7, |
424 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ | ||
425 | [ C(RESULT_MISS) ] = 0x01b7, | ||
381 | }, | 426 | }, |
427 | /* | ||
428 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
429 | * on RFO. | ||
430 | */ | ||
382 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
383 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 432 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
384 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 433 | [ C(RESULT_ACCESS) ] = 0x01b7, |
434 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ | ||
435 | [ C(RESULT_MISS) ] = 0x01b7, | ||
385 | }, | 436 | }, |
386 | [ C(OP_PREFETCH) ] = { | 437 | [ C(OP_PREFETCH) ] = { |
387 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 438 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
388 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 439 | [ C(RESULT_ACCESS) ] = 0x01b7, |
440 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ | ||
441 | [ C(RESULT_MISS) ] = 0x01b7, | ||
389 | }, | 442 | }, |
390 | }, | 443 | }, |
391 | [ C(DTLB) ] = { | 444 | [ C(DTLB) ] = { |
@@ -1340,6 +1393,8 @@ static __init int intel_pmu_init(void) | |||
1340 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | 1393 | case 46: /* 45 nm nehalem-ex, "Beckton" */ |
1341 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1394 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
1342 | sizeof(hw_cache_event_ids)); | 1395 | sizeof(hw_cache_event_ids)); |
1396 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1397 | sizeof(hw_cache_extra_regs)); | ||
1343 | 1398 | ||
1344 | intel_pmu_lbr_init_nhm(); | 1399 | intel_pmu_lbr_init_nhm(); |
1345 | 1400 | ||
@@ -1366,6 +1421,8 @@ static __init int intel_pmu_init(void) | |||
1366 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1421 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1367 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1422 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1368 | sizeof(hw_cache_event_ids)); | 1423 | sizeof(hw_cache_event_ids)); |
1424 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1425 | sizeof(hw_cache_extra_regs)); | ||
1369 | 1426 | ||
1370 | intel_pmu_lbr_init_nhm(); | 1427 | intel_pmu_lbr_init_nhm(); |
1371 | 1428 | ||