aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-03-02 21:34:48 -0500
committerIngo Molnar <mingo@elte.hu>2011-03-04 05:32:53 -0500
commite994d7d23a0bae34cd28834e85522ed4e782faf7 (patch)
treef9b08a69bdccf047cba9449adee4dd86ed1e8892
parenta7e3ed1e470116c9d12c2f778431a481a6be8ab6 (diff)
perf: Fix LLC-* events on Intel Nehalem/Westmere
On Intel Nehalem and Westmere CPUs the generic perf LLC-* events count the L2 caches, not the real L3 LLC - this was inconsistent with behavior on other CPUs. Fixing this requires the use of the special OFFCORE_RESPONSE events which need a separate mask register. This has been implemented by the previous patch, now use this infrastructure to set correct events for the LLC-* on Nehalem and Westmere. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Lin Ming <ming.m.lin@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1299119690-13991-3-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c81
2 files changed, 79 insertions, 17 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ec6a6db07332..4d6ce5d612da 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -310,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids
310 [PERF_COUNT_HW_CACHE_MAX] 310 [PERF_COUNT_HW_CACHE_MAX]
311 [PERF_COUNT_HW_CACHE_OP_MAX] 311 [PERF_COUNT_HW_CACHE_OP_MAX]
312 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 312 [PERF_COUNT_HW_CACHE_RESULT_MAX];
313static u64 __read_mostly hw_cache_extra_regs
314 [PERF_COUNT_HW_CACHE_MAX]
315 [PERF_COUNT_HW_CACHE_OP_MAX]
316 [PERF_COUNT_HW_CACHE_RESULT_MAX];
313 317
314/* 318/*
315 * Propagate event elapsed time into the generic event. 319 * Propagate event elapsed time into the generic event.
@@ -524,8 +528,9 @@ static inline int x86_pmu_initialized(void)
524} 528}
525 529
526static inline int 530static inline int
527set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) 531set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
528{ 532{
533 struct perf_event_attr *attr = &event->attr;
529 unsigned int cache_type, cache_op, cache_result; 534 unsigned int cache_type, cache_op, cache_result;
530 u64 config, val; 535 u64 config, val;
531 536
@@ -552,8 +557,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
552 return -EINVAL; 557 return -EINVAL;
553 558
554 hwc->config |= val; 559 hwc->config |= val;
555 560 attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
556 return 0; 561 return x86_pmu_extra_regs(val, event);
557} 562}
558 563
559static int x86_setup_perfctr(struct perf_event *event) 564static int x86_setup_perfctr(struct perf_event *event)
@@ -578,10 +583,10 @@ static int x86_setup_perfctr(struct perf_event *event)
578 } 583 }
579 584
580 if (attr->type == PERF_TYPE_RAW) 585 if (attr->type == PERF_TYPE_RAW)
581 return 0; 586 return x86_pmu_extra_regs(event->attr.config, event);
582 587
583 if (attr->type == PERF_TYPE_HW_CACHE) 588 if (attr->type == PERF_TYPE_HW_CACHE)
584 return set_ext_hw_attr(hwc, attr); 589 return set_ext_hw_attr(hwc, event);
585 590
586 if (attr->config >= x86_pmu.max_events) 591 if (attr->config >= x86_pmu.max_events)
587 return -EINVAL; 592 return -EINVAL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 13cb6cf013f6..6e9b6763ff48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -285,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
285 }, 285 },
286 [ C(LL ) ] = { 286 [ C(LL ) ] = {
287 [ C(OP_READ) ] = { 287 [ C(OP_READ) ] = {
288 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
289 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 289 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb,
290 }, 292 },
293 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO.
296 */
291 [ C(OP_WRITE) ] = { 297 [ C(OP_WRITE) ] = {
292 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
293 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 299 [ C(RESULT_ACCESS) ] = 0x01bb,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7,
294 }, 302 },
295 [ C(OP_PREFETCH) ] = { 303 [ C(OP_PREFETCH) ] = {
296 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
297 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 305 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb,
298 }, 308 },
299 }, 309 },
300 [ C(DTLB) ] = { 310 [ C(DTLB) ] = {
@@ -341,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
341 }, 351 },
342}; 352};
343 353
354/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
356 */
357
358#define DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3)
361#define PF_DATA_RD (1 << 4)
362#define PF_DATA_RFO (1 << 5)
363#define RESP_UNCORE_HIT (1 << 8)
364#define RESP_MISS (0xf600) /* non uncore hit */
365
366static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX]
368 [PERF_COUNT_HW_CACHE_OP_MAX]
369 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
370{
371 [ C(LL ) ] = {
372 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
375 },
376 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
379 },
380 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
383 },
384 }
385};
386
344static __initconst const u64 nehalem_hw_cache_event_ids 387static __initconst const u64 nehalem_hw_cache_event_ids
345 [PERF_COUNT_HW_CACHE_MAX] 388 [PERF_COUNT_HW_CACHE_MAX]
346 [PERF_COUNT_HW_CACHE_OP_MAX] 389 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -376,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
376 }, 419 },
377 [ C(LL ) ] = { 420 [ C(LL ) ] = {
378 [ C(OP_READ) ] = { 421 [ C(OP_READ) ] = {
379 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 422 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
380 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 423 [ C(RESULT_ACCESS) ] = 0x01b7,
424 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
425 [ C(RESULT_MISS) ] = 0x01b7,
381 }, 426 },
427 /*
428 * Use RFO, not WRITEBACK, because a write miss would typically occur
429 * on RFO.
430 */
382 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
383 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 432 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
384 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 433 [ C(RESULT_ACCESS) ] = 0x01b7,
434 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
435 [ C(RESULT_MISS) ] = 0x01b7,
385 }, 436 },
386 [ C(OP_PREFETCH) ] = { 437 [ C(OP_PREFETCH) ] = {
387 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 438 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
388 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 439 [ C(RESULT_ACCESS) ] = 0x01b7,
440 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
441 [ C(RESULT_MISS) ] = 0x01b7,
389 }, 442 },
390 }, 443 },
391 [ C(DTLB) ] = { 444 [ C(DTLB) ] = {
@@ -1340,6 +1393,8 @@ static __init int intel_pmu_init(void)
1340 case 46: /* 45 nm nehalem-ex, "Beckton" */ 1393 case 46: /* 45 nm nehalem-ex, "Beckton" */
1341 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 1394 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1342 sizeof(hw_cache_event_ids)); 1395 sizeof(hw_cache_event_ids));
1396 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1397 sizeof(hw_cache_extra_regs));
1343 1398
1344 intel_pmu_lbr_init_nhm(); 1399 intel_pmu_lbr_init_nhm();
1345 1400
@@ -1366,6 +1421,8 @@ static __init int intel_pmu_init(void)
1366 case 44: /* 32 nm nehalem, "Gulftown" */ 1421 case 44: /* 32 nm nehalem, "Gulftown" */
1367 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 1422 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1368 sizeof(hw_cache_event_ids)); 1423 sizeof(hw_cache_event_ids));
1424 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1425 sizeof(hw_cache_extra_regs));
1369 1426
1370 intel_pmu_lbr_init_nhm(); 1427 intel_pmu_lbr_init_nhm();
1371 1428