aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2011-04-22 18:57:42 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-06 05:24:48 -0400
commit63b6a6758eede2f9283c3594265b6e32e75d7456 (patch)
treef57561ee0ae40e084dea56ba79439e563071a9d9 /arch
parent925f83c085e1bb08435556c5b4844a60de002e31 (diff)
perf events, x86: Fix Intel Nehalem and Westmere last level cache event definitions
The Intel Nehalem offcore bits implemented in: e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere ... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as MISS even though its clearly documented as an L3 hit ... Fix them and the Westmere definitions as well. Cc: Andi Kleen <ak@linux.intel.com> Cc: Lin Ming <ming.m.lin@intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/1299119690-13991-3-git-send-email-ming.m.lin@intel.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c87
1 files changed, 52 insertions, 35 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e61539b07d2c..447a28de6f09 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
184 }, 184 },
185 }, 185 },
186 [ C(LL ) ] = { 186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = { 187 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 188 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7, 189 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 190 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb, 191 [ C(RESULT_MISS) ] = 0x01b7,
195 }, 192 },
196 [ C(OP_WRITE) ] = { 193 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ 194 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7, 195 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ 196 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb, 197 [ C(RESULT_MISS) ] = 0x01b7,
201 }, 198 },
202 [ C(OP_PREFETCH) ] = { 199 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 200 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7, 201 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 202 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb, 203 [ C(RESULT_MISS) ] = 0x01b7,
207 }, 204 },
208 }, 205 },
209 [ C(DTLB) ] = { 206 [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
285 }, 282 },
286 [ C(LL ) ] = { 283 [ C(LL ) ] = {
287 [ C(OP_READ) ] = { 284 [ C(OP_READ) ] = {
288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 285 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
289 [ C(RESULT_ACCESS) ] = 0x01b7, 286 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 287 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb, 288 [ C(RESULT_MISS) ] = 0x01b7,
292 }, 289 },
293 /* 290 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur 291 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO. 292 * on RFO.
296 */ 293 */
297 [ C(OP_WRITE) ] = { 294 [ C(OP_WRITE) ] = {
298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ 295 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
299 [ C(RESULT_ACCESS) ] = 0x01bb, 296 [ C(RESULT_ACCESS) ] = 0x01b7,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ 297 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7, 298 [ C(RESULT_MISS) ] = 0x01b7,
302 }, 299 },
303 [ C(OP_PREFETCH) ] = { 300 [ C(OP_PREFETCH) ] = {
304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 301 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
305 [ C(RESULT_ACCESS) ] = 0x01b7, 302 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 303 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb, 304 [ C(RESULT_MISS) ] = 0x01b7,
308 }, 305 },
309 }, 306 },
310 [ C(DTLB) ] = { 307 [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
352}; 349};
353 350
354/* 351/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 352 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
353 * See IA32 SDM Vol 3B 30.6.1.3
356 */ 354 */
357 355
358#define DMND_DATA_RD (1 << 0) 356#define NHM_DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1) 357#define NHM_DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3) 358#define NHM_DMND_IFETCH (1 << 2)
361#define PF_DATA_RD (1 << 4) 359#define NHM_DMND_WB (1 << 3)
362#define PF_DATA_RFO (1 << 5) 360#define NHM_PF_DATA_RD (1 << 4)
363#define RESP_UNCORE_HIT (1 << 8) 361#define NHM_PF_DATA_RFO (1 << 5)
364#define RESP_MISS (0xf600) /* non uncore hit */ 362#define NHM_PF_IFETCH (1 << 6)
363#define NHM_OFFCORE_OTHER (1 << 7)
364#define NHM_UNCORE_HIT (1 << 8)
365#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
366#define NHM_OTHER_CORE_HITM (1 << 10)
367 /* reserved */
368#define NHM_REMOTE_CACHE_FWD (1 << 12)
369#define NHM_REMOTE_DRAM (1 << 13)
370#define NHM_LOCAL_DRAM (1 << 14)
371#define NHM_NON_DRAM (1 << 15)
372
373#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
374
375#define NHM_DMND_READ (NHM_DMND_DATA_RD)
376#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
377#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
378
379#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
380#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
381#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
365 382
366static __initconst const u64 nehalem_hw_cache_extra_regs 383static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX] 384 [PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
370{ 387{
371 [ C(LL ) ] = { 388 [ C(LL ) ] = {
372 [ C(OP_READ) ] = { 389 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, 390 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, 391 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
375 }, 392 },
376 [ C(OP_WRITE) ] = { 393 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, 394 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, 395 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
379 }, 396 },
380 [ C(OP_PREFETCH) ] = { 397 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, 398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, 399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
383 }, 400 },
384 } 401 }
385}; 402};