aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-10-29 05:18:17 -0400
committerIngo Molnar <mingo@kernel.org>2014-10-29 06:07:58 -0400
commit1776b10627e486dd431fe72d8d47e5a865cf65d1 (patch)
tree0e94654466903002b88f0afe96e8c0a60587ae2c
parent7fb0f1de49fc75a0dcec22531f2d0a79fc2fb625 (diff)
perf/x86/intel: Revert incomplete and undocumented Broadwell client support
These patches: 86a349a28b24 ("perf/x86/intel: Add Broadwell core support") c46e665f0377 ("perf/x86: Add INST_RETIRED.ALL workarounds") fdda3c4aacec ("perf/x86/intel: Use Broadwell cache event list for Haswell") introduced magic constants and unexplained changes: https://lkml.org/lkml/2014/10/28/1128 https://lkml.org/lkml/2014/10/27/325 https://lkml.org/lkml/2014/8/27/546 https://lkml.org/lkml/2014/10/28/546 Peter Zijlstra has attempted to help out, to clean up the mess: https://lkml.org/lkml/2014/10/28/543 But has not received helpful and constructive replies which makes me doubt wether it can all be finished in time until v3.18 is released. Despite various review feedback the author (Andi Kleen) has answered only few of the review questions and has generally been uncooperative, only giving replies when prompted repeatedly, and only giving minimal answers instead of constructively explaining and helping along the effort. That kind of behavior is not acceptable. There's also a boot crash on Intel E5-1630 v3 CPUs reported for another commit from Andi Kleen: e735b9db12d7 ("perf/x86/intel/uncore: Add Haswell-EP uncore support") https://lkml.org/lkml/2014/10/22/730 Which is not yet resolved. The uncore driver is independent in theory, but the crash makes me worry about how well all these patches were tested and makes me uneasy about the level of interminging that the Broadwell and Haswell code has received by the commits above. As a first step to resolve the mess revert the Broadwell client commits back to the v3.17 version, before we run out of time and problematic code hits a stable upstream kernel. ( If the Haswell-EP crash is not resolved via a simple fix then we'll have to revert the Haswell-EP uncore driver as well. ) The Broadwell client series has to be submitted in a clean fashion, with single, well documented changes per patch. If they are submitted in time and are accepted during review then they can possibly go into v3.19 but will need additional scrutiny due to the rocky history of this patch set. Cc: Andi Kleen <ak@linux.intel.com> Cc: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: eranian@google.com Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/1409683455-29168-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c173
3 files changed, 2 insertions, 181 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66451a6b9485..143e5f5dc855 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -445,12 +445,6 @@ int x86_pmu_hw_config(struct perf_event *event)
445 if (event->attr.type == PERF_TYPE_RAW) 445 if (event->attr.type == PERF_TYPE_RAW)
446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; 446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
447 447
448 if (event->attr.sample_period && x86_pmu.limit_period) {
449 if (x86_pmu.limit_period(event, event->attr.sample_period) >
450 event->attr.sample_period)
451 return -EINVAL;
452 }
453
454 return x86_setup_perfctr(event); 448 return x86_setup_perfctr(event);
455} 449}
456 450
@@ -988,9 +982,6 @@ int x86_perf_event_set_period(struct perf_event *event)
988 if (left > x86_pmu.max_period) 982 if (left > x86_pmu.max_period)
989 left = x86_pmu.max_period; 983 left = x86_pmu.max_period;
990 984
991 if (x86_pmu.limit_period)
992 left = x86_pmu.limit_period(event, left);
993
994 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; 985 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
995 986
996 /* 987 /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d98a34d435d7..fc5eb390b368 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -445,7 +445,6 @@ struct x86_pmu {
445 struct x86_pmu_quirk *quirks; 445 struct x86_pmu_quirk *quirks;
446 int perfctr_second_write; 446 int perfctr_second_write;
447 bool late_ack; 447 bool late_ack;
448 unsigned (*limit_period)(struct perf_event *event, unsigned l);
449 448
450 /* 449 /*
451 * sysfs attrs 450 * sysfs attrs
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a73947c53b65..944bf019b74f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -220,15 +220,6 @@ static struct event_constraint intel_hsw_event_constraints[] = {
220 EVENT_CONSTRAINT_END 220 EVENT_CONSTRAINT_END
221}; 221};
222 222
223static struct event_constraint intel_bdw_event_constraints[] = {
224 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
225 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
226 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
227 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
228 INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
229 EVENT_CONSTRAINT_END
230};
231
232static u64 intel_pmu_event_map(int hw_event) 223static u64 intel_pmu_event_map(int hw_event)
233{ 224{
234 return intel_perfmon_event_map[hw_event]; 225 return intel_perfmon_event_map[hw_event];
@@ -424,126 +415,6 @@ static __initconst const u64 snb_hw_cache_event_ids
424 415
425}; 416};
426 417
427static __initconst const u64 hsw_hw_cache_event_ids
428 [PERF_COUNT_HW_CACHE_MAX]
429 [PERF_COUNT_HW_CACHE_OP_MAX]
430 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
431{
432 [ C(L1D ) ] = {
433 [ C(OP_READ) ] = {
434 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
435 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
436 },
437 [ C(OP_WRITE) ] = {
438 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
439 [ C(RESULT_MISS) ] = 0x0,
440 },
441 [ C(OP_PREFETCH) ] = {
442 [ C(RESULT_ACCESS) ] = 0x0,
443 [ C(RESULT_MISS) ] = 0x0,
444 },
445 },
446 [ C(L1I ) ] = {
447 [ C(OP_READ) ] = {
448 [ C(RESULT_ACCESS) ] = 0x0,
449 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
450 },
451 [ C(OP_WRITE) ] = {
452 [ C(RESULT_ACCESS) ] = -1,
453 [ C(RESULT_MISS) ] = -1,
454 },
455 [ C(OP_PREFETCH) ] = {
456 [ C(RESULT_ACCESS) ] = 0x0,
457 [ C(RESULT_MISS) ] = 0x0,
458 },
459 },
460 [ C(LL ) ] = {
461 [ C(OP_READ) ] = {
462 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
463 [ C(RESULT_ACCESS) ] = 0x1b7,
464 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
465 L3_MISS|ANY_SNOOP */
466 [ C(RESULT_MISS) ] = 0x1b7,
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
470 /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
471 [ C(RESULT_MISS) ] = 0x1b7,
472 },
473 [ C(OP_PREFETCH) ] = {
474 [ C(RESULT_ACCESS) ] = 0x0,
475 [ C(RESULT_MISS) ] = 0x0,
476 },
477 },
478 [ C(DTLB) ] = {
479 [ C(OP_READ) ] = {
480 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
481 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
482 },
483 [ C(OP_WRITE) ] = {
484 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
485 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
486 },
487 [ C(OP_PREFETCH) ] = {
488 [ C(RESULT_ACCESS) ] = 0x0,
489 [ C(RESULT_MISS) ] = 0x0,
490 },
491 },
492 [ C(ITLB) ] = {
493 [ C(OP_READ) ] = {
494 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
495 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
496 },
497 [ C(OP_WRITE) ] = {
498 [ C(RESULT_ACCESS) ] = -1,
499 [ C(RESULT_MISS) ] = -1,
500 },
501 [ C(OP_PREFETCH) ] = {
502 [ C(RESULT_ACCESS) ] = -1,
503 [ C(RESULT_MISS) ] = -1,
504 },
505 },
506 [ C(BPU ) ] = {
507 [ C(OP_READ) ] = {
508 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
509 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
510 },
511 [ C(OP_WRITE) ] = {
512 [ C(RESULT_ACCESS) ] = -1,
513 [ C(RESULT_MISS) ] = -1,
514 },
515 [ C(OP_PREFETCH) ] = {
516 [ C(RESULT_ACCESS) ] = -1,
517 [ C(RESULT_MISS) ] = -1,
518 },
519 },
520};
521
522static __initconst const u64 hsw_hw_cache_extra_regs
523 [PERF_COUNT_HW_CACHE_MAX]
524 [PERF_COUNT_HW_CACHE_OP_MAX]
525 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
526{
527 [ C(LL ) ] = {
528 [ C(OP_READ) ] = {
529 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
530 [ C(RESULT_ACCESS) ] = 0x2d5,
531 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
532 L3_MISS|ANY_SNOOP */
533 [ C(RESULT_MISS) ] = 0x3fbc0202d5ull,
534 },
535 [ C(OP_WRITE) ] = {
536 [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
537 /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
538 [ C(RESULT_MISS) ] = 0x3fbc020122ull,
539 },
540 [ C(OP_PREFETCH) ] = {
541 [ C(RESULT_ACCESS) ] = 0x0,
542 [ C(RESULT_MISS) ] = 0x0,
543 },
544 },
545};
546
547static __initconst const u64 westmere_hw_cache_event_ids 418static __initconst const u64 westmere_hw_cache_event_ids
548 [PERF_COUNT_HW_CACHE_MAX] 419 [PERF_COUNT_HW_CACHE_MAX]
549 [PERF_COUNT_HW_CACHE_OP_MAX] 420 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2034,24 +1905,6 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2034 return c; 1905 return c;
2035} 1906}
2036 1907
2037/*
2038 * Broadwell:
2039 * The INST_RETIRED.ALL period always needs to have lowest
2040 * 6bits cleared (BDM57). It shall not use a period smaller
2041 * than 100 (BDM11). We combine the two to enforce
2042 * a min-period of 128.
2043 */
2044static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
2045{
2046 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
2047 X86_CONFIG(.event=0xc0, .umask=0x01)) {
2048 if (left < 128)
2049 left = 128;
2050 left &= ~0x3fu;
2051 }
2052 return left;
2053}
2054
2055PMU_FORMAT_ATTR(event, "config:0-7" ); 1908PMU_FORMAT_ATTR(event, "config:0-7" );
2056PMU_FORMAT_ATTR(umask, "config:8-15" ); 1909PMU_FORMAT_ATTR(umask, "config:8-15" );
2057PMU_FORMAT_ATTR(edge, "config:18" ); 1910PMU_FORMAT_ATTR(edge, "config:18" );
@@ -2692,8 +2545,8 @@ __init int intel_pmu_init(void)
2692 case 69: /* 22nm Haswell ULT */ 2545 case 69: /* 22nm Haswell ULT */
2693 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ 2546 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
2694 x86_pmu.late_ack = true; 2547 x86_pmu.late_ack = true;
2695 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 2548 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2696 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 2549 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2697 2550
2698 intel_pmu_lbr_init_snb(); 2551 intel_pmu_lbr_init_snb();
2699 2552
@@ -2712,28 +2565,6 @@ __init int intel_pmu_init(void)
2712 pr_cont("Haswell events, "); 2565 pr_cont("Haswell events, ");
2713 break; 2566 break;
2714 2567
2715 case 61: /* 14nm Broadwell Core-M */
2716 x86_pmu.late_ack = true;
2717 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2718 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2719
2720 intel_pmu_lbr_init_snb();
2721
2722 x86_pmu.event_constraints = intel_bdw_event_constraints;
2723 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2724 x86_pmu.extra_regs = intel_snbep_extra_regs;
2725 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2726 /* all extra regs are per-cpu when HT is on */
2727 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2728 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2729
2730 x86_pmu.hw_config = hsw_hw_config;
2731 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2732 x86_pmu.cpu_events = hsw_events_attrs;
2733 x86_pmu.limit_period = bdw_limit_period;
2734 pr_cont("Broadwell events, ");
2735 break;
2736
2737 default: 2568 default:
2738 switch (x86_pmu.version) { 2569 switch (x86_pmu.version) {
2739 case 1: 2570 case 1: