aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2015-02-17 21:18:06 -0500
committerIngo Molnar <mingo@kernel.org>2015-03-27 04:14:03 -0400
commit294fe0f52a44c6f207211de0686c369a961b5533 (patch)
tree0802f465bd807ee3b0e9e6ecc4522033beb92522 /arch
parent91f1b70582c62576f429cf78d53751c66677553d (diff)
perf/x86/intel: Add INST_RETIRED.ALL workarounds
On Broadwell INST_RETIRED.ALL cannot be used with any period that doesn't have the lowest 6 bits cleared. And the period should not be smaller than 128. This is erratum BDM11 and BDM55: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/5th-gen-core-family-spec-update.pdf BDM11: When using a period < 100; we may get incorrect PEBS/PMI interrupts and/or an invalid counter state. BDM55: When bit0-5 of the period are !0 we may get redundant PEBS records on overflow. Add a new callback to enforce this, and set it for Broadwell. How does this handle the case when an app requests a specific period with some of the bottom bits set? Short answer: Any useful instruction sampling period needs to be 4-6 orders of magnitude larger than 128, as an PMI every 128 instructions would instantly overwhelm the system and be throttled. So the +-64 error from this is really small compared to the period, much smaller than normal system jitter. Long answer (by Peterz): IFF we guarantee perf_event_attr::sample_period >= 128. Suppose we start out with sample_period=192; then we'll set period_left to 192, we'll end up with left = 128 (we truncate the lower bits). We get an interrupt, find that period_left = 64 (>0 so we return 0 and don't get an overflow handler), up that to 128. Then we trigger again, at n=256. Then we find period_left = -64 (<=0 so we return 1 and do get an overflow). We increment with sample_period so we get left = 128. We fire again, at n=384, period_left = 0 (<=0 so we return 1 and get an overflow). And on and on. So while the individual interrupts are 'wrong' we get then with interval=256,128 in exactly the right ratio to average out at 192. And this works for everything >=128. So the num_samples*fixed_period thing is still entirely correct +- 127, which is good enough I'd say, as you already have that error anyhow. So no need to 'fix' the tools, al we need to do is refuse to create INST_RETIRED:ALL events with sample_period < 128. Signed-off-by: Andi Kleen <ak@linux.intel.com> [ Updated comments and changelog a bit. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: http://lkml.kernel.org/r/1424225886-18652-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c27
3 files changed, 37 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e0dab5ce61e9..ec6e982fd464 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -451,6 +451,12 @@ int x86_pmu_hw_config(struct perf_event *event)
451 if (event->attr.type == PERF_TYPE_RAW) 451 if (event->attr.type == PERF_TYPE_RAW)
452 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; 452 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
453 453
454 if (event->attr.sample_period && x86_pmu.limit_period) {
455 if (x86_pmu.limit_period(event, event->attr.sample_period) >
456 event->attr.sample_period)
457 return -EINVAL;
458 }
459
454 return x86_setup_perfctr(event); 460 return x86_setup_perfctr(event);
455} 461}
456 462
@@ -988,6 +994,9 @@ int x86_perf_event_set_period(struct perf_event *event)
988 if (left > x86_pmu.max_period) 994 if (left > x86_pmu.max_period)
989 left = x86_pmu.max_period; 995 left = x86_pmu.max_period;
990 996
997 if (x86_pmu.limit_period)
998 left = x86_pmu.limit_period(event, left);
999
991 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; 1000 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
992 1001
993 /* 1002 /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a371d27d6795..87e5081f4cdc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -451,6 +451,7 @@ struct x86_pmu {
451 struct x86_pmu_quirk *quirks; 451 struct x86_pmu_quirk *quirks;
452 int perfctr_second_write; 452 int perfctr_second_write;
453 bool late_ack; 453 bool late_ack;
454 unsigned (*limit_period)(struct perf_event *event, unsigned l);
454 455
455 /* 456 /*
456 * sysfs attrs 457 * sysfs attrs
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 28838536a9f7..fc6dbc46af4a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2096,6 +2096,32 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2096 return c; 2096 return c;
2097} 2097}
2098 2098
2099/*
2100 * Broadwell:
2101 *
2102 * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
2103 * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
2104 * the two to enforce a minimum period of 128 (the smallest value that has bits
2105 * 0-5 cleared and >= 100).
2106 *
2107 * Because of how the code in x86_perf_event_set_period() works, the truncation
2108 * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
2109 * to make up for the 'lost' events due to carrying the 'error' in period_left.
2110 *
2111 * Therefore the effective (average) period matches the requested period,
2112 * despite coarser hardware granularity.
2113 */
2114static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
2115{
2116 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
2117 X86_CONFIG(.event=0xc0, .umask=0x01)) {
2118 if (left < 128)
2119 left = 128;
2120 left &= ~0x3fu;
2121 }
2122 return left;
2123}
2124
2099PMU_FORMAT_ATTR(event, "config:0-7" ); 2125PMU_FORMAT_ATTR(event, "config:0-7" );
2100PMU_FORMAT_ATTR(umask, "config:8-15" ); 2126PMU_FORMAT_ATTR(umask, "config:8-15" );
2101PMU_FORMAT_ATTR(edge, "config:18" ); 2127PMU_FORMAT_ATTR(edge, "config:18" );
@@ -2774,6 +2800,7 @@ __init int intel_pmu_init(void)
2774 x86_pmu.hw_config = hsw_hw_config; 2800 x86_pmu.hw_config = hsw_hw_config;
2775 x86_pmu.get_event_constraints = hsw_get_event_constraints; 2801 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2776 x86_pmu.cpu_events = hsw_events_attrs; 2802 x86_pmu.cpu_events = hsw_events_attrs;
2803 x86_pmu.limit_period = bdw_limit_period;
2777 pr_cont("Broadwell events, "); 2804 pr_cont("Broadwell events, ");
2778 break; 2805 break;
2779 2806