diff options
author | Andi Kleen <ak@linux.intel.com> | 2014-09-02 14:44:14 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-09-24 08:48:19 -0400 |
commit | c46e665f037743bbce7887e7c55750713eb7fb09 (patch) | |
tree | b1a83558436d1f82e18f0bf9bd9d5c7ded33e32b | |
parent | 86a349a28b249bf4c3b396af3cc550760dedb456 (diff) |
perf/x86: Add INST_RETIRED.ALL workarounds
On Broadwell INST_RETIRED.ALL cannot be used with any period
that doesn't have the lowest 6 bits cleared. And the period
should not be smaller than 128.
Add a new callback to enforce this, and set it for Broadwell.
This is erratum BDM57 and BDM11.
How does this handle the case when an app requests a specific
period with some of the bottom bits set
The apps thinks it is sampling at X occurences per sample, when it is
in fact at X - 63 (worst case).
Short answer:
Any useful instruction sampling period needs to be 4-6 orders
of magnitude larger than 128, as an PMI every 128 instructions
would instantly overwhelm the system and be throttled.
So the +-64 error from this is really small compared to the
period, much smaller than normal system jitter.
Long answer:
<write up by Peter:>
IFF we guarantee perf_event_attr::sample_period >= 128.
Suppose we start out with sample_period=192; then we'll set period_left
to 192, we'll end up with left = 128 (we truncate the lower bits). We
get an interrupt, find that period_left = 64 (>0 so we return 0 and
don't get an overflow handler), up that to 128. Then we trigger again,
at n=256. Then we find period_left = -64 (<=0 so we return 1 and do get
an overflow). We increment with sample_period so we get left = 128. We
fire again, at n=384, period_left = 0 (<=0 so we return 1 and get an
overflow). And on and on.
So while the individual interrupts are 'wrong' we get then with
interval=256,128 in exactly the right ratio to average out at 192. And
this works for everything >=128.
So the num_samples*fixed_period thing is still entirely correct +- 127,
which is good enough I'd say, as you already have that error anyhow.
So no need to 'fix' the tools, al we need to do is refuse to create
INST_RETIRED:ALL events with sample_period < 128.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Cc: Mark Davies <junk@eslaf.co.uk>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1409683455-29168-4-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 19 |
3 files changed, 29 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0646d3b63b9d..918d75f77be7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -443,6 +443,12 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
443 | if (event->attr.type == PERF_TYPE_RAW) | 443 | if (event->attr.type == PERF_TYPE_RAW) |
444 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | 444 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; |
445 | 445 | ||
446 | if (event->attr.sample_period && x86_pmu.limit_period) { | ||
447 | if (x86_pmu.limit_period(event, event->attr.sample_period) > | ||
448 | event->attr.sample_period) | ||
449 | return -EINVAL; | ||
450 | } | ||
451 | |||
446 | return x86_setup_perfctr(event); | 452 | return x86_setup_perfctr(event); |
447 | } | 453 | } |
448 | 454 | ||
@@ -980,6 +986,9 @@ int x86_perf_event_set_period(struct perf_event *event) | |||
980 | if (left > x86_pmu.max_period) | 986 | if (left > x86_pmu.max_period) |
981 | left = x86_pmu.max_period; | 987 | left = x86_pmu.max_period; |
982 | 988 | ||
989 | if (x86_pmu.limit_period) | ||
990 | left = x86_pmu.limit_period(event, left); | ||
991 | |||
983 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; | 992 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; |
984 | 993 | ||
985 | /* | 994 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index fc5eb390b368..d98a34d435d7 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -445,6 +445,7 @@ struct x86_pmu { | |||
445 | struct x86_pmu_quirk *quirks; | 445 | struct x86_pmu_quirk *quirks; |
446 | int perfctr_second_write; | 446 | int perfctr_second_write; |
447 | bool late_ack; | 447 | bool late_ack; |
448 | unsigned (*limit_period)(struct perf_event *event, unsigned l); | ||
448 | 449 | ||
449 | /* | 450 | /* |
450 | * sysfs attrs | 451 | * sysfs attrs |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 567156608613..bbcd0d2b157a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -2034,6 +2034,24 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
2034 | return c; | 2034 | return c; |
2035 | } | 2035 | } |
2036 | 2036 | ||
2037 | /* | ||
2038 | * Broadwell: | ||
2039 | * The INST_RETIRED.ALL period always needs to have lowest | ||
2040 | * 6bits cleared (BDM57). It shall not use a period smaller | ||
2041 | * than 100 (BDM11). We combine the two to enforce | ||
2042 | * a min-period of 128. | ||
2043 | */ | ||
2044 | static unsigned bdw_limit_period(struct perf_event *event, unsigned left) | ||
2045 | { | ||
2046 | if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == | ||
2047 | X86_CONFIG(.event=0xc0, .umask=0x01)) { | ||
2048 | if (left < 128) | ||
2049 | left = 128; | ||
2050 | left &= ~0x3fu; | ||
2051 | } | ||
2052 | return left; | ||
2053 | } | ||
2054 | |||
2037 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 2055 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
2038 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | 2056 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
2039 | PMU_FORMAT_ATTR(edge, "config:18" ); | 2057 | PMU_FORMAT_ATTR(edge, "config:18" ); |
@@ -2712,6 +2730,7 @@ __init int intel_pmu_init(void) | |||
2712 | x86_pmu.hw_config = hsw_hw_config; | 2730 | x86_pmu.hw_config = hsw_hw_config; |
2713 | x86_pmu.get_event_constraints = hsw_get_event_constraints; | 2731 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
2714 | x86_pmu.cpu_events = hsw_events_attrs; | 2732 | x86_pmu.cpu_events = hsw_events_attrs; |
2733 | x86_pmu.limit_period = bdw_limit_period; | ||
2715 | pr_cont("Broadwell events, "); | 2734 | pr_cont("Broadwell events, "); |
2716 | break; | 2735 | break; |
2717 | 2736 | ||