perf/x86: Improve HT workaround GP counter constraint

The (SNB/IVB/HSW) HT bug only affects events that can be programmed onto GP counters, therefore we should only limit the number of GP counters that can be used per cpu -- iow we should not constrain the FP counters. Furthermore, we should only enfore such a limit when there are in fact exclusive events being scheduled on either sibling. Reported-by: Vince Weaver <vincent.weaver@maine.edu> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> [ Fixed build fail for the !CONFIG_CPU_SUP_INTEL case. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2015-05-21 04:57:17 -0400
committer: Ingo Molnar <mingo@kernel.org> 2015-05-27 03:16:03 -0400
commit: cc1790cf541553263bda024295d7600c7cd7c45d (patch)
tree: 9ddcfe2e7d2805060986c30aef6d0db8361603d9
parent: b371b594317869971af326adcf7cd65cabdb4087 (diff)
4 files changed, 53 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1664eeea65e0..2eca19422454 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -611,6 +611,7 @@ struct sched_state {
        int     event;          /* event index */
        int     counter;        /* counter index */
        int     unassigned;     /* number of events to be assigned left */
+        int     nr_gp;          /* number of GP counters used */
        unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 };
@@ -620,9 +621,10 @@ struct sched_state {
 struct perf_sched {
        int                     max_weight;
        int                     max_events;
+        int                     max_gp;
+        int                     saved_states;
        struct event_constraint **constraints;
        struct sched_state      state;
-        int                     saved_states;
        struct sched_state      saved[SCHED_STATES_MAX];
 };
@@ -630,13 +632,14 @@ struct perf_sched {
 * Initialize interator that runs through all events and counters.
 */
 static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
-                            int num, int wmin, int wmax)
+                            int num, int wmin, int wmax, int gpmax)
 {
        int idx;
        memset(sched, 0, sizeof(*sched));
        sched->max_events       = num;
        sched->max_weight       = wmax;
+        sched->max_gp           = gpmax;
        sched->constraints      = constraints;
        for (idx = 0; idx < num; idx++) {
@@ -696,11 +699,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
                                goto done;
                }
        }
        /* Grab the first unused counter starting with idx */
        idx = sched->state.counter;
        for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-                if (!__test_and_set_bit(idx, sched->state.used))
+                if (!__test_and_set_bit(idx, sched->state.used)) {
+                        if (sched->state.nr_gp++ >= sched->max_gp)
+                                return false;
                        goto done;
+                }
        }
        return false;
@@ -757,11 +765,11 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 * Assign a counter for each event.
 */
 int perf_assign_events(struct event_constraint **constraints, int n,
-                        int wmin, int wmax, int *assign)
+                        int wmin, int wmax, int gpmax, int *assign)
 {
        struct perf_sched sched;
-        perf_sched_init(&sched, constraints, n, wmin, wmax);
+        perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
        do {
                if (!perf_sched_find_counter(&sched))
@@ -822,8 +830,24 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
        /* slow path */
        if (i != n) {
+                int gpmax = x86_pmu.num_counters;
+                /*
+                 * Do not allow scheduling of more than half the available
+                 * generic counters.
+                 *
+                 * This helps avoid counter starvation of sibling thread by
+                 * ensuring at most half the counters cannot be in exclusive
+                 * mode. There is no designated counters for the limits. Any
+                 * N/2 counters can be used. This helps with events with
+                 * specific counter constraints.
+                 */
+                if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+                    READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+                        gpmax /= 2;
                unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
-                                             wmax, assign);
+                                             wmax, gpmax, assign);
        }
        /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index fdfaab7c5e55..ef78516850fb 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -74,6 +74,7 @@ struct event_constraint {
 #define PERF_X86_EVENT_EXCL             0x0040 /* HT exclusivity on counter */
 #define PERF_X86_EVENT_DYNAMIC          0x0080 /* dynamic alloc'd constraint */
 #define PERF_X86_EVENT_RDPMC_ALLOWED    0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT        0x0200 /* accounted EXCL event */
 struct amd_nb {
@@ -134,8 +135,6 @@ enum intel_excl_state_type {
 struct intel_excl_states {
        enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
        enum intel_excl_state_type state[X86_PMC_IDX_MAX];
-        int  num_alloc_cntrs;/* #counters allocated */
-        int  max_alloc_cntrs;/* max #counters allowed */
        bool sched_started; /* true if scheduling has started */
 };
@@ -144,6 +143,11 @@ struct intel_excl_cntrs {
        struct intel_excl_states states[2];
+        union {
+                u16     has_exclusive[2];
+                u32     exclusive_present;
+        };
        int             refcnt;         /* per-core: #HT threads */
        unsigned        core_id;        /* per-core: core id */
 };
@@ -176,6 +180,7 @@ struct cpu_hw_events {
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
        struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+        int                     n_excl; /* the number of exclusive events */
        unsigned int            group_flag;
        int                     is_fake;
@@ -719,7 +724,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 void x86_pmu_enable_all(int added);
 int perf_assign_events(struct event_constraint **constraints, int n,
-                        int wmin, int wmax, int *assign);
+                        int wmin, int wmax, int gpmax, int *assign);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
 void x86_pmu_stop(struct perf_event *event, int flags);
@@ -930,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
        return NULL;
 }
+static inline int is_ht_workaround_enabled(void)
+{
+        return 0;
+}
 #endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7a58fb5df15c..a1e35c9f06b9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
        xl = &excl_cntrs->states[tid];
        xl->sched_started = true;
-        xl->num_alloc_cntrs = 0;
        /*
         * lock shared state until we are done scheduling
         * in stop_event_scheduling()
@@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
         * across HT threads
         */
        is_excl = c->flags & PERF_X86_EVENT_EXCL;
+        if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+                event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+                if (!cpuc->n_excl++)
+                        WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+        }
        /*
         * xl = state of current HT
@@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
        xl = &excl_cntrs->states[tid];
        xlo = &excl_cntrs->states[o_tid];
-        /*
-         * do not allow scheduling of more than max_alloc_cntrs
-         * which is set to half the available generic counters.
-         * this helps avoid counter starvation of sibling thread
-         * by ensuring at most half the counters cannot be in
-         * exclusive mode. There is not designated counters for the
-         * limits. Any N/2 counters can be used. This helps with
-         * events with specifix counter constraints
-         */
-        if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
-                return &emptyconstraint;
        cx = c;
        /*
@@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
        xl = &excl_cntrs->states[tid];
        xlo = &excl_cntrs->states[o_tid];
+        if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+                hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+                if (!--cpuc->n_excl)
+                        WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+        }
        /*
         * put_constraint may be called from x86_schedule_events()
@@ -2632,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu)
                cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
        if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-                int h = x86_pmu.num_counters >> 1;
                for_each_cpu(i, topology_thread_cpumask(cpu)) {
                        struct intel_excl_cntrs *c;
@@ -2647,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu)
                }
                cpuc->excl_cntrs->core_id = core_id;
                cpuc->excl_cntrs->refcnt++;
-                /*
-                 * set hard limit to half the number of generic counters
-                 */
-                cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
-                cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
        }
 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index ec2ba578d286..dd319e59246b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -395,7 +395,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
        /* slow path */
        if (i != n)
                ret = perf_assign_events(box->event_constraint, n,
-                                         wmin, wmax, assign);
+                                         wmin, wmax, n, assign);
        if (!assign || ret) {
                for (i = 0; i < n; i++)
author	Peter Zijlstra <peterz@infradead.org>	2015-05-21 04:57:17 -0400
committer	Ingo Molnar <mingo@kernel.org>	2015-05-27 03:16:03 -0400
commit	cc1790cf541553263bda024295d7600c7cd7c45d (patch)
tree	9ddcfe2e7d2805060986c30aef6d0db8361603d9
parent	b371b594317869971af326adcf7cd65cabdb4087 (diff)