aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-07-06 12:02:43 -0400
committerIngo Molnar <mingo@kernel.org>2016-08-10 07:13:24 -0400
commit09e61b4f78498bd9f213b0a536e80b79507ea89f (patch)
tree8d4732fb8c286e125e578043c212ab602ad2b3fe
parent3f005e7de3db8d0b3f7a1f399aa061dc35b65864 (diff)
perf/x86/intel: Rework the large PEBS setup code
In order to allow optimizing perf_pmu_sched_task() we must ensure perf_sched_cb_{inc,dec}() are no longer called from NMI context; this means that pmu::{start,stop}() can no longer use them. Prepare for this by reworking the whole large PEBS setup code. The current code relied on the cpuc->pebs_enabled state, however since that reflects the current active state as per pmu::{start,stop}() we can no longer rely on this. Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which count the total number of PEBS events and the number of PEBS events that have FREERUNNING set, resp.. With this we can tell if the current setup requires a single record interrupt threshold or can use a larger buffer. This also improves the code in that it re-enables the large threshold once the PEBS event that required single record gets removed. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/intel/ds.c102
-rw-r--r--arch/x86/events/perf_event.h2
-rw-r--r--kernel/events/core.c4
3 files changed, 73 insertions, 35 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7ce9f3f669e6..c791ff961079 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -806,9 +806,55 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
806 return &emptyconstraint; 806 return &emptyconstraint;
807} 807}
808 808
809static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) 809/*
810 * We need the sched_task callback even for per-cpu events when we use
811 * the large interrupt threshold, such that we can provide PID and TID
812 * to PEBS samples.
813 */
814static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
815{
816 return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
817}
818
819static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
820{
821 struct debug_store *ds = cpuc->ds;
822 u64 threshold;
823
824 if (cpuc->n_pebs == cpuc->n_large_pebs) {
825 threshold = ds->pebs_absolute_maximum -
826 x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
827 } else {
828 threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
829 }
830
831 ds->pebs_interrupt_threshold = threshold;
832}
833
834static void
835pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
836{
837 if (needed_cb != pebs_needs_sched_cb(cpuc)) {
838 if (!needed_cb)
839 perf_sched_cb_inc(pmu);
840 else
841 perf_sched_cb_dec(pmu);
842
843 pebs_update_threshold(cpuc);
844 }
845}
846
847static void intel_pmu_pebs_add(struct perf_event *event)
810{ 848{
811 return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); 849 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
850 struct hw_perf_event *hwc = &event->hw;
851 bool needed_cb = pebs_needs_sched_cb(cpuc);
852
853 cpuc->n_pebs++;
854 if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
855 cpuc->n_large_pebs++;
856
857 pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
812} 858}
813 859
814void intel_pmu_pebs_enable(struct perf_event *event) 860void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +862,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
816 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 862 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
817 struct hw_perf_event *hwc = &event->hw; 863 struct hw_perf_event *hwc = &event->hw;
818 struct debug_store *ds = cpuc->ds; 864 struct debug_store *ds = cpuc->ds;
819 bool first_pebs; 865
820 u64 threshold; 866 intel_pmu_pebs_add(event);
821 867
822 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 868 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
823 869
824 first_pebs = !pebs_is_enabled(cpuc);
825 cpuc->pebs_enabled |= 1ULL << hwc->idx; 870 cpuc->pebs_enabled |= 1ULL << hwc->idx;
826 871
827 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 872 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +875,34 @@ void intel_pmu_pebs_enable(struct perf_event *event)
830 cpuc->pebs_enabled |= 1ULL << 63; 875 cpuc->pebs_enabled |= 1ULL << 63;
831 876
832 /* 877 /*
833 * When the event is constrained enough we can use a larger 878 * Use auto-reload if possible to save a MSR write in the PMI.
834 * threshold and run the event with less frequent PMI. 879 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
835 */ 880 */
836 if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
837 threshold = ds->pebs_absolute_maximum -
838 x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
839
840 if (first_pebs)
841 perf_sched_cb_inc(event->ctx->pmu);
842 } else {
843 threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
844
845 /*
846 * If not all events can use larger buffer,
847 * roll back to threshold = 1
848 */
849 if (!first_pebs &&
850 (ds->pebs_interrupt_threshold > threshold))
851 perf_sched_cb_dec(event->ctx->pmu);
852 }
853
854 /* Use auto-reload if possible to save a MSR write in the PMI */
855 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 881 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
856 ds->pebs_event_reset[hwc->idx] = 882 ds->pebs_event_reset[hwc->idx] =
857 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; 883 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
858 } 884 }
885}
886
887static void intel_pmu_pebs_del(struct perf_event *event)
888{
889 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
890 struct hw_perf_event *hwc = &event->hw;
891 bool needed_cb = pebs_needs_sched_cb(cpuc);
892
893 cpuc->n_pebs--;
894 if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
895 cpuc->n_large_pebs--;
859 896
860 if (first_pebs || ds->pebs_interrupt_threshold > threshold) 897 pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
861 ds->pebs_interrupt_threshold = threshold;
862} 898}
863 899
864void intel_pmu_pebs_disable(struct perf_event *event) 900void intel_pmu_pebs_disable(struct perf_event *event)
865{ 901{
866 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 902 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
867 struct hw_perf_event *hwc = &event->hw; 903 struct hw_perf_event *hwc = &event->hw;
868 struct debug_store *ds = cpuc->ds;
869 bool large_pebs = ds->pebs_interrupt_threshold >
870 ds->pebs_buffer_base + x86_pmu.pebs_record_size;
871 904
872 if (large_pebs) 905 if (cpuc->n_pebs == cpuc->n_large_pebs)
873 intel_pmu_drain_pebs_buffer(); 906 intel_pmu_drain_pebs_buffer();
874 907
875 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 908 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -879,13 +912,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
879 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 912 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
880 cpuc->pebs_enabled &= ~(1ULL << 63); 913 cpuc->pebs_enabled &= ~(1ULL << 63);
881 914
882 if (large_pebs && !pebs_is_enabled(cpuc))
883 perf_sched_cb_dec(event->ctx->pmu);
884
885 if (cpuc->enabled) 915 if (cpuc->enabled)
886 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 916 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
887 917
888 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 918 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
919
920 intel_pmu_pebs_del(event);
889} 921}
890 922
891void intel_pmu_pebs_enable_all(void) 923void intel_pmu_pebs_enable_all(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8c4a47706296..94b8f2702c51 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -194,6 +194,8 @@ struct cpu_hw_events {
194 */ 194 */
195 struct debug_store *ds; 195 struct debug_store *ds;
196 u64 pebs_enabled; 196 u64 pebs_enabled;
197 int n_pebs;
198 int n_large_pebs;
197 199
198 /* 200 /*
199 * Intel LBR bits 201 * Intel LBR bits
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 11f6bbe168ab..57aff715039f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2818,6 +2818,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
2818/* 2818/*
2819 * This function provides the context switch callback to the lower code 2819 * This function provides the context switch callback to the lower code
2820 * layer. It is invoked ONLY when the context switch callback is enabled. 2820 * layer. It is invoked ONLY when the context switch callback is enabled.
2821 *
2822 * This callback is relevant even to per-cpu events; for example multi event
2823 * PEBS requires this to provide PID/TID information. This requires we flush
2824 * all queued PEBS records before we context switch to a new task.
2821 */ 2825 */
2822static void perf_pmu_sched_task(struct task_struct *prev, 2826static void perf_pmu_sched_task(struct task_struct *prev,
2823 struct task_struct *next, 2827 struct task_struct *next,