diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-07-06 12:02:43 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-08-10 07:13:24 -0400 |
commit | 09e61b4f78498bd9f213b0a536e80b79507ea89f (patch) | |
tree | 8d4732fb8c286e125e578043c212ab602ad2b3fe | |
parent | 3f005e7de3db8d0b3f7a1f399aa061dc35b65864 (diff) |
perf/x86/intel: Rework the large PEBS setup code
In order to allow optimizing perf_pmu_sched_task() we must ensure
perf_sched_cb_{inc,dec}() are no longer called from NMI context; this
means that pmu::{start,stop}() can no longer use them.
Prepare for this by reworking the whole large PEBS setup code.
The current code relied on the cpuc->pebs_enabled state, however since
that reflects the current active state as per pmu::{start,stop}() we
can no longer rely on this.
Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which
count the total number of PEBS events and the number of PEBS events
that have FREERUNNING set, resp.. With this we can tell if the current
setup requires a single record interrupt threshold or can use a larger
buffer.
This also improves the code in that it re-enables the large threshold
once the PEBS event that required single record gets removed.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/events/intel/ds.c | 102 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 4 |
3 files changed, 73 insertions, 35 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f669e6..c791ff961079 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
@@ -806,9 +806,55 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) | |||
806 | return &emptyconstraint; | 806 | return &emptyconstraint; |
807 | } | 807 | } |
808 | 808 | ||
809 | static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) | 809 | /* |
810 | * We need the sched_task callback even for per-cpu events when we use | ||
811 | * the large interrupt threshold, such that we can provide PID and TID | ||
812 | * to PEBS samples. | ||
813 | */ | ||
814 | static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) | ||
815 | { | ||
816 | return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); | ||
817 | } | ||
818 | |||
819 | static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) | ||
820 | { | ||
821 | struct debug_store *ds = cpuc->ds; | ||
822 | u64 threshold; | ||
823 | |||
824 | if (cpuc->n_pebs == cpuc->n_large_pebs) { | ||
825 | threshold = ds->pebs_absolute_maximum - | ||
826 | x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; | ||
827 | } else { | ||
828 | threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; | ||
829 | } | ||
830 | |||
831 | ds->pebs_interrupt_threshold = threshold; | ||
832 | } | ||
833 | |||
834 | static void | ||
835 | pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) | ||
836 | { | ||
837 | if (needed_cb != pebs_needs_sched_cb(cpuc)) { | ||
838 | if (!needed_cb) | ||
839 | perf_sched_cb_inc(pmu); | ||
840 | else | ||
841 | perf_sched_cb_dec(pmu); | ||
842 | |||
843 | pebs_update_threshold(cpuc); | ||
844 | } | ||
845 | } | ||
846 | |||
847 | static void intel_pmu_pebs_add(struct perf_event *event) | ||
810 | { | 848 | { |
811 | return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); | 849 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
850 | struct hw_perf_event *hwc = &event->hw; | ||
851 | bool needed_cb = pebs_needs_sched_cb(cpuc); | ||
852 | |||
853 | cpuc->n_pebs++; | ||
854 | if (hwc->flags & PERF_X86_EVENT_FREERUNNING) | ||
855 | cpuc->n_large_pebs++; | ||
856 | |||
857 | pebs_update_state(needed_cb, cpuc, event->ctx->pmu); | ||
812 | } | 858 | } |
813 | 859 | ||
814 | void intel_pmu_pebs_enable(struct perf_event *event) | 860 | void intel_pmu_pebs_enable(struct perf_event *event) |
@@ -816,12 +862,11 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
816 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 862 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
817 | struct hw_perf_event *hwc = &event->hw; | 863 | struct hw_perf_event *hwc = &event->hw; |
818 | struct debug_store *ds = cpuc->ds; | 864 | struct debug_store *ds = cpuc->ds; |
819 | bool first_pebs; | 865 | |
820 | u64 threshold; | 866 | intel_pmu_pebs_add(event); |
821 | 867 | ||
822 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 868 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
823 | 869 | ||
824 | first_pebs = !pebs_is_enabled(cpuc); | ||
825 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 870 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
826 | 871 | ||
827 | if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) | 872 | if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) |
@@ -830,46 +875,34 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
830 | cpuc->pebs_enabled |= 1ULL << 63; | 875 | cpuc->pebs_enabled |= 1ULL << 63; |
831 | 876 | ||
832 | /* | 877 | /* |
833 | * When the event is constrained enough we can use a larger | 878 | * Use auto-reload if possible to save a MSR write in the PMI. |
834 | * threshold and run the event with less frequent PMI. | 879 | * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. |
835 | */ | 880 | */ |
836 | if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { | ||
837 | threshold = ds->pebs_absolute_maximum - | ||
838 | x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; | ||
839 | |||
840 | if (first_pebs) | ||
841 | perf_sched_cb_inc(event->ctx->pmu); | ||
842 | } else { | ||
843 | threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; | ||
844 | |||
845 | /* | ||
846 | * If not all events can use larger buffer, | ||
847 | * roll back to threshold = 1 | ||
848 | */ | ||
849 | if (!first_pebs && | ||
850 | (ds->pebs_interrupt_threshold > threshold)) | ||
851 | perf_sched_cb_dec(event->ctx->pmu); | ||
852 | } | ||
853 | |||
854 | /* Use auto-reload if possible to save a MSR write in the PMI */ | ||
855 | if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { | 881 | if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { |
856 | ds->pebs_event_reset[hwc->idx] = | 882 | ds->pebs_event_reset[hwc->idx] = |
857 | (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; | 883 | (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; |
858 | } | 884 | } |
885 | } | ||
886 | |||
887 | static void intel_pmu_pebs_del(struct perf_event *event) | ||
888 | { | ||
889 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | ||
890 | struct hw_perf_event *hwc = &event->hw; | ||
891 | bool needed_cb = pebs_needs_sched_cb(cpuc); | ||
892 | |||
893 | cpuc->n_pebs--; | ||
894 | if (hwc->flags & PERF_X86_EVENT_FREERUNNING) | ||
895 | cpuc->n_large_pebs--; | ||
859 | 896 | ||
860 | if (first_pebs || ds->pebs_interrupt_threshold > threshold) | 897 | pebs_update_state(needed_cb, cpuc, event->ctx->pmu); |
861 | ds->pebs_interrupt_threshold = threshold; | ||
862 | } | 898 | } |
863 | 899 | ||
864 | void intel_pmu_pebs_disable(struct perf_event *event) | 900 | void intel_pmu_pebs_disable(struct perf_event *event) |
865 | { | 901 | { |
866 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 902 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
867 | struct hw_perf_event *hwc = &event->hw; | 903 | struct hw_perf_event *hwc = &event->hw; |
868 | struct debug_store *ds = cpuc->ds; | ||
869 | bool large_pebs = ds->pebs_interrupt_threshold > | ||
870 | ds->pebs_buffer_base + x86_pmu.pebs_record_size; | ||
871 | 904 | ||
872 | if (large_pebs) | 905 | if (cpuc->n_pebs == cpuc->n_large_pebs) |
873 | intel_pmu_drain_pebs_buffer(); | 906 | intel_pmu_drain_pebs_buffer(); |
874 | 907 | ||
875 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | 908 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); |
@@ -879,13 +912,12 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
879 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) | 912 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) |
880 | cpuc->pebs_enabled &= ~(1ULL << 63); | 913 | cpuc->pebs_enabled &= ~(1ULL << 63); |
881 | 914 | ||
882 | if (large_pebs && !pebs_is_enabled(cpuc)) | ||
883 | perf_sched_cb_dec(event->ctx->pmu); | ||
884 | |||
885 | if (cpuc->enabled) | 915 | if (cpuc->enabled) |
886 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 916 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
887 | 917 | ||
888 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 918 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
919 | |||
920 | intel_pmu_pebs_del(event); | ||
889 | } | 921 | } |
890 | 922 | ||
891 | void intel_pmu_pebs_enable_all(void) | 923 | void intel_pmu_pebs_enable_all(void) |
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8c4a47706296..94b8f2702c51 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h | |||
@@ -194,6 +194,8 @@ struct cpu_hw_events { | |||
194 | */ | 194 | */ |
195 | struct debug_store *ds; | 195 | struct debug_store *ds; |
196 | u64 pebs_enabled; | 196 | u64 pebs_enabled; |
197 | int n_pebs; | ||
198 | int n_large_pebs; | ||
197 | 199 | ||
198 | /* | 200 | /* |
199 | * Intel LBR bits | 201 | * Intel LBR bits |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 11f6bbe168ab..57aff715039f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -2818,6 +2818,10 @@ void perf_sched_cb_inc(struct pmu *pmu) | |||
2818 | /* | 2818 | /* |
2819 | * This function provides the context switch callback to the lower code | 2819 | * This function provides the context switch callback to the lower code |
2820 | * layer. It is invoked ONLY when the context switch callback is enabled. | 2820 | * layer. It is invoked ONLY when the context switch callback is enabled. |
2821 | * | ||
2822 | * This callback is relevant even to per-cpu events; for example multi event | ||
2823 | * PEBS requires this to provide PID/TID information. This requires we flush | ||
2824 | * all queued PEBS records before we context switch to a new task. | ||
2821 | */ | 2825 | */ |
2822 | static void perf_pmu_sched_task(struct task_struct *prev, | 2826 | static void perf_pmu_sched_task(struct task_struct *prev, |
2823 | struct task_struct *next, | 2827 | struct task_struct *next, |