diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 83 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 24 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 45 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_pt.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 | ||||
-rw-r--r-- | include/linux/perf_event.h | 4 | ||||
-rw-r--r-- | kernel/events/core.c | 3 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 14 |
10 files changed, 139 insertions, 84 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 87848ebe2bb7..4f7001f28936 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -190,6 +190,7 @@ static bool check_hw_exists(void) | |||
190 | u64 val, val_fail, val_new= ~0; | 190 | u64 val, val_fail, val_new= ~0; |
191 | int i, reg, reg_fail, ret = 0; | 191 | int i, reg, reg_fail, ret = 0; |
192 | int bios_fail = 0; | 192 | int bios_fail = 0; |
193 | int reg_safe = -1; | ||
193 | 194 | ||
194 | /* | 195 | /* |
195 | * Check to see if the BIOS enabled any of the counters, if so | 196 | * Check to see if the BIOS enabled any of the counters, if so |
@@ -204,6 +205,8 @@ static bool check_hw_exists(void) | |||
204 | bios_fail = 1; | 205 | bios_fail = 1; |
205 | val_fail = val; | 206 | val_fail = val; |
206 | reg_fail = reg; | 207 | reg_fail = reg; |
208 | } else { | ||
209 | reg_safe = i; | ||
207 | } | 210 | } |
208 | } | 211 | } |
209 | 212 | ||
@@ -222,11 +225,22 @@ static bool check_hw_exists(void) | |||
222 | } | 225 | } |
223 | 226 | ||
224 | /* | 227 | /* |
228 | * If all the counters are enabled, the below test will always | ||
229 | * fail. The tools will also become useless in this scenario. | ||
230 | * Just fail and disable the hardware counters. | ||
231 | */ | ||
232 | |||
233 | if (reg_safe == -1) { | ||
234 | reg = reg_safe; | ||
235 | goto msr_fail; | ||
236 | } | ||
237 | |||
238 | /* | ||
225 | * Read the current value, change it and read it back to see if it | 239 | * Read the current value, change it and read it back to see if it |
226 | * matches, this is needed to detect certain hardware emulators | 240 | * matches, this is needed to detect certain hardware emulators |
227 | * (qemu/kvm) that don't trap on the MSR access and always return 0s. | 241 | * (qemu/kvm) that don't trap on the MSR access and always return 0s. |
228 | */ | 242 | */ |
229 | reg = x86_pmu_event_addr(0); | 243 | reg = x86_pmu_event_addr(reg_safe); |
230 | if (rdmsrl_safe(reg, &val)) | 244 | if (rdmsrl_safe(reg, &val)) |
231 | goto msr_fail; | 245 | goto msr_fail; |
232 | val ^= 0xffffUL; | 246 | val ^= 0xffffUL; |
@@ -611,6 +625,7 @@ struct sched_state { | |||
611 | int event; /* event index */ | 625 | int event; /* event index */ |
612 | int counter; /* counter index */ | 626 | int counter; /* counter index */ |
613 | int unassigned; /* number of events to be assigned left */ | 627 | int unassigned; /* number of events to be assigned left */ |
628 | int nr_gp; /* number of GP counters used */ | ||
614 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 629 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
615 | }; | 630 | }; |
616 | 631 | ||
@@ -620,27 +635,29 @@ struct sched_state { | |||
620 | struct perf_sched { | 635 | struct perf_sched { |
621 | int max_weight; | 636 | int max_weight; |
622 | int max_events; | 637 | int max_events; |
623 | struct perf_event **events; | 638 | int max_gp; |
624 | struct sched_state state; | ||
625 | int saved_states; | 639 | int saved_states; |
640 | struct event_constraint **constraints; | ||
641 | struct sched_state state; | ||
626 | struct sched_state saved[SCHED_STATES_MAX]; | 642 | struct sched_state saved[SCHED_STATES_MAX]; |
627 | }; | 643 | }; |
628 | 644 | ||
629 | /* | 645 | /* |
630 | * Initialize interator that runs through all events and counters. | 646 | * Initialize interator that runs through all events and counters. |
631 | */ | 647 | */ |
632 | static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, | 648 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, |
633 | int num, int wmin, int wmax) | 649 | int num, int wmin, int wmax, int gpmax) |
634 | { | 650 | { |
635 | int idx; | 651 | int idx; |
636 | 652 | ||
637 | memset(sched, 0, sizeof(*sched)); | 653 | memset(sched, 0, sizeof(*sched)); |
638 | sched->max_events = num; | 654 | sched->max_events = num; |
639 | sched->max_weight = wmax; | 655 | sched->max_weight = wmax; |
640 | sched->events = events; | 656 | sched->max_gp = gpmax; |
657 | sched->constraints = constraints; | ||
641 | 658 | ||
642 | for (idx = 0; idx < num; idx++) { | 659 | for (idx = 0; idx < num; idx++) { |
643 | if (events[idx]->hw.constraint->weight == wmin) | 660 | if (constraints[idx]->weight == wmin) |
644 | break; | 661 | break; |
645 | } | 662 | } |
646 | 663 | ||
@@ -687,7 +704,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
687 | if (sched->state.event >= sched->max_events) | 704 | if (sched->state.event >= sched->max_events) |
688 | return false; | 705 | return false; |
689 | 706 | ||
690 | c = sched->events[sched->state.event]->hw.constraint; | 707 | c = sched->constraints[sched->state.event]; |
691 | /* Prefer fixed purpose counters */ | 708 | /* Prefer fixed purpose counters */ |
692 | if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { | 709 | if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { |
693 | idx = INTEL_PMC_IDX_FIXED; | 710 | idx = INTEL_PMC_IDX_FIXED; |
@@ -696,11 +713,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
696 | goto done; | 713 | goto done; |
697 | } | 714 | } |
698 | } | 715 | } |
716 | |||
699 | /* Grab the first unused counter starting with idx */ | 717 | /* Grab the first unused counter starting with idx */ |
700 | idx = sched->state.counter; | 718 | idx = sched->state.counter; |
701 | for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { | 719 | for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { |
702 | if (!__test_and_set_bit(idx, sched->state.used)) | 720 | if (!__test_and_set_bit(idx, sched->state.used)) { |
721 | if (sched->state.nr_gp++ >= sched->max_gp) | ||
722 | return false; | ||
723 | |||
703 | goto done; | 724 | goto done; |
725 | } | ||
704 | } | 726 | } |
705 | 727 | ||
706 | return false; | 728 | return false; |
@@ -745,7 +767,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||
745 | if (sched->state.weight > sched->max_weight) | 767 | if (sched->state.weight > sched->max_weight) |
746 | return false; | 768 | return false; |
747 | } | 769 | } |
748 | c = sched->events[sched->state.event]->hw.constraint; | 770 | c = sched->constraints[sched->state.event]; |
749 | } while (c->weight != sched->state.weight); | 771 | } while (c->weight != sched->state.weight); |
750 | 772 | ||
751 | sched->state.counter = 0; /* start with first counter */ | 773 | sched->state.counter = 0; /* start with first counter */ |
@@ -756,12 +778,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||
756 | /* | 778 | /* |
757 | * Assign a counter for each event. | 779 | * Assign a counter for each event. |
758 | */ | 780 | */ |
759 | int perf_assign_events(struct perf_event **events, int n, | 781 | int perf_assign_events(struct event_constraint **constraints, int n, |
760 | int wmin, int wmax, int *assign) | 782 | int wmin, int wmax, int gpmax, int *assign) |
761 | { | 783 | { |
762 | struct perf_sched sched; | 784 | struct perf_sched sched; |
763 | 785 | ||
764 | perf_sched_init(&sched, events, n, wmin, wmax); | 786 | perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax); |
765 | 787 | ||
766 | do { | 788 | do { |
767 | if (!perf_sched_find_counter(&sched)) | 789 | if (!perf_sched_find_counter(&sched)) |
@@ -788,9 +810,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
788 | x86_pmu.start_scheduling(cpuc); | 810 | x86_pmu.start_scheduling(cpuc); |
789 | 811 | ||
790 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { | 812 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
791 | hwc = &cpuc->event_list[i]->hw; | 813 | cpuc->event_constraint[i] = NULL; |
792 | c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); | 814 | c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); |
793 | hwc->constraint = c; | 815 | cpuc->event_constraint[i] = c; |
794 | 816 | ||
795 | wmin = min(wmin, c->weight); | 817 | wmin = min(wmin, c->weight); |
796 | wmax = max(wmax, c->weight); | 818 | wmax = max(wmax, c->weight); |
@@ -801,7 +823,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
801 | */ | 823 | */ |
802 | for (i = 0; i < n; i++) { | 824 | for (i = 0; i < n; i++) { |
803 | hwc = &cpuc->event_list[i]->hw; | 825 | hwc = &cpuc->event_list[i]->hw; |
804 | c = hwc->constraint; | 826 | c = cpuc->event_constraint[i]; |
805 | 827 | ||
806 | /* never assigned */ | 828 | /* never assigned */ |
807 | if (hwc->idx == -1) | 829 | if (hwc->idx == -1) |
@@ -821,9 +843,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
821 | } | 843 | } |
822 | 844 | ||
823 | /* slow path */ | 845 | /* slow path */ |
824 | if (i != n) | 846 | if (i != n) { |
825 | unsched = perf_assign_events(cpuc->event_list, n, wmin, | 847 | int gpmax = x86_pmu.num_counters; |
826 | wmax, assign); | 848 | |
849 | /* | ||
850 | * Do not allow scheduling of more than half the available | ||
851 | * generic counters. | ||
852 | * | ||
853 | * This helps avoid counter starvation of sibling thread by | ||
854 | * ensuring at most half the counters cannot be in exclusive | ||
855 | * mode. There is no designated counters for the limits. Any | ||
856 | * N/2 counters can be used. This helps with events with | ||
857 | * specific counter constraints. | ||
858 | */ | ||
859 | if (is_ht_workaround_enabled() && !cpuc->is_fake && | ||
860 | READ_ONCE(cpuc->excl_cntrs->exclusive_present)) | ||
861 | gpmax /= 2; | ||
862 | |||
863 | unsched = perf_assign_events(cpuc->event_constraint, n, wmin, | ||
864 | wmax, gpmax, assign); | ||
865 | } | ||
827 | 866 | ||
828 | /* | 867 | /* |
829 | * In case of success (unsched = 0), mark events as committed, | 868 | * In case of success (unsched = 0), mark events as committed, |
@@ -840,7 +879,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
840 | e = cpuc->event_list[i]; | 879 | e = cpuc->event_list[i]; |
841 | e->hw.flags |= PERF_X86_EVENT_COMMITTED; | 880 | e->hw.flags |= PERF_X86_EVENT_COMMITTED; |
842 | if (x86_pmu.commit_scheduling) | 881 | if (x86_pmu.commit_scheduling) |
843 | x86_pmu.commit_scheduling(cpuc, e, assign[i]); | 882 | x86_pmu.commit_scheduling(cpuc, i, assign[i]); |
844 | } | 883 | } |
845 | } | 884 | } |
846 | 885 | ||
@@ -1292,8 +1331,10 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1292 | x86_pmu.put_event_constraints(cpuc, event); | 1331 | x86_pmu.put_event_constraints(cpuc, event); |
1293 | 1332 | ||
1294 | /* Delete the array entry. */ | 1333 | /* Delete the array entry. */ |
1295 | while (++i < cpuc->n_events) | 1334 | while (++i < cpuc->n_events) { |
1296 | cpuc->event_list[i-1] = cpuc->event_list[i]; | 1335 | cpuc->event_list[i-1] = cpuc->event_list[i]; |
1336 | cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; | ||
1337 | } | ||
1297 | --cpuc->n_events; | 1338 | --cpuc->n_events; |
1298 | 1339 | ||
1299 | perf_event_update_userpage(event); | 1340 | perf_event_update_userpage(event); |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6ac5cb7a9e14..ef78516850fb 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -74,6 +74,7 @@ struct event_constraint { | |||
74 | #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ | 74 | #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ |
75 | #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ | 75 | #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ |
76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ | 76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ |
77 | #define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ | ||
77 | 78 | ||
78 | 79 | ||
79 | struct amd_nb { | 80 | struct amd_nb { |
@@ -134,8 +135,6 @@ enum intel_excl_state_type { | |||
134 | struct intel_excl_states { | 135 | struct intel_excl_states { |
135 | enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; | 136 | enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; |
136 | enum intel_excl_state_type state[X86_PMC_IDX_MAX]; | 137 | enum intel_excl_state_type state[X86_PMC_IDX_MAX]; |
137 | int num_alloc_cntrs;/* #counters allocated */ | ||
138 | int max_alloc_cntrs;/* max #counters allowed */ | ||
139 | bool sched_started; /* true if scheduling has started */ | 138 | bool sched_started; /* true if scheduling has started */ |
140 | }; | 139 | }; |
141 | 140 | ||
@@ -144,6 +143,11 @@ struct intel_excl_cntrs { | |||
144 | 143 | ||
145 | struct intel_excl_states states[2]; | 144 | struct intel_excl_states states[2]; |
146 | 145 | ||
146 | union { | ||
147 | u16 has_exclusive[2]; | ||
148 | u32 exclusive_present; | ||
149 | }; | ||
150 | |||
147 | int refcnt; /* per-core: #HT threads */ | 151 | int refcnt; /* per-core: #HT threads */ |
148 | unsigned core_id; /* per-core: core id */ | 152 | unsigned core_id; /* per-core: core id */ |
149 | }; | 153 | }; |
@@ -172,7 +176,11 @@ struct cpu_hw_events { | |||
172 | added in the current transaction */ | 176 | added in the current transaction */ |
173 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 177 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
174 | u64 tags[X86_PMC_IDX_MAX]; | 178 | u64 tags[X86_PMC_IDX_MAX]; |
179 | |||
175 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 180 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
181 | struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; | ||
182 | |||
183 | int n_excl; /* the number of exclusive events */ | ||
176 | 184 | ||
177 | unsigned int group_flag; | 185 | unsigned int group_flag; |
178 | int is_fake; | 186 | int is_fake; |
@@ -519,9 +527,7 @@ struct x86_pmu { | |||
519 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 527 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
520 | struct perf_event *event); | 528 | struct perf_event *event); |
521 | 529 | ||
522 | void (*commit_scheduling)(struct cpu_hw_events *cpuc, | 530 | void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); |
523 | struct perf_event *event, | ||
524 | int cntr); | ||
525 | 531 | ||
526 | void (*start_scheduling)(struct cpu_hw_events *cpuc); | 532 | void (*start_scheduling)(struct cpu_hw_events *cpuc); |
527 | 533 | ||
@@ -717,8 +723,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | |||
717 | 723 | ||
718 | void x86_pmu_enable_all(int added); | 724 | void x86_pmu_enable_all(int added); |
719 | 725 | ||
720 | int perf_assign_events(struct perf_event **events, int n, | 726 | int perf_assign_events(struct event_constraint **constraints, int n, |
721 | int wmin, int wmax, int *assign); | 727 | int wmin, int wmax, int gpmax, int *assign); |
722 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | 728 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); |
723 | 729 | ||
724 | void x86_pmu_stop(struct perf_event *event, int flags); | 730 | void x86_pmu_stop(struct perf_event *event, int flags); |
@@ -929,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu) | |||
929 | return NULL; | 935 | return NULL; |
930 | } | 936 | } |
931 | 937 | ||
938 | static inline int is_ht_workaround_enabled(void) | ||
939 | { | ||
940 | return 0; | ||
941 | } | ||
932 | #endif /* CONFIG_CPU_SUP_INTEL */ | 942 | #endif /* CONFIG_CPU_SUP_INTEL */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3998131d1a68..a1e35c9f06b9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) | |||
1923 | xl = &excl_cntrs->states[tid]; | 1923 | xl = &excl_cntrs->states[tid]; |
1924 | 1924 | ||
1925 | xl->sched_started = true; | 1925 | xl->sched_started = true; |
1926 | xl->num_alloc_cntrs = 0; | ||
1927 | /* | 1926 | /* |
1928 | * lock shared state until we are done scheduling | 1927 | * lock shared state until we are done scheduling |
1929 | * in stop_event_scheduling() | 1928 | * in stop_event_scheduling() |
@@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, | |||
2000 | * across HT threads | 1999 | * across HT threads |
2001 | */ | 2000 | */ |
2002 | is_excl = c->flags & PERF_X86_EVENT_EXCL; | 2001 | is_excl = c->flags & PERF_X86_EVENT_EXCL; |
2002 | if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { | ||
2003 | event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; | ||
2004 | if (!cpuc->n_excl++) | ||
2005 | WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); | ||
2006 | } | ||
2003 | 2007 | ||
2004 | /* | 2008 | /* |
2005 | * xl = state of current HT | 2009 | * xl = state of current HT |
@@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, | |||
2008 | xl = &excl_cntrs->states[tid]; | 2012 | xl = &excl_cntrs->states[tid]; |
2009 | xlo = &excl_cntrs->states[o_tid]; | 2013 | xlo = &excl_cntrs->states[o_tid]; |
2010 | 2014 | ||
2011 | /* | ||
2012 | * do not allow scheduling of more than max_alloc_cntrs | ||
2013 | * which is set to half the available generic counters. | ||
2014 | * this helps avoid counter starvation of sibling thread | ||
2015 | * by ensuring at most half the counters cannot be in | ||
2016 | * exclusive mode. There is not designated counters for the | ||
2017 | * limits. Any N/2 counters can be used. This helps with | ||
2018 | * events with specifix counter constraints | ||
2019 | */ | ||
2020 | if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs) | ||
2021 | return &emptyconstraint; | ||
2022 | |||
2023 | cx = c; | 2015 | cx = c; |
2024 | 2016 | ||
2025 | /* | 2017 | /* |
@@ -2106,7 +2098,7 @@ static struct event_constraint * | |||
2106 | intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | 2098 | intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
2107 | struct perf_event *event) | 2099 | struct perf_event *event) |
2108 | { | 2100 | { |
2109 | struct event_constraint *c1 = event->hw.constraint; | 2101 | struct event_constraint *c1 = cpuc->event_constraint[idx]; |
2110 | struct event_constraint *c2; | 2102 | struct event_constraint *c2; |
2111 | 2103 | ||
2112 | /* | 2104 | /* |
@@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, | |||
2150 | 2142 | ||
2151 | xl = &excl_cntrs->states[tid]; | 2143 | xl = &excl_cntrs->states[tid]; |
2152 | xlo = &excl_cntrs->states[o_tid]; | 2144 | xlo = &excl_cntrs->states[o_tid]; |
2145 | if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { | ||
2146 | hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; | ||
2147 | if (!--cpuc->n_excl) | ||
2148 | WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); | ||
2149 | } | ||
2153 | 2150 | ||
2154 | /* | 2151 | /* |
2155 | * put_constraint may be called from x86_schedule_events() | 2152 | * put_constraint may be called from x86_schedule_events() |
@@ -2188,8 +2185,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
2188 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 2185 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
2189 | struct perf_event *event) | 2186 | struct perf_event *event) |
2190 | { | 2187 | { |
2191 | struct event_constraint *c = event->hw.constraint; | ||
2192 | |||
2193 | intel_put_shared_regs_event_constraints(cpuc, event); | 2188 | intel_put_shared_regs_event_constraints(cpuc, event); |
2194 | 2189 | ||
2195 | /* | 2190 | /* |
@@ -2197,19 +2192,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | |||
2197 | * all events are subject to and must call the | 2192 | * all events are subject to and must call the |
2198 | * put_excl_constraints() routine | 2193 | * put_excl_constraints() routine |
2199 | */ | 2194 | */ |
2200 | if (c && cpuc->excl_cntrs) | 2195 | if (cpuc->excl_cntrs) |
2201 | intel_put_excl_constraints(cpuc, event); | 2196 | intel_put_excl_constraints(cpuc, event); |
2202 | |||
2203 | /* cleanup dynamic constraint */ | ||
2204 | if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) | ||
2205 | event->hw.constraint = NULL; | ||
2206 | } | 2197 | } |
2207 | 2198 | ||
2208 | static void intel_commit_scheduling(struct cpu_hw_events *cpuc, | 2199 | static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) |
2209 | struct perf_event *event, int cntr) | ||
2210 | { | 2200 | { |
2211 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; | 2201 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
2212 | struct event_constraint *c = event->hw.constraint; | 2202 | struct event_constraint *c = cpuc->event_constraint[idx]; |
2213 | struct intel_excl_states *xlo, *xl; | 2203 | struct intel_excl_states *xlo, *xl; |
2214 | int tid = cpuc->excl_thread_id; | 2204 | int tid = cpuc->excl_thread_id; |
2215 | int o_tid = 1 - tid; | 2205 | int o_tid = 1 - tid; |
@@ -2639,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu) | |||
2639 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; | 2629 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
2640 | 2630 | ||
2641 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { | 2631 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { |
2642 | int h = x86_pmu.num_counters >> 1; | ||
2643 | |||
2644 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 2632 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
2645 | struct intel_excl_cntrs *c; | 2633 | struct intel_excl_cntrs *c; |
2646 | 2634 | ||
@@ -2654,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu) | |||
2654 | } | 2642 | } |
2655 | cpuc->excl_cntrs->core_id = core_id; | 2643 | cpuc->excl_cntrs->core_id = core_id; |
2656 | cpuc->excl_cntrs->refcnt++; | 2644 | cpuc->excl_cntrs->refcnt++; |
2657 | /* | ||
2658 | * set hard limit to half the number of generic counters | ||
2659 | */ | ||
2660 | cpuc->excl_cntrs->states[0].max_alloc_cntrs = h; | ||
2661 | cpuc->excl_cntrs->states[1].max_alloc_cntrs = h; | ||
2662 | } | 2645 | } |
2663 | } | 2646 | } |
2664 | 2647 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 813f75d71175..7f73b3553e2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
706 | 706 | ||
707 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | 707 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); |
708 | 708 | ||
709 | if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) | 709 | if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) |
710 | cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); | 710 | cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); |
711 | else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) | 711 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) |
712 | cpuc->pebs_enabled &= ~(1ULL << 63); | 712 | cpuc->pebs_enabled &= ~(1ULL << 63); |
713 | 713 | ||
714 | if (cpuc->enabled) | 714 | if (cpuc->enabled) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index ffe666c2c6b5..123ff1bb2f60 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c | |||
@@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void) | |||
151 | 151 | ||
152 | de_attr->attr.attr.name = pt_caps[i].name; | 152 | de_attr->attr.attr.name = pt_caps[i].name; |
153 | 153 | ||
154 | sysfs_attr_init(&de_attrs->attr.attr); | 154 | sysfs_attr_init(&de_attr->attr.attr); |
155 | 155 | ||
156 | de_attr->attr.attr.mode = S_IRUGO; | 156 | de_attr->attr.attr.mode = S_IRUGO; |
157 | de_attr->attr.show = pt_cap_show; | 157 | de_attr->attr.show = pt_cap_show; |
@@ -615,7 +615,8 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, | |||
615 | struct perf_output_handle *handle) | 615 | struct perf_output_handle *handle) |
616 | 616 | ||
617 | { | 617 | { |
618 | unsigned long idx, npages, end; | 618 | unsigned long head = local64_read(&buf->head); |
619 | unsigned long idx, npages, wakeup; | ||
619 | 620 | ||
620 | if (buf->snapshot) | 621 | if (buf->snapshot) |
621 | return 0; | 622 | return 0; |
@@ -634,17 +635,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, | |||
634 | buf->topa_index[buf->stop_pos]->stop = 0; | 635 | buf->topa_index[buf->stop_pos]->stop = 0; |
635 | buf->topa_index[buf->intr_pos]->intr = 0; | 636 | buf->topa_index[buf->intr_pos]->intr = 0; |
636 | 637 | ||
637 | if (pt_cap_get(PT_CAP_topa_multiple_entries)) { | 638 | /* how many pages till the STOP marker */ |
638 | npages = (handle->size + 1) >> PAGE_SHIFT; | 639 | npages = handle->size >> PAGE_SHIFT; |
639 | end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages; | 640 | |
640 | /*if (end > handle->wakeup >> PAGE_SHIFT) | 641 | /* if it's on a page boundary, fill up one more page */ |
641 | end = handle->wakeup >> PAGE_SHIFT;*/ | 642 | if (!offset_in_page(head + handle->size + 1)) |
642 | idx = end & (buf->nr_pages - 1); | 643 | npages++; |
643 | buf->stop_pos = idx; | 644 | |
644 | idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1; | 645 | idx = (head >> PAGE_SHIFT) + npages; |
645 | idx &= buf->nr_pages - 1; | 646 | idx &= buf->nr_pages - 1; |
646 | buf->intr_pos = idx; | 647 | buf->stop_pos = idx; |
647 | } | 648 | |
649 | wakeup = handle->wakeup >> PAGE_SHIFT; | ||
650 | |||
651 | /* in the worst case, wake up the consumer one page before hard stop */ | ||
652 | idx = (head >> PAGE_SHIFT) + npages - 1; | ||
653 | if (idx > wakeup) | ||
654 | idx = wakeup; | ||
655 | |||
656 | idx &= buf->nr_pages - 1; | ||
657 | buf->intr_pos = idx; | ||
648 | 658 | ||
649 | buf->topa_index[buf->stop_pos]->stop = 1; | 659 | buf->topa_index[buf->stop_pos]->stop = 1; |
650 | buf->topa_index[buf->intr_pos]->intr = 1; | 660 | buf->topa_index[buf->intr_pos]->intr = 1; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c635b8b49e93..dd319e59246b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||
365 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | 365 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); |
366 | 366 | ||
367 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | 367 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
368 | hwc = &box->event_list[i]->hw; | ||
369 | c = uncore_get_event_constraint(box, box->event_list[i]); | 368 | c = uncore_get_event_constraint(box, box->event_list[i]); |
370 | hwc->constraint = c; | 369 | box->event_constraint[i] = c; |
371 | wmin = min(wmin, c->weight); | 370 | wmin = min(wmin, c->weight); |
372 | wmax = max(wmax, c->weight); | 371 | wmax = max(wmax, c->weight); |
373 | } | 372 | } |
@@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||
375 | /* fastpath, try to reuse previous register */ | 374 | /* fastpath, try to reuse previous register */ |
376 | for (i = 0; i < n; i++) { | 375 | for (i = 0; i < n; i++) { |
377 | hwc = &box->event_list[i]->hw; | 376 | hwc = &box->event_list[i]->hw; |
378 | c = hwc->constraint; | 377 | c = box->event_constraint[i]; |
379 | 378 | ||
380 | /* never assigned */ | 379 | /* never assigned */ |
381 | if (hwc->idx == -1) | 380 | if (hwc->idx == -1) |
@@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||
395 | } | 394 | } |
396 | /* slow path */ | 395 | /* slow path */ |
397 | if (i != n) | 396 | if (i != n) |
398 | ret = perf_assign_events(box->event_list, n, | 397 | ret = perf_assign_events(box->event_constraint, n, |
399 | wmin, wmax, assign); | 398 | wmin, wmax, n, assign); |
400 | 399 | ||
401 | if (!assign || ret) { | 400 | if (!assign || ret) { |
402 | for (i = 0; i < n; i++) | 401 | for (i = 0; i < n; i++) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 6c8c1e7e69d8..f789ec9a0133 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -97,6 +97,7 @@ struct intel_uncore_box { | |||
97 | atomic_t refcnt; | 97 | atomic_t refcnt; |
98 | struct perf_event *events[UNCORE_PMC_IDX_MAX]; | 98 | struct perf_event *events[UNCORE_PMC_IDX_MAX]; |
99 | struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; | 99 | struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; |
100 | struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX]; | ||
100 | unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | 101 | unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; |
101 | u64 tags[UNCORE_PMC_IDX_MAX]; | 102 | u64 tags[UNCORE_PMC_IDX_MAX]; |
102 | struct pci_dev *pci_dev; | 103 | struct pci_dev *pci_dev; |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..d8a82a89f35a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -92,8 +92,6 @@ struct hw_perf_event_extra { | |||
92 | int idx; /* index in shared_regs->regs[] */ | 92 | int idx; /* index in shared_regs->regs[] */ |
93 | }; | 93 | }; |
94 | 94 | ||
95 | struct event_constraint; | ||
96 | |||
97 | /** | 95 | /** |
98 | * struct hw_perf_event - performance event hardware details: | 96 | * struct hw_perf_event - performance event hardware details: |
99 | */ | 97 | */ |
@@ -112,8 +110,6 @@ struct hw_perf_event { | |||
112 | 110 | ||
113 | struct hw_perf_event_extra extra_reg; | 111 | struct hw_perf_event_extra extra_reg; |
114 | struct hw_perf_event_extra branch_reg; | 112 | struct hw_perf_event_extra branch_reg; |
115 | |||
116 | struct event_constraint *constraint; | ||
117 | }; | 113 | }; |
118 | struct { /* software */ | 114 | struct { /* software */ |
119 | struct hrtimer hrtimer; | 115 | struct hrtimer hrtimer; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1a3bf48743ce..eddf1ed4155e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3442,7 +3442,6 @@ static void free_event_rcu(struct rcu_head *head) | |||
3442 | if (event->ns) | 3442 | if (event->ns) |
3443 | put_pid_ns(event->ns); | 3443 | put_pid_ns(event->ns); |
3444 | perf_event_free_filter(event); | 3444 | perf_event_free_filter(event); |
3445 | perf_event_free_bpf_prog(event); | ||
3446 | kfree(event); | 3445 | kfree(event); |
3447 | } | 3446 | } |
3448 | 3447 | ||
@@ -3573,6 +3572,8 @@ static void __free_event(struct perf_event *event) | |||
3573 | put_callchain_buffers(); | 3572 | put_callchain_buffers(); |
3574 | } | 3573 | } |
3575 | 3574 | ||
3575 | perf_event_free_bpf_prog(event); | ||
3576 | |||
3576 | if (event->destroy) | 3577 | if (event->destroy) |
3577 | event->destroy(event); | 3578 | event->destroy(event); |
3578 | 3579 | ||
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 232f00f273cb..725c416085e3 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -493,6 +493,20 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, | |||
493 | rb->aux_pages[rb->aux_nr_pages] = page_address(page++); | 493 | rb->aux_pages[rb->aux_nr_pages] = page_address(page++); |
494 | } | 494 | } |
495 | 495 | ||
496 | /* | ||
497 | * In overwrite mode, PMUs that don't support SG may not handle more | ||
498 | * than one contiguous allocation, since they rely on PMI to do double | ||
499 | * buffering. In this case, the entire buffer has to be one contiguous | ||
500 | * chunk. | ||
501 | */ | ||
502 | if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) && | ||
503 | overwrite) { | ||
504 | struct page *page = virt_to_page(rb->aux_pages[0]); | ||
505 | |||
506 | if (page_private(page) != max_order) | ||
507 | goto out; | ||
508 | } | ||
509 | |||
496 | rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, | 510 | rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, |
497 | overwrite); | 511 | overwrite); |
498 | if (!rb->aux_priv) | 512 | if (!rb->aux_priv) |