diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 31 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 307 |
3 files changed, 331 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 71755401476c..b8b7a1277d8d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -779,7 +779,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
779 | struct event_constraint *c; | 779 | struct event_constraint *c; |
780 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 780 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
781 | struct perf_event *e; | 781 | struct perf_event *e; |
782 | int i, wmin, wmax, num = 0; | 782 | int i, wmin, wmax, unsched = 0; |
783 | struct hw_perf_event *hwc; | 783 | struct hw_perf_event *hwc; |
784 | 784 | ||
785 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 785 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
@@ -822,14 +822,20 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
822 | 822 | ||
823 | /* slow path */ | 823 | /* slow path */ |
824 | if (i != n) | 824 | if (i != n) |
825 | num = perf_assign_events(cpuc->event_list, n, wmin, | 825 | unsched = perf_assign_events(cpuc->event_list, n, wmin, |
826 | wmax, assign); | 826 | wmax, assign); |
827 | 827 | ||
828 | /* | 828 | /* |
829 | * Mark the event as committed, so we do not put_constraint() | 829 | * In case of success (unsched = 0), mark events as committed, |
830 | * in case new events are added and fail scheduling. | 830 | * so we do not put_constraint() in case new events are added |
831 | * and fail to be scheduled | ||
832 | * | ||
833 | * We invoke the lower level commit callback to lock the resource | ||
834 | * | ||
835 | * We do not need to do all of this in case we are called to | ||
836 | * validate an event group (assign == NULL) | ||
831 | */ | 837 | */ |
832 | if (!num && assign) { | 838 | if (!unsched && assign) { |
833 | for (i = 0; i < n; i++) { | 839 | for (i = 0; i < n; i++) { |
834 | e = cpuc->event_list[i]; | 840 | e = cpuc->event_list[i]; |
835 | e->hw.flags |= PERF_X86_EVENT_COMMITTED; | 841 | e->hw.flags |= PERF_X86_EVENT_COMMITTED; |
@@ -837,11 +843,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
837 | x86_pmu.commit_scheduling(cpuc, e, assign[i]); | 843 | x86_pmu.commit_scheduling(cpuc, e, assign[i]); |
838 | } | 844 | } |
839 | } | 845 | } |
840 | /* | 846 | |
841 | * scheduling failed or is just a simulation, | 847 | if (!assign || unsched) { |
842 | * free resources if necessary | 848 | |
843 | */ | ||
844 | if (!assign || num) { | ||
845 | for (i = 0; i < n; i++) { | 849 | for (i = 0; i < n; i++) { |
846 | e = cpuc->event_list[i]; | 850 | e = cpuc->event_list[i]; |
847 | /* | 851 | /* |
@@ -851,6 +855,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
851 | if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) | 855 | if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) |
852 | continue; | 856 | continue; |
853 | 857 | ||
858 | /* | ||
859 | * release events that failed scheduling | ||
860 | */ | ||
854 | if (x86_pmu.put_event_constraints) | 861 | if (x86_pmu.put_event_constraints) |
855 | x86_pmu.put_event_constraints(cpuc, e); | 862 | x86_pmu.put_event_constraints(cpuc, e); |
856 | } | 863 | } |
@@ -859,7 +866,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
859 | if (x86_pmu.stop_scheduling) | 866 | if (x86_pmu.stop_scheduling) |
860 | x86_pmu.stop_scheduling(cpuc); | 867 | x86_pmu.stop_scheduling(cpuc); |
861 | 868 | ||
862 | return num ? -EINVAL : 0; | 869 | return unsched ? -EINVAL : 0; |
863 | } | 870 | } |
864 | 871 | ||
865 | /* | 872 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index f31f90e2d859..236afee35587 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -72,6 +72,7 @@ struct event_constraint { | |||
72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ | 72 | #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ |
73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ | 73 | #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ |
74 | #define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ | 74 | #define PERF_X86_EVENT_EXCL 0x40 /* HT exclusivity on counter */ |
75 | #define PERF_X86_EVENT_DYNAMIC 0x80 /* dynamic alloc'd constraint */ | ||
75 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ | 76 | #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ |
76 | 77 | ||
77 | 78 | ||
@@ -133,6 +134,7 @@ enum intel_excl_state_type { | |||
133 | struct intel_excl_states { | 134 | struct intel_excl_states { |
134 | enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; | 135 | enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; |
135 | enum intel_excl_state_type state[X86_PMC_IDX_MAX]; | 136 | enum intel_excl_state_type state[X86_PMC_IDX_MAX]; |
137 | bool sched_started; /* true if scheduling has started */ | ||
136 | }; | 138 | }; |
137 | 139 | ||
138 | struct intel_excl_cntrs { | 140 | struct intel_excl_cntrs { |
@@ -296,6 +298,10 @@ struct cpu_hw_events { | |||
296 | #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ | 298 | #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ |
297 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) | 299 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) |
298 | 300 | ||
301 | #define INTEL_EXCLUEVT_CONSTRAINT(c, n) \ | ||
302 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ | ||
303 | HWEIGHT(n), 0, PERF_X86_EVENT_EXCL) | ||
304 | |||
299 | #define INTEL_PLD_CONSTRAINT(c, n) \ | 305 | #define INTEL_PLD_CONSTRAINT(c, n) \ |
300 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ | 306 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
301 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) | 307 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7f54000fd0f1..91cc7749d7ce 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1845,7 +1845,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | |||
1845 | } | 1845 | } |
1846 | 1846 | ||
1847 | static struct event_constraint * | 1847 | static struct event_constraint * |
1848 | intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | 1848 | __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
1849 | struct perf_event *event) | 1849 | struct perf_event *event) |
1850 | { | 1850 | { |
1851 | struct event_constraint *c; | 1851 | struct event_constraint *c; |
@@ -1866,6 +1866,254 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | |||
1866 | } | 1866 | } |
1867 | 1867 | ||
1868 | static void | 1868 | static void |
1869 | intel_start_scheduling(struct cpu_hw_events *cpuc) | ||
1870 | { | ||
1871 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; | ||
1872 | struct intel_excl_states *xl, *xlo; | ||
1873 | int tid = cpuc->excl_thread_id; | ||
1874 | int o_tid = 1 - tid; /* sibling thread */ | ||
1875 | |||
1876 | /* | ||
1877 | * nothing needed if in group validation mode | ||
1878 | */ | ||
1879 | if (cpuc->is_fake) | ||
1880 | return; | ||
1881 | /* | ||
1882 | * no exclusion needed | ||
1883 | */ | ||
1884 | if (!excl_cntrs) | ||
1885 | return; | ||
1886 | |||
1887 | xlo = &excl_cntrs->states[o_tid]; | ||
1888 | xl = &excl_cntrs->states[tid]; | ||
1889 | |||
1890 | xl->sched_started = true; | ||
1891 | |||
1892 | /* | ||
1893 | * lock shared state until we are done scheduling | ||
1894 | * in stop_event_scheduling() | ||
1895 | * makes scheduling appear as a transaction | ||
1896 | */ | ||
1897 | WARN_ON_ONCE(!irqs_disabled()); | ||
1898 | raw_spin_lock(&excl_cntrs->lock); | ||
1899 | |||
1900 | /* | ||
1901 | * save initial state of sibling thread | ||
1902 | */ | ||
1903 | memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); | ||
1904 | } | ||
1905 | |||
1906 | static void | ||
1907 | intel_stop_scheduling(struct cpu_hw_events *cpuc) | ||
1908 | { | ||
1909 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; | ||
1910 | struct intel_excl_states *xl, *xlo; | ||
1911 | int tid = cpuc->excl_thread_id; | ||
1912 | int o_tid = 1 - tid; /* sibling thread */ | ||
1913 | |||
1914 | /* | ||
1915 | * nothing needed if in group validation mode | ||
1916 | */ | ||
1917 | if (cpuc->is_fake) | ||
1918 | return; | ||
1919 | /* | ||
1920 | * no exclusion needed | ||
1921 | */ | ||
1922 | if (!excl_cntrs) | ||
1923 | return; | ||
1924 | |||
1925 | xlo = &excl_cntrs->states[o_tid]; | ||
1926 | xl = &excl_cntrs->states[tid]; | ||
1927 | |||
1928 | /* | ||
1929 | * make new sibling thread state visible | ||
1930 | */ | ||
1931 | memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); | ||
1932 | |||
1933 | xl->sched_started = false; | ||
1934 | /* | ||
1935 | * release shared state lock (acquired in intel_start_scheduling()) | ||
1936 | */ | ||
1937 | raw_spin_unlock(&excl_cntrs->lock); | ||
1938 | } | ||
1939 | |||
1940 | static struct event_constraint * | ||
1941 | intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, | ||
1942 | int idx, struct event_constraint *c) | ||
1943 | { | ||
1944 | struct event_constraint *cx; | ||
1945 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; | ||
1946 | struct intel_excl_states *xl, *xlo; | ||
1947 | int is_excl, i; | ||
1948 | int tid = cpuc->excl_thread_id; | ||
1949 | int o_tid = 1 - tid; /* alternate */ | ||
1950 | |||
1951 | /* | ||
1952 | * validating a group does not require | ||
1953 | * enforcing cross-thread exclusion | ||
1954 | */ | ||
1955 | if (cpuc->is_fake) | ||
1956 | return c; | ||
1957 | |||
1958 | /* | ||
1959 | * event requires exclusive counter access | ||
1960 | * across HT threads | ||
1961 | */ | ||
1962 | is_excl = c->flags & PERF_X86_EVENT_EXCL; | ||
1963 | |||
1964 | /* | ||
1965 | * xl = state of current HT | ||
1966 | * xlo = state of sibling HT | ||
1967 | */ | ||
1968 | xl = &excl_cntrs->states[tid]; | ||
1969 | xlo = &excl_cntrs->states[o_tid]; | ||
1970 | |||
1971 | cx = c; | ||
1972 | |||
1973 | /* | ||
1974 | * because we modify the constraint, we need | ||
1975 | * to make a copy. Static constraints come | ||
1976 | * from static const tables. | ||
1977 | * | ||
1978 | * only needed when constraint has not yet | ||
1979 | * been cloned (marked dynamic) | ||
1980 | */ | ||
1981 | if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { | ||
1982 | |||
1983 | /* sanity check */ | ||
1984 | if (idx < 0) | ||
1985 | return &emptyconstraint; | ||
1986 | |||
1987 | /* | ||
1988 | * grab pre-allocated constraint entry | ||
1989 | */ | ||
1990 | cx = &cpuc->constraint_list[idx]; | ||
1991 | |||
1992 | /* | ||
1993 | * initialize dynamic constraint | ||
1994 | * with static constraint | ||
1995 | */ | ||
1996 | memcpy(cx, c, sizeof(*cx)); | ||
1997 | |||
1998 | /* | ||
1999 | * mark constraint as dynamic, so we | ||
2000 | * can free it later on | ||
2001 | */ | ||
2002 | cx->flags |= PERF_X86_EVENT_DYNAMIC; | ||
2003 | } | ||
2004 | |||
2005 | /* | ||
2006 | * From here on, the constraint is dynamic. | ||
2007 | * Either it was just allocated above, or it | ||
2008 | * was allocated during a earlier invocation | ||
2009 | * of this function | ||
2010 | */ | ||
2011 | |||
2012 | /* | ||
2013 | * Modify static constraint with current dynamic | ||
2014 | * state of thread | ||
2015 | * | ||
2016 | * EXCLUSIVE: sibling counter measuring exclusive event | ||
2017 | * SHARED : sibling counter measuring non-exclusive event | ||
2018 | * UNUSED : sibling counter unused | ||
2019 | */ | ||
2020 | for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) { | ||
2021 | /* | ||
2022 | * exclusive event in sibling counter | ||
2023 | * our corresponding counter cannot be used | ||
2024 | * regardless of our event | ||
2025 | */ | ||
2026 | if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) | ||
2027 | __clear_bit(i, cx->idxmsk); | ||
2028 | /* | ||
2029 | * if measuring an exclusive event, sibling | ||
2030 | * measuring non-exclusive, then counter cannot | ||
2031 | * be used | ||
2032 | */ | ||
2033 | if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) | ||
2034 | __clear_bit(i, cx->idxmsk); | ||
2035 | } | ||
2036 | |||
2037 | /* | ||
2038 | * recompute actual bit weight for scheduling algorithm | ||
2039 | */ | ||
2040 | cx->weight = hweight64(cx->idxmsk64); | ||
2041 | |||
2042 | /* | ||
2043 | * if we return an empty mask, then switch | ||
2044 | * back to static empty constraint to avoid | ||
2045 | * the cost of freeing later on | ||
2046 | */ | ||
2047 | if (cx->weight == 0) | ||
2048 | cx = &emptyconstraint; | ||
2049 | |||
2050 | return cx; | ||
2051 | } | ||
2052 | |||
2053 | static struct event_constraint * | ||
2054 | intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | ||
2055 | struct perf_event *event) | ||
2056 | { | ||
2057 | struct event_constraint *c = event->hw.constraint; | ||
2058 | |||
2059 | /* | ||
2060 | * first time only | ||
2061 | * - static constraint: no change across incremental scheduling calls | ||
2062 | * - dynamic constraint: handled by intel_get_excl_constraints() | ||
2063 | */ | ||
2064 | if (!c) | ||
2065 | c = __intel_get_event_constraints(cpuc, idx, event); | ||
2066 | |||
2067 | if (cpuc->excl_cntrs) | ||
2068 | return intel_get_excl_constraints(cpuc, event, idx, c); | ||
2069 | |||
2070 | return c; | ||
2071 | } | ||
2072 | |||
2073 | static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, | ||
2074 | struct perf_event *event) | ||
2075 | { | ||
2076 | struct hw_perf_event *hwc = &event->hw; | ||
2077 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; | ||
2078 | struct intel_excl_states *xlo, *xl; | ||
2079 | unsigned long flags = 0; /* keep compiler happy */ | ||
2080 | int tid = cpuc->excl_thread_id; | ||
2081 | int o_tid = 1 - tid; | ||
2082 | |||
2083 | /* | ||
2084 | * nothing needed if in group validation mode | ||
2085 | */ | ||
2086 | if (cpuc->is_fake) | ||
2087 | return; | ||
2088 | |||
2089 | WARN_ON_ONCE(!excl_cntrs); | ||
2090 | |||
2091 | if (!excl_cntrs) | ||
2092 | return; | ||
2093 | |||
2094 | xl = &excl_cntrs->states[tid]; | ||
2095 | xlo = &excl_cntrs->states[o_tid]; | ||
2096 | |||
2097 | /* | ||
2098 | * put_constraint may be called from x86_schedule_events() | ||
2099 | * which already has the lock held so here make locking | ||
2100 | * conditional | ||
2101 | */ | ||
2102 | if (!xl->sched_started) | ||
2103 | raw_spin_lock_irqsave(&excl_cntrs->lock, flags); | ||
2104 | |||
2105 | /* | ||
2106 | * if event was actually assigned, then mark the | ||
2107 | * counter state as unused now | ||
2108 | */ | ||
2109 | if (hwc->idx >= 0) | ||
2110 | xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; | ||
2111 | |||
2112 | if (!xl->sched_started) | ||
2113 | raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); | ||
2114 | } | ||
2115 | |||
2116 | static void | ||
1869 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | 2117 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, |
1870 | struct perf_event *event) | 2118 | struct perf_event *event) |
1871 | { | 2119 | { |
@@ -1883,7 +2131,57 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1883 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 2131 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1884 | struct perf_event *event) | 2132 | struct perf_event *event) |
1885 | { | 2133 | { |
2134 | struct event_constraint *c = event->hw.constraint; | ||
2135 | |||
1886 | intel_put_shared_regs_event_constraints(cpuc, event); | 2136 | intel_put_shared_regs_event_constraints(cpuc, event); |
2137 | |||
2138 | /* | ||
2139 | * is PMU has exclusive counter restrictions, then | ||
2140 | * all events are subject to and must call the | ||
2141 | * put_excl_constraints() routine | ||
2142 | */ | ||
2143 | if (c && cpuc->excl_cntrs) | ||
2144 | intel_put_excl_constraints(cpuc, event); | ||
2145 | |||
2146 | /* cleanup dynamic constraint */ | ||
2147 | if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) | ||
2148 | event->hw.constraint = NULL; | ||
2149 | } | ||
2150 | |||
2151 | static void intel_commit_scheduling(struct cpu_hw_events *cpuc, | ||
2152 | struct perf_event *event, int cntr) | ||
2153 | { | ||
2154 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; | ||
2155 | struct event_constraint *c = event->hw.constraint; | ||
2156 | struct intel_excl_states *xlo, *xl; | ||
2157 | int tid = cpuc->excl_thread_id; | ||
2158 | int o_tid = 1 - tid; | ||
2159 | int is_excl; | ||
2160 | |||
2161 | if (cpuc->is_fake || !c) | ||
2162 | return; | ||
2163 | |||
2164 | is_excl = c->flags & PERF_X86_EVENT_EXCL; | ||
2165 | |||
2166 | if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) | ||
2167 | return; | ||
2168 | |||
2169 | WARN_ON_ONCE(!excl_cntrs); | ||
2170 | |||
2171 | if (!excl_cntrs) | ||
2172 | return; | ||
2173 | |||
2174 | xl = &excl_cntrs->states[tid]; | ||
2175 | xlo = &excl_cntrs->states[o_tid]; | ||
2176 | |||
2177 | WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); | ||
2178 | |||
2179 | if (cntr >= 0) { | ||
2180 | if (is_excl) | ||
2181 | xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; | ||
2182 | else | ||
2183 | xlo->init_state[cntr] = INTEL_EXCL_SHARED; | ||
2184 | } | ||
1887 | } | 2185 | } |
1888 | 2186 | ||
1889 | static void intel_pebs_aliases_core2(struct perf_event *event) | 2187 | static void intel_pebs_aliases_core2(struct perf_event *event) |
@@ -2349,6 +2647,13 @@ static void intel_pmu_cpu_dying(int cpu) | |||
2349 | cpuc->constraint_list = NULL; | 2647 | cpuc->constraint_list = NULL; |
2350 | } | 2648 | } |
2351 | 2649 | ||
2650 | c = cpuc->excl_cntrs; | ||
2651 | if (c) { | ||
2652 | if (c->core_id == -1 || --c->refcnt == 0) | ||
2653 | kfree(c); | ||
2654 | cpuc->excl_cntrs = NULL; | ||
2655 | } | ||
2656 | |||
2352 | fini_debug_store_on_cpu(cpu); | 2657 | fini_debug_store_on_cpu(cpu); |
2353 | } | 2658 | } |
2354 | 2659 | ||