aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel/perf_event_v6.c
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2014-05-30 12:46:13 -0400
committerWill Deacon <will.deacon@arm.com>2014-07-02 10:48:19 -0400
commitcf20ae8cfcb3ab4013b7766abe22d251c99fee15 (patch)
treef047f8386d47d85a2c017430c5f5e9a1b399ea36 /arch/arm/kernel/perf_event_v6.c
parent6b7658ec8a100b608e59e3cde353434db51f5be0 (diff)
arm: perf: armv6: condense event maps
Now that we have macros for declaring fully invalid event maps, put them to work for all the ARMv6 PMU event maps. While this necessitates repeating common indices, we no longer need to refer to *_UNSUPPORTED events at all, and it makes it possible for the even maps to fit on a single page on a reasonably sized monitor. Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm/kernel/perf_event_v6.c')
-rw-r--r--arch/arm/kernel/perf_event_v6.c271
1 files changed, 47 insertions, 224 deletions
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 03664b0e8fa4..0fd4290f911f 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -65,13 +65,11 @@ enum armv6_counters {
65 * accesses/misses in hardware. 65 * accesses/misses in hardware.
66 */ 66 */
67static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { 67static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 PERF_MAP_ALL_UNSUPPORTED,
68 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, 69 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
69 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, 70 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
70 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
71 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
72 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, 71 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
73 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, 72 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
74 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
75 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, 73 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
76 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, 74 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
77}; 75};
@@ -79,116 +77,31 @@ static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
79static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 77static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
80 [PERF_COUNT_HW_CACHE_OP_MAX] 78 [PERF_COUNT_HW_CACHE_OP_MAX]
81 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 79 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
82 [C(L1D)] = { 80 PERF_CACHE_MAP_ALL_UNSUPPORTED,
83 /* 81
84 * The performance counters don't differentiate between read 82 /*
85 * and write accesses/misses so this isn't strictly correct, 83 * The performance counters don't differentiate between read and write
86 * but it's the best we can do. Writes and reads get 84 * accesses/misses so this isn't strictly correct, but it's the best we
87 * combined. 85 * can do. Writes and reads get combined.
88 */ 86 */
89 [C(OP_READ)] = { 87 [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
90 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 88 [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
91 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 89 [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
92 }, 90 [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
93 [C(OP_WRITE)] = { 91
94 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 92 [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
95 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 93
96 }, 94 /*
97 [C(OP_PREFETCH)] = { 95 * The ARM performance counters can count micro DTLB misses, micro ITLB
98 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 96 * misses and main TLB misses. There isn't an event for TLB misses, so
99 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 97 * use the micro misses here and if users want the main TLB misses they
100 }, 98 * can use a raw counter.
101 }, 99 */
102 [C(L1I)] = { 100 [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
103 [C(OP_READ)] = { 101 [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
104 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 102
105 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 103 [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
106 }, 104 [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
107 [C(OP_WRITE)] = {
108 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
109 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
110 },
111 [C(OP_PREFETCH)] = {
112 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
113 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
114 },
115 },
116 [C(LL)] = {
117 [C(OP_READ)] = {
118 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
119 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
120 },
121 [C(OP_WRITE)] = {
122 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
123 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
124 },
125 [C(OP_PREFETCH)] = {
126 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
127 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
128 },
129 },
130 [C(DTLB)] = {
131 /*
132 * The ARM performance counters can count micro DTLB misses,
133 * micro ITLB misses and main TLB misses. There isn't an event
134 * for TLB misses, so use the micro misses here and if users
135 * want the main TLB misses they can use a raw counter.
136 */
137 [C(OP_READ)] = {
138 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
139 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
140 },
141 [C(OP_WRITE)] = {
142 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
143 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
144 },
145 [C(OP_PREFETCH)] = {
146 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
147 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
148 },
149 },
150 [C(ITLB)] = {
151 [C(OP_READ)] = {
152 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
153 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
154 },
155 [C(OP_WRITE)] = {
156 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
157 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
158 },
159 [C(OP_PREFETCH)] = {
160 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
161 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
162 },
163 },
164 [C(BPU)] = {
165 [C(OP_READ)] = {
166 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
167 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
168 },
169 [C(OP_WRITE)] = {
170 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
171 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
172 },
173 [C(OP_PREFETCH)] = {
174 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
175 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
176 },
177 },
178 [C(NODE)] = {
179 [C(OP_READ)] = {
180 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
181 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
182 },
183 [C(OP_WRITE)] = {
184 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
185 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
186 },
187 [C(OP_PREFETCH)] = {
188 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
189 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
190 },
191 },
192}; 105};
193 106
194enum armv6mpcore_perf_types { 107enum armv6mpcore_perf_types {
@@ -220,13 +133,11 @@ enum armv6mpcore_perf_types {
220 * accesses/misses in hardware. 133 * accesses/misses in hardware.
221 */ 134 */
222static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { 135static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
136 PERF_MAP_ALL_UNSUPPORTED,
223 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, 137 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
224 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, 138 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
225 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
226 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
227 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, 139 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
228 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, 140 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
229 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
230 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL, 141 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL,
231 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL, 142 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
232}; 143};
@@ -234,114 +145,26 @@ static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
234static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 145static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
235 [PERF_COUNT_HW_CACHE_OP_MAX] 146 [PERF_COUNT_HW_CACHE_OP_MAX]
236 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 147 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
237 [C(L1D)] = { 148 PERF_CACHE_MAP_ALL_UNSUPPORTED,
238 [C(OP_READ)] = { 149
239 [C(RESULT_ACCESS)] = 150 [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
240 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, 151 [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
241 [C(RESULT_MISS)] = 152 [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
242 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, 153 [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
243 }, 154
244 [C(OP_WRITE)] = { 155 [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
245 [C(RESULT_ACCESS)] = 156
246 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, 157 /*
247 [C(RESULT_MISS)] = 158 * The ARM performance counters can count micro DTLB misses, micro ITLB
248 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, 159 * misses and main TLB misses. There isn't an event for TLB misses, so
249 }, 160 * use the micro misses here and if users want the main TLB misses they
250 [C(OP_PREFETCH)] = { 161 * can use a raw counter.
251 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 162 */
252 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 163 [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
253 }, 164 [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
254 }, 165
255 [C(L1I)] = { 166 [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
256 [C(OP_READ)] = { 167 [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
257 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
258 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
259 },
260 [C(OP_WRITE)] = {
261 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
262 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
263 },
264 [C(OP_PREFETCH)] = {
265 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
266 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
267 },
268 },
269 [C(LL)] = {
270 [C(OP_READ)] = {
271 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
272 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
273 },
274 [C(OP_WRITE)] = {
275 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
276 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
277 },
278 [C(OP_PREFETCH)] = {
279 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
280 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
281 },
282 },
283 [C(DTLB)] = {
284 /*
285 * The ARM performance counters can count micro DTLB misses,
286 * micro ITLB misses and main TLB misses. There isn't an event
287 * for TLB misses, so use the micro misses here and if users
288 * want the main TLB misses they can use a raw counter.
289 */
290 [C(OP_READ)] = {
291 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
292 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
293 },
294 [C(OP_WRITE)] = {
295 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
296 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
297 },
298 [C(OP_PREFETCH)] = {
299 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
300 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
301 },
302 },
303 [C(ITLB)] = {
304 [C(OP_READ)] = {
305 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
306 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
307 },
308 [C(OP_WRITE)] = {
309 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
310 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
311 },
312 [C(OP_PREFETCH)] = {
313 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
314 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
315 },
316 },
317 [C(BPU)] = {
318 [C(OP_READ)] = {
319 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
320 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
321 },
322 [C(OP_WRITE)] = {
323 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
324 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
325 },
326 [C(OP_PREFETCH)] = {
327 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
328 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
329 },
330 },
331 [C(NODE)] = {
332 [C(OP_READ)] = {
333 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
334 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
335 },
336 [C(OP_WRITE)] = {
337 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
338 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
339 },
340 [C(OP_PREFETCH)] = {
341 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
342 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
343 },
344 },
345}; 168};
346 169
347static inline unsigned long 170static inline unsigned long