diff options
Diffstat (limited to 'arch/sparc/kernel/perf_event.c')
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 579 |
1 files changed, 538 insertions, 41 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2d6a1b10c81d..fa5936e1c3b9 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -56,7 +56,8 @@ struct cpu_hw_events { | |||
56 | struct perf_event *events[MAX_HWEVENTS]; | 56 | struct perf_event *events[MAX_HWEVENTS]; |
57 | unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; | 57 | unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; |
58 | unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; | 58 | unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; |
59 | int enabled; | 59 | u64 pcr; |
60 | int enabled; | ||
60 | }; | 61 | }; |
61 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | 62 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; |
62 | 63 | ||
@@ -68,8 +69,30 @@ struct perf_event_map { | |||
68 | #define PIC_LOWER 0x02 | 69 | #define PIC_LOWER 0x02 |
69 | }; | 70 | }; |
70 | 71 | ||
72 | static unsigned long perf_event_encode(const struct perf_event_map *pmap) | ||
73 | { | ||
74 | return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; | ||
75 | } | ||
76 | |||
77 | static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) | ||
78 | { | ||
79 | *msk = val & 0xff; | ||
80 | *enc = val >> 16; | ||
81 | } | ||
82 | |||
83 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
84 | |||
85 | #define CACHE_OP_UNSUPPORTED 0xfffe | ||
86 | #define CACHE_OP_NONSENSE 0xffff | ||
87 | |||
88 | typedef struct perf_event_map cache_map_t | ||
89 | [PERF_COUNT_HW_CACHE_MAX] | ||
90 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
91 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
92 | |||
71 | struct sparc_pmu { | 93 | struct sparc_pmu { |
72 | const struct perf_event_map *(*event_map)(int); | 94 | const struct perf_event_map *(*event_map)(int); |
95 | const cache_map_t *cache_map; | ||
73 | int max_events; | 96 | int max_events; |
74 | int upper_shift; | 97 | int upper_shift; |
75 | int lower_shift; | 98 | int lower_shift; |
@@ -80,21 +103,109 @@ struct sparc_pmu { | |||
80 | int lower_nop; | 103 | int lower_nop; |
81 | }; | 104 | }; |
82 | 105 | ||
83 | static const struct perf_event_map ultra3i_perfmon_event_map[] = { | 106 | static const struct perf_event_map ultra3_perfmon_event_map[] = { |
84 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, | 107 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, |
85 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, | 108 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, |
86 | [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, | 109 | [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, |
87 | [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, | 110 | [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, |
88 | }; | 111 | }; |
89 | 112 | ||
90 | static const struct perf_event_map *ultra3i_event_map(int event_id) | 113 | static const struct perf_event_map *ultra3_event_map(int event_id) |
91 | { | 114 | { |
92 | return &ultra3i_perfmon_event_map[event_id]; | 115 | return &ultra3_perfmon_event_map[event_id]; |
93 | } | 116 | } |
94 | 117 | ||
95 | static const struct sparc_pmu ultra3i_pmu = { | 118 | static const cache_map_t ultra3_cache_map = { |
96 | .event_map = ultra3i_event_map, | 119 | [C(L1D)] = { |
97 | .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), | 120 | [C(OP_READ)] = { |
121 | [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, | ||
122 | [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, | ||
123 | }, | ||
124 | [C(OP_WRITE)] = { | ||
125 | [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, | ||
126 | [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, | ||
127 | }, | ||
128 | [C(OP_PREFETCH)] = { | ||
129 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
130 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
131 | }, | ||
132 | }, | ||
133 | [C(L1I)] = { | ||
134 | [C(OP_READ)] = { | ||
135 | [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, | ||
136 | [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, | ||
137 | }, | ||
138 | [ C(OP_WRITE) ] = { | ||
139 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, | ||
140 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, | ||
141 | }, | ||
142 | [ C(OP_PREFETCH) ] = { | ||
143 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
144 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
145 | }, | ||
146 | }, | ||
147 | [C(LL)] = { | ||
148 | [C(OP_READ)] = { | ||
149 | [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, | ||
150 | [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, | ||
151 | }, | ||
152 | [C(OP_WRITE)] = { | ||
153 | [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, | ||
154 | [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, | ||
155 | }, | ||
156 | [C(OP_PREFETCH)] = { | ||
157 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
158 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
159 | }, | ||
160 | }, | ||
161 | [C(DTLB)] = { | ||
162 | [C(OP_READ)] = { | ||
163 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
164 | [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, | ||
165 | }, | ||
166 | [ C(OP_WRITE) ] = { | ||
167 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
168 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
169 | }, | ||
170 | [ C(OP_PREFETCH) ] = { | ||
171 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
172 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
173 | }, | ||
174 | }, | ||
175 | [C(ITLB)] = { | ||
176 | [C(OP_READ)] = { | ||
177 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
178 | [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, | ||
179 | }, | ||
180 | [ C(OP_WRITE) ] = { | ||
181 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
182 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
183 | }, | ||
184 | [ C(OP_PREFETCH) ] = { | ||
185 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
186 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
187 | }, | ||
188 | }, | ||
189 | [C(BPU)] = { | ||
190 | [C(OP_READ)] = { | ||
191 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
192 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
193 | }, | ||
194 | [ C(OP_WRITE) ] = { | ||
195 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
196 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
197 | }, | ||
198 | [ C(OP_PREFETCH) ] = { | ||
199 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
200 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
201 | }, | ||
202 | }, | ||
203 | }; | ||
204 | |||
205 | static const struct sparc_pmu ultra3_pmu = { | ||
206 | .event_map = ultra3_event_map, | ||
207 | .cache_map = &ultra3_cache_map, | ||
208 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), | ||
98 | .upper_shift = 11, | 209 | .upper_shift = 11, |
99 | .lower_shift = 4, | 210 | .lower_shift = 4, |
100 | .event_mask = 0x3f, | 211 | .event_mask = 0x3f, |
@@ -102,6 +213,121 @@ static const struct sparc_pmu ultra3i_pmu = { | |||
102 | .lower_nop = 0x14, | 213 | .lower_nop = 0x14, |
103 | }; | 214 | }; |
104 | 215 | ||
216 | /* Niagara1 is very limited. The upper PIC is hard-locked to count | ||
217 | * only instructions, so it is free running which creates all kinds of | ||
218 | * problems. Some hardware designs make one wonder if the creator | ||
219 | * even looked at how this stuff gets used by software. | ||
220 | */ | ||
221 | static const struct perf_event_map niagara1_perfmon_event_map[] = { | ||
222 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, | ||
223 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, | ||
224 | [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, | ||
225 | [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, | ||
226 | }; | ||
227 | |||
228 | static const struct perf_event_map *niagara1_event_map(int event_id) | ||
229 | { | ||
230 | return &niagara1_perfmon_event_map[event_id]; | ||
231 | } | ||
232 | |||
233 | static const cache_map_t niagara1_cache_map = { | ||
234 | [C(L1D)] = { | ||
235 | [C(OP_READ)] = { | ||
236 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
237 | [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, | ||
238 | }, | ||
239 | [C(OP_WRITE)] = { | ||
240 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
241 | [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, | ||
242 | }, | ||
243 | [C(OP_PREFETCH)] = { | ||
244 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
245 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
246 | }, | ||
247 | }, | ||
248 | [C(L1I)] = { | ||
249 | [C(OP_READ)] = { | ||
250 | [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, | ||
251 | [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, | ||
252 | }, | ||
253 | [ C(OP_WRITE) ] = { | ||
254 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, | ||
255 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, | ||
256 | }, | ||
257 | [ C(OP_PREFETCH) ] = { | ||
258 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
259 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
260 | }, | ||
261 | }, | ||
262 | [C(LL)] = { | ||
263 | [C(OP_READ)] = { | ||
264 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
265 | [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, | ||
266 | }, | ||
267 | [C(OP_WRITE)] = { | ||
268 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
269 | [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, | ||
270 | }, | ||
271 | [C(OP_PREFETCH)] = { | ||
272 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
273 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
274 | }, | ||
275 | }, | ||
276 | [C(DTLB)] = { | ||
277 | [C(OP_READ)] = { | ||
278 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
279 | [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, | ||
280 | }, | ||
281 | [ C(OP_WRITE) ] = { | ||
282 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
283 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
284 | }, | ||
285 | [ C(OP_PREFETCH) ] = { | ||
286 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
287 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
288 | }, | ||
289 | }, | ||
290 | [C(ITLB)] = { | ||
291 | [C(OP_READ)] = { | ||
292 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
293 | [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, | ||
294 | }, | ||
295 | [ C(OP_WRITE) ] = { | ||
296 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
297 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
298 | }, | ||
299 | [ C(OP_PREFETCH) ] = { | ||
300 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
301 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
302 | }, | ||
303 | }, | ||
304 | [C(BPU)] = { | ||
305 | [C(OP_READ)] = { | ||
306 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
307 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
308 | }, | ||
309 | [ C(OP_WRITE) ] = { | ||
310 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
311 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
312 | }, | ||
313 | [ C(OP_PREFETCH) ] = { | ||
314 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
315 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
316 | }, | ||
317 | }, | ||
318 | }; | ||
319 | |||
320 | static const struct sparc_pmu niagara1_pmu = { | ||
321 | .event_map = niagara1_event_map, | ||
322 | .cache_map = &niagara1_cache_map, | ||
323 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), | ||
324 | .upper_shift = 0, | ||
325 | .lower_shift = 4, | ||
326 | .event_mask = 0x7, | ||
327 | .upper_nop = 0x0, | ||
328 | .lower_nop = 0x0, | ||
329 | }; | ||
330 | |||
105 | static const struct perf_event_map niagara2_perfmon_event_map[] = { | 331 | static const struct perf_event_map niagara2_perfmon_event_map[] = { |
106 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, | 332 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, |
107 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, | 333 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, |
@@ -116,14 +342,102 @@ static const struct perf_event_map *niagara2_event_map(int event_id) | |||
116 | return &niagara2_perfmon_event_map[event_id]; | 342 | return &niagara2_perfmon_event_map[event_id]; |
117 | } | 343 | } |
118 | 344 | ||
345 | static const cache_map_t niagara2_cache_map = { | ||
346 | [C(L1D)] = { | ||
347 | [C(OP_READ)] = { | ||
348 | [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, | ||
349 | [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, | ||
350 | }, | ||
351 | [C(OP_WRITE)] = { | ||
352 | [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, | ||
353 | [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, | ||
354 | }, | ||
355 | [C(OP_PREFETCH)] = { | ||
356 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
357 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
358 | }, | ||
359 | }, | ||
360 | [C(L1I)] = { | ||
361 | [C(OP_READ)] = { | ||
362 | [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, | ||
363 | [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, | ||
364 | }, | ||
365 | [ C(OP_WRITE) ] = { | ||
366 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, | ||
367 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, | ||
368 | }, | ||
369 | [ C(OP_PREFETCH) ] = { | ||
370 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
371 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
372 | }, | ||
373 | }, | ||
374 | [C(LL)] = { | ||
375 | [C(OP_READ)] = { | ||
376 | [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, | ||
377 | [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, | ||
378 | }, | ||
379 | [C(OP_WRITE)] = { | ||
380 | [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, | ||
381 | [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, | ||
382 | }, | ||
383 | [C(OP_PREFETCH)] = { | ||
384 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
385 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
386 | }, | ||
387 | }, | ||
388 | [C(DTLB)] = { | ||
389 | [C(OP_READ)] = { | ||
390 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
391 | [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, | ||
392 | }, | ||
393 | [ C(OP_WRITE) ] = { | ||
394 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
395 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
396 | }, | ||
397 | [ C(OP_PREFETCH) ] = { | ||
398 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
399 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
400 | }, | ||
401 | }, | ||
402 | [C(ITLB)] = { | ||
403 | [C(OP_READ)] = { | ||
404 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
405 | [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, | ||
406 | }, | ||
407 | [ C(OP_WRITE) ] = { | ||
408 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
409 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
410 | }, | ||
411 | [ C(OP_PREFETCH) ] = { | ||
412 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
413 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
414 | }, | ||
415 | }, | ||
416 | [C(BPU)] = { | ||
417 | [C(OP_READ)] = { | ||
418 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
419 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
420 | }, | ||
421 | [ C(OP_WRITE) ] = { | ||
422 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
423 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
424 | }, | ||
425 | [ C(OP_PREFETCH) ] = { | ||
426 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
427 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
428 | }, | ||
429 | }, | ||
430 | }; | ||
431 | |||
119 | static const struct sparc_pmu niagara2_pmu = { | 432 | static const struct sparc_pmu niagara2_pmu = { |
120 | .event_map = niagara2_event_map, | 433 | .event_map = niagara2_event_map, |
434 | .cache_map = &niagara2_cache_map, | ||
121 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), | 435 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), |
122 | .upper_shift = 19, | 436 | .upper_shift = 19, |
123 | .lower_shift = 6, | 437 | .lower_shift = 6, |
124 | .event_mask = 0xfff, | 438 | .event_mask = 0xfff, |
125 | .hv_bit = 0x8, | 439 | .hv_bit = 0x8, |
126 | .irq_bit = 0x03, | 440 | .irq_bit = 0x30, |
127 | .upper_nop = 0x220, | 441 | .upper_nop = 0x220, |
128 | .lower_nop = 0x220, | 442 | .lower_nop = 0x220, |
129 | }; | 443 | }; |
@@ -151,23 +465,30 @@ static u64 nop_for_index(int idx) | |||
151 | sparc_pmu->lower_nop, idx); | 465 | sparc_pmu->lower_nop, idx); |
152 | } | 466 | } |
153 | 467 | ||
154 | static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, | 468 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
155 | int idx) | ||
156 | { | 469 | { |
157 | u64 val, mask = mask_for_index(idx); | 470 | u64 val, mask = mask_for_index(idx); |
158 | 471 | ||
159 | val = pcr_ops->read(); | 472 | val = cpuc->pcr; |
160 | pcr_ops->write((val & ~mask) | hwc->config); | 473 | val &= ~mask; |
474 | val |= hwc->config; | ||
475 | cpuc->pcr = val; | ||
476 | |||
477 | pcr_ops->write(cpuc->pcr); | ||
161 | } | 478 | } |
162 | 479 | ||
163 | static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, | 480 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
164 | int idx) | ||
165 | { | 481 | { |
166 | u64 mask = mask_for_index(idx); | 482 | u64 mask = mask_for_index(idx); |
167 | u64 nop = nop_for_index(idx); | 483 | u64 nop = nop_for_index(idx); |
168 | u64 val = pcr_ops->read(); | 484 | u64 val; |
169 | 485 | ||
170 | pcr_ops->write((val & ~mask) | nop); | 486 | val = cpuc->pcr; |
487 | val &= ~mask; | ||
488 | val |= nop; | ||
489 | cpuc->pcr = val; | ||
490 | |||
491 | pcr_ops->write(cpuc->pcr); | ||
171 | } | 492 | } |
172 | 493 | ||
173 | void hw_perf_enable(void) | 494 | void hw_perf_enable(void) |
@@ -182,7 +503,7 @@ void hw_perf_enable(void) | |||
182 | cpuc->enabled = 1; | 503 | cpuc->enabled = 1; |
183 | barrier(); | 504 | barrier(); |
184 | 505 | ||
185 | val = pcr_ops->read(); | 506 | val = cpuc->pcr; |
186 | 507 | ||
187 | for (i = 0; i < MAX_HWEVENTS; i++) { | 508 | for (i = 0; i < MAX_HWEVENTS; i++) { |
188 | struct perf_event *cp = cpuc->events[i]; | 509 | struct perf_event *cp = cpuc->events[i]; |
@@ -194,7 +515,9 @@ void hw_perf_enable(void) | |||
194 | val |= hwc->config_base; | 515 | val |= hwc->config_base; |
195 | } | 516 | } |
196 | 517 | ||
197 | pcr_ops->write(val); | 518 | cpuc->pcr = val; |
519 | |||
520 | pcr_ops->write(cpuc->pcr); | ||
198 | } | 521 | } |
199 | 522 | ||
200 | void hw_perf_disable(void) | 523 | void hw_perf_disable(void) |
@@ -207,10 +530,12 @@ void hw_perf_disable(void) | |||
207 | 530 | ||
208 | cpuc->enabled = 0; | 531 | cpuc->enabled = 0; |
209 | 532 | ||
210 | val = pcr_ops->read(); | 533 | val = cpuc->pcr; |
211 | val &= ~(PCR_UTRACE | PCR_STRACE | | 534 | val &= ~(PCR_UTRACE | PCR_STRACE | |
212 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | 535 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); |
213 | pcr_ops->write(val); | 536 | cpuc->pcr = val; |
537 | |||
538 | pcr_ops->write(cpuc->pcr); | ||
214 | } | 539 | } |
215 | 540 | ||
216 | static u32 read_pmc(int idx) | 541 | static u32 read_pmc(int idx) |
@@ -242,7 +567,7 @@ static void write_pmc(int idx, u64 val) | |||
242 | } | 567 | } |
243 | 568 | ||
244 | static int sparc_perf_event_set_period(struct perf_event *event, | 569 | static int sparc_perf_event_set_period(struct perf_event *event, |
245 | struct hw_perf_event *hwc, int idx) | 570 | struct hw_perf_event *hwc, int idx) |
246 | { | 571 | { |
247 | s64 left = atomic64_read(&hwc->period_left); | 572 | s64 left = atomic64_read(&hwc->period_left); |
248 | s64 period = hwc->sample_period; | 573 | s64 period = hwc->sample_period; |
@@ -282,19 +607,19 @@ static int sparc_pmu_enable(struct perf_event *event) | |||
282 | if (test_and_set_bit(idx, cpuc->used_mask)) | 607 | if (test_and_set_bit(idx, cpuc->used_mask)) |
283 | return -EAGAIN; | 608 | return -EAGAIN; |
284 | 609 | ||
285 | sparc_pmu_disable_event(hwc, idx); | 610 | sparc_pmu_disable_event(cpuc, hwc, idx); |
286 | 611 | ||
287 | cpuc->events[idx] = event; | 612 | cpuc->events[idx] = event; |
288 | set_bit(idx, cpuc->active_mask); | 613 | set_bit(idx, cpuc->active_mask); |
289 | 614 | ||
290 | sparc_perf_event_set_period(event, hwc, idx); | 615 | sparc_perf_event_set_period(event, hwc, idx); |
291 | sparc_pmu_enable_event(hwc, idx); | 616 | sparc_pmu_enable_event(cpuc, hwc, idx); |
292 | perf_event_update_userpage(event); | 617 | perf_event_update_userpage(event); |
293 | return 0; | 618 | return 0; |
294 | } | 619 | } |
295 | 620 | ||
296 | static u64 sparc_perf_event_update(struct perf_event *event, | 621 | static u64 sparc_perf_event_update(struct perf_event *event, |
297 | struct hw_perf_event *hwc, int idx) | 622 | struct hw_perf_event *hwc, int idx) |
298 | { | 623 | { |
299 | int shift = 64 - 32; | 624 | int shift = 64 - 32; |
300 | u64 prev_raw_count, new_raw_count; | 625 | u64 prev_raw_count, new_raw_count; |
@@ -324,7 +649,7 @@ static void sparc_pmu_disable(struct perf_event *event) | |||
324 | int idx = hwc->idx; | 649 | int idx = hwc->idx; |
325 | 650 | ||
326 | clear_bit(idx, cpuc->active_mask); | 651 | clear_bit(idx, cpuc->active_mask); |
327 | sparc_pmu_disable_event(hwc, idx); | 652 | sparc_pmu_disable_event(cpuc, hwc, idx); |
328 | 653 | ||
329 | barrier(); | 654 | barrier(); |
330 | 655 | ||
@@ -338,18 +663,29 @@ static void sparc_pmu_disable(struct perf_event *event) | |||
338 | static void sparc_pmu_read(struct perf_event *event) | 663 | static void sparc_pmu_read(struct perf_event *event) |
339 | { | 664 | { |
340 | struct hw_perf_event *hwc = &event->hw; | 665 | struct hw_perf_event *hwc = &event->hw; |
666 | |||
341 | sparc_perf_event_update(event, hwc, hwc->idx); | 667 | sparc_perf_event_update(event, hwc, hwc->idx); |
342 | } | 668 | } |
343 | 669 | ||
344 | static void sparc_pmu_unthrottle(struct perf_event *event) | 670 | static void sparc_pmu_unthrottle(struct perf_event *event) |
345 | { | 671 | { |
672 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
346 | struct hw_perf_event *hwc = &event->hw; | 673 | struct hw_perf_event *hwc = &event->hw; |
347 | sparc_pmu_enable_event(hwc, hwc->idx); | 674 | |
675 | sparc_pmu_enable_event(cpuc, hwc, hwc->idx); | ||
348 | } | 676 | } |
349 | 677 | ||
350 | static atomic_t active_events = ATOMIC_INIT(0); | 678 | static atomic_t active_events = ATOMIC_INIT(0); |
351 | static DEFINE_MUTEX(pmc_grab_mutex); | 679 | static DEFINE_MUTEX(pmc_grab_mutex); |
352 | 680 | ||
681 | static void perf_stop_nmi_watchdog(void *unused) | ||
682 | { | ||
683 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
684 | |||
685 | stop_nmi_watchdog(NULL); | ||
686 | cpuc->pcr = pcr_ops->read(); | ||
687 | } | ||
688 | |||
353 | void perf_event_grab_pmc(void) | 689 | void perf_event_grab_pmc(void) |
354 | { | 690 | { |
355 | if (atomic_inc_not_zero(&active_events)) | 691 | if (atomic_inc_not_zero(&active_events)) |
@@ -358,7 +694,7 @@ void perf_event_grab_pmc(void) | |||
358 | mutex_lock(&pmc_grab_mutex); | 694 | mutex_lock(&pmc_grab_mutex); |
359 | if (atomic_read(&active_events) == 0) { | 695 | if (atomic_read(&active_events) == 0) { |
360 | if (atomic_read(&nmi_active) > 0) { | 696 | if (atomic_read(&nmi_active) > 0) { |
361 | on_each_cpu(stop_nmi_watchdog, NULL, 1); | 697 | on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); |
362 | BUG_ON(atomic_read(&nmi_active) != 0); | 698 | BUG_ON(atomic_read(&nmi_active) != 0); |
363 | } | 699 | } |
364 | atomic_inc(&active_events); | 700 | atomic_inc(&active_events); |
@@ -375,30 +711,160 @@ void perf_event_release_pmc(void) | |||
375 | } | 711 | } |
376 | } | 712 | } |
377 | 713 | ||
714 | static const struct perf_event_map *sparc_map_cache_event(u64 config) | ||
715 | { | ||
716 | unsigned int cache_type, cache_op, cache_result; | ||
717 | const struct perf_event_map *pmap; | ||
718 | |||
719 | if (!sparc_pmu->cache_map) | ||
720 | return ERR_PTR(-ENOENT); | ||
721 | |||
722 | cache_type = (config >> 0) & 0xff; | ||
723 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
724 | return ERR_PTR(-EINVAL); | ||
725 | |||
726 | cache_op = (config >> 8) & 0xff; | ||
727 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
728 | return ERR_PTR(-EINVAL); | ||
729 | |||
730 | cache_result = (config >> 16) & 0xff; | ||
731 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
732 | return ERR_PTR(-EINVAL); | ||
733 | |||
734 | pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); | ||
735 | |||
736 | if (pmap->encoding == CACHE_OP_UNSUPPORTED) | ||
737 | return ERR_PTR(-ENOENT); | ||
738 | |||
739 | if (pmap->encoding == CACHE_OP_NONSENSE) | ||
740 | return ERR_PTR(-EINVAL); | ||
741 | |||
742 | return pmap; | ||
743 | } | ||
744 | |||
378 | static void hw_perf_event_destroy(struct perf_event *event) | 745 | static void hw_perf_event_destroy(struct perf_event *event) |
379 | { | 746 | { |
380 | perf_event_release_pmc(); | 747 | perf_event_release_pmc(); |
381 | } | 748 | } |
382 | 749 | ||
750 | /* Make sure all events can be scheduled into the hardware at | ||
751 | * the same time. This is simplified by the fact that we only | ||
752 | * need to support 2 simultaneous HW events. | ||
753 | */ | ||
754 | static int sparc_check_constraints(unsigned long *events, int n_ev) | ||
755 | { | ||
756 | if (n_ev <= perf_max_events) { | ||
757 | u8 msk1, msk2; | ||
758 | u16 dummy; | ||
759 | |||
760 | if (n_ev == 1) | ||
761 | return 0; | ||
762 | BUG_ON(n_ev != 2); | ||
763 | perf_event_decode(events[0], &dummy, &msk1); | ||
764 | perf_event_decode(events[1], &dummy, &msk2); | ||
765 | |||
766 | /* If both events can go on any counter, OK. */ | ||
767 | if (msk1 == (PIC_UPPER | PIC_LOWER) && | ||
768 | msk2 == (PIC_UPPER | PIC_LOWER)) | ||
769 | return 0; | ||
770 | |||
771 | /* If one event is limited to a specific counter, | ||
772 | * and the other can go on both, OK. | ||
773 | */ | ||
774 | if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && | ||
775 | msk2 == (PIC_UPPER | PIC_LOWER)) | ||
776 | return 0; | ||
777 | if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && | ||
778 | msk1 == (PIC_UPPER | PIC_LOWER)) | ||
779 | return 0; | ||
780 | |||
781 | /* If the events are fixed to different counters, OK. */ | ||
782 | if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || | ||
783 | (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) | ||
784 | return 0; | ||
785 | |||
786 | /* Otherwise, there is a conflict. */ | ||
787 | } | ||
788 | |||
789 | return -1; | ||
790 | } | ||
791 | |||
792 | static int check_excludes(struct perf_event **evts, int n_prev, int n_new) | ||
793 | { | ||
794 | int eu = 0, ek = 0, eh = 0; | ||
795 | struct perf_event *event; | ||
796 | int i, n, first; | ||
797 | |||
798 | n = n_prev + n_new; | ||
799 | if (n <= 1) | ||
800 | return 0; | ||
801 | |||
802 | first = 1; | ||
803 | for (i = 0; i < n; i++) { | ||
804 | event = evts[i]; | ||
805 | if (first) { | ||
806 | eu = event->attr.exclude_user; | ||
807 | ek = event->attr.exclude_kernel; | ||
808 | eh = event->attr.exclude_hv; | ||
809 | first = 0; | ||
810 | } else if (event->attr.exclude_user != eu || | ||
811 | event->attr.exclude_kernel != ek || | ||
812 | event->attr.exclude_hv != eh) { | ||
813 | return -EAGAIN; | ||
814 | } | ||
815 | } | ||
816 | |||
817 | return 0; | ||
818 | } | ||
819 | |||
820 | static int collect_events(struct perf_event *group, int max_count, | ||
821 | struct perf_event *evts[], unsigned long *events) | ||
822 | { | ||
823 | struct perf_event *event; | ||
824 | int n = 0; | ||
825 | |||
826 | if (!is_software_event(group)) { | ||
827 | if (n >= max_count) | ||
828 | return -1; | ||
829 | evts[n] = group; | ||
830 | events[n++] = group->hw.event_base; | ||
831 | } | ||
832 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
833 | if (!is_software_event(event) && | ||
834 | event->state != PERF_EVENT_STATE_OFF) { | ||
835 | if (n >= max_count) | ||
836 | return -1; | ||
837 | evts[n] = event; | ||
838 | events[n++] = event->hw.event_base; | ||
839 | } | ||
840 | } | ||
841 | return n; | ||
842 | } | ||
843 | |||
383 | static int __hw_perf_event_init(struct perf_event *event) | 844 | static int __hw_perf_event_init(struct perf_event *event) |
384 | { | 845 | { |
385 | struct perf_event_attr *attr = &event->attr; | 846 | struct perf_event_attr *attr = &event->attr; |
847 | struct perf_event *evts[MAX_HWEVENTS]; | ||
386 | struct hw_perf_event *hwc = &event->hw; | 848 | struct hw_perf_event *hwc = &event->hw; |
849 | unsigned long events[MAX_HWEVENTS]; | ||
387 | const struct perf_event_map *pmap; | 850 | const struct perf_event_map *pmap; |
388 | u64 enc; | 851 | u64 enc; |
852 | int n; | ||
389 | 853 | ||
390 | if (atomic_read(&nmi_active) < 0) | 854 | if (atomic_read(&nmi_active) < 0) |
391 | return -ENODEV; | 855 | return -ENODEV; |
392 | 856 | ||
393 | if (attr->type != PERF_TYPE_HARDWARE) | 857 | if (attr->type == PERF_TYPE_HARDWARE) { |
858 | if (attr->config >= sparc_pmu->max_events) | ||
859 | return -EINVAL; | ||
860 | pmap = sparc_pmu->event_map(attr->config); | ||
861 | } else if (attr->type == PERF_TYPE_HW_CACHE) { | ||
862 | pmap = sparc_map_cache_event(attr->config); | ||
863 | if (IS_ERR(pmap)) | ||
864 | return PTR_ERR(pmap); | ||
865 | } else | ||
394 | return -EOPNOTSUPP; | 866 | return -EOPNOTSUPP; |
395 | 867 | ||
396 | if (attr->config >= sparc_pmu->max_events) | ||
397 | return -EINVAL; | ||
398 | |||
399 | perf_event_grab_pmc(); | ||
400 | event->destroy = hw_perf_event_destroy; | ||
401 | |||
402 | /* We save the enable bits in the config_base. So to | 868 | /* We save the enable bits in the config_base. So to |
403 | * turn off sampling just write 'config', and to enable | 869 | * turn off sampling just write 'config', and to enable |
404 | * things write 'config | config_base'. | 870 | * things write 'config | config_base'. |
@@ -411,15 +877,39 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
411 | if (!attr->exclude_hv) | 877 | if (!attr->exclude_hv) |
412 | hwc->config_base |= sparc_pmu->hv_bit; | 878 | hwc->config_base |= sparc_pmu->hv_bit; |
413 | 879 | ||
880 | hwc->event_base = perf_event_encode(pmap); | ||
881 | |||
882 | enc = pmap->encoding; | ||
883 | |||
884 | n = 0; | ||
885 | if (event->group_leader != event) { | ||
886 | n = collect_events(event->group_leader, | ||
887 | perf_max_events - 1, | ||
888 | evts, events); | ||
889 | if (n < 0) | ||
890 | return -EINVAL; | ||
891 | } | ||
892 | events[n] = hwc->event_base; | ||
893 | evts[n] = event; | ||
894 | |||
895 | if (check_excludes(evts, n, 1)) | ||
896 | return -EINVAL; | ||
897 | |||
898 | if (sparc_check_constraints(events, n + 1)) | ||
899 | return -EINVAL; | ||
900 | |||
901 | /* Try to do all error checking before this point, as unwinding | ||
902 | * state after grabbing the PMC is difficult. | ||
903 | */ | ||
904 | perf_event_grab_pmc(); | ||
905 | event->destroy = hw_perf_event_destroy; | ||
906 | |||
414 | if (!hwc->sample_period) { | 907 | if (!hwc->sample_period) { |
415 | hwc->sample_period = MAX_PERIOD; | 908 | hwc->sample_period = MAX_PERIOD; |
416 | hwc->last_period = hwc->sample_period; | 909 | hwc->last_period = hwc->sample_period; |
417 | atomic64_set(&hwc->period_left, hwc->sample_period); | 910 | atomic64_set(&hwc->period_left, hwc->sample_period); |
418 | } | 911 | } |
419 | 912 | ||
420 | pmap = sparc_pmu->event_map(attr->config); | ||
421 | |||
422 | enc = pmap->encoding; | ||
423 | if (pmap->pic_mask & PIC_UPPER) { | 913 | if (pmap->pic_mask & PIC_UPPER) { |
424 | hwc->idx = PIC_UPPER_INDEX; | 914 | hwc->idx = PIC_UPPER_INDEX; |
425 | enc <<= sparc_pmu->upper_shift; | 915 | enc <<= sparc_pmu->upper_shift; |
@@ -472,7 +962,7 @@ void perf_event_print_debug(void) | |||
472 | } | 962 | } |
473 | 963 | ||
474 | static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | 964 | static int __kprobes perf_event_nmi_handler(struct notifier_block *self, |
475 | unsigned long cmd, void *__args) | 965 | unsigned long cmd, void *__args) |
476 | { | 966 | { |
477 | struct die_args *args = __args; | 967 | struct die_args *args = __args; |
478 | struct perf_sample_data data; | 968 | struct perf_sample_data data; |
@@ -513,7 +1003,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
513 | continue; | 1003 | continue; |
514 | 1004 | ||
515 | if (perf_event_overflow(event, 1, &data, regs)) | 1005 | if (perf_event_overflow(event, 1, &data, regs)) |
516 | sparc_pmu_disable_event(hwc, idx); | 1006 | sparc_pmu_disable_event(cpuc, hwc, idx); |
517 | } | 1007 | } |
518 | 1008 | ||
519 | return NOTIFY_STOP; | 1009 | return NOTIFY_STOP; |
@@ -525,8 +1015,15 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
525 | 1015 | ||
526 | static bool __init supported_pmu(void) | 1016 | static bool __init supported_pmu(void) |
527 | { | 1017 | { |
528 | if (!strcmp(sparc_pmu_type, "ultra3i")) { | 1018 | if (!strcmp(sparc_pmu_type, "ultra3") || |
529 | sparc_pmu = &ultra3i_pmu; | 1019 | !strcmp(sparc_pmu_type, "ultra3+") || |
1020 | !strcmp(sparc_pmu_type, "ultra3i") || | ||
1021 | !strcmp(sparc_pmu_type, "ultra4+")) { | ||
1022 | sparc_pmu = &ultra3_pmu; | ||
1023 | return true; | ||
1024 | } | ||
1025 | if (!strcmp(sparc_pmu_type, "niagara")) { | ||
1026 | sparc_pmu = &niagara1_pmu; | ||
530 | return true; | 1027 | return true; |
531 | } | 1028 | } |
532 | if (!strcmp(sparc_pmu_type, "niagara2")) { | 1029 | if (!strcmp(sparc_pmu_type, "niagara2")) { |