diff options
Diffstat (limited to 'arch/sparc/kernel/perf_event.c')
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 556 |
1 files changed, 556 insertions, 0 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c new file mode 100644 index 000000000000..2d6a1b10c81d --- /dev/null +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -0,0 +1,556 @@ | |||
1 | /* Performance event support for sparc64. | ||
2 | * | ||
3 | * Copyright (C) 2009 David S. Miller <davem@davemloft.net> | ||
4 | * | ||
5 | * This code is based almost entirely upon the x86 perf event | ||
6 | * code, which is: | ||
7 | * | ||
8 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
9 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
10 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
11 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
12 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
13 | */ | ||
14 | |||
15 | #include <linux/perf_event.h> | ||
16 | #include <linux/kprobes.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/kdebug.h> | ||
19 | #include <linux/mutex.h> | ||
20 | |||
21 | #include <asm/cpudata.h> | ||
22 | #include <asm/atomic.h> | ||
23 | #include <asm/nmi.h> | ||
24 | #include <asm/pcr.h> | ||
25 | |||
26 | /* Sparc64 chips have two performance counters, 32-bits each, with | ||
27 | * overflow interrupts generated on transition from 0xffffffff to 0. | ||
28 | * The counters are accessed in one go using a 64-bit register. | ||
29 | * | ||
30 | * Both counters are controlled using a single control register. The | ||
31 | * only way to stop all sampling is to clear all of the context (user, | ||
32 | * supervisor, hypervisor) sampling enable bits. But these bits apply | ||
33 | * to both counters, thus the two counters can't be enabled/disabled | ||
34 | * individually. | ||
35 | * | ||
36 | * The control register has two event fields, one for each of the two | ||
37 | * counters. It's thus nearly impossible to have one counter going | ||
38 | * while keeping the other one stopped. Therefore it is possible to | ||
39 | * get overflow interrupts for counters not currently "in use" and | ||
40 | * that condition must be checked in the overflow interrupt handler. | ||
41 | * | ||
42 | * So we use a hack, in that we program inactive counters with the | ||
43 | * "sw_count0" and "sw_count1" events. These count how many times | ||
44 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an | ||
45 | * unusual way to encode a NOP and therefore will not trigger in | ||
46 | * normal code. | ||
47 | */ | ||
48 | |||
49 | #define MAX_HWEVENTS 2 | ||
50 | #define MAX_PERIOD ((1UL << 32) - 1) | ||
51 | |||
52 | #define PIC_UPPER_INDEX 0 | ||
53 | #define PIC_LOWER_INDEX 1 | ||
54 | |||
55 | struct cpu_hw_events { | ||
56 | struct perf_event *events[MAX_HWEVENTS]; | ||
57 | unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; | ||
58 | unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; | ||
59 | int enabled; | ||
60 | }; | ||
61 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | ||
62 | |||
63 | struct perf_event_map { | ||
64 | u16 encoding; | ||
65 | u8 pic_mask; | ||
66 | #define PIC_NONE 0x00 | ||
67 | #define PIC_UPPER 0x01 | ||
68 | #define PIC_LOWER 0x02 | ||
69 | }; | ||
70 | |||
71 | struct sparc_pmu { | ||
72 | const struct perf_event_map *(*event_map)(int); | ||
73 | int max_events; | ||
74 | int upper_shift; | ||
75 | int lower_shift; | ||
76 | int event_mask; | ||
77 | int hv_bit; | ||
78 | int irq_bit; | ||
79 | int upper_nop; | ||
80 | int lower_nop; | ||
81 | }; | ||
82 | |||
83 | static const struct perf_event_map ultra3i_perfmon_event_map[] = { | ||
84 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, | ||
85 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, | ||
86 | [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, | ||
87 | [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, | ||
88 | }; | ||
89 | |||
90 | static const struct perf_event_map *ultra3i_event_map(int event_id) | ||
91 | { | ||
92 | return &ultra3i_perfmon_event_map[event_id]; | ||
93 | } | ||
94 | |||
95 | static const struct sparc_pmu ultra3i_pmu = { | ||
96 | .event_map = ultra3i_event_map, | ||
97 | .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), | ||
98 | .upper_shift = 11, | ||
99 | .lower_shift = 4, | ||
100 | .event_mask = 0x3f, | ||
101 | .upper_nop = 0x1c, | ||
102 | .lower_nop = 0x14, | ||
103 | }; | ||
104 | |||
105 | static const struct perf_event_map niagara2_perfmon_event_map[] = { | ||
106 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, | ||
107 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, | ||
108 | [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, | ||
109 | [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, | ||
110 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, | ||
111 | [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, | ||
112 | }; | ||
113 | |||
114 | static const struct perf_event_map *niagara2_event_map(int event_id) | ||
115 | { | ||
116 | return &niagara2_perfmon_event_map[event_id]; | ||
117 | } | ||
118 | |||
119 | static const struct sparc_pmu niagara2_pmu = { | ||
120 | .event_map = niagara2_event_map, | ||
121 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), | ||
122 | .upper_shift = 19, | ||
123 | .lower_shift = 6, | ||
124 | .event_mask = 0xfff, | ||
125 | .hv_bit = 0x8, | ||
126 | .irq_bit = 0x03, | ||
127 | .upper_nop = 0x220, | ||
128 | .lower_nop = 0x220, | ||
129 | }; | ||
130 | |||
131 | static const struct sparc_pmu *sparc_pmu __read_mostly; | ||
132 | |||
133 | static u64 event_encoding(u64 event_id, int idx) | ||
134 | { | ||
135 | if (idx == PIC_UPPER_INDEX) | ||
136 | event_id <<= sparc_pmu->upper_shift; | ||
137 | else | ||
138 | event_id <<= sparc_pmu->lower_shift; | ||
139 | return event_id; | ||
140 | } | ||
141 | |||
142 | static u64 mask_for_index(int idx) | ||
143 | { | ||
144 | return event_encoding(sparc_pmu->event_mask, idx); | ||
145 | } | ||
146 | |||
147 | static u64 nop_for_index(int idx) | ||
148 | { | ||
149 | return event_encoding(idx == PIC_UPPER_INDEX ? | ||
150 | sparc_pmu->upper_nop : | ||
151 | sparc_pmu->lower_nop, idx); | ||
152 | } | ||
153 | |||
154 | static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, | ||
155 | int idx) | ||
156 | { | ||
157 | u64 val, mask = mask_for_index(idx); | ||
158 | |||
159 | val = pcr_ops->read(); | ||
160 | pcr_ops->write((val & ~mask) | hwc->config); | ||
161 | } | ||
162 | |||
163 | static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, | ||
164 | int idx) | ||
165 | { | ||
166 | u64 mask = mask_for_index(idx); | ||
167 | u64 nop = nop_for_index(idx); | ||
168 | u64 val = pcr_ops->read(); | ||
169 | |||
170 | pcr_ops->write((val & ~mask) | nop); | ||
171 | } | ||
172 | |||
173 | void hw_perf_enable(void) | ||
174 | { | ||
175 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
176 | u64 val; | ||
177 | int i; | ||
178 | |||
179 | if (cpuc->enabled) | ||
180 | return; | ||
181 | |||
182 | cpuc->enabled = 1; | ||
183 | barrier(); | ||
184 | |||
185 | val = pcr_ops->read(); | ||
186 | |||
187 | for (i = 0; i < MAX_HWEVENTS; i++) { | ||
188 | struct perf_event *cp = cpuc->events[i]; | ||
189 | struct hw_perf_event *hwc; | ||
190 | |||
191 | if (!cp) | ||
192 | continue; | ||
193 | hwc = &cp->hw; | ||
194 | val |= hwc->config_base; | ||
195 | } | ||
196 | |||
197 | pcr_ops->write(val); | ||
198 | } | ||
199 | |||
200 | void hw_perf_disable(void) | ||
201 | { | ||
202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
203 | u64 val; | ||
204 | |||
205 | if (!cpuc->enabled) | ||
206 | return; | ||
207 | |||
208 | cpuc->enabled = 0; | ||
209 | |||
210 | val = pcr_ops->read(); | ||
211 | val &= ~(PCR_UTRACE | PCR_STRACE | | ||
212 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
213 | pcr_ops->write(val); | ||
214 | } | ||
215 | |||
216 | static u32 read_pmc(int idx) | ||
217 | { | ||
218 | u64 val; | ||
219 | |||
220 | read_pic(val); | ||
221 | if (idx == PIC_UPPER_INDEX) | ||
222 | val >>= 32; | ||
223 | |||
224 | return val & 0xffffffff; | ||
225 | } | ||
226 | |||
227 | static void write_pmc(int idx, u64 val) | ||
228 | { | ||
229 | u64 shift, mask, pic; | ||
230 | |||
231 | shift = 0; | ||
232 | if (idx == PIC_UPPER_INDEX) | ||
233 | shift = 32; | ||
234 | |||
235 | mask = ((u64) 0xffffffff) << shift; | ||
236 | val <<= shift; | ||
237 | |||
238 | read_pic(pic); | ||
239 | pic &= ~mask; | ||
240 | pic |= val; | ||
241 | write_pic(pic); | ||
242 | } | ||
243 | |||
244 | static int sparc_perf_event_set_period(struct perf_event *event, | ||
245 | struct hw_perf_event *hwc, int idx) | ||
246 | { | ||
247 | s64 left = atomic64_read(&hwc->period_left); | ||
248 | s64 period = hwc->sample_period; | ||
249 | int ret = 0; | ||
250 | |||
251 | if (unlikely(left <= -period)) { | ||
252 | left = period; | ||
253 | atomic64_set(&hwc->period_left, left); | ||
254 | hwc->last_period = period; | ||
255 | ret = 1; | ||
256 | } | ||
257 | |||
258 | if (unlikely(left <= 0)) { | ||
259 | left += period; | ||
260 | atomic64_set(&hwc->period_left, left); | ||
261 | hwc->last_period = period; | ||
262 | ret = 1; | ||
263 | } | ||
264 | if (left > MAX_PERIOD) | ||
265 | left = MAX_PERIOD; | ||
266 | |||
267 | atomic64_set(&hwc->prev_count, (u64)-left); | ||
268 | |||
269 | write_pmc(idx, (u64)(-left) & 0xffffffff); | ||
270 | |||
271 | perf_event_update_userpage(event); | ||
272 | |||
273 | return ret; | ||
274 | } | ||
275 | |||
276 | static int sparc_pmu_enable(struct perf_event *event) | ||
277 | { | ||
278 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
279 | struct hw_perf_event *hwc = &event->hw; | ||
280 | int idx = hwc->idx; | ||
281 | |||
282 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
283 | return -EAGAIN; | ||
284 | |||
285 | sparc_pmu_disable_event(hwc, idx); | ||
286 | |||
287 | cpuc->events[idx] = event; | ||
288 | set_bit(idx, cpuc->active_mask); | ||
289 | |||
290 | sparc_perf_event_set_period(event, hwc, idx); | ||
291 | sparc_pmu_enable_event(hwc, idx); | ||
292 | perf_event_update_userpage(event); | ||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | static u64 sparc_perf_event_update(struct perf_event *event, | ||
297 | struct hw_perf_event *hwc, int idx) | ||
298 | { | ||
299 | int shift = 64 - 32; | ||
300 | u64 prev_raw_count, new_raw_count; | ||
301 | s64 delta; | ||
302 | |||
303 | again: | ||
304 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
305 | new_raw_count = read_pmc(idx); | ||
306 | |||
307 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
308 | new_raw_count) != prev_raw_count) | ||
309 | goto again; | ||
310 | |||
311 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
312 | delta >>= shift; | ||
313 | |||
314 | atomic64_add(delta, &event->count); | ||
315 | atomic64_sub(delta, &hwc->period_left); | ||
316 | |||
317 | return new_raw_count; | ||
318 | } | ||
319 | |||
320 | static void sparc_pmu_disable(struct perf_event *event) | ||
321 | { | ||
322 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
323 | struct hw_perf_event *hwc = &event->hw; | ||
324 | int idx = hwc->idx; | ||
325 | |||
326 | clear_bit(idx, cpuc->active_mask); | ||
327 | sparc_pmu_disable_event(hwc, idx); | ||
328 | |||
329 | barrier(); | ||
330 | |||
331 | sparc_perf_event_update(event, hwc, idx); | ||
332 | cpuc->events[idx] = NULL; | ||
333 | clear_bit(idx, cpuc->used_mask); | ||
334 | |||
335 | perf_event_update_userpage(event); | ||
336 | } | ||
337 | |||
338 | static void sparc_pmu_read(struct perf_event *event) | ||
339 | { | ||
340 | struct hw_perf_event *hwc = &event->hw; | ||
341 | sparc_perf_event_update(event, hwc, hwc->idx); | ||
342 | } | ||
343 | |||
344 | static void sparc_pmu_unthrottle(struct perf_event *event) | ||
345 | { | ||
346 | struct hw_perf_event *hwc = &event->hw; | ||
347 | sparc_pmu_enable_event(hwc, hwc->idx); | ||
348 | } | ||
349 | |||
350 | static atomic_t active_events = ATOMIC_INIT(0); | ||
351 | static DEFINE_MUTEX(pmc_grab_mutex); | ||
352 | |||
353 | void perf_event_grab_pmc(void) | ||
354 | { | ||
355 | if (atomic_inc_not_zero(&active_events)) | ||
356 | return; | ||
357 | |||
358 | mutex_lock(&pmc_grab_mutex); | ||
359 | if (atomic_read(&active_events) == 0) { | ||
360 | if (atomic_read(&nmi_active) > 0) { | ||
361 | on_each_cpu(stop_nmi_watchdog, NULL, 1); | ||
362 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
363 | } | ||
364 | atomic_inc(&active_events); | ||
365 | } | ||
366 | mutex_unlock(&pmc_grab_mutex); | ||
367 | } | ||
368 | |||
369 | void perf_event_release_pmc(void) | ||
370 | { | ||
371 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { | ||
372 | if (atomic_read(&nmi_active) == 0) | ||
373 | on_each_cpu(start_nmi_watchdog, NULL, 1); | ||
374 | mutex_unlock(&pmc_grab_mutex); | ||
375 | } | ||
376 | } | ||
377 | |||
378 | static void hw_perf_event_destroy(struct perf_event *event) | ||
379 | { | ||
380 | perf_event_release_pmc(); | ||
381 | } | ||
382 | |||
383 | static int __hw_perf_event_init(struct perf_event *event) | ||
384 | { | ||
385 | struct perf_event_attr *attr = &event->attr; | ||
386 | struct hw_perf_event *hwc = &event->hw; | ||
387 | const struct perf_event_map *pmap; | ||
388 | u64 enc; | ||
389 | |||
390 | if (atomic_read(&nmi_active) < 0) | ||
391 | return -ENODEV; | ||
392 | |||
393 | if (attr->type != PERF_TYPE_HARDWARE) | ||
394 | return -EOPNOTSUPP; | ||
395 | |||
396 | if (attr->config >= sparc_pmu->max_events) | ||
397 | return -EINVAL; | ||
398 | |||
399 | perf_event_grab_pmc(); | ||
400 | event->destroy = hw_perf_event_destroy; | ||
401 | |||
402 | /* We save the enable bits in the config_base. So to | ||
403 | * turn off sampling just write 'config', and to enable | ||
404 | * things write 'config | config_base'. | ||
405 | */ | ||
406 | hwc->config_base = sparc_pmu->irq_bit; | ||
407 | if (!attr->exclude_user) | ||
408 | hwc->config_base |= PCR_UTRACE; | ||
409 | if (!attr->exclude_kernel) | ||
410 | hwc->config_base |= PCR_STRACE; | ||
411 | if (!attr->exclude_hv) | ||
412 | hwc->config_base |= sparc_pmu->hv_bit; | ||
413 | |||
414 | if (!hwc->sample_period) { | ||
415 | hwc->sample_period = MAX_PERIOD; | ||
416 | hwc->last_period = hwc->sample_period; | ||
417 | atomic64_set(&hwc->period_left, hwc->sample_period); | ||
418 | } | ||
419 | |||
420 | pmap = sparc_pmu->event_map(attr->config); | ||
421 | |||
422 | enc = pmap->encoding; | ||
423 | if (pmap->pic_mask & PIC_UPPER) { | ||
424 | hwc->idx = PIC_UPPER_INDEX; | ||
425 | enc <<= sparc_pmu->upper_shift; | ||
426 | } else { | ||
427 | hwc->idx = PIC_LOWER_INDEX; | ||
428 | enc <<= sparc_pmu->lower_shift; | ||
429 | } | ||
430 | |||
431 | hwc->config |= enc; | ||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | static const struct pmu pmu = { | ||
436 | .enable = sparc_pmu_enable, | ||
437 | .disable = sparc_pmu_disable, | ||
438 | .read = sparc_pmu_read, | ||
439 | .unthrottle = sparc_pmu_unthrottle, | ||
440 | }; | ||
441 | |||
442 | const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
443 | { | ||
444 | int err = __hw_perf_event_init(event); | ||
445 | |||
446 | if (err) | ||
447 | return ERR_PTR(err); | ||
448 | return &pmu; | ||
449 | } | ||
450 | |||
451 | void perf_event_print_debug(void) | ||
452 | { | ||
453 | unsigned long flags; | ||
454 | u64 pcr, pic; | ||
455 | int cpu; | ||
456 | |||
457 | if (!sparc_pmu) | ||
458 | return; | ||
459 | |||
460 | local_irq_save(flags); | ||
461 | |||
462 | cpu = smp_processor_id(); | ||
463 | |||
464 | pcr = pcr_ops->read(); | ||
465 | read_pic(pic); | ||
466 | |||
467 | pr_info("\n"); | ||
468 | pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", | ||
469 | cpu, pcr, pic); | ||
470 | |||
471 | local_irq_restore(flags); | ||
472 | } | ||
473 | |||
474 | static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | ||
475 | unsigned long cmd, void *__args) | ||
476 | { | ||
477 | struct die_args *args = __args; | ||
478 | struct perf_sample_data data; | ||
479 | struct cpu_hw_events *cpuc; | ||
480 | struct pt_regs *regs; | ||
481 | int idx; | ||
482 | |||
483 | if (!atomic_read(&active_events)) | ||
484 | return NOTIFY_DONE; | ||
485 | |||
486 | switch (cmd) { | ||
487 | case DIE_NMI: | ||
488 | break; | ||
489 | |||
490 | default: | ||
491 | return NOTIFY_DONE; | ||
492 | } | ||
493 | |||
494 | regs = args->regs; | ||
495 | |||
496 | data.addr = 0; | ||
497 | |||
498 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
499 | for (idx = 0; idx < MAX_HWEVENTS; idx++) { | ||
500 | struct perf_event *event = cpuc->events[idx]; | ||
501 | struct hw_perf_event *hwc; | ||
502 | u64 val; | ||
503 | |||
504 | if (!test_bit(idx, cpuc->active_mask)) | ||
505 | continue; | ||
506 | hwc = &event->hw; | ||
507 | val = sparc_perf_event_update(event, hwc, idx); | ||
508 | if (val & (1ULL << 31)) | ||
509 | continue; | ||
510 | |||
511 | data.period = event->hw.last_period; | ||
512 | if (!sparc_perf_event_set_period(event, hwc, idx)) | ||
513 | continue; | ||
514 | |||
515 | if (perf_event_overflow(event, 1, &data, regs)) | ||
516 | sparc_pmu_disable_event(hwc, idx); | ||
517 | } | ||
518 | |||
519 | return NOTIFY_STOP; | ||
520 | } | ||
521 | |||
522 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | ||
523 | .notifier_call = perf_event_nmi_handler, | ||
524 | }; | ||
525 | |||
526 | static bool __init supported_pmu(void) | ||
527 | { | ||
528 | if (!strcmp(sparc_pmu_type, "ultra3i")) { | ||
529 | sparc_pmu = &ultra3i_pmu; | ||
530 | return true; | ||
531 | } | ||
532 | if (!strcmp(sparc_pmu_type, "niagara2")) { | ||
533 | sparc_pmu = &niagara2_pmu; | ||
534 | return true; | ||
535 | } | ||
536 | return false; | ||
537 | } | ||
538 | |||
539 | void __init init_hw_perf_events(void) | ||
540 | { | ||
541 | pr_info("Performance events: "); | ||
542 | |||
543 | if (!supported_pmu()) { | ||
544 | pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); | ||
545 | return; | ||
546 | } | ||
547 | |||
548 | pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); | ||
549 | |||
550 | /* All sparc64 PMUs currently have 2 events. But this simple | ||
551 | * driver only supports one active event at a time. | ||
552 | */ | ||
553 | perf_max_events = 1; | ||
554 | |||
555 | register_die_notifier(&perf_event_nmi_notifier); | ||
556 | } | ||