aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Hogan <james.hogan@imgtec.com>2012-10-05 11:54:55 -0400
committerJames Hogan <james.hogan@imgtec.com>2013-03-02 15:09:54 -0500
commit903b20ad6810e05bc5f7cc038257e80463e71001 (patch)
tree592087306a334c55c00aba52ba1246da68f5f636
parent5633004cc2498ff50a5b88d415d3746ff0c301f2 (diff)
metag: Perf
Add Perf support for metag. Signed-off-by: James Hogan <james.hogan@imgtec.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
-rw-r--r--arch/metag/Kconfig1
-rw-r--r--arch/metag/include/asm/perf_event.h4
-rw-r--r--arch/metag/kernel/Makefile2
-rw-r--r--arch/metag/kernel/perf/Makefile3
-rw-r--r--arch/metag/kernel/perf/perf_event.c861
-rw-r--r--arch/metag/kernel/perf/perf_event.h106
-rw-r--r--arch/metag/kernel/perf_callchain.c96
7 files changed, 1073 insertions, 0 deletions
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index f786e6e09700..47972025818f 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -22,6 +22,7 @@ config METAG
22 select HAVE_MEMBLOCK 22 select HAVE_MEMBLOCK
23 select HAVE_MEMBLOCK_NODE_MAP 23 select HAVE_MEMBLOCK_NODE_MAP
24 select HAVE_MOD_ARCH_SPECIFIC 24 select HAVE_MOD_ARCH_SPECIFIC
25 select HAVE_PERF_EVENTS
25 select HAVE_SYSCALL_TRACEPOINTS 26 select HAVE_SYSCALL_TRACEPOINTS
26 select IRQ_DOMAIN 27 select IRQ_DOMAIN
27 select MODULES_USE_ELF_RELA 28 select MODULES_USE_ELF_RELA
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h
new file mode 100644
index 000000000000..105bbff0149f
--- /dev/null
+++ b/arch/metag/include/asm/perf_event.h
@@ -0,0 +1,4 @@
1#ifndef __ASM_METAG_PERF_EVENT_H
2#define __ASM_METAG_PERF_EVENT_H
3
4#endif /* __ASM_METAG_PERF_EVENT_H */
diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile
index e985d0ca618c..a5e4ba6fd20a 100644
--- a/arch/metag/kernel/Makefile
+++ b/arch/metag/kernel/Makefile
@@ -25,6 +25,8 @@ obj-y += topology.o
25obj-y += traps.o 25obj-y += traps.o
26obj-y += user_gateway.o 26obj-y += user_gateway.o
27 27
28obj-$(CONFIG_PERF_EVENTS) += perf/
29
28obj-$(CONFIG_METAG_COREMEM) += coremem.o 30obj-$(CONFIG_METAG_COREMEM) += coremem.o
29obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 31obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
30obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o 32obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile
new file mode 100644
index 000000000000..b158cb27208d
--- /dev/null
+++ b/arch/metag/kernel/perf/Makefile
@@ -0,0 +1,3 @@
1# Makefile for performance event core
2
3obj-y += perf_event.o
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
new file mode 100644
index 000000000000..a876d5ff3897
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -0,0 +1,861 @@
1/*
2 * Meta performance counter support.
3 * Copyright (C) 2012 Imagination Technologies Ltd
4 *
5 * This code is based on the sh pmu code:
6 * Copyright (C) 2009 Paul Mundt
7 *
8 * and on the arm pmu code:
9 * Copyright (C) 2009 picoChip Designs, Ltd., James Iles
10 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
11 *
12 * This file is subject to the terms and conditions of the GNU General Public
13 * License. See the file "COPYING" in the main directory of this archive
14 * for more details.
15 */
16
17#include <linux/atomic.h>
18#include <linux/export.h>
19#include <linux/init.h>
20#include <linux/irqchip/metag.h>
21#include <linux/perf_event.h>
22#include <linux/slab.h>
23
24#include <asm/core_reg.h>
25#include <asm/hwthread.h>
26#include <asm/io.h>
27#include <asm/irq.h>
28
29#include "perf_event.h"
30
31static int _hw_perf_event_init(struct perf_event *);
32static void _hw_perf_event_destroy(struct perf_event *);
33
34/* Determines which core type we are */
35static struct metag_pmu *metag_pmu __read_mostly;
36
37/* Processor specific data */
38static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
39
40/* PMU admin */
41const char *perf_pmu_name(void)
42{
43 if (metag_pmu)
44 return metag_pmu->pmu.name;
45
46 return NULL;
47}
48EXPORT_SYMBOL_GPL(perf_pmu_name);
49
50int perf_num_counters(void)
51{
52 if (metag_pmu)
53 return metag_pmu->max_events;
54
55 return 0;
56}
57EXPORT_SYMBOL_GPL(perf_num_counters);
58
59static inline int metag_pmu_initialised(void)
60{
61 return !!metag_pmu;
62}
63
64static void release_pmu_hardware(void)
65{
66 int irq;
67 unsigned int version = (metag_pmu->version &
68 (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
69 METAC_ID_REV_S;
70
71 /* Early cores don't have overflow interrupts */
72 if (version < 0x0104)
73 return;
74
75 irq = internal_irq_map(17);
76 if (irq >= 0)
77 free_irq(irq, (void *)1);
78
79 irq = internal_irq_map(16);
80 if (irq >= 0)
81 free_irq(irq, (void *)0);
82}
83
84static int reserve_pmu_hardware(void)
85{
86 int err = 0, irq[2];
87 unsigned int version = (metag_pmu->version &
88 (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
89 METAC_ID_REV_S;
90
91 /* Early cores don't have overflow interrupts */
92 if (version < 0x0104)
93 goto out;
94
95 /*
96 * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
97 * similarly, 17 is the interrupt for performance counter 1.
98 * We can't (yet) interrupt on the cycle counter, because it's a
99 * register, however it holds a 32-bit value as opposed to 24-bit.
100 */
101 irq[0] = internal_irq_map(16);
102 if (irq[0] < 0) {
103 pr_err("unable to map internal IRQ %d\n", 16);
104 goto out;
105 }
106 err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
107 "metagpmu0", (void *)0);
108 if (err) {
109 pr_err("unable to request IRQ%d for metag PMU counters\n",
110 irq[0]);
111 goto out;
112 }
113
114 irq[1] = internal_irq_map(17);
115 if (irq[1] < 0) {
116 pr_err("unable to map internal IRQ %d\n", 17);
117 goto out_irq1;
118 }
119 err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
120 "metagpmu1", (void *)1);
121 if (err) {
122 pr_err("unable to request IRQ%d for metag PMU counters\n",
123 irq[1]);
124 goto out_irq1;
125 }
126
127 return 0;
128
129out_irq1:
130 free_irq(irq[0], (void *)0);
131out:
132 return err;
133}
134
135/* PMU operations */
136static void metag_pmu_enable(struct pmu *pmu)
137{
138}
139
140static void metag_pmu_disable(struct pmu *pmu)
141{
142}
143
144static int metag_pmu_event_init(struct perf_event *event)
145{
146 int err = 0;
147 atomic_t *active_events = &metag_pmu->active_events;
148
149 if (!metag_pmu_initialised()) {
150 err = -ENODEV;
151 goto out;
152 }
153
154 if (has_branch_stack(event))
155 return -EOPNOTSUPP;
156
157 event->destroy = _hw_perf_event_destroy;
158
159 if (!atomic_inc_not_zero(active_events)) {
160 mutex_lock(&metag_pmu->reserve_mutex);
161 if (atomic_read(active_events) == 0)
162 err = reserve_pmu_hardware();
163
164 if (!err)
165 atomic_inc(active_events);
166
167 mutex_unlock(&metag_pmu->reserve_mutex);
168 }
169
170 /* Hardware and caches counters */
171 switch (event->attr.type) {
172 case PERF_TYPE_HARDWARE:
173 case PERF_TYPE_HW_CACHE:
174 err = _hw_perf_event_init(event);
175 break;
176
177 default:
178 return -ENOENT;
179 }
180
181 if (err)
182 event->destroy(event);
183
184out:
185 return err;
186}
187
188void metag_pmu_event_update(struct perf_event *event,
189 struct hw_perf_event *hwc, int idx)
190{
191 u64 prev_raw_count, new_raw_count;
192 s64 delta;
193
194 /*
195 * If this counter is chained, it may be that the previous counter
196 * value has been changed beneath us.
197 *
198 * To get around this, we read and exchange the new raw count, then
199 * add the delta (new - prev) to the generic counter atomically.
200 *
201 * Without interrupts, this is the simplest approach.
202 */
203again:
204 prev_raw_count = local64_read(&hwc->prev_count);
205 new_raw_count = metag_pmu->read(idx);
206
207 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
208 new_raw_count) != prev_raw_count)
209 goto again;
210
211 /*
212 * Calculate the delta and add it to the counter.
213 */
214 delta = new_raw_count - prev_raw_count;
215
216 local64_add(delta, &event->count);
217}
218
219int metag_pmu_event_set_period(struct perf_event *event,
220 struct hw_perf_event *hwc, int idx)
221{
222 s64 left = local64_read(&hwc->period_left);
223 s64 period = hwc->sample_period;
224 int ret = 0;
225
226 if (unlikely(left <= -period)) {
227 left = period;
228 local64_set(&hwc->period_left, left);
229 hwc->last_period = period;
230 ret = 1;
231 }
232
233 if (unlikely(left <= 0)) {
234 left += period;
235 local64_set(&hwc->period_left, left);
236 hwc->last_period = period;
237 ret = 1;
238 }
239
240 if (left > (s64)metag_pmu->max_period)
241 left = metag_pmu->max_period;
242
243 if (metag_pmu->write)
244 metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
245
246 perf_event_update_userpage(event);
247
248 return ret;
249}
250
251static void metag_pmu_start(struct perf_event *event, int flags)
252{
253 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
254 struct hw_perf_event *hwc = &event->hw;
255 int idx = hwc->idx;
256
257 if (WARN_ON_ONCE(idx == -1))
258 return;
259
260 /*
261 * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
262 */
263 if (flags & PERF_EF_RELOAD)
264 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
265
266 hwc->state = 0;
267
268 /*
269 * Reset the period.
270 * Some counters can't be stopped (i.e. are core global), so when the
271 * counter was 'stopped' we merely disabled the IRQ. If we don't reset
272 * the period, then we'll either: a) get an overflow too soon;
273 * or b) too late if the overflow happened since disabling.
274 * Obviously, this has little bearing on cores without the overflow
275 * interrupt, as the performance counter resets to zero on write
276 * anyway.
277 */
278 if (metag_pmu->max_period)
279 metag_pmu_event_set_period(event, hwc, hwc->idx);
280 cpuc->events[idx] = event;
281 metag_pmu->enable(hwc, idx);
282}
283
284static void metag_pmu_stop(struct perf_event *event, int flags)
285{
286 struct hw_perf_event *hwc = &event->hw;
287
288 /*
289 * We should always update the counter on stop; see comment above
290 * why.
291 */
292 if (!(hwc->state & PERF_HES_STOPPED)) {
293 metag_pmu_event_update(event, hwc, hwc->idx);
294 metag_pmu->disable(hwc, hwc->idx);
295 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
296 }
297}
298
299static int metag_pmu_add(struct perf_event *event, int flags)
300{
301 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
302 struct hw_perf_event *hwc = &event->hw;
303 int idx = 0, ret = 0;
304
305 perf_pmu_disable(event->pmu);
306
307 /* check whether we're counting instructions */
308 if (hwc->config == 0x100) {
309 if (__test_and_set_bit(METAG_INST_COUNTER,
310 cpuc->used_mask)) {
311 ret = -EAGAIN;
312 goto out;
313 }
314 idx = METAG_INST_COUNTER;
315 } else {
316 /* Check whether we have a spare counter */
317 idx = find_first_zero_bit(cpuc->used_mask,
318 atomic_read(&metag_pmu->active_events));
319 if (idx >= METAG_INST_COUNTER) {
320 ret = -EAGAIN;
321 goto out;
322 }
323
324 __set_bit(idx, cpuc->used_mask);
325 }
326 hwc->idx = idx;
327
328 /* Make sure the counter is disabled */
329 metag_pmu->disable(hwc, idx);
330
331 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
332 if (flags & PERF_EF_START)
333 metag_pmu_start(event, PERF_EF_RELOAD);
334
335 perf_event_update_userpage(event);
336out:
337 perf_pmu_enable(event->pmu);
338 return ret;
339}
340
341static void metag_pmu_del(struct perf_event *event, int flags)
342{
343 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
344 struct hw_perf_event *hwc = &event->hw;
345 int idx = hwc->idx;
346
347 WARN_ON(idx < 0);
348 metag_pmu_stop(event, PERF_EF_UPDATE);
349 cpuc->events[idx] = NULL;
350 __clear_bit(idx, cpuc->used_mask);
351
352 perf_event_update_userpage(event);
353}
354
355static void metag_pmu_read(struct perf_event *event)
356{
357 struct hw_perf_event *hwc = &event->hw;
358
359 /* Don't read disabled counters! */
360 if (hwc->idx < 0)
361 return;
362
363 metag_pmu_event_update(event, hwc, hwc->idx);
364}
365
366static struct pmu pmu = {
367 .pmu_enable = metag_pmu_enable,
368 .pmu_disable = metag_pmu_disable,
369
370 .event_init = metag_pmu_event_init,
371
372 .add = metag_pmu_add,
373 .del = metag_pmu_del,
374 .start = metag_pmu_start,
375 .stop = metag_pmu_stop,
376 .read = metag_pmu_read,
377};
378
379/* Core counter specific functions */
380static const int metag_general_events[] = {
381 [PERF_COUNT_HW_CPU_CYCLES] = 0x03,
382 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
383 [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
384 [PERF_COUNT_HW_CACHE_MISSES] = -1,
385 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
386 [PERF_COUNT_HW_BRANCH_MISSES] = -1,
387 [PERF_COUNT_HW_BUS_CYCLES] = -1,
388 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
389 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
390 [PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
391};
392
393static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
394 [C(L1D)] = {
395 [C(OP_READ)] = {
396 [C(RESULT_ACCESS)] = 0x08,
397 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
398 },
399 [C(OP_WRITE)] = {
400 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
401 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
402 },
403 [C(OP_PREFETCH)] = {
404 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
405 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
406 },
407 },
408 [C(L1I)] = {
409 [C(OP_READ)] = {
410 [C(RESULT_ACCESS)] = 0x09,
411 [C(RESULT_MISS)] = 0x0a,
412 },
413 [C(OP_WRITE)] = {
414 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
415 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
416 },
417 [C(OP_PREFETCH)] = {
418 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
419 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
420 },
421 },
422 [C(LL)] = {
423 [C(OP_READ)] = {
424 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
425 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
426 },
427 [C(OP_WRITE)] = {
428 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
429 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
430 },
431 [C(OP_PREFETCH)] = {
432 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
433 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
434 },
435 },
436 [C(DTLB)] = {
437 [C(OP_READ)] = {
438 [C(RESULT_ACCESS)] = 0xd0,
439 [C(RESULT_MISS)] = 0xd2,
440 },
441 [C(OP_WRITE)] = {
442 [C(RESULT_ACCESS)] = 0xd4,
443 [C(RESULT_MISS)] = 0xd5,
444 },
445 [C(OP_PREFETCH)] = {
446 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
447 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
448 },
449 },
450 [C(ITLB)] = {
451 [C(OP_READ)] = {
452 [C(RESULT_ACCESS)] = 0xd1,
453 [C(RESULT_MISS)] = 0xd3,
454 },
455 [C(OP_WRITE)] = {
456 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
457 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
458 },
459 [C(OP_PREFETCH)] = {
460 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
461 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
462 },
463 },
464 [C(BPU)] = {
465 [C(OP_READ)] = {
466 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
467 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
468 },
469 [C(OP_WRITE)] = {
470 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
471 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
472 },
473 [C(OP_PREFETCH)] = {
474 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
475 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
476 },
477 },
478 [C(NODE)] = {
479 [C(OP_READ)] = {
480 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
481 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
482 },
483 [C(OP_WRITE)] = {
484 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
485 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
486 },
487 [C(OP_PREFETCH)] = {
488 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
489 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
490 },
491 },
492};
493
494
495static void _hw_perf_event_destroy(struct perf_event *event)
496{
497 atomic_t *active_events = &metag_pmu->active_events;
498 struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
499
500 if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
501 release_pmu_hardware();
502 mutex_unlock(pmu_mutex);
503 }
504}
505
506static int _hw_perf_cache_event(int config, int *evp)
507{
508 unsigned long type, op, result;
509 int ev;
510
511 if (!metag_pmu->cache_events)
512 return -EINVAL;
513
514 /* Unpack config */
515 type = config & 0xff;
516 op = (config >> 8) & 0xff;
517 result = (config >> 16) & 0xff;
518
519 if (type >= PERF_COUNT_HW_CACHE_MAX ||
520 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
521 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
522 return -EINVAL;
523
524 ev = (*metag_pmu->cache_events)[type][op][result];
525 if (ev == 0)
526 return -EOPNOTSUPP;
527 if (ev == -1)
528 return -EINVAL;
529 *evp = ev;
530 return 0;
531}
532
533static int _hw_perf_event_init(struct perf_event *event)
534{
535 struct perf_event_attr *attr = &event->attr;
536 struct hw_perf_event *hwc = &event->hw;
537 int mapping = 0, err;
538
539 switch (attr->type) {
540 case PERF_TYPE_HARDWARE:
541 if (attr->config >= PERF_COUNT_HW_MAX)
542 return -EINVAL;
543
544 mapping = metag_pmu->event_map(attr->config);
545 break;
546
547 case PERF_TYPE_HW_CACHE:
548 err = _hw_perf_cache_event(attr->config, &mapping);
549 if (err)
550 return err;
551 break;
552 }
553
554 /* Return early if the event is unsupported */
555 if (mapping == -1)
556 return -EINVAL;
557
558 /*
559 * Early cores have "limited" counters - they have no overflow
560 * interrupts - and so are unable to do sampling without extra work
561 * and timer assistance.
562 */
563 if (metag_pmu->max_period == 0) {
564 if (hwc->sample_period)
565 return -EINVAL;
566 }
567
568 /*
569 * Don't assign an index until the event is placed into the hardware.
570 * -1 signifies that we're still deciding where to put it. On SMP
571 * systems each core has its own set of counters, so we can't do any
572 * constraint checking yet.
573 */
574 hwc->idx = -1;
575
576 /* Store the event encoding */
577 hwc->config |= (unsigned long)mapping;
578
579 /*
580 * For non-sampling runs, limit the sample_period to half of the
581 * counter width. This way, the new counter value should be less
582 * likely to overtake the previous one (unless there are IRQ latency
583 * issues...)
584 */
585 if (metag_pmu->max_period) {
586 if (!hwc->sample_period) {
587 hwc->sample_period = metag_pmu->max_period >> 1;
588 hwc->last_period = hwc->sample_period;
589 local64_set(&hwc->period_left, hwc->sample_period);
590 }
591 }
592
593 return 0;
594}
595
596static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
597{
598 struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
599 unsigned int config = event->config;
600 unsigned int tmp = config & 0xf0;
601 unsigned long flags;
602
603 raw_spin_lock_irqsave(&events->pmu_lock, flags);
604
605 /*
606 * Check if we're enabling the instruction counter (index of
607 * MAX_HWEVENTS - 1)
608 */
609 if (METAG_INST_COUNTER == idx) {
610 WARN_ONCE((config != 0x100),
611 "invalid configuration (%d) for counter (%d)\n",
612 config, idx);
613
614 /* Reset the cycle count */
615 __core_reg_set(TXTACTCYC, 0);
616 goto unlock;
617 }
618
619 /* Check for a core internal or performance channel event. */
620 if (tmp) {
621 void *perf_addr = (void *)PERF_COUNT(idx);
622
623 /*
624 * Anything other than a cycle count will write the low-
625 * nibble to the correct counter register.
626 */
627 switch (tmp) {
628 case 0xd0:
629 perf_addr = (void *)PERF_ICORE(idx);
630 break;
631
632 case 0xf0:
633 perf_addr = (void *)PERF_CHAN(idx);
634 break;
635 }
636
637 metag_out32((tmp & 0x0f), perf_addr);
638
639 /*
640 * Now we use the high nibble as the performance event to
641 * to count.
642 */
643 config = tmp >> 4;
644 }
645
646 /*
647 * Enabled counters start from 0. Early cores clear the count on
648 * write but newer cores don't, so we make sure that the count is
649 * set to 0.
650 */
651 tmp = ((config & 0xf) << 28) |
652 ((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
653 metag_out32(tmp, PERF_COUNT(idx));
654unlock:
655 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
656}
657
658static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
659{
660 struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
661 unsigned int tmp = 0;
662 unsigned long flags;
663
664 /*
665 * The cycle counter can't be disabled per se, as it's a hardware
666 * thread register which is always counting. We merely return if this
667 * is the counter we're attempting to disable.
668 */
669 if (METAG_INST_COUNTER == idx)
670 return;
671
672 /*
673 * The counter value _should_ have been read prior to disabling,
674 * as if we're running on an early core then the value gets reset to
675 * 0, and any read after that would be useless. On the newer cores,
676 * however, it's better to read-modify-update this for purposes of
677 * the overflow interrupt.
678 * Here we remove the thread id AND the event nibble (there are at
679 * least two events that count events that are core global and ignore
680 * the thread id mask). This only works because we don't mix thread
681 * performance counts, and event 0x00 requires a thread id mask!
682 */
683 raw_spin_lock_irqsave(&events->pmu_lock, flags);
684
685 tmp = metag_in32(PERF_COUNT(idx));
686 tmp &= 0x00ffffff;
687 metag_out32(tmp, PERF_COUNT(idx));
688
689 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
690}
691
692static u64 metag_pmu_read_counter(int idx)
693{
694 u32 tmp = 0;
695
696 /* The act of reading the cycle counter also clears it */
697 if (METAG_INST_COUNTER == idx) {
698 __core_reg_swap(TXTACTCYC, tmp);
699 goto out;
700 }
701
702 tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
703out:
704 return tmp;
705}
706
707static void metag_pmu_write_counter(int idx, u32 val)
708{
709 struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
710 u32 tmp = 0;
711 unsigned long flags;
712
713 /*
714 * This _shouldn't_ happen, but if it does, then we can just
715 * ignore the write, as the register is read-only and clear-on-write.
716 */
717 if (METAG_INST_COUNTER == idx)
718 return;
719
720 /*
721 * We'll keep the thread mask and event id, and just update the
722 * counter itself. Also , we should bound the value to 24-bits.
723 */
724 raw_spin_lock_irqsave(&events->pmu_lock, flags);
725
726 val &= 0x00ffffff;
727 tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
728 val |= tmp;
729 metag_out32(val, PERF_COUNT(idx));
730
731 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
732}
733
734static int metag_pmu_event_map(int idx)
735{
736 return metag_general_events[idx];
737}
738
739static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
740{
741 int idx = (int)dev;
742 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
743 struct perf_event *event = cpuhw->events[idx];
744 struct hw_perf_event *hwc = &event->hw;
745 struct pt_regs *regs = get_irq_regs();
746 struct perf_sample_data sampledata;
747 unsigned long flags;
748 u32 counter = 0;
749
750 /*
751 * We need to stop the core temporarily from generating another
752 * interrupt while we disable this counter. However, we don't want
753 * to flag the counter as free
754 */
755 __global_lock2(flags);
756 counter = metag_in32(PERF_COUNT(idx));
757 metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
758 __global_unlock2(flags);
759
760 /* Update the counts and reset the sample period */
761 metag_pmu_event_update(event, hwc, idx);
762 perf_sample_data_init(&sampledata, 0, hwc->last_period);
763 metag_pmu_event_set_period(event, hwc, idx);
764
765 /*
766 * Enable the counter again once core overflow processing has
767 * completed.
768 */
769 if (!perf_event_overflow(event, &sampledata, regs))
770 metag_out32(counter, PERF_COUNT(idx));
771
772 return IRQ_HANDLED;
773}
774
775static struct metag_pmu _metag_pmu = {
776 .handle_irq = metag_pmu_counter_overflow,
777 .enable = metag_pmu_enable_counter,
778 .disable = metag_pmu_disable_counter,
779 .read = metag_pmu_read_counter,
780 .write = metag_pmu_write_counter,
781 .event_map = metag_pmu_event_map,
782 .cache_events = &metag_pmu_cache_events,
783 .max_period = MAX_PERIOD,
784 .max_events = MAX_HWEVENTS,
785};
786
787/* PMU CPU hotplug notifier */
788static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b,
789 unsigned long action, void *hcpu)
790{
791 unsigned int cpu = (unsigned int)hcpu;
792 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
793
794 if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
795 return NOTIFY_DONE;
796
797 memset(cpuc, 0, sizeof(struct cpu_hw_events));
798 raw_spin_lock_init(&cpuc->pmu_lock);
799
800 return NOTIFY_OK;
801}
802
803static struct notifier_block __cpuinitdata metag_pmu_notifier = {
804 .notifier_call = metag_pmu_cpu_notify,
805};
806
807/* PMU Initialisation */
808static int __init init_hw_perf_events(void)
809{
810 int ret = 0, cpu;
811 u32 version = *(u32 *)METAC_ID;
812 int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
813 int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
814 >> METAC_ID_REV_S;
815
816 /* Not a Meta 2 core, then not supported */
817 if (0x02 > major) {
818 pr_info("no hardware counter support available\n");
819 goto out;
820 } else if (0x02 == major) {
821 metag_pmu = &_metag_pmu;
822
823 if (min_rev < 0x0104) {
824 /*
825 * A core without overflow interrupts, and clear-on-
826 * write counters.
827 */
828 metag_pmu->handle_irq = NULL;
829 metag_pmu->write = NULL;
830 metag_pmu->max_period = 0;
831 }
832
833 metag_pmu->name = "Meta 2";
834 metag_pmu->version = version;
835 metag_pmu->pmu = pmu;
836 }
837
838 pr_info("enabled with %s PMU driver, %d counters available\n",
839 metag_pmu->name, metag_pmu->max_events);
840
841 /* Initialise the active events and reservation mutex */
842 atomic_set(&metag_pmu->active_events, 0);
843 mutex_init(&metag_pmu->reserve_mutex);
844
845 /* Clear the counters */
846 metag_out32(0, PERF_COUNT(0));
847 metag_out32(0, PERF_COUNT(1));
848
849 for_each_possible_cpu(cpu) {
850 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
851
852 memset(cpuc, 0, sizeof(struct cpu_hw_events));
853 raw_spin_lock_init(&cpuc->pmu_lock);
854 }
855
856 register_cpu_notifier(&metag_pmu_notifier);
857 ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
858out:
859 return ret;
860}
861early_initcall(init_hw_perf_events);
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h
new file mode 100644
index 000000000000..fd10a1345b67
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.h
@@ -0,0 +1,106 @@
1/*
2 * Meta performance counter support.
3 * Copyright (C) 2012 Imagination Technologies Ltd
4 *
5 * This file is subject to the terms and conditions of the GNU General Public
6 * License. See the file "COPYING" in the main directory of this archive
7 * for more details.
8 */
9
10#ifndef METAG_PERF_EVENT_H_
11#define METAG_PERF_EVENT_H_
12
13#include <linux/kernel.h>
14#include <linux/interrupt.h>
15#include <linux/perf_event.h>
16
17/* For performance counter definitions */
18#include <asm/metag_mem.h>
19
20/*
21 * The Meta core has two performance counters, with 24-bit resolution. Newer
22 * cores generate an overflow interrupt on transition from 0xffffff to 0.
23 *
24 * Each counter consists of the counter id, hardware thread id, and the count
25 * itself; each counter can be assigned to multiple hardware threads at any
26 * one time, with the returned count being an aggregate of events. A small
27 * number of events are thread global, i.e. they count the aggregate of all
28 * threads' events, regardless of the thread selected.
29 *
30 * Newer cores can store an arbitrary 24-bit number in the counter, whereas
31 * older cores will clear the counter bits on write.
32 *
33 * We also have a pseudo-counter in the form of the thread active cycles
34 * counter (which, incidentally, is also bound to
35 */
36
37#define MAX_HWEVENTS 3
38#define MAX_PERIOD ((1UL << 24) - 1)
39#define METAG_INST_COUNTER (MAX_HWEVENTS - 1)
40
41/**
42 * struct cpu_hw_events - a processor core's performance events
43 * @events: an array of perf_events active for a given index.
44 * @used_mask: a bitmap of in-use counters.
45 * @pmu_lock: a perf counter lock
46 *
47 * This is a per-cpu/core structure that maintains a record of its
48 * performance counters' state.
49 */
50struct cpu_hw_events {
51 struct perf_event *events[MAX_HWEVENTS];
52 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
53 raw_spinlock_t pmu_lock;
54};
55
56/**
57 * struct metag_pmu - the Meta PMU structure
58 * @pmu: core pmu structure
59 * @name: pmu name
60 * @version: core version
61 * @handle_irq: overflow interrupt handler
62 * @enable: enable a counter
63 * @disable: disable a counter
64 * @read: read the value of a counter
65 * @write: write a value to a counter
66 * @event_map: kernel event to counter event id map
67 * @cache_events: kernel cache counter to core cache counter map
68 * @max_period: maximum value of the counter before overflow
69 * @max_events: maximum number of counters available at any one time
70 * @active_events: number of active counters
71 * @reserve_mutex: counter reservation mutex
72 *
73 * This describes the main functionality and data used by the performance
74 * event core.
75 */
76struct metag_pmu {
77 struct pmu pmu;
78 const char *name;
79 u32 version;
80 irqreturn_t (*handle_irq)(int irq_num, void *dev);
81 void (*enable)(struct hw_perf_event *evt, int idx);
82 void (*disable)(struct hw_perf_event *evt, int idx);
83 u64 (*read)(int idx);
84 void (*write)(int idx, u32 val);
85 int (*event_map)(int idx);
86 const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
87 [PERF_COUNT_HW_CACHE_OP_MAX]
88 [PERF_COUNT_HW_CACHE_RESULT_MAX];
89 u32 max_period;
90 int max_events;
91 atomic_t active_events;
92 struct mutex reserve_mutex;
93};
94
95/* Convenience macros for accessing the perf counters */
96/* Define some convenience accessors */
97#define PERF_COUNT(x) (PERF_COUNT0 + (sizeof(u64) * (x)))
98#define PERF_ICORE(x) (PERF_ICORE0 + (sizeof(u64) * (x)))
99#define PERF_CHAN(x) (PERF_CHAN0 + (sizeof(u64) * (x)))
100
101/* Cache index macros */
102#define C(x) PERF_COUNT_HW_CACHE_##x
103#define CACHE_OP_UNSUPPORTED 0xfffe
104#define CACHE_OP_NONSENSE 0xffff
105
106#endif
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
new file mode 100644
index 000000000000..315633461a94
--- /dev/null
+++ b/arch/metag/kernel/perf_callchain.c
@@ -0,0 +1,96 @@
1/*
2 * Perf callchain handling code.
3 *
4 * Based on the ARM perf implementation.
5 */
6
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/perf_event.h>
10#include <linux/uaccess.h>
11#include <asm/ptrace.h>
12#include <asm/stacktrace.h>
13
14static bool is_valid_call(unsigned long calladdr)
15{
16 unsigned int callinsn;
17
18 /* Check the possible return address is aligned. */
19 if (!(calladdr & 0x3)) {
20 if (!get_user(callinsn, (unsigned int *)calladdr)) {
21 /* Check for CALLR or SWAP PC,D1RtP. */
22 if ((callinsn & 0xff000000) == 0xab000000 ||
23 callinsn == 0xa3200aa0)
24 return true;
25 }
26 }
27 return false;
28}
29
30static struct metag_frame __user *
31user_backtrace(struct metag_frame __user *user_frame,
32 struct perf_callchain_entry *entry)
33{
34 struct metag_frame frame;
35 unsigned long calladdr;
36
37 /* We cannot rely on having frame pointers in user code. */
38 while (1) {
39 /* Also check accessibility of one struct frame beyond */
40 if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
41 return 0;
42 if (__copy_from_user_inatomic(&frame, user_frame,
43 sizeof(frame)))
44 return 0;
45
46 --user_frame;
47
48 calladdr = frame.lr - 4;
49 if (is_valid_call(calladdr)) {
50 perf_callchain_store(entry, calladdr);
51 return user_frame;
52 }
53 }
54
55 return 0;
56}
57
58void
59perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
60{
61 unsigned long sp = regs->ctx.AX[0].U0;
62 struct metag_frame __user *frame;
63
64 frame = (struct metag_frame __user *)sp;
65
66 --frame;
67
68 while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
69 frame = user_backtrace(frame, entry);
70}
71
72/*
73 * Gets called by walk_stackframe() for every stackframe. This will be called
74 * whist unwinding the stackframe and is like a subroutine return so we use
75 * the PC.
76 */
77static int
78callchain_trace(struct stackframe *fr,
79 void *data)
80{
81 struct perf_callchain_entry *entry = data;
82 perf_callchain_store(entry, fr->pc);
83 return 0;
84}
85
86void
87perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
88{
89 struct stackframe fr;
90
91 fr.fp = regs->ctx.AX[1].U0;
92 fr.sp = regs->ctx.AX[0].U0;
93 fr.lr = regs->ctx.DX[4].U1;
94 fr.pc = regs->ctx.CurrPC;
95 walk_stackframe(&fr, callchain_trace, entry);
96}