diff options
author | James Hogan <james.hogan@imgtec.com> | 2012-10-05 11:54:55 -0400 |
---|---|---|
committer | James Hogan <james.hogan@imgtec.com> | 2013-03-02 15:09:54 -0500 |
commit | 903b20ad6810e05bc5f7cc038257e80463e71001 (patch) | |
tree | 592087306a334c55c00aba52ba1246da68f5f636 | |
parent | 5633004cc2498ff50a5b88d415d3746ff0c301f2 (diff) |
metag: Perf
Add Perf support for metag.
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
-rw-r--r-- | arch/metag/Kconfig | 1 | ||||
-rw-r--r-- | arch/metag/include/asm/perf_event.h | 4 | ||||
-rw-r--r-- | arch/metag/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/metag/kernel/perf/Makefile | 3 | ||||
-rw-r--r-- | arch/metag/kernel/perf/perf_event.c | 861 | ||||
-rw-r--r-- | arch/metag/kernel/perf/perf_event.h | 106 | ||||
-rw-r--r-- | arch/metag/kernel/perf_callchain.c | 96 |
7 files changed, 1073 insertions, 0 deletions
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index f786e6e09700..47972025818f 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig | |||
@@ -22,6 +22,7 @@ config METAG | |||
22 | select HAVE_MEMBLOCK | 22 | select HAVE_MEMBLOCK |
23 | select HAVE_MEMBLOCK_NODE_MAP | 23 | select HAVE_MEMBLOCK_NODE_MAP |
24 | select HAVE_MOD_ARCH_SPECIFIC | 24 | select HAVE_MOD_ARCH_SPECIFIC |
25 | select HAVE_PERF_EVENTS | ||
25 | select HAVE_SYSCALL_TRACEPOINTS | 26 | select HAVE_SYSCALL_TRACEPOINTS |
26 | select IRQ_DOMAIN | 27 | select IRQ_DOMAIN |
27 | select MODULES_USE_ELF_RELA | 28 | select MODULES_USE_ELF_RELA |
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h new file mode 100644 index 000000000000..105bbff0149f --- /dev/null +++ b/arch/metag/include/asm/perf_event.h | |||
@@ -0,0 +1,4 @@ | |||
1 | #ifndef __ASM_METAG_PERF_EVENT_H | ||
2 | #define __ASM_METAG_PERF_EVENT_H | ||
3 | |||
4 | #endif /* __ASM_METAG_PERF_EVENT_H */ | ||
diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile index e985d0ca618c..a5e4ba6fd20a 100644 --- a/arch/metag/kernel/Makefile +++ b/arch/metag/kernel/Makefile | |||
@@ -25,6 +25,8 @@ obj-y += topology.o | |||
25 | obj-y += traps.o | 25 | obj-y += traps.o |
26 | obj-y += user_gateway.o | 26 | obj-y += user_gateway.o |
27 | 27 | ||
28 | obj-$(CONFIG_PERF_EVENTS) += perf/ | ||
29 | |||
28 | obj-$(CONFIG_METAG_COREMEM) += coremem.o | 30 | obj-$(CONFIG_METAG_COREMEM) += coremem.o |
29 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 31 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
30 | obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o | 32 | obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o |
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile new file mode 100644 index 000000000000..b158cb27208d --- /dev/null +++ b/arch/metag/kernel/perf/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | # Makefile for performance event core | ||
2 | |||
3 | obj-y += perf_event.o | ||
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c new file mode 100644 index 000000000000..a876d5ff3897 --- /dev/null +++ b/arch/metag/kernel/perf/perf_event.c | |||
@@ -0,0 +1,861 @@ | |||
1 | /* | ||
2 | * Meta performance counter support. | ||
3 | * Copyright (C) 2012 Imagination Technologies Ltd | ||
4 | * | ||
5 | * This code is based on the sh pmu code: | ||
6 | * Copyright (C) 2009 Paul Mundt | ||
7 | * | ||
8 | * and on the arm pmu code: | ||
9 | * Copyright (C) 2009 picoChip Designs, Ltd., James Iles | ||
10 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> | ||
11 | * | ||
12 | * This file is subject to the terms and conditions of the GNU General Public | ||
13 | * License. See the file "COPYING" in the main directory of this archive | ||
14 | * for more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/atomic.h> | ||
18 | #include <linux/export.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/irqchip/metag.h> | ||
21 | #include <linux/perf_event.h> | ||
22 | #include <linux/slab.h> | ||
23 | |||
24 | #include <asm/core_reg.h> | ||
25 | #include <asm/hwthread.h> | ||
26 | #include <asm/io.h> | ||
27 | #include <asm/irq.h> | ||
28 | |||
29 | #include "perf_event.h" | ||
30 | |||
31 | static int _hw_perf_event_init(struct perf_event *); | ||
32 | static void _hw_perf_event_destroy(struct perf_event *); | ||
33 | |||
34 | /* Determines which core type we are */ | ||
35 | static struct metag_pmu *metag_pmu __read_mostly; | ||
36 | |||
37 | /* Processor specific data */ | ||
38 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
39 | |||
40 | /* PMU admin */ | ||
41 | const char *perf_pmu_name(void) | ||
42 | { | ||
43 | if (metag_pmu) | ||
44 | return metag_pmu->pmu.name; | ||
45 | |||
46 | return NULL; | ||
47 | } | ||
48 | EXPORT_SYMBOL_GPL(perf_pmu_name); | ||
49 | |||
50 | int perf_num_counters(void) | ||
51 | { | ||
52 | if (metag_pmu) | ||
53 | return metag_pmu->max_events; | ||
54 | |||
55 | return 0; | ||
56 | } | ||
57 | EXPORT_SYMBOL_GPL(perf_num_counters); | ||
58 | |||
59 | static inline int metag_pmu_initialised(void) | ||
60 | { | ||
61 | return !!metag_pmu; | ||
62 | } | ||
63 | |||
64 | static void release_pmu_hardware(void) | ||
65 | { | ||
66 | int irq; | ||
67 | unsigned int version = (metag_pmu->version & | ||
68 | (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >> | ||
69 | METAC_ID_REV_S; | ||
70 | |||
71 | /* Early cores don't have overflow interrupts */ | ||
72 | if (version < 0x0104) | ||
73 | return; | ||
74 | |||
75 | irq = internal_irq_map(17); | ||
76 | if (irq >= 0) | ||
77 | free_irq(irq, (void *)1); | ||
78 | |||
79 | irq = internal_irq_map(16); | ||
80 | if (irq >= 0) | ||
81 | free_irq(irq, (void *)0); | ||
82 | } | ||
83 | |||
84 | static int reserve_pmu_hardware(void) | ||
85 | { | ||
86 | int err = 0, irq[2]; | ||
87 | unsigned int version = (metag_pmu->version & | ||
88 | (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >> | ||
89 | METAC_ID_REV_S; | ||
90 | |||
91 | /* Early cores don't have overflow interrupts */ | ||
92 | if (version < 0x0104) | ||
93 | goto out; | ||
94 | |||
95 | /* | ||
96 | * Bit 16 on HWSTATMETA is the interrupt for performance counter 0; | ||
97 | * similarly, 17 is the interrupt for performance counter 1. | ||
98 | * We can't (yet) interrupt on the cycle counter, because it's a | ||
99 | * register, however it holds a 32-bit value as opposed to 24-bit. | ||
100 | */ | ||
101 | irq[0] = internal_irq_map(16); | ||
102 | if (irq[0] < 0) { | ||
103 | pr_err("unable to map internal IRQ %d\n", 16); | ||
104 | goto out; | ||
105 | } | ||
106 | err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING, | ||
107 | "metagpmu0", (void *)0); | ||
108 | if (err) { | ||
109 | pr_err("unable to request IRQ%d for metag PMU counters\n", | ||
110 | irq[0]); | ||
111 | goto out; | ||
112 | } | ||
113 | |||
114 | irq[1] = internal_irq_map(17); | ||
115 | if (irq[1] < 0) { | ||
116 | pr_err("unable to map internal IRQ %d\n", 17); | ||
117 | goto out_irq1; | ||
118 | } | ||
119 | err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING, | ||
120 | "metagpmu1", (void *)1); | ||
121 | if (err) { | ||
122 | pr_err("unable to request IRQ%d for metag PMU counters\n", | ||
123 | irq[1]); | ||
124 | goto out_irq1; | ||
125 | } | ||
126 | |||
127 | return 0; | ||
128 | |||
129 | out_irq1: | ||
130 | free_irq(irq[0], (void *)0); | ||
131 | out: | ||
132 | return err; | ||
133 | } | ||
134 | |||
135 | /* PMU operations */ | ||
136 | static void metag_pmu_enable(struct pmu *pmu) | ||
137 | { | ||
138 | } | ||
139 | |||
140 | static void metag_pmu_disable(struct pmu *pmu) | ||
141 | { | ||
142 | } | ||
143 | |||
144 | static int metag_pmu_event_init(struct perf_event *event) | ||
145 | { | ||
146 | int err = 0; | ||
147 | atomic_t *active_events = &metag_pmu->active_events; | ||
148 | |||
149 | if (!metag_pmu_initialised()) { | ||
150 | err = -ENODEV; | ||
151 | goto out; | ||
152 | } | ||
153 | |||
154 | if (has_branch_stack(event)) | ||
155 | return -EOPNOTSUPP; | ||
156 | |||
157 | event->destroy = _hw_perf_event_destroy; | ||
158 | |||
159 | if (!atomic_inc_not_zero(active_events)) { | ||
160 | mutex_lock(&metag_pmu->reserve_mutex); | ||
161 | if (atomic_read(active_events) == 0) | ||
162 | err = reserve_pmu_hardware(); | ||
163 | |||
164 | if (!err) | ||
165 | atomic_inc(active_events); | ||
166 | |||
167 | mutex_unlock(&metag_pmu->reserve_mutex); | ||
168 | } | ||
169 | |||
170 | /* Hardware and caches counters */ | ||
171 | switch (event->attr.type) { | ||
172 | case PERF_TYPE_HARDWARE: | ||
173 | case PERF_TYPE_HW_CACHE: | ||
174 | err = _hw_perf_event_init(event); | ||
175 | break; | ||
176 | |||
177 | default: | ||
178 | return -ENOENT; | ||
179 | } | ||
180 | |||
181 | if (err) | ||
182 | event->destroy(event); | ||
183 | |||
184 | out: | ||
185 | return err; | ||
186 | } | ||
187 | |||
188 | void metag_pmu_event_update(struct perf_event *event, | ||
189 | struct hw_perf_event *hwc, int idx) | ||
190 | { | ||
191 | u64 prev_raw_count, new_raw_count; | ||
192 | s64 delta; | ||
193 | |||
194 | /* | ||
195 | * If this counter is chained, it may be that the previous counter | ||
196 | * value has been changed beneath us. | ||
197 | * | ||
198 | * To get around this, we read and exchange the new raw count, then | ||
199 | * add the delta (new - prev) to the generic counter atomically. | ||
200 | * | ||
201 | * Without interrupts, this is the simplest approach. | ||
202 | */ | ||
203 | again: | ||
204 | prev_raw_count = local64_read(&hwc->prev_count); | ||
205 | new_raw_count = metag_pmu->read(idx); | ||
206 | |||
207 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
208 | new_raw_count) != prev_raw_count) | ||
209 | goto again; | ||
210 | |||
211 | /* | ||
212 | * Calculate the delta and add it to the counter. | ||
213 | */ | ||
214 | delta = new_raw_count - prev_raw_count; | ||
215 | |||
216 | local64_add(delta, &event->count); | ||
217 | } | ||
218 | |||
219 | int metag_pmu_event_set_period(struct perf_event *event, | ||
220 | struct hw_perf_event *hwc, int idx) | ||
221 | { | ||
222 | s64 left = local64_read(&hwc->period_left); | ||
223 | s64 period = hwc->sample_period; | ||
224 | int ret = 0; | ||
225 | |||
226 | if (unlikely(left <= -period)) { | ||
227 | left = period; | ||
228 | local64_set(&hwc->period_left, left); | ||
229 | hwc->last_period = period; | ||
230 | ret = 1; | ||
231 | } | ||
232 | |||
233 | if (unlikely(left <= 0)) { | ||
234 | left += period; | ||
235 | local64_set(&hwc->period_left, left); | ||
236 | hwc->last_period = period; | ||
237 | ret = 1; | ||
238 | } | ||
239 | |||
240 | if (left > (s64)metag_pmu->max_period) | ||
241 | left = metag_pmu->max_period; | ||
242 | |||
243 | if (metag_pmu->write) | ||
244 | metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD); | ||
245 | |||
246 | perf_event_update_userpage(event); | ||
247 | |||
248 | return ret; | ||
249 | } | ||
250 | |||
251 | static void metag_pmu_start(struct perf_event *event, int flags) | ||
252 | { | ||
253 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
254 | struct hw_perf_event *hwc = &event->hw; | ||
255 | int idx = hwc->idx; | ||
256 | |||
257 | if (WARN_ON_ONCE(idx == -1)) | ||
258 | return; | ||
259 | |||
260 | /* | ||
261 | * We always have to reprogram the period, so ignore PERF_EF_RELOAD. | ||
262 | */ | ||
263 | if (flags & PERF_EF_RELOAD) | ||
264 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
265 | |||
266 | hwc->state = 0; | ||
267 | |||
268 | /* | ||
269 | * Reset the period. | ||
270 | * Some counters can't be stopped (i.e. are core global), so when the | ||
271 | * counter was 'stopped' we merely disabled the IRQ. If we don't reset | ||
272 | * the period, then we'll either: a) get an overflow too soon; | ||
273 | * or b) too late if the overflow happened since disabling. | ||
274 | * Obviously, this has little bearing on cores without the overflow | ||
275 | * interrupt, as the performance counter resets to zero on write | ||
276 | * anyway. | ||
277 | */ | ||
278 | if (metag_pmu->max_period) | ||
279 | metag_pmu_event_set_period(event, hwc, hwc->idx); | ||
280 | cpuc->events[idx] = event; | ||
281 | metag_pmu->enable(hwc, idx); | ||
282 | } | ||
283 | |||
284 | static void metag_pmu_stop(struct perf_event *event, int flags) | ||
285 | { | ||
286 | struct hw_perf_event *hwc = &event->hw; | ||
287 | |||
288 | /* | ||
289 | * We should always update the counter on stop; see comment above | ||
290 | * why. | ||
291 | */ | ||
292 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
293 | metag_pmu_event_update(event, hwc, hwc->idx); | ||
294 | metag_pmu->disable(hwc, hwc->idx); | ||
295 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
296 | } | ||
297 | } | ||
298 | |||
299 | static int metag_pmu_add(struct perf_event *event, int flags) | ||
300 | { | ||
301 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
302 | struct hw_perf_event *hwc = &event->hw; | ||
303 | int idx = 0, ret = 0; | ||
304 | |||
305 | perf_pmu_disable(event->pmu); | ||
306 | |||
307 | /* check whether we're counting instructions */ | ||
308 | if (hwc->config == 0x100) { | ||
309 | if (__test_and_set_bit(METAG_INST_COUNTER, | ||
310 | cpuc->used_mask)) { | ||
311 | ret = -EAGAIN; | ||
312 | goto out; | ||
313 | } | ||
314 | idx = METAG_INST_COUNTER; | ||
315 | } else { | ||
316 | /* Check whether we have a spare counter */ | ||
317 | idx = find_first_zero_bit(cpuc->used_mask, | ||
318 | atomic_read(&metag_pmu->active_events)); | ||
319 | if (idx >= METAG_INST_COUNTER) { | ||
320 | ret = -EAGAIN; | ||
321 | goto out; | ||
322 | } | ||
323 | |||
324 | __set_bit(idx, cpuc->used_mask); | ||
325 | } | ||
326 | hwc->idx = idx; | ||
327 | |||
328 | /* Make sure the counter is disabled */ | ||
329 | metag_pmu->disable(hwc, idx); | ||
330 | |||
331 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
332 | if (flags & PERF_EF_START) | ||
333 | metag_pmu_start(event, PERF_EF_RELOAD); | ||
334 | |||
335 | perf_event_update_userpage(event); | ||
336 | out: | ||
337 | perf_pmu_enable(event->pmu); | ||
338 | return ret; | ||
339 | } | ||
340 | |||
341 | static void metag_pmu_del(struct perf_event *event, int flags) | ||
342 | { | ||
343 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
344 | struct hw_perf_event *hwc = &event->hw; | ||
345 | int idx = hwc->idx; | ||
346 | |||
347 | WARN_ON(idx < 0); | ||
348 | metag_pmu_stop(event, PERF_EF_UPDATE); | ||
349 | cpuc->events[idx] = NULL; | ||
350 | __clear_bit(idx, cpuc->used_mask); | ||
351 | |||
352 | perf_event_update_userpage(event); | ||
353 | } | ||
354 | |||
355 | static void metag_pmu_read(struct perf_event *event) | ||
356 | { | ||
357 | struct hw_perf_event *hwc = &event->hw; | ||
358 | |||
359 | /* Don't read disabled counters! */ | ||
360 | if (hwc->idx < 0) | ||
361 | return; | ||
362 | |||
363 | metag_pmu_event_update(event, hwc, hwc->idx); | ||
364 | } | ||
365 | |||
366 | static struct pmu pmu = { | ||
367 | .pmu_enable = metag_pmu_enable, | ||
368 | .pmu_disable = metag_pmu_disable, | ||
369 | |||
370 | .event_init = metag_pmu_event_init, | ||
371 | |||
372 | .add = metag_pmu_add, | ||
373 | .del = metag_pmu_del, | ||
374 | .start = metag_pmu_start, | ||
375 | .stop = metag_pmu_stop, | ||
376 | .read = metag_pmu_read, | ||
377 | }; | ||
378 | |||
379 | /* Core counter specific functions */ | ||
380 | static const int metag_general_events[] = { | ||
381 | [PERF_COUNT_HW_CPU_CYCLES] = 0x03, | ||
382 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100, | ||
383 | [PERF_COUNT_HW_CACHE_REFERENCES] = -1, | ||
384 | [PERF_COUNT_HW_CACHE_MISSES] = -1, | ||
385 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, | ||
386 | [PERF_COUNT_HW_BRANCH_MISSES] = -1, | ||
387 | [PERF_COUNT_HW_BUS_CYCLES] = -1, | ||
388 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1, | ||
389 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1, | ||
390 | [PERF_COUNT_HW_REF_CPU_CYCLES] = -1, | ||
391 | }; | ||
392 | |||
393 | static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
394 | [C(L1D)] = { | ||
395 | [C(OP_READ)] = { | ||
396 | [C(RESULT_ACCESS)] = 0x08, | ||
397 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
398 | }, | ||
399 | [C(OP_WRITE)] = { | ||
400 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
401 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
402 | }, | ||
403 | [C(OP_PREFETCH)] = { | ||
404 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
405 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
406 | }, | ||
407 | }, | ||
408 | [C(L1I)] = { | ||
409 | [C(OP_READ)] = { | ||
410 | [C(RESULT_ACCESS)] = 0x09, | ||
411 | [C(RESULT_MISS)] = 0x0a, | ||
412 | }, | ||
413 | [C(OP_WRITE)] = { | ||
414 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
415 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
416 | }, | ||
417 | [C(OP_PREFETCH)] = { | ||
418 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
419 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
420 | }, | ||
421 | }, | ||
422 | [C(LL)] = { | ||
423 | [C(OP_READ)] = { | ||
424 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
425 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
426 | }, | ||
427 | [C(OP_WRITE)] = { | ||
428 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
429 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
430 | }, | ||
431 | [C(OP_PREFETCH)] = { | ||
432 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
433 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
434 | }, | ||
435 | }, | ||
436 | [C(DTLB)] = { | ||
437 | [C(OP_READ)] = { | ||
438 | [C(RESULT_ACCESS)] = 0xd0, | ||
439 | [C(RESULT_MISS)] = 0xd2, | ||
440 | }, | ||
441 | [C(OP_WRITE)] = { | ||
442 | [C(RESULT_ACCESS)] = 0xd4, | ||
443 | [C(RESULT_MISS)] = 0xd5, | ||
444 | }, | ||
445 | [C(OP_PREFETCH)] = { | ||
446 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
447 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
448 | }, | ||
449 | }, | ||
450 | [C(ITLB)] = { | ||
451 | [C(OP_READ)] = { | ||
452 | [C(RESULT_ACCESS)] = 0xd1, | ||
453 | [C(RESULT_MISS)] = 0xd3, | ||
454 | }, | ||
455 | [C(OP_WRITE)] = { | ||
456 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
457 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
458 | }, | ||
459 | [C(OP_PREFETCH)] = { | ||
460 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
461 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
462 | }, | ||
463 | }, | ||
464 | [C(BPU)] = { | ||
465 | [C(OP_READ)] = { | ||
466 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
467 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
468 | }, | ||
469 | [C(OP_WRITE)] = { | ||
470 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
471 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
472 | }, | ||
473 | [C(OP_PREFETCH)] = { | ||
474 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
475 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
476 | }, | ||
477 | }, | ||
478 | [C(NODE)] = { | ||
479 | [C(OP_READ)] = { | ||
480 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
481 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
482 | }, | ||
483 | [C(OP_WRITE)] = { | ||
484 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
485 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
486 | }, | ||
487 | [C(OP_PREFETCH)] = { | ||
488 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
489 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
490 | }, | ||
491 | }, | ||
492 | }; | ||
493 | |||
494 | |||
495 | static void _hw_perf_event_destroy(struct perf_event *event) | ||
496 | { | ||
497 | atomic_t *active_events = &metag_pmu->active_events; | ||
498 | struct mutex *pmu_mutex = &metag_pmu->reserve_mutex; | ||
499 | |||
500 | if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) { | ||
501 | release_pmu_hardware(); | ||
502 | mutex_unlock(pmu_mutex); | ||
503 | } | ||
504 | } | ||
505 | |||
506 | static int _hw_perf_cache_event(int config, int *evp) | ||
507 | { | ||
508 | unsigned long type, op, result; | ||
509 | int ev; | ||
510 | |||
511 | if (!metag_pmu->cache_events) | ||
512 | return -EINVAL; | ||
513 | |||
514 | /* Unpack config */ | ||
515 | type = config & 0xff; | ||
516 | op = (config >> 8) & 0xff; | ||
517 | result = (config >> 16) & 0xff; | ||
518 | |||
519 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
520 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
521 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
522 | return -EINVAL; | ||
523 | |||
524 | ev = (*metag_pmu->cache_events)[type][op][result]; | ||
525 | if (ev == 0) | ||
526 | return -EOPNOTSUPP; | ||
527 | if (ev == -1) | ||
528 | return -EINVAL; | ||
529 | *evp = ev; | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | static int _hw_perf_event_init(struct perf_event *event) | ||
534 | { | ||
535 | struct perf_event_attr *attr = &event->attr; | ||
536 | struct hw_perf_event *hwc = &event->hw; | ||
537 | int mapping = 0, err; | ||
538 | |||
539 | switch (attr->type) { | ||
540 | case PERF_TYPE_HARDWARE: | ||
541 | if (attr->config >= PERF_COUNT_HW_MAX) | ||
542 | return -EINVAL; | ||
543 | |||
544 | mapping = metag_pmu->event_map(attr->config); | ||
545 | break; | ||
546 | |||
547 | case PERF_TYPE_HW_CACHE: | ||
548 | err = _hw_perf_cache_event(attr->config, &mapping); | ||
549 | if (err) | ||
550 | return err; | ||
551 | break; | ||
552 | } | ||
553 | |||
554 | /* Return early if the event is unsupported */ | ||
555 | if (mapping == -1) | ||
556 | return -EINVAL; | ||
557 | |||
558 | /* | ||
559 | * Early cores have "limited" counters - they have no overflow | ||
560 | * interrupts - and so are unable to do sampling without extra work | ||
561 | * and timer assistance. | ||
562 | */ | ||
563 | if (metag_pmu->max_period == 0) { | ||
564 | if (hwc->sample_period) | ||
565 | return -EINVAL; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Don't assign an index until the event is placed into the hardware. | ||
570 | * -1 signifies that we're still deciding where to put it. On SMP | ||
571 | * systems each core has its own set of counters, so we can't do any | ||
572 | * constraint checking yet. | ||
573 | */ | ||
574 | hwc->idx = -1; | ||
575 | |||
576 | /* Store the event encoding */ | ||
577 | hwc->config |= (unsigned long)mapping; | ||
578 | |||
579 | /* | ||
580 | * For non-sampling runs, limit the sample_period to half of the | ||
581 | * counter width. This way, the new counter value should be less | ||
582 | * likely to overtake the previous one (unless there are IRQ latency | ||
583 | * issues...) | ||
584 | */ | ||
585 | if (metag_pmu->max_period) { | ||
586 | if (!hwc->sample_period) { | ||
587 | hwc->sample_period = metag_pmu->max_period >> 1; | ||
588 | hwc->last_period = hwc->sample_period; | ||
589 | local64_set(&hwc->period_left, hwc->sample_period); | ||
590 | } | ||
591 | } | ||
592 | |||
593 | return 0; | ||
594 | } | ||
595 | |||
596 | static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) | ||
597 | { | ||
598 | struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); | ||
599 | unsigned int config = event->config; | ||
600 | unsigned int tmp = config & 0xf0; | ||
601 | unsigned long flags; | ||
602 | |||
603 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | ||
604 | |||
605 | /* | ||
606 | * Check if we're enabling the instruction counter (index of | ||
607 | * MAX_HWEVENTS - 1) | ||
608 | */ | ||
609 | if (METAG_INST_COUNTER == idx) { | ||
610 | WARN_ONCE((config != 0x100), | ||
611 | "invalid configuration (%d) for counter (%d)\n", | ||
612 | config, idx); | ||
613 | |||
614 | /* Reset the cycle count */ | ||
615 | __core_reg_set(TXTACTCYC, 0); | ||
616 | goto unlock; | ||
617 | } | ||
618 | |||
619 | /* Check for a core internal or performance channel event. */ | ||
620 | if (tmp) { | ||
621 | void *perf_addr = (void *)PERF_COUNT(idx); | ||
622 | |||
623 | /* | ||
624 | * Anything other than a cycle count will write the low- | ||
625 | * nibble to the correct counter register. | ||
626 | */ | ||
627 | switch (tmp) { | ||
628 | case 0xd0: | ||
629 | perf_addr = (void *)PERF_ICORE(idx); | ||
630 | break; | ||
631 | |||
632 | case 0xf0: | ||
633 | perf_addr = (void *)PERF_CHAN(idx); | ||
634 | break; | ||
635 | } | ||
636 | |||
637 | metag_out32((tmp & 0x0f), perf_addr); | ||
638 | |||
639 | /* | ||
640 | * Now we use the high nibble as the performance event to | ||
641 | * to count. | ||
642 | */ | ||
643 | config = tmp >> 4; | ||
644 | } | ||
645 | |||
646 | /* | ||
647 | * Enabled counters start from 0. Early cores clear the count on | ||
648 | * write but newer cores don't, so we make sure that the count is | ||
649 | * set to 0. | ||
650 | */ | ||
651 | tmp = ((config & 0xf) << 28) | | ||
652 | ((1 << 24) << cpu_2_hwthread_id[get_cpu()]); | ||
653 | metag_out32(tmp, PERF_COUNT(idx)); | ||
654 | unlock: | ||
655 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | ||
656 | } | ||
657 | |||
658 | static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx) | ||
659 | { | ||
660 | struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); | ||
661 | unsigned int tmp = 0; | ||
662 | unsigned long flags; | ||
663 | |||
664 | /* | ||
665 | * The cycle counter can't be disabled per se, as it's a hardware | ||
666 | * thread register which is always counting. We merely return if this | ||
667 | * is the counter we're attempting to disable. | ||
668 | */ | ||
669 | if (METAG_INST_COUNTER == idx) | ||
670 | return; | ||
671 | |||
672 | /* | ||
673 | * The counter value _should_ have been read prior to disabling, | ||
674 | * as if we're running on an early core then the value gets reset to | ||
675 | * 0, and any read after that would be useless. On the newer cores, | ||
676 | * however, it's better to read-modify-update this for purposes of | ||
677 | * the overflow interrupt. | ||
678 | * Here we remove the thread id AND the event nibble (there are at | ||
679 | * least two events that count events that are core global and ignore | ||
680 | * the thread id mask). This only works because we don't mix thread | ||
681 | * performance counts, and event 0x00 requires a thread id mask! | ||
682 | */ | ||
683 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | ||
684 | |||
685 | tmp = metag_in32(PERF_COUNT(idx)); | ||
686 | tmp &= 0x00ffffff; | ||
687 | metag_out32(tmp, PERF_COUNT(idx)); | ||
688 | |||
689 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | ||
690 | } | ||
691 | |||
692 | static u64 metag_pmu_read_counter(int idx) | ||
693 | { | ||
694 | u32 tmp = 0; | ||
695 | |||
696 | /* The act of reading the cycle counter also clears it */ | ||
697 | if (METAG_INST_COUNTER == idx) { | ||
698 | __core_reg_swap(TXTACTCYC, tmp); | ||
699 | goto out; | ||
700 | } | ||
701 | |||
702 | tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff; | ||
703 | out: | ||
704 | return tmp; | ||
705 | } | ||
706 | |||
707 | static void metag_pmu_write_counter(int idx, u32 val) | ||
708 | { | ||
709 | struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); | ||
710 | u32 tmp = 0; | ||
711 | unsigned long flags; | ||
712 | |||
713 | /* | ||
714 | * This _shouldn't_ happen, but if it does, then we can just | ||
715 | * ignore the write, as the register is read-only and clear-on-write. | ||
716 | */ | ||
717 | if (METAG_INST_COUNTER == idx) | ||
718 | return; | ||
719 | |||
720 | /* | ||
721 | * We'll keep the thread mask and event id, and just update the | ||
722 | * counter itself. Also , we should bound the value to 24-bits. | ||
723 | */ | ||
724 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | ||
725 | |||
726 | val &= 0x00ffffff; | ||
727 | tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000; | ||
728 | val |= tmp; | ||
729 | metag_out32(val, PERF_COUNT(idx)); | ||
730 | |||
731 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | ||
732 | } | ||
733 | |||
734 | static int metag_pmu_event_map(int idx) | ||
735 | { | ||
736 | return metag_general_events[idx]; | ||
737 | } | ||
738 | |||
739 | static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev) | ||
740 | { | ||
741 | int idx = (int)dev; | ||
742 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
743 | struct perf_event *event = cpuhw->events[idx]; | ||
744 | struct hw_perf_event *hwc = &event->hw; | ||
745 | struct pt_regs *regs = get_irq_regs(); | ||
746 | struct perf_sample_data sampledata; | ||
747 | unsigned long flags; | ||
748 | u32 counter = 0; | ||
749 | |||
750 | /* | ||
751 | * We need to stop the core temporarily from generating another | ||
752 | * interrupt while we disable this counter. However, we don't want | ||
753 | * to flag the counter as free | ||
754 | */ | ||
755 | __global_lock2(flags); | ||
756 | counter = metag_in32(PERF_COUNT(idx)); | ||
757 | metag_out32((counter & 0x00ffffff), PERF_COUNT(idx)); | ||
758 | __global_unlock2(flags); | ||
759 | |||
760 | /* Update the counts and reset the sample period */ | ||
761 | metag_pmu_event_update(event, hwc, idx); | ||
762 | perf_sample_data_init(&sampledata, 0, hwc->last_period); | ||
763 | metag_pmu_event_set_period(event, hwc, idx); | ||
764 | |||
765 | /* | ||
766 | * Enable the counter again once core overflow processing has | ||
767 | * completed. | ||
768 | */ | ||
769 | if (!perf_event_overflow(event, &sampledata, regs)) | ||
770 | metag_out32(counter, PERF_COUNT(idx)); | ||
771 | |||
772 | return IRQ_HANDLED; | ||
773 | } | ||
774 | |||
775 | static struct metag_pmu _metag_pmu = { | ||
776 | .handle_irq = metag_pmu_counter_overflow, | ||
777 | .enable = metag_pmu_enable_counter, | ||
778 | .disable = metag_pmu_disable_counter, | ||
779 | .read = metag_pmu_read_counter, | ||
780 | .write = metag_pmu_write_counter, | ||
781 | .event_map = metag_pmu_event_map, | ||
782 | .cache_events = &metag_pmu_cache_events, | ||
783 | .max_period = MAX_PERIOD, | ||
784 | .max_events = MAX_HWEVENTS, | ||
785 | }; | ||
786 | |||
787 | /* PMU CPU hotplug notifier */ | ||
788 | static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b, | ||
789 | unsigned long action, void *hcpu) | ||
790 | { | ||
791 | unsigned int cpu = (unsigned int)hcpu; | ||
792 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
793 | |||
794 | if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) | ||
795 | return NOTIFY_DONE; | ||
796 | |||
797 | memset(cpuc, 0, sizeof(struct cpu_hw_events)); | ||
798 | raw_spin_lock_init(&cpuc->pmu_lock); | ||
799 | |||
800 | return NOTIFY_OK; | ||
801 | } | ||
802 | |||
803 | static struct notifier_block __cpuinitdata metag_pmu_notifier = { | ||
804 | .notifier_call = metag_pmu_cpu_notify, | ||
805 | }; | ||
806 | |||
807 | /* PMU Initialisation */ | ||
808 | static int __init init_hw_perf_events(void) | ||
809 | { | ||
810 | int ret = 0, cpu; | ||
811 | u32 version = *(u32 *)METAC_ID; | ||
812 | int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S; | ||
813 | int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) | ||
814 | >> METAC_ID_REV_S; | ||
815 | |||
816 | /* Not a Meta 2 core, then not supported */ | ||
817 | if (0x02 > major) { | ||
818 | pr_info("no hardware counter support available\n"); | ||
819 | goto out; | ||
820 | } else if (0x02 == major) { | ||
821 | metag_pmu = &_metag_pmu; | ||
822 | |||
823 | if (min_rev < 0x0104) { | ||
824 | /* | ||
825 | * A core without overflow interrupts, and clear-on- | ||
826 | * write counters. | ||
827 | */ | ||
828 | metag_pmu->handle_irq = NULL; | ||
829 | metag_pmu->write = NULL; | ||
830 | metag_pmu->max_period = 0; | ||
831 | } | ||
832 | |||
833 | metag_pmu->name = "Meta 2"; | ||
834 | metag_pmu->version = version; | ||
835 | metag_pmu->pmu = pmu; | ||
836 | } | ||
837 | |||
838 | pr_info("enabled with %s PMU driver, %d counters available\n", | ||
839 | metag_pmu->name, metag_pmu->max_events); | ||
840 | |||
841 | /* Initialise the active events and reservation mutex */ | ||
842 | atomic_set(&metag_pmu->active_events, 0); | ||
843 | mutex_init(&metag_pmu->reserve_mutex); | ||
844 | |||
845 | /* Clear the counters */ | ||
846 | metag_out32(0, PERF_COUNT(0)); | ||
847 | metag_out32(0, PERF_COUNT(1)); | ||
848 | |||
849 | for_each_possible_cpu(cpu) { | ||
850 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
851 | |||
852 | memset(cpuc, 0, sizeof(struct cpu_hw_events)); | ||
853 | raw_spin_lock_init(&cpuc->pmu_lock); | ||
854 | } | ||
855 | |||
856 | register_cpu_notifier(&metag_pmu_notifier); | ||
857 | ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW); | ||
858 | out: | ||
859 | return ret; | ||
860 | } | ||
861 | early_initcall(init_hw_perf_events); | ||
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h new file mode 100644 index 000000000000..fd10a1345b67 --- /dev/null +++ b/arch/metag/kernel/perf/perf_event.h | |||
@@ -0,0 +1,106 @@ | |||
1 | /* | ||
2 | * Meta performance counter support. | ||
3 | * Copyright (C) 2012 Imagination Technologies Ltd | ||
4 | * | ||
5 | * This file is subject to the terms and conditions of the GNU General Public | ||
6 | * License. See the file "COPYING" in the main directory of this archive | ||
7 | * for more details. | ||
8 | */ | ||
9 | |||
10 | #ifndef METAG_PERF_EVENT_H_ | ||
11 | #define METAG_PERF_EVENT_H_ | ||
12 | |||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/interrupt.h> | ||
15 | #include <linux/perf_event.h> | ||
16 | |||
17 | /* For performance counter definitions */ | ||
18 | #include <asm/metag_mem.h> | ||
19 | |||
20 | /* | ||
21 | * The Meta core has two performance counters, with 24-bit resolution. Newer | ||
22 | * cores generate an overflow interrupt on transition from 0xffffff to 0. | ||
23 | * | ||
24 | * Each counter consists of the counter id, hardware thread id, and the count | ||
25 | * itself; each counter can be assigned to multiple hardware threads at any | ||
26 | * one time, with the returned count being an aggregate of events. A small | ||
27 | * number of events are thread global, i.e. they count the aggregate of all | ||
28 | * threads' events, regardless of the thread selected. | ||
29 | * | ||
30 | * Newer cores can store an arbitrary 24-bit number in the counter, whereas | ||
31 | * older cores will clear the counter bits on write. | ||
32 | * | ||
33 | * We also have a pseudo-counter in the form of the thread active cycles | ||
34 | * counter (which, incidentally, is also bound to | ||
35 | */ | ||
36 | |||
37 | #define MAX_HWEVENTS 3 | ||
38 | #define MAX_PERIOD ((1UL << 24) - 1) | ||
39 | #define METAG_INST_COUNTER (MAX_HWEVENTS - 1) | ||
40 | |||
41 | /** | ||
42 | * struct cpu_hw_events - a processor core's performance events | ||
43 | * @events: an array of perf_events active for a given index. | ||
44 | * @used_mask: a bitmap of in-use counters. | ||
45 | * @pmu_lock: a perf counter lock | ||
46 | * | ||
47 | * This is a per-cpu/core structure that maintains a record of its | ||
48 | * performance counters' state. | ||
49 | */ | ||
50 | struct cpu_hw_events { | ||
51 | struct perf_event *events[MAX_HWEVENTS]; | ||
52 | unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; | ||
53 | raw_spinlock_t pmu_lock; | ||
54 | }; | ||
55 | |||
56 | /** | ||
57 | * struct metag_pmu - the Meta PMU structure | ||
58 | * @pmu: core pmu structure | ||
59 | * @name: pmu name | ||
60 | * @version: core version | ||
61 | * @handle_irq: overflow interrupt handler | ||
62 | * @enable: enable a counter | ||
63 | * @disable: disable a counter | ||
64 | * @read: read the value of a counter | ||
65 | * @write: write a value to a counter | ||
66 | * @event_map: kernel event to counter event id map | ||
67 | * @cache_events: kernel cache counter to core cache counter map | ||
68 | * @max_period: maximum value of the counter before overflow | ||
69 | * @max_events: maximum number of counters available at any one time | ||
70 | * @active_events: number of active counters | ||
71 | * @reserve_mutex: counter reservation mutex | ||
72 | * | ||
73 | * This describes the main functionality and data used by the performance | ||
74 | * event core. | ||
75 | */ | ||
76 | struct metag_pmu { | ||
77 | struct pmu pmu; | ||
78 | const char *name; | ||
79 | u32 version; | ||
80 | irqreturn_t (*handle_irq)(int irq_num, void *dev); | ||
81 | void (*enable)(struct hw_perf_event *evt, int idx); | ||
82 | void (*disable)(struct hw_perf_event *evt, int idx); | ||
83 | u64 (*read)(int idx); | ||
84 | void (*write)(int idx, u32 val); | ||
85 | int (*event_map)(int idx); | ||
86 | const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
87 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
88 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
89 | u32 max_period; | ||
90 | int max_events; | ||
91 | atomic_t active_events; | ||
92 | struct mutex reserve_mutex; | ||
93 | }; | ||
94 | |||
95 | /* Convenience macros for accessing the perf counters */ | ||
96 | /* Define some convenience accessors */ | ||
97 | #define PERF_COUNT(x) (PERF_COUNT0 + (sizeof(u64) * (x))) | ||
98 | #define PERF_ICORE(x) (PERF_ICORE0 + (sizeof(u64) * (x))) | ||
99 | #define PERF_CHAN(x) (PERF_CHAN0 + (sizeof(u64) * (x))) | ||
100 | |||
101 | /* Cache index macros */ | ||
102 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
103 | #define CACHE_OP_UNSUPPORTED 0xfffe | ||
104 | #define CACHE_OP_NONSENSE 0xffff | ||
105 | |||
106 | #endif | ||
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c new file mode 100644 index 000000000000..315633461a94 --- /dev/null +++ b/arch/metag/kernel/perf_callchain.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Perf callchain handling code. | ||
3 | * | ||
4 | * Based on the ARM perf implementation. | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/perf_event.h> | ||
10 | #include <linux/uaccess.h> | ||
11 | #include <asm/ptrace.h> | ||
12 | #include <asm/stacktrace.h> | ||
13 | |||
14 | static bool is_valid_call(unsigned long calladdr) | ||
15 | { | ||
16 | unsigned int callinsn; | ||
17 | |||
18 | /* Check the possible return address is aligned. */ | ||
19 | if (!(calladdr & 0x3)) { | ||
20 | if (!get_user(callinsn, (unsigned int *)calladdr)) { | ||
21 | /* Check for CALLR or SWAP PC,D1RtP. */ | ||
22 | if ((callinsn & 0xff000000) == 0xab000000 || | ||
23 | callinsn == 0xa3200aa0) | ||
24 | return true; | ||
25 | } | ||
26 | } | ||
27 | return false; | ||
28 | } | ||
29 | |||
30 | static struct metag_frame __user * | ||
31 | user_backtrace(struct metag_frame __user *user_frame, | ||
32 | struct perf_callchain_entry *entry) | ||
33 | { | ||
34 | struct metag_frame frame; | ||
35 | unsigned long calladdr; | ||
36 | |||
37 | /* We cannot rely on having frame pointers in user code. */ | ||
38 | while (1) { | ||
39 | /* Also check accessibility of one struct frame beyond */ | ||
40 | if (!access_ok(VERIFY_READ, user_frame, sizeof(frame))) | ||
41 | return 0; | ||
42 | if (__copy_from_user_inatomic(&frame, user_frame, | ||
43 | sizeof(frame))) | ||
44 | return 0; | ||
45 | |||
46 | --user_frame; | ||
47 | |||
48 | calladdr = frame.lr - 4; | ||
49 | if (is_valid_call(calladdr)) { | ||
50 | perf_callchain_store(entry, calladdr); | ||
51 | return user_frame; | ||
52 | } | ||
53 | } | ||
54 | |||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | void | ||
59 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
60 | { | ||
61 | unsigned long sp = regs->ctx.AX[0].U0; | ||
62 | struct metag_frame __user *frame; | ||
63 | |||
64 | frame = (struct metag_frame __user *)sp; | ||
65 | |||
66 | --frame; | ||
67 | |||
68 | while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame) | ||
69 | frame = user_backtrace(frame, entry); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Gets called by walk_stackframe() for every stackframe. This will be called | ||
74 | * whist unwinding the stackframe and is like a subroutine return so we use | ||
75 | * the PC. | ||
76 | */ | ||
77 | static int | ||
78 | callchain_trace(struct stackframe *fr, | ||
79 | void *data) | ||
80 | { | ||
81 | struct perf_callchain_entry *entry = data; | ||
82 | perf_callchain_store(entry, fr->pc); | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | void | ||
87 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
88 | { | ||
89 | struct stackframe fr; | ||
90 | |||
91 | fr.fp = regs->ctx.AX[1].U0; | ||
92 | fr.sp = regs->ctx.AX[0].U0; | ||
93 | fr.lr = regs->ctx.DX[4].U1; | ||
94 | fr.pc = regs->ctx.CurrPC; | ||
95 | walk_stackframe(&fr, callchain_trace, entry); | ||
96 | } | ||