diff options
author | Mike Frysinger <vapier@gentoo.org> | 2011-05-06 11:47:52 -0400 |
---|---|---|
committer | Mike Frysinger <vapier@gentoo.org> | 2011-05-25 08:24:09 -0400 |
commit | 7db79172908990c3ea540fcc6819330d273f9f1c (patch) | |
tree | b81e71dbda26a19ca0396cb0a7b98fd513104b5c /arch | |
parent | 93f1742c631a87f02622e6a4570e65479f598672 (diff) |
Blackfin: initial perf_event support
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/blackfin/Kconfig | 2 | ||||
-rw-r--r-- | arch/blackfin/include/asm/bfin_pfmon.h | 44 | ||||
-rw-r--r-- | arch/blackfin/include/asm/perf_event.h | 1 | ||||
-rw-r--r-- | arch/blackfin/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/blackfin/kernel/perf_event.c | 498 |
5 files changed, 546 insertions, 0 deletions
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 8addb1220b4f..a18180f2d007 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig | |||
@@ -24,11 +24,13 @@ config BLACKFIN | |||
24 | select HAVE_FUNCTION_TRACER | 24 | select HAVE_FUNCTION_TRACER |
25 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | 25 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST |
26 | select HAVE_IDE | 26 | select HAVE_IDE |
27 | select HAVE_IRQ_WORK | ||
27 | select HAVE_KERNEL_GZIP if RAMKERNEL | 28 | select HAVE_KERNEL_GZIP if RAMKERNEL |
28 | select HAVE_KERNEL_BZIP2 if RAMKERNEL | 29 | select HAVE_KERNEL_BZIP2 if RAMKERNEL |
29 | select HAVE_KERNEL_LZMA if RAMKERNEL | 30 | select HAVE_KERNEL_LZMA if RAMKERNEL |
30 | select HAVE_KERNEL_LZO if RAMKERNEL | 31 | select HAVE_KERNEL_LZO if RAMKERNEL |
31 | select HAVE_OPROFILE | 32 | select HAVE_OPROFILE |
33 | select HAVE_PERF_EVENTS | ||
32 | select ARCH_WANT_OPTIONAL_GPIOLIB | 34 | select ARCH_WANT_OPTIONAL_GPIOLIB |
33 | select HAVE_GENERIC_HARDIRQS | 35 | select HAVE_GENERIC_HARDIRQS |
34 | select GENERIC_ATOMIC64 | 36 | select GENERIC_ATOMIC64 |
diff --git a/arch/blackfin/include/asm/bfin_pfmon.h b/arch/blackfin/include/asm/bfin_pfmon.h new file mode 100644 index 000000000000..accd47e2db40 --- /dev/null +++ b/arch/blackfin/include/asm/bfin_pfmon.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * Blackfin Performance Monitor definitions | ||
3 | * | ||
4 | * Copyright 2005-2011 Analog Devices Inc. | ||
5 | * | ||
6 | * Licensed under the ADI BSD license or GPL-2 (or later). | ||
7 | */ | ||
8 | |||
9 | #ifndef __ASM_BFIN_PFMON_H__ | ||
10 | #define __ASM_BFIN_PFMON_H__ | ||
11 | |||
12 | /* PFCTL Masks */ | ||
13 | #define PFMON_MASK 0xff | ||
14 | #define PFCEN_MASK 0x3 | ||
15 | #define PFCEN_DISABLE 0x0 | ||
16 | #define PFCEN_ENABLE_USER 0x1 | ||
17 | #define PFCEN_ENABLE_SUPV 0x2 | ||
18 | #define PFCEN_ENABLE_ALL (PFCEN_ENABLE_USER | PFCEN_ENABLE_SUPV) | ||
19 | |||
20 | #define PFPWR_P 0 | ||
21 | #define PEMUSW0_P 2 | ||
22 | #define PFCEN0_P 3 | ||
23 | #define PFMON0_P 5 | ||
24 | #define PEMUSW1_P 13 | ||
25 | #define PFCEN1_P 14 | ||
26 | #define PFMON1_P 16 | ||
27 | #define PFCNT0_P 24 | ||
28 | #define PFCNT1_P 25 | ||
29 | |||
30 | #define PFPWR (1 << PFPWR_P) | ||
31 | #define PEMUSW(n, x) ((x) << ((n) ? PEMUSW1_P : PEMUSW0_P)) | ||
32 | #define PEMUSW0 PEMUSW(0, 1) | ||
33 | #define PEMUSW1 PEMUSW(1, 1) | ||
34 | #define PFCEN(n, x) ((x) << ((n) ? PFCEN1_P : PFCEN0_P)) | ||
35 | #define PFCEN0 PFCEN(0, PFCEN_MASK) | ||
36 | #define PFCEN1 PFCEN(1, PFCEN_MASK) | ||
37 | #define PFCNT(n, x) ((x) << ((n) ? PFCNT1_P : PFCNT0_P)) | ||
38 | #define PFCNT0 PFCNT(0, 1) | ||
39 | #define PFCNT1 PFCNT(1, 1) | ||
40 | #define PFMON(n, x) ((x) << ((n) ? PFMON1_P : PFMON0_P)) | ||
41 | #define PFMON0 PFMON(0, PFMON_MASK) | ||
42 | #define PFMON1 PFMON(1, PFMON_MASK) | ||
43 | |||
44 | #endif | ||
diff --git a/arch/blackfin/include/asm/perf_event.h b/arch/blackfin/include/asm/perf_event.h new file mode 100644 index 000000000000..3d2b1716322f --- /dev/null +++ b/arch/blackfin/include/asm/perf_event.h | |||
@@ -0,0 +1 @@ | |||
#define MAX_HWEVENTS 2 | |||
diff --git a/arch/blackfin/kernel/Makefile b/arch/blackfin/kernel/Makefile index 18ba6abd66db..d550b24d9e9b 100644 --- a/arch/blackfin/kernel/Makefile +++ b/arch/blackfin/kernel/Makefile | |||
@@ -33,6 +33,7 @@ obj-$(CONFIG_EARLY_PRINTK) += shadow_console.o | |||
33 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 33 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
34 | obj-$(CONFIG_DEBUG_VERBOSE) += trace.o | 34 | obj-$(CONFIG_DEBUG_VERBOSE) += trace.o |
35 | obj-$(CONFIG_BFIN_PSEUDODBG_INSNS) += pseudodbg.o | 35 | obj-$(CONFIG_BFIN_PSEUDODBG_INSNS) += pseudodbg.o |
36 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
36 | 37 | ||
37 | # the kgdb test puts code into L2 and without linker | 38 | # the kgdb test puts code into L2 and without linker |
38 | # relaxation, we need to force long calls to/from it | 39 | # relaxation, we need to force long calls to/from it |
diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c new file mode 100644 index 000000000000..04300f29c0e7 --- /dev/null +++ b/arch/blackfin/kernel/perf_event.c | |||
@@ -0,0 +1,498 @@ | |||
1 | /* | ||
2 | * Blackfin performance counters | ||
3 | * | ||
4 | * Copyright 2011 Analog Devices Inc. | ||
5 | * | ||
6 | * Ripped from SuperH version: | ||
7 | * | ||
8 | * Copyright (C) 2009 Paul Mundt | ||
9 | * | ||
10 | * Heavily based on the x86 and PowerPC implementations. | ||
11 | * | ||
12 | * x86: | ||
13 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
14 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
15 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
16 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
17 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
18 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
19 | * | ||
20 | * ppc: | ||
21 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
22 | * | ||
23 | * Licensed under the GPL-2 or later. | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/perf_event.h> | ||
29 | #include <asm/bfin_pfmon.h> | ||
30 | |||
31 | /* | ||
32 | * We have two counters, and each counter can support an event type. | ||
33 | * The 'o' is PFCNTx=1 and 's' is PFCNTx=0 | ||
34 | * | ||
35 | * 0x04 o pc invariant branches | ||
36 | * 0x06 o mispredicted branches | ||
37 | * 0x09 o predicted branches taken | ||
38 | * 0x0B o EXCPT insn | ||
39 | * 0x0C o CSYNC/SSYNC insn | ||
40 | * 0x0D o Insns committed | ||
41 | * 0x0E o Interrupts taken | ||
42 | * 0x0F o Misaligned address exceptions | ||
43 | * 0x80 o Code memory fetches stalled due to DMA | ||
44 | * 0x83 o 64bit insn fetches delivered | ||
45 | * 0x9A o data cache fills (bank a) | ||
46 | * 0x9B o data cache fills (bank b) | ||
47 | * 0x9C o data cache lines evicted (bank a) | ||
48 | * 0x9D o data cache lines evicted (bank b) | ||
49 | * 0x9E o data cache high priority fills | ||
50 | * 0x9F o data cache low priority fills | ||
51 | * 0x00 s loop 0 iterations | ||
52 | * 0x01 s loop 1 iterations | ||
53 | * 0x0A s CSYNC/SSYNC stalls | ||
54 | * 0x10 s DAG read/after write hazards | ||
55 | * 0x13 s RAW data hazards | ||
56 | * 0x81 s code TAG stalls | ||
57 | * 0x82 s code fill stalls | ||
58 | * 0x90 s processor to memory stalls | ||
59 | * 0x91 s data memory stalls not hidden by 0x90 | ||
60 | * 0x92 s data store buffer full stalls | ||
61 | * 0x93 s data memory write buffer full stalls due to high->low priority | ||
62 | * 0x95 s data memory fill buffer stalls | ||
63 | * 0x96 s data TAG collision stalls | ||
64 | * 0x97 s data collision stalls | ||
65 | * 0x98 s data stalls | ||
66 | * 0x99 s data stalls sent to processor | ||
67 | */ | ||
68 | |||
69 | static const int event_map[] = { | ||
70 | /* use CYCLES cpu register */ | ||
71 | [PERF_COUNT_HW_CPU_CYCLES] = -1, | ||
72 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x0D, | ||
73 | [PERF_COUNT_HW_CACHE_REFERENCES] = -1, | ||
74 | [PERF_COUNT_HW_CACHE_MISSES] = 0x83, | ||
75 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x09, | ||
76 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x06, | ||
77 | [PERF_COUNT_HW_BUS_CYCLES] = -1, | ||
78 | }; | ||
79 | |||
80 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
81 | |||
82 | static const int cache_events[PERF_COUNT_HW_CACHE_MAX] | ||
83 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
84 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
85 | { | ||
86 | [C(L1D)] = { /* Data bank A */ | ||
87 | [C(OP_READ)] = { | ||
88 | [C(RESULT_ACCESS)] = 0, | ||
89 | [C(RESULT_MISS) ] = 0x9A, | ||
90 | }, | ||
91 | [C(OP_WRITE)] = { | ||
92 | [C(RESULT_ACCESS)] = 0, | ||
93 | [C(RESULT_MISS) ] = 0, | ||
94 | }, | ||
95 | [C(OP_PREFETCH)] = { | ||
96 | [C(RESULT_ACCESS)] = 0, | ||
97 | [C(RESULT_MISS) ] = 0, | ||
98 | }, | ||
99 | }, | ||
100 | |||
101 | [C(L1I)] = { | ||
102 | [C(OP_READ)] = { | ||
103 | [C(RESULT_ACCESS)] = 0, | ||
104 | [C(RESULT_MISS) ] = 0x83, | ||
105 | }, | ||
106 | [C(OP_WRITE)] = { | ||
107 | [C(RESULT_ACCESS)] = -1, | ||
108 | [C(RESULT_MISS) ] = -1, | ||
109 | }, | ||
110 | [C(OP_PREFETCH)] = { | ||
111 | [C(RESULT_ACCESS)] = 0, | ||
112 | [C(RESULT_MISS) ] = 0, | ||
113 | }, | ||
114 | }, | ||
115 | |||
116 | [C(LL)] = { | ||
117 | [C(OP_READ)] = { | ||
118 | [C(RESULT_ACCESS)] = -1, | ||
119 | [C(RESULT_MISS) ] = -1, | ||
120 | }, | ||
121 | [C(OP_WRITE)] = { | ||
122 | [C(RESULT_ACCESS)] = -1, | ||
123 | [C(RESULT_MISS) ] = -1, | ||
124 | }, | ||
125 | [C(OP_PREFETCH)] = { | ||
126 | [C(RESULT_ACCESS)] = -1, | ||
127 | [C(RESULT_MISS) ] = -1, | ||
128 | }, | ||
129 | }, | ||
130 | |||
131 | [C(DTLB)] = { | ||
132 | [C(OP_READ)] = { | ||
133 | [C(RESULT_ACCESS)] = -1, | ||
134 | [C(RESULT_MISS) ] = -1, | ||
135 | }, | ||
136 | [C(OP_WRITE)] = { | ||
137 | [C(RESULT_ACCESS)] = -1, | ||
138 | [C(RESULT_MISS) ] = -1, | ||
139 | }, | ||
140 | [C(OP_PREFETCH)] = { | ||
141 | [C(RESULT_ACCESS)] = -1, | ||
142 | [C(RESULT_MISS) ] = -1, | ||
143 | }, | ||
144 | }, | ||
145 | |||
146 | [C(ITLB)] = { | ||
147 | [C(OP_READ)] = { | ||
148 | [C(RESULT_ACCESS)] = -1, | ||
149 | [C(RESULT_MISS) ] = -1, | ||
150 | }, | ||
151 | [C(OP_WRITE)] = { | ||
152 | [C(RESULT_ACCESS)] = -1, | ||
153 | [C(RESULT_MISS) ] = -1, | ||
154 | }, | ||
155 | [C(OP_PREFETCH)] = { | ||
156 | [C(RESULT_ACCESS)] = -1, | ||
157 | [C(RESULT_MISS) ] = -1, | ||
158 | }, | ||
159 | }, | ||
160 | |||
161 | [C(BPU)] = { | ||
162 | [C(OP_READ)] = { | ||
163 | [C(RESULT_ACCESS)] = -1, | ||
164 | [C(RESULT_MISS) ] = -1, | ||
165 | }, | ||
166 | [C(OP_WRITE)] = { | ||
167 | [C(RESULT_ACCESS)] = -1, | ||
168 | [C(RESULT_MISS) ] = -1, | ||
169 | }, | ||
170 | [C(OP_PREFETCH)] = { | ||
171 | [C(RESULT_ACCESS)] = -1, | ||
172 | [C(RESULT_MISS) ] = -1, | ||
173 | }, | ||
174 | }, | ||
175 | }; | ||
176 | |||
177 | const char *perf_pmu_name(void) | ||
178 | { | ||
179 | return "bfin"; | ||
180 | } | ||
181 | EXPORT_SYMBOL(perf_pmu_name); | ||
182 | |||
183 | int perf_num_counters(void) | ||
184 | { | ||
185 | return ARRAY_SIZE(event_map); | ||
186 | } | ||
187 | EXPORT_SYMBOL(perf_num_counters); | ||
188 | |||
189 | static u64 bfin_pfmon_read(int idx) | ||
190 | { | ||
191 | return bfin_read32(PFCNTR0 + (idx * 4)); | ||
192 | } | ||
193 | |||
194 | static void bfin_pfmon_disable(struct hw_perf_event *hwc, int idx) | ||
195 | { | ||
196 | bfin_write_PFCTL(bfin_read_PFCTL() & ~PFCEN(idx, PFCEN_MASK)); | ||
197 | } | ||
198 | |||
199 | static void bfin_pfmon_enable(struct hw_perf_event *hwc, int idx) | ||
200 | { | ||
201 | u32 val, mask; | ||
202 | |||
203 | val = PFPWR; | ||
204 | if (idx) { | ||
205 | mask = ~(PFCNT1 | PFMON1 | PFCEN1 | PEMUSW1); | ||
206 | /* The packed config is for event0, so shift it to event1 slots */ | ||
207 | val |= (hwc->config << (PFMON1_P - PFMON0_P)); | ||
208 | val |= (hwc->config & PFCNT0) << (PFCNT1_P - PFCNT0_P); | ||
209 | bfin_write_PFCNTR1(0); | ||
210 | } else { | ||
211 | mask = ~(PFCNT0 | PFMON0 | PFCEN0 | PEMUSW0); | ||
212 | val |= hwc->config; | ||
213 | bfin_write_PFCNTR0(0); | ||
214 | } | ||
215 | |||
216 | bfin_write_PFCTL((bfin_read_PFCTL() & mask) | val); | ||
217 | } | ||
218 | |||
219 | static void bfin_pfmon_disable_all(void) | ||
220 | { | ||
221 | bfin_write_PFCTL(bfin_read_PFCTL() & ~PFPWR); | ||
222 | } | ||
223 | |||
224 | static void bfin_pfmon_enable_all(void) | ||
225 | { | ||
226 | bfin_write_PFCTL(bfin_read_PFCTL() | PFPWR); | ||
227 | } | ||
228 | |||
229 | struct cpu_hw_events { | ||
230 | struct perf_event *events[MAX_HWEVENTS]; | ||
231 | unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; | ||
232 | }; | ||
233 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
234 | |||
235 | static int hw_perf_cache_event(int config, int *evp) | ||
236 | { | ||
237 | unsigned long type, op, result; | ||
238 | int ev; | ||
239 | |||
240 | /* unpack config */ | ||
241 | type = config & 0xff; | ||
242 | op = (config >> 8) & 0xff; | ||
243 | result = (config >> 16) & 0xff; | ||
244 | |||
245 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
246 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
247 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
248 | return -EINVAL; | ||
249 | |||
250 | ev = cache_events[type][op][result]; | ||
251 | if (ev == 0) | ||
252 | return -EOPNOTSUPP; | ||
253 | if (ev == -1) | ||
254 | return -EINVAL; | ||
255 | *evp = ev; | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static void bfin_perf_event_update(struct perf_event *event, | ||
260 | struct hw_perf_event *hwc, int idx) | ||
261 | { | ||
262 | u64 prev_raw_count, new_raw_count; | ||
263 | s64 delta; | ||
264 | int shift = 0; | ||
265 | |||
266 | /* | ||
267 | * Depending on the counter configuration, they may or may not | ||
268 | * be chained, in which case the previous counter value can be | ||
269 | * updated underneath us if the lower-half overflows. | ||
270 | * | ||
271 | * Our tactic to handle this is to first atomically read and | ||
272 | * exchange a new raw count - then add that new-prev delta | ||
273 | * count to the generic counter atomically. | ||
274 | * | ||
275 | * As there is no interrupt associated with the overflow events, | ||
276 | * this is the simplest approach for maintaining consistency. | ||
277 | */ | ||
278 | again: | ||
279 | prev_raw_count = local64_read(&hwc->prev_count); | ||
280 | new_raw_count = bfin_pfmon_read(idx); | ||
281 | |||
282 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
283 | new_raw_count) != prev_raw_count) | ||
284 | goto again; | ||
285 | |||
286 | /* | ||
287 | * Now we have the new raw value and have updated the prev | ||
288 | * timestamp already. We can now calculate the elapsed delta | ||
289 | * (counter-)time and add that to the generic counter. | ||
290 | * | ||
291 | * Careful, not all hw sign-extends above the physical width | ||
292 | * of the count. | ||
293 | */ | ||
294 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
295 | delta >>= shift; | ||
296 | |||
297 | local64_add(delta, &event->count); | ||
298 | } | ||
299 | |||
300 | static void bfin_pmu_stop(struct perf_event *event, int flags) | ||
301 | { | ||
302 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
303 | struct hw_perf_event *hwc = &event->hw; | ||
304 | int idx = hwc->idx; | ||
305 | |||
306 | if (!(event->hw.state & PERF_HES_STOPPED)) { | ||
307 | bfin_pfmon_disable(hwc, idx); | ||
308 | cpuc->events[idx] = NULL; | ||
309 | event->hw.state |= PERF_HES_STOPPED; | ||
310 | } | ||
311 | |||
312 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { | ||
313 | bfin_perf_event_update(event, &event->hw, idx); | ||
314 | event->hw.state |= PERF_HES_UPTODATE; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | static void bfin_pmu_start(struct perf_event *event, int flags) | ||
319 | { | ||
320 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
321 | struct hw_perf_event *hwc = &event->hw; | ||
322 | int idx = hwc->idx; | ||
323 | |||
324 | if (WARN_ON_ONCE(idx == -1)) | ||
325 | return; | ||
326 | |||
327 | if (flags & PERF_EF_RELOAD) | ||
328 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
329 | |||
330 | cpuc->events[idx] = event; | ||
331 | event->hw.state = 0; | ||
332 | bfin_pfmon_enable(hwc, idx); | ||
333 | } | ||
334 | |||
335 | static void bfin_pmu_del(struct perf_event *event, int flags) | ||
336 | { | ||
337 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
338 | |||
339 | bfin_pmu_stop(event, PERF_EF_UPDATE); | ||
340 | __clear_bit(event->hw.idx, cpuc->used_mask); | ||
341 | |||
342 | perf_event_update_userpage(event); | ||
343 | } | ||
344 | |||
345 | static int bfin_pmu_add(struct perf_event *event, int flags) | ||
346 | { | ||
347 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
348 | struct hw_perf_event *hwc = &event->hw; | ||
349 | int idx = hwc->idx; | ||
350 | int ret = -EAGAIN; | ||
351 | |||
352 | perf_pmu_disable(event->pmu); | ||
353 | |||
354 | if (__test_and_set_bit(idx, cpuc->used_mask)) { | ||
355 | idx = find_first_zero_bit(cpuc->used_mask, MAX_HWEVENTS); | ||
356 | if (idx == MAX_HWEVENTS) | ||
357 | goto out; | ||
358 | |||
359 | __set_bit(idx, cpuc->used_mask); | ||
360 | hwc->idx = idx; | ||
361 | } | ||
362 | |||
363 | bfin_pfmon_disable(hwc, idx); | ||
364 | |||
365 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
366 | if (flags & PERF_EF_START) | ||
367 | bfin_pmu_start(event, PERF_EF_RELOAD); | ||
368 | |||
369 | perf_event_update_userpage(event); | ||
370 | ret = 0; | ||
371 | out: | ||
372 | perf_pmu_enable(event->pmu); | ||
373 | return ret; | ||
374 | } | ||
375 | |||
376 | static void bfin_pmu_read(struct perf_event *event) | ||
377 | { | ||
378 | bfin_perf_event_update(event, &event->hw, event->hw.idx); | ||
379 | } | ||
380 | |||
381 | static int bfin_pmu_event_init(struct perf_event *event) | ||
382 | { | ||
383 | struct perf_event_attr *attr = &event->attr; | ||
384 | struct hw_perf_event *hwc = &event->hw; | ||
385 | int config = -1; | ||
386 | int ret; | ||
387 | |||
388 | if (attr->exclude_hv || attr->exclude_idle) | ||
389 | return -EPERM; | ||
390 | |||
391 | /* | ||
392 | * All of the on-chip counters are "limited", in that they have | ||
393 | * no interrupts, and are therefore unable to do sampling without | ||
394 | * further work and timer assistance. | ||
395 | */ | ||
396 | if (hwc->sample_period) | ||
397 | return -EINVAL; | ||
398 | |||
399 | ret = 0; | ||
400 | switch (attr->type) { | ||
401 | case PERF_TYPE_RAW: | ||
402 | config = PFMON(0, attr->config & PFMON_MASK) | | ||
403 | PFCNT(0, !(attr->config & 0x100)); | ||
404 | break; | ||
405 | case PERF_TYPE_HW_CACHE: | ||
406 | ret = hw_perf_cache_event(attr->config, &config); | ||
407 | break; | ||
408 | case PERF_TYPE_HARDWARE: | ||
409 | if (attr->config >= ARRAY_SIZE(event_map)) | ||
410 | return -EINVAL; | ||
411 | |||
412 | config = event_map[attr->config]; | ||
413 | break; | ||
414 | } | ||
415 | |||
416 | if (config == -1) | ||
417 | return -EINVAL; | ||
418 | |||
419 | if (!attr->exclude_kernel) | ||
420 | config |= PFCEN(0, PFCEN_ENABLE_SUPV); | ||
421 | if (!attr->exclude_user) | ||
422 | config |= PFCEN(0, PFCEN_ENABLE_USER); | ||
423 | |||
424 | hwc->config |= config; | ||
425 | |||
426 | return ret; | ||
427 | } | ||
428 | |||
429 | static void bfin_pmu_enable(struct pmu *pmu) | ||
430 | { | ||
431 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
432 | struct perf_event *event; | ||
433 | struct hw_perf_event *hwc; | ||
434 | int i; | ||
435 | |||
436 | for (i = 0; i < MAX_HWEVENTS; ++i) { | ||
437 | event = cpuc->events[i]; | ||
438 | if (!event) | ||
439 | continue; | ||
440 | hwc = &event->hw; | ||
441 | bfin_pfmon_enable(hwc, hwc->idx); | ||
442 | } | ||
443 | |||
444 | bfin_pfmon_enable_all(); | ||
445 | } | ||
446 | |||
447 | static void bfin_pmu_disable(struct pmu *pmu) | ||
448 | { | ||
449 | bfin_pfmon_disable_all(); | ||
450 | } | ||
451 | |||
452 | static struct pmu pmu = { | ||
453 | .pmu_enable = bfin_pmu_enable, | ||
454 | .pmu_disable = bfin_pmu_disable, | ||
455 | .event_init = bfin_pmu_event_init, | ||
456 | .add = bfin_pmu_add, | ||
457 | .del = bfin_pmu_del, | ||
458 | .start = bfin_pmu_start, | ||
459 | .stop = bfin_pmu_stop, | ||
460 | .read = bfin_pmu_read, | ||
461 | }; | ||
462 | |||
463 | static void bfin_pmu_setup(int cpu) | ||
464 | { | ||
465 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
466 | |||
467 | memset(cpuhw, 0, sizeof(struct cpu_hw_events)); | ||
468 | } | ||
469 | |||
470 | static int __cpuinit | ||
471 | bfin_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
472 | { | ||
473 | unsigned int cpu = (long)hcpu; | ||
474 | |||
475 | switch (action & ~CPU_TASKS_FROZEN) { | ||
476 | case CPU_UP_PREPARE: | ||
477 | bfin_write_PFCTL(0); | ||
478 | bfin_pmu_setup(cpu); | ||
479 | break; | ||
480 | |||
481 | default: | ||
482 | break; | ||
483 | } | ||
484 | |||
485 | return NOTIFY_OK; | ||
486 | } | ||
487 | |||
488 | static int __init bfin_pmu_init(void) | ||
489 | { | ||
490 | int ret; | ||
491 | |||
492 | ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); | ||
493 | if (!ret) | ||
494 | perf_cpu_notifier(bfin_pmu_notifier); | ||
495 | |||
496 | return ret; | ||
497 | } | ||
498 | early_initcall(bfin_pmu_init); | ||