diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 21:55:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 21:55:10 -0400 |
commit | 5375871d432ae9fc581014ac117b96aaee3cd0c7 (patch) | |
tree | be98e8255b0f927fb920fb532a598b93fa140dbe /arch/powerpc/perf | |
parent | b57cb7231b2ce52d3dda14a7b417ae125fb2eb97 (diff) | |
parent | dfbc2d75c1bd47c3186fa91f1655ea2f3825b0ec (diff) |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc merge from Benjamin Herrenschmidt:
"Here's the powerpc batch for this merge window. It is going to be a
bit more nasty than usual as in touching things outside of
arch/powerpc mostly due to the big iSeriesectomy :-) We finally got
rid of the bugger (legacy iSeries support) which was a PITA to
maintain and that nobody really used anymore.
Here are some of the highlights:
- Legacy iSeries is gone. Thanks Stephen ! There's still some bits
and pieces remaining if you do a grep -ir series arch/powerpc but
they are harmless and will be removed in the next few weeks
hopefully.
- The 'fadump' functionality (Firmware Assisted Dump) replaces the
previous (equivalent) "pHyp assisted dump"... it's a rewrite of a
mechanism to get the hypervisor to do crash dumps on pSeries, the
new implementation hopefully being much more reliable. Thanks
Mahesh Salgaonkar.
- The "EEH" code (pSeries PCI error handling & recovery) got a big
spring cleaning, motivated by the need to be able to implement a
new backend for it on top of some new different type of firwmare.
The work isn't complete yet, but a good chunk of the cleanups is
there. Note that this adds a field to struct device_node which is
not very nice and which Grant objects to. I will have a patch soon
that moves that to a powerpc private data structure (hopefully
before rc1) and we'll improve things further later on (hopefully
getting rid of the need for that pointer completely). Thanks Gavin
Shan.
- I dug into our exception & interrupt handling code to improve the
way we do lazy interrupt handling (and make it work properly with
"edge" triggered interrupt sources), and while at it found & fixed
a wagon of issues in those areas, including adding support for page
fault retry & fatal signals on page faults.
- Your usual random batch of small fixes & updates, including a bunch
of new embedded boards, both Freescale and APM based ones, etc..."
I fixed up some conflicts with the generalized irq-domain changes from
Grant Likely, hopefully correctly.
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (141 commits)
powerpc/ps3: Do not adjust the wrapper load address
powerpc: Remove the rest of the legacy iSeries include files
powerpc: Remove the remaining CONFIG_PPC_ISERIES pieces
init: Remove CONFIG_PPC_ISERIES
powerpc: Remove FW_FEATURE ISERIES from arch code
tty/hvc_vio: FW_FEATURE_ISERIES is no longer selectable
powerpc/spufs: Fix double unlocks
powerpc/5200: convert mpc5200 to use of_platform_populate()
powerpc/mpc5200: add options to mpc5200_defconfig
powerpc/mpc52xx: add a4m072 board support
powerpc/mpc5200: update mpc5200_defconfig to fit for charon board
Documentation/powerpc/mpc52xx.txt: Checkpatch cleanup
powerpc/44x: Add additional device support for APM821xx SoC and Bluestone board
powerpc/44x: Add support PCI-E for APM821xx SoC and Bluestone board
MAINTAINERS: Update PowerPC 4xx tree
powerpc/44x: The bug fixed support for APM821xx SoC and Bluestone board
powerpc: document the FSL MPIC message register binding
powerpc: add support for MPIC message register API
powerpc/fsl: Added aliased MSIIR register address to MSI node in dts
powerpc/85xx: mpc8548cds - add 36-bit dts
...
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r-- | arch/powerpc/perf/Makefile | 14 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain.c | 492 | ||||
-rw-r--r-- | arch/powerpc/perf/core-book3s.c | 1448 | ||||
-rw-r--r-- | arch/powerpc/perf/core-fsl-emb.c | 688 | ||||
-rw-r--r-- | arch/powerpc/perf/e500-pmu.c | 134 | ||||
-rw-r--r-- | arch/powerpc/perf/mpc7450-pmu.c | 422 | ||||
-rw-r--r-- | arch/powerpc/perf/power4-pmu.c | 621 | ||||
-rw-r--r-- | arch/powerpc/perf/power5+-pmu.c | 690 | ||||
-rw-r--r-- | arch/powerpc/perf/power5-pmu.c | 629 | ||||
-rw-r--r-- | arch/powerpc/perf/power6-pmu.c | 552 | ||||
-rw-r--r-- | arch/powerpc/perf/power7-pmu.c | 379 | ||||
-rw-r--r-- | arch/powerpc/perf/ppc970-pmu.c | 502 |
12 files changed, 6571 insertions, 0 deletions
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile new file mode 100644 index 000000000000..af3fac23768c --- /dev/null +++ b/arch/powerpc/perf/Makefile | |||
@@ -0,0 +1,14 @@ | |||
1 | subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror | ||
2 | |||
3 | obj-$(CONFIG_PERF_EVENTS) += callchain.o | ||
4 | |||
5 | obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o | ||
6 | obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ | ||
7 | power5+-pmu.o power6-pmu.o power7-pmu.o | ||
8 | obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o | ||
9 | |||
10 | obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o | ||
11 | obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o | ||
12 | |||
13 | obj-$(CONFIG_PPC64) += $(obj64-y) | ||
14 | obj-$(CONFIG_PPC32) += $(obj32-y) | ||
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c new file mode 100644 index 000000000000..e8a18d1cc7c9 --- /dev/null +++ b/arch/powerpc/perf/callchain.c | |||
@@ -0,0 +1,492 @@ | |||
1 | /* | ||
2 | * Performance counter callchain support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright © 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <asm/ptrace.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/sigcontext.h> | ||
20 | #include <asm/ucontext.h> | ||
21 | #include <asm/vdso.h> | ||
22 | #ifdef CONFIG_PPC64 | ||
23 | #include "../kernel/ppc32.h" | ||
24 | #endif | ||
25 | |||
26 | |||
27 | /* | ||
28 | * Is sp valid as the address of the next kernel stack frame after prev_sp? | ||
29 | * The next frame may be in a different stack area but should not go | ||
30 | * back down in the same stack area. | ||
31 | */ | ||
32 | static int valid_next_sp(unsigned long sp, unsigned long prev_sp) | ||
33 | { | ||
34 | if (sp & 0xf) | ||
35 | return 0; /* must be 16-byte aligned */ | ||
36 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | ||
37 | return 0; | ||
38 | if (sp >= prev_sp + STACK_FRAME_OVERHEAD) | ||
39 | return 1; | ||
40 | /* | ||
41 | * sp could decrease when we jump off an interrupt stack | ||
42 | * back to the regular process stack. | ||
43 | */ | ||
44 | if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) | ||
45 | return 1; | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | void | ||
50 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
51 | { | ||
52 | unsigned long sp, next_sp; | ||
53 | unsigned long next_ip; | ||
54 | unsigned long lr; | ||
55 | long level = 0; | ||
56 | unsigned long *fp; | ||
57 | |||
58 | lr = regs->link; | ||
59 | sp = regs->gpr[1]; | ||
60 | perf_callchain_store(entry, regs->nip); | ||
61 | |||
62 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | ||
63 | return; | ||
64 | |||
65 | for (;;) { | ||
66 | fp = (unsigned long *) sp; | ||
67 | next_sp = fp[0]; | ||
68 | |||
69 | if (next_sp == sp + STACK_INT_FRAME_SIZE && | ||
70 | fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { | ||
71 | /* | ||
72 | * This looks like an interrupt frame for an | ||
73 | * interrupt that occurred in the kernel | ||
74 | */ | ||
75 | regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); | ||
76 | next_ip = regs->nip; | ||
77 | lr = regs->link; | ||
78 | level = 0; | ||
79 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
80 | |||
81 | } else { | ||
82 | if (level == 0) | ||
83 | next_ip = lr; | ||
84 | else | ||
85 | next_ip = fp[STACK_FRAME_LR_SAVE]; | ||
86 | |||
87 | /* | ||
88 | * We can't tell which of the first two addresses | ||
89 | * we get are valid, but we can filter out the | ||
90 | * obviously bogus ones here. We replace them | ||
91 | * with 0 rather than removing them entirely so | ||
92 | * that userspace can tell which is which. | ||
93 | */ | ||
94 | if ((level == 1 && next_ip == lr) || | ||
95 | (level <= 1 && !kernel_text_address(next_ip))) | ||
96 | next_ip = 0; | ||
97 | |||
98 | ++level; | ||
99 | } | ||
100 | |||
101 | perf_callchain_store(entry, next_ip); | ||
102 | if (!valid_next_sp(next_sp, sp)) | ||
103 | return; | ||
104 | sp = next_sp; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | #ifdef CONFIG_PPC64 | ||
109 | /* | ||
110 | * On 64-bit we don't want to invoke hash_page on user addresses from | ||
111 | * interrupt context, so if the access faults, we read the page tables | ||
112 | * to find which page (if any) is mapped and access it directly. | ||
113 | */ | ||
114 | static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | ||
115 | { | ||
116 | pgd_t *pgdir; | ||
117 | pte_t *ptep, pte; | ||
118 | unsigned shift; | ||
119 | unsigned long addr = (unsigned long) ptr; | ||
120 | unsigned long offset; | ||
121 | unsigned long pfn; | ||
122 | void *kaddr; | ||
123 | |||
124 | pgdir = current->mm->pgd; | ||
125 | if (!pgdir) | ||
126 | return -EFAULT; | ||
127 | |||
128 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); | ||
129 | if (!shift) | ||
130 | shift = PAGE_SHIFT; | ||
131 | |||
132 | /* align address to page boundary */ | ||
133 | offset = addr & ((1UL << shift) - 1); | ||
134 | addr -= offset; | ||
135 | |||
136 | if (ptep == NULL) | ||
137 | return -EFAULT; | ||
138 | pte = *ptep; | ||
139 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) | ||
140 | return -EFAULT; | ||
141 | pfn = pte_pfn(pte); | ||
142 | if (!page_is_ram(pfn)) | ||
143 | return -EFAULT; | ||
144 | |||
145 | /* no highmem to worry about here */ | ||
146 | kaddr = pfn_to_kaddr(pfn); | ||
147 | memcpy(ret, kaddr + offset, nb); | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) | ||
152 | { | ||
153 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || | ||
154 | ((unsigned long)ptr & 7)) | ||
155 | return -EFAULT; | ||
156 | |||
157 | pagefault_disable(); | ||
158 | if (!__get_user_inatomic(*ret, ptr)) { | ||
159 | pagefault_enable(); | ||
160 | return 0; | ||
161 | } | ||
162 | pagefault_enable(); | ||
163 | |||
164 | return read_user_stack_slow(ptr, ret, 8); | ||
165 | } | ||
166 | |||
167 | static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | ||
168 | { | ||
169 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || | ||
170 | ((unsigned long)ptr & 3)) | ||
171 | return -EFAULT; | ||
172 | |||
173 | pagefault_disable(); | ||
174 | if (!__get_user_inatomic(*ret, ptr)) { | ||
175 | pagefault_enable(); | ||
176 | return 0; | ||
177 | } | ||
178 | pagefault_enable(); | ||
179 | |||
180 | return read_user_stack_slow(ptr, ret, 4); | ||
181 | } | ||
182 | |||
183 | static inline int valid_user_sp(unsigned long sp, int is_64) | ||
184 | { | ||
185 | if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) | ||
186 | return 0; | ||
187 | return 1; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * 64-bit user processes use the same stack frame for RT and non-RT signals. | ||
192 | */ | ||
193 | struct signal_frame_64 { | ||
194 | char dummy[__SIGNAL_FRAMESIZE]; | ||
195 | struct ucontext uc; | ||
196 | unsigned long unused[2]; | ||
197 | unsigned int tramp[6]; | ||
198 | struct siginfo *pinfo; | ||
199 | void *puc; | ||
200 | struct siginfo info; | ||
201 | char abigap[288]; | ||
202 | }; | ||
203 | |||
204 | static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) | ||
205 | { | ||
206 | if (nip == fp + offsetof(struct signal_frame_64, tramp)) | ||
207 | return 1; | ||
208 | if (vdso64_rt_sigtramp && current->mm->context.vdso_base && | ||
209 | nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) | ||
210 | return 1; | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Do some sanity checking on the signal frame pointed to by sp. | ||
216 | * We check the pinfo and puc pointers in the frame. | ||
217 | */ | ||
218 | static int sane_signal_64_frame(unsigned long sp) | ||
219 | { | ||
220 | struct signal_frame_64 __user *sf; | ||
221 | unsigned long pinfo, puc; | ||
222 | |||
223 | sf = (struct signal_frame_64 __user *) sp; | ||
224 | if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || | ||
225 | read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) | ||
226 | return 0; | ||
227 | return pinfo == (unsigned long) &sf->info && | ||
228 | puc == (unsigned long) &sf->uc; | ||
229 | } | ||
230 | |||
231 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, | ||
232 | struct pt_regs *regs) | ||
233 | { | ||
234 | unsigned long sp, next_sp; | ||
235 | unsigned long next_ip; | ||
236 | unsigned long lr; | ||
237 | long level = 0; | ||
238 | struct signal_frame_64 __user *sigframe; | ||
239 | unsigned long __user *fp, *uregs; | ||
240 | |||
241 | next_ip = regs->nip; | ||
242 | lr = regs->link; | ||
243 | sp = regs->gpr[1]; | ||
244 | perf_callchain_store(entry, next_ip); | ||
245 | |||
246 | for (;;) { | ||
247 | fp = (unsigned long __user *) sp; | ||
248 | if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) | ||
249 | return; | ||
250 | if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) | ||
251 | return; | ||
252 | |||
253 | /* | ||
254 | * Note: the next_sp - sp >= signal frame size check | ||
255 | * is true when next_sp < sp, which can happen when | ||
256 | * transitioning from an alternate signal stack to the | ||
257 | * normal stack. | ||
258 | */ | ||
259 | if (next_sp - sp >= sizeof(struct signal_frame_64) && | ||
260 | (is_sigreturn_64_address(next_ip, sp) || | ||
261 | (level <= 1 && is_sigreturn_64_address(lr, sp))) && | ||
262 | sane_signal_64_frame(sp)) { | ||
263 | /* | ||
264 | * This looks like an signal frame | ||
265 | */ | ||
266 | sigframe = (struct signal_frame_64 __user *) sp; | ||
267 | uregs = sigframe->uc.uc_mcontext.gp_regs; | ||
268 | if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || | ||
269 | read_user_stack_64(&uregs[PT_LNK], &lr) || | ||
270 | read_user_stack_64(&uregs[PT_R1], &sp)) | ||
271 | return; | ||
272 | level = 0; | ||
273 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
274 | perf_callchain_store(entry, next_ip); | ||
275 | continue; | ||
276 | } | ||
277 | |||
278 | if (level == 0) | ||
279 | next_ip = lr; | ||
280 | perf_callchain_store(entry, next_ip); | ||
281 | ++level; | ||
282 | sp = next_sp; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | static inline int current_is_64bit(void) | ||
287 | { | ||
288 | /* | ||
289 | * We can't use test_thread_flag() here because we may be on an | ||
290 | * interrupt stack, and the thread flags don't get copied over | ||
291 | * from the thread_info on the main stack to the interrupt stack. | ||
292 | */ | ||
293 | return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); | ||
294 | } | ||
295 | |||
296 | #else /* CONFIG_PPC64 */ | ||
297 | /* | ||
298 | * On 32-bit we just access the address and let hash_page create a | ||
299 | * HPTE if necessary, so there is no need to fall back to reading | ||
300 | * the page tables. Since this is called at interrupt level, | ||
301 | * do_page_fault() won't treat a DSI as a page fault. | ||
302 | */ | ||
303 | static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | ||
304 | { | ||
305 | int rc; | ||
306 | |||
307 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || | ||
308 | ((unsigned long)ptr & 3)) | ||
309 | return -EFAULT; | ||
310 | |||
311 | pagefault_disable(); | ||
312 | rc = __get_user_inatomic(*ret, ptr); | ||
313 | pagefault_enable(); | ||
314 | |||
315 | return rc; | ||
316 | } | ||
317 | |||
318 | static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, | ||
319 | struct pt_regs *regs) | ||
320 | { | ||
321 | } | ||
322 | |||
323 | static inline int current_is_64bit(void) | ||
324 | { | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static inline int valid_user_sp(unsigned long sp, int is_64) | ||
329 | { | ||
330 | if (!sp || (sp & 7) || sp > TASK_SIZE - 32) | ||
331 | return 0; | ||
332 | return 1; | ||
333 | } | ||
334 | |||
335 | #define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE | ||
336 | #define sigcontext32 sigcontext | ||
337 | #define mcontext32 mcontext | ||
338 | #define ucontext32 ucontext | ||
339 | #define compat_siginfo_t struct siginfo | ||
340 | |||
341 | #endif /* CONFIG_PPC64 */ | ||
342 | |||
343 | /* | ||
344 | * Layout for non-RT signal frames | ||
345 | */ | ||
346 | struct signal_frame_32 { | ||
347 | char dummy[__SIGNAL_FRAMESIZE32]; | ||
348 | struct sigcontext32 sctx; | ||
349 | struct mcontext32 mctx; | ||
350 | int abigap[56]; | ||
351 | }; | ||
352 | |||
353 | /* | ||
354 | * Layout for RT signal frames | ||
355 | */ | ||
356 | struct rt_signal_frame_32 { | ||
357 | char dummy[__SIGNAL_FRAMESIZE32 + 16]; | ||
358 | compat_siginfo_t info; | ||
359 | struct ucontext32 uc; | ||
360 | int abigap[56]; | ||
361 | }; | ||
362 | |||
363 | static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) | ||
364 | { | ||
365 | if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) | ||
366 | return 1; | ||
367 | if (vdso32_sigtramp && current->mm->context.vdso_base && | ||
368 | nip == current->mm->context.vdso_base + vdso32_sigtramp) | ||
369 | return 1; | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) | ||
374 | { | ||
375 | if (nip == fp + offsetof(struct rt_signal_frame_32, | ||
376 | uc.uc_mcontext.mc_pad)) | ||
377 | return 1; | ||
378 | if (vdso32_rt_sigtramp && current->mm->context.vdso_base && | ||
379 | nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) | ||
380 | return 1; | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static int sane_signal_32_frame(unsigned int sp) | ||
385 | { | ||
386 | struct signal_frame_32 __user *sf; | ||
387 | unsigned int regs; | ||
388 | |||
389 | sf = (struct signal_frame_32 __user *) (unsigned long) sp; | ||
390 | if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) | ||
391 | return 0; | ||
392 | return regs == (unsigned long) &sf->mctx; | ||
393 | } | ||
394 | |||
395 | static int sane_rt_signal_32_frame(unsigned int sp) | ||
396 | { | ||
397 | struct rt_signal_frame_32 __user *sf; | ||
398 | unsigned int regs; | ||
399 | |||
400 | sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; | ||
401 | if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) | ||
402 | return 0; | ||
403 | return regs == (unsigned long) &sf->uc.uc_mcontext; | ||
404 | } | ||
405 | |||
406 | static unsigned int __user *signal_frame_32_regs(unsigned int sp, | ||
407 | unsigned int next_sp, unsigned int next_ip) | ||
408 | { | ||
409 | struct mcontext32 __user *mctx = NULL; | ||
410 | struct signal_frame_32 __user *sf; | ||
411 | struct rt_signal_frame_32 __user *rt_sf; | ||
412 | |||
413 | /* | ||
414 | * Note: the next_sp - sp >= signal frame size check | ||
415 | * is true when next_sp < sp, for example, when | ||
416 | * transitioning from an alternate signal stack to the | ||
417 | * normal stack. | ||
418 | */ | ||
419 | if (next_sp - sp >= sizeof(struct signal_frame_32) && | ||
420 | is_sigreturn_32_address(next_ip, sp) && | ||
421 | sane_signal_32_frame(sp)) { | ||
422 | sf = (struct signal_frame_32 __user *) (unsigned long) sp; | ||
423 | mctx = &sf->mctx; | ||
424 | } | ||
425 | |||
426 | if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && | ||
427 | is_rt_sigreturn_32_address(next_ip, sp) && | ||
428 | sane_rt_signal_32_frame(sp)) { | ||
429 | rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; | ||
430 | mctx = &rt_sf->uc.uc_mcontext; | ||
431 | } | ||
432 | |||
433 | if (!mctx) | ||
434 | return NULL; | ||
435 | return mctx->mc_gregs; | ||
436 | } | ||
437 | |||
438 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, | ||
439 | struct pt_regs *regs) | ||
440 | { | ||
441 | unsigned int sp, next_sp; | ||
442 | unsigned int next_ip; | ||
443 | unsigned int lr; | ||
444 | long level = 0; | ||
445 | unsigned int __user *fp, *uregs; | ||
446 | |||
447 | next_ip = regs->nip; | ||
448 | lr = regs->link; | ||
449 | sp = regs->gpr[1]; | ||
450 | perf_callchain_store(entry, next_ip); | ||
451 | |||
452 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
453 | fp = (unsigned int __user *) (unsigned long) sp; | ||
454 | if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) | ||
455 | return; | ||
456 | if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) | ||
457 | return; | ||
458 | |||
459 | uregs = signal_frame_32_regs(sp, next_sp, next_ip); | ||
460 | if (!uregs && level <= 1) | ||
461 | uregs = signal_frame_32_regs(sp, next_sp, lr); | ||
462 | if (uregs) { | ||
463 | /* | ||
464 | * This looks like an signal frame, so restart | ||
465 | * the stack trace with the values in it. | ||
466 | */ | ||
467 | if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || | ||
468 | read_user_stack_32(&uregs[PT_LNK], &lr) || | ||
469 | read_user_stack_32(&uregs[PT_R1], &sp)) | ||
470 | return; | ||
471 | level = 0; | ||
472 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
473 | perf_callchain_store(entry, next_ip); | ||
474 | continue; | ||
475 | } | ||
476 | |||
477 | if (level == 0) | ||
478 | next_ip = lr; | ||
479 | perf_callchain_store(entry, next_ip); | ||
480 | ++level; | ||
481 | sp = next_sp; | ||
482 | } | ||
483 | } | ||
484 | |||
485 | void | ||
486 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
487 | { | ||
488 | if (current_is_64bit()) | ||
489 | perf_callchain_user_64(entry, regs); | ||
490 | else | ||
491 | perf_callchain_user_32(entry, regs); | ||
492 | } | ||
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c new file mode 100644 index 000000000000..c2e27ede07ec --- /dev/null +++ b/arch/powerpc/perf/core-book3s.c | |||
@@ -0,0 +1,1448 @@ | |||
1 | /* | ||
2 | * Performance event support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/hardirq.h> | ||
16 | #include <asm/reg.h> | ||
17 | #include <asm/pmc.h> | ||
18 | #include <asm/machdep.h> | ||
19 | #include <asm/firmware.h> | ||
20 | #include <asm/ptrace.h> | ||
21 | |||
22 | struct cpu_hw_events { | ||
23 | int n_events; | ||
24 | int n_percpu; | ||
25 | int disabled; | ||
26 | int n_added; | ||
27 | int n_limited; | ||
28 | u8 pmcs_enabled; | ||
29 | struct perf_event *event[MAX_HWEVENTS]; | ||
30 | u64 events[MAX_HWEVENTS]; | ||
31 | unsigned int flags[MAX_HWEVENTS]; | ||
32 | unsigned long mmcr[3]; | ||
33 | struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; | ||
34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | ||
35 | u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
36 | unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
37 | unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
38 | |||
39 | unsigned int group_flag; | ||
40 | int n_txn_start; | ||
41 | }; | ||
42 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
43 | |||
44 | struct power_pmu *ppmu; | ||
45 | |||
46 | /* | ||
47 | * Normally, to ignore kernel events we set the FCS (freeze counters | ||
48 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | ||
49 | * hypervisor bit set in the MSR, or if we are running on a processor | ||
50 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | ||
51 | * then we need to use the FCHV bit to ignore kernel events. | ||
52 | */ | ||
53 | static unsigned int freeze_events_kernel = MMCR0_FCS; | ||
54 | |||
55 | /* | ||
56 | * 32-bit doesn't have MMCRA but does have an MMCR2, | ||
57 | * and a few other names are different. | ||
58 | */ | ||
59 | #ifdef CONFIG_PPC32 | ||
60 | |||
61 | #define MMCR0_FCHV 0 | ||
62 | #define MMCR0_PMCjCE MMCR0_PMCnCE | ||
63 | |||
64 | #define SPRN_MMCRA SPRN_MMCR2 | ||
65 | #define MMCRA_SAMPLE_ENABLE 0 | ||
66 | |||
67 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } | ||
72 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
73 | { | ||
74 | return 0; | ||
75 | } | ||
76 | static inline void perf_read_regs(struct pt_regs *regs) { } | ||
77 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | #endif /* CONFIG_PPC32 */ | ||
83 | |||
84 | /* | ||
85 | * Things that are specific to 64-bit implementations. | ||
86 | */ | ||
87 | #ifdef CONFIG_PPC64 | ||
88 | |||
89 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
90 | { | ||
91 | unsigned long mmcra = regs->dsisr; | ||
92 | |||
93 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
94 | unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
95 | if (slot > 1) | ||
96 | return 4 * (slot - 1); | ||
97 | } | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * The user wants a data address recorded. | ||
103 | * If we're not doing instruction sampling, give them the SDAR | ||
104 | * (sampled data address). If we are doing instruction sampling, then | ||
105 | * only give them the SDAR if it corresponds to the instruction | ||
106 | * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC | ||
107 | * bit in MMCRA. | ||
108 | */ | ||
109 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) | ||
110 | { | ||
111 | unsigned long mmcra = regs->dsisr; | ||
112 | unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
113 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
114 | |||
115 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
116 | *addrp = mfspr(SPRN_SDAR); | ||
117 | } | ||
118 | |||
119 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
120 | { | ||
121 | unsigned long mmcra = regs->dsisr; | ||
122 | unsigned long sihv = MMCRA_SIHV; | ||
123 | unsigned long sipr = MMCRA_SIPR; | ||
124 | |||
125 | if (TRAP(regs) != 0xf00) | ||
126 | return 0; /* not a PMU interrupt */ | ||
127 | |||
128 | if (ppmu->flags & PPMU_ALT_SIPR) { | ||
129 | sihv = POWER6_MMCRA_SIHV; | ||
130 | sipr = POWER6_MMCRA_SIPR; | ||
131 | } | ||
132 | |||
133 | /* PR has priority over HV, so order below is important */ | ||
134 | if (mmcra & sipr) | ||
135 | return PERF_RECORD_MISC_USER; | ||
136 | if ((mmcra & sihv) && (freeze_events_kernel != MMCR0_FCHV)) | ||
137 | return PERF_RECORD_MISC_HYPERVISOR; | ||
138 | return PERF_RECORD_MISC_KERNEL; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Overload regs->dsisr to store MMCRA so we only need to read it once | ||
143 | * on each interrupt. | ||
144 | */ | ||
145 | static inline void perf_read_regs(struct pt_regs *regs) | ||
146 | { | ||
147 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
152 | * it as an NMI. | ||
153 | */ | ||
154 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
155 | { | ||
156 | return !regs->softe; | ||
157 | } | ||
158 | |||
159 | #endif /* CONFIG_PPC64 */ | ||
160 | |||
161 | static void perf_event_interrupt(struct pt_regs *regs); | ||
162 | |||
163 | void perf_event_print_debug(void) | ||
164 | { | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Read one performance monitor counter (PMC). | ||
169 | */ | ||
170 | static unsigned long read_pmc(int idx) | ||
171 | { | ||
172 | unsigned long val; | ||
173 | |||
174 | switch (idx) { | ||
175 | case 1: | ||
176 | val = mfspr(SPRN_PMC1); | ||
177 | break; | ||
178 | case 2: | ||
179 | val = mfspr(SPRN_PMC2); | ||
180 | break; | ||
181 | case 3: | ||
182 | val = mfspr(SPRN_PMC3); | ||
183 | break; | ||
184 | case 4: | ||
185 | val = mfspr(SPRN_PMC4); | ||
186 | break; | ||
187 | case 5: | ||
188 | val = mfspr(SPRN_PMC5); | ||
189 | break; | ||
190 | case 6: | ||
191 | val = mfspr(SPRN_PMC6); | ||
192 | break; | ||
193 | #ifdef CONFIG_PPC64 | ||
194 | case 7: | ||
195 | val = mfspr(SPRN_PMC7); | ||
196 | break; | ||
197 | case 8: | ||
198 | val = mfspr(SPRN_PMC8); | ||
199 | break; | ||
200 | #endif /* CONFIG_PPC64 */ | ||
201 | default: | ||
202 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
203 | val = 0; | ||
204 | } | ||
205 | return val; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Write one PMC. | ||
210 | */ | ||
211 | static void write_pmc(int idx, unsigned long val) | ||
212 | { | ||
213 | switch (idx) { | ||
214 | case 1: | ||
215 | mtspr(SPRN_PMC1, val); | ||
216 | break; | ||
217 | case 2: | ||
218 | mtspr(SPRN_PMC2, val); | ||
219 | break; | ||
220 | case 3: | ||
221 | mtspr(SPRN_PMC3, val); | ||
222 | break; | ||
223 | case 4: | ||
224 | mtspr(SPRN_PMC4, val); | ||
225 | break; | ||
226 | case 5: | ||
227 | mtspr(SPRN_PMC5, val); | ||
228 | break; | ||
229 | case 6: | ||
230 | mtspr(SPRN_PMC6, val); | ||
231 | break; | ||
232 | #ifdef CONFIG_PPC64 | ||
233 | case 7: | ||
234 | mtspr(SPRN_PMC7, val); | ||
235 | break; | ||
236 | case 8: | ||
237 | mtspr(SPRN_PMC8, val); | ||
238 | break; | ||
239 | #endif /* CONFIG_PPC64 */ | ||
240 | default: | ||
241 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Check if a set of events can all go on the PMU at once. | ||
247 | * If they can't, this will look at alternative codes for the events | ||
248 | * and see if any combination of alternative codes is feasible. | ||
249 | * The feasible set is returned in event_id[]. | ||
250 | */ | ||
251 | static int power_check_constraints(struct cpu_hw_events *cpuhw, | ||
252 | u64 event_id[], unsigned int cflags[], | ||
253 | int n_ev) | ||
254 | { | ||
255 | unsigned long mask, value, nv; | ||
256 | unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; | ||
257 | int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; | ||
258 | int i, j; | ||
259 | unsigned long addf = ppmu->add_fields; | ||
260 | unsigned long tadd = ppmu->test_adder; | ||
261 | |||
262 | if (n_ev > ppmu->n_counter) | ||
263 | return -1; | ||
264 | |||
265 | /* First see if the events will go on as-is */ | ||
266 | for (i = 0; i < n_ev; ++i) { | ||
267 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | ||
268 | && !ppmu->limited_pmc_event(event_id[i])) { | ||
269 | ppmu->get_alternatives(event_id[i], cflags[i], | ||
270 | cpuhw->alternatives[i]); | ||
271 | event_id[i] = cpuhw->alternatives[i][0]; | ||
272 | } | ||
273 | if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], | ||
274 | &cpuhw->avalues[i][0])) | ||
275 | return -1; | ||
276 | } | ||
277 | value = mask = 0; | ||
278 | for (i = 0; i < n_ev; ++i) { | ||
279 | nv = (value | cpuhw->avalues[i][0]) + | ||
280 | (value & cpuhw->avalues[i][0] & addf); | ||
281 | if ((((nv + tadd) ^ value) & mask) != 0 || | ||
282 | (((nv + tadd) ^ cpuhw->avalues[i][0]) & | ||
283 | cpuhw->amasks[i][0]) != 0) | ||
284 | break; | ||
285 | value = nv; | ||
286 | mask |= cpuhw->amasks[i][0]; | ||
287 | } | ||
288 | if (i == n_ev) | ||
289 | return 0; /* all OK */ | ||
290 | |||
291 | /* doesn't work, gather alternatives... */ | ||
292 | if (!ppmu->get_alternatives) | ||
293 | return -1; | ||
294 | for (i = 0; i < n_ev; ++i) { | ||
295 | choice[i] = 0; | ||
296 | n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], | ||
297 | cpuhw->alternatives[i]); | ||
298 | for (j = 1; j < n_alt[i]; ++j) | ||
299 | ppmu->get_constraint(cpuhw->alternatives[i][j], | ||
300 | &cpuhw->amasks[i][j], | ||
301 | &cpuhw->avalues[i][j]); | ||
302 | } | ||
303 | |||
304 | /* enumerate all possibilities and see if any will work */ | ||
305 | i = 0; | ||
306 | j = -1; | ||
307 | value = mask = nv = 0; | ||
308 | while (i < n_ev) { | ||
309 | if (j >= 0) { | ||
310 | /* we're backtracking, restore context */ | ||
311 | value = svalues[i]; | ||
312 | mask = smasks[i]; | ||
313 | j = choice[i]; | ||
314 | } | ||
315 | /* | ||
316 | * See if any alternative k for event_id i, | ||
317 | * where k > j, will satisfy the constraints. | ||
318 | */ | ||
319 | while (++j < n_alt[i]) { | ||
320 | nv = (value | cpuhw->avalues[i][j]) + | ||
321 | (value & cpuhw->avalues[i][j] & addf); | ||
322 | if ((((nv + tadd) ^ value) & mask) == 0 && | ||
323 | (((nv + tadd) ^ cpuhw->avalues[i][j]) | ||
324 | & cpuhw->amasks[i][j]) == 0) | ||
325 | break; | ||
326 | } | ||
327 | if (j >= n_alt[i]) { | ||
328 | /* | ||
329 | * No feasible alternative, backtrack | ||
330 | * to event_id i-1 and continue enumerating its | ||
331 | * alternatives from where we got up to. | ||
332 | */ | ||
333 | if (--i < 0) | ||
334 | return -1; | ||
335 | } else { | ||
336 | /* | ||
337 | * Found a feasible alternative for event_id i, | ||
338 | * remember where we got up to with this event_id, | ||
339 | * go on to the next event_id, and start with | ||
340 | * the first alternative for it. | ||
341 | */ | ||
342 | choice[i] = j; | ||
343 | svalues[i] = value; | ||
344 | smasks[i] = mask; | ||
345 | value = nv; | ||
346 | mask |= cpuhw->amasks[i][j]; | ||
347 | ++i; | ||
348 | j = -1; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | /* OK, we have a feasible combination, tell the caller the solution */ | ||
353 | for (i = 0; i < n_ev; ++i) | ||
354 | event_id[i] = cpuhw->alternatives[i][choice[i]]; | ||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Check if newly-added events have consistent settings for | ||
360 | * exclude_{user,kernel,hv} with each other and any previously | ||
361 | * added events. | ||
362 | */ | ||
363 | static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], | ||
364 | int n_prev, int n_new) | ||
365 | { | ||
366 | int eu = 0, ek = 0, eh = 0; | ||
367 | int i, n, first; | ||
368 | struct perf_event *event; | ||
369 | |||
370 | n = n_prev + n_new; | ||
371 | if (n <= 1) | ||
372 | return 0; | ||
373 | |||
374 | first = 1; | ||
375 | for (i = 0; i < n; ++i) { | ||
376 | if (cflags[i] & PPMU_LIMITED_PMC_OK) { | ||
377 | cflags[i] &= ~PPMU_LIMITED_PMC_REQD; | ||
378 | continue; | ||
379 | } | ||
380 | event = ctrs[i]; | ||
381 | if (first) { | ||
382 | eu = event->attr.exclude_user; | ||
383 | ek = event->attr.exclude_kernel; | ||
384 | eh = event->attr.exclude_hv; | ||
385 | first = 0; | ||
386 | } else if (event->attr.exclude_user != eu || | ||
387 | event->attr.exclude_kernel != ek || | ||
388 | event->attr.exclude_hv != eh) { | ||
389 | return -EAGAIN; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | if (eu || ek || eh) | ||
394 | for (i = 0; i < n; ++i) | ||
395 | if (cflags[i] & PPMU_LIMITED_PMC_OK) | ||
396 | cflags[i] |= PPMU_LIMITED_PMC_REQD; | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | static u64 check_and_compute_delta(u64 prev, u64 val) | ||
402 | { | ||
403 | u64 delta = (val - prev) & 0xfffffffful; | ||
404 | |||
405 | /* | ||
406 | * POWER7 can roll back counter values, if the new value is smaller | ||
407 | * than the previous value it will cause the delta and the counter to | ||
408 | * have bogus values unless we rolled a counter over. If a coutner is | ||
409 | * rolled back, it will be smaller, but within 256, which is the maximum | ||
410 | * number of events to rollback at once. If we dectect a rollback | ||
411 | * return 0. This can lead to a small lack of precision in the | ||
412 | * counters. | ||
413 | */ | ||
414 | if (prev > val && (prev - val) < 256) | ||
415 | delta = 0; | ||
416 | |||
417 | return delta; | ||
418 | } | ||
419 | |||
420 | static void power_pmu_read(struct perf_event *event) | ||
421 | { | ||
422 | s64 val, delta, prev; | ||
423 | |||
424 | if (event->hw.state & PERF_HES_STOPPED) | ||
425 | return; | ||
426 | |||
427 | if (!event->hw.idx) | ||
428 | return; | ||
429 | /* | ||
430 | * Performance monitor interrupts come even when interrupts | ||
431 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
432 | * Therefore we treat them like NMIs. | ||
433 | */ | ||
434 | do { | ||
435 | prev = local64_read(&event->hw.prev_count); | ||
436 | barrier(); | ||
437 | val = read_pmc(event->hw.idx); | ||
438 | delta = check_and_compute_delta(prev, val); | ||
439 | if (!delta) | ||
440 | return; | ||
441 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | ||
442 | |||
443 | local64_add(delta, &event->count); | ||
444 | local64_sub(delta, &event->hw.period_left); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * On some machines, PMC5 and PMC6 can't be written, don't respect | ||
449 | * the freeze conditions, and don't generate interrupts. This tells | ||
450 | * us if `event' is using such a PMC. | ||
451 | */ | ||
452 | static int is_limited_pmc(int pmcnum) | ||
453 | { | ||
454 | return (ppmu->flags & PPMU_LIMITED_PMC5_6) | ||
455 | && (pmcnum == 5 || pmcnum == 6); | ||
456 | } | ||
457 | |||
458 | static void freeze_limited_counters(struct cpu_hw_events *cpuhw, | ||
459 | unsigned long pmc5, unsigned long pmc6) | ||
460 | { | ||
461 | struct perf_event *event; | ||
462 | u64 val, prev, delta; | ||
463 | int i; | ||
464 | |||
465 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
466 | event = cpuhw->limited_counter[i]; | ||
467 | if (!event->hw.idx) | ||
468 | continue; | ||
469 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | ||
470 | prev = local64_read(&event->hw.prev_count); | ||
471 | event->hw.idx = 0; | ||
472 | delta = check_and_compute_delta(prev, val); | ||
473 | if (delta) | ||
474 | local64_add(delta, &event->count); | ||
475 | } | ||
476 | } | ||
477 | |||
478 | static void thaw_limited_counters(struct cpu_hw_events *cpuhw, | ||
479 | unsigned long pmc5, unsigned long pmc6) | ||
480 | { | ||
481 | struct perf_event *event; | ||
482 | u64 val, prev; | ||
483 | int i; | ||
484 | |||
485 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
486 | event = cpuhw->limited_counter[i]; | ||
487 | event->hw.idx = cpuhw->limited_hwidx[i]; | ||
488 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | ||
489 | prev = local64_read(&event->hw.prev_count); | ||
490 | if (check_and_compute_delta(prev, val)) | ||
491 | local64_set(&event->hw.prev_count, val); | ||
492 | perf_event_update_userpage(event); | ||
493 | } | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Since limited events don't respect the freeze conditions, we | ||
498 | * have to read them immediately after freezing or unfreezing the | ||
499 | * other events. We try to keep the values from the limited | ||
500 | * events as consistent as possible by keeping the delay (in | ||
501 | * cycles and instructions) between freezing/unfreezing and reading | ||
502 | * the limited events as small and consistent as possible. | ||
503 | * Therefore, if any limited events are in use, we read them | ||
504 | * both, and always in the same order, to minimize variability, | ||
505 | * and do it inside the same asm that writes MMCR0. | ||
506 | */ | ||
507 | static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) | ||
508 | { | ||
509 | unsigned long pmc5, pmc6; | ||
510 | |||
511 | if (!cpuhw->n_limited) { | ||
512 | mtspr(SPRN_MMCR0, mmcr0); | ||
513 | return; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Write MMCR0, then read PMC5 and PMC6 immediately. | ||
518 | * To ensure we don't get a performance monitor interrupt | ||
519 | * between writing MMCR0 and freezing/thawing the limited | ||
520 | * events, we first write MMCR0 with the event overflow | ||
521 | * interrupt enable bits turned off. | ||
522 | */ | ||
523 | asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" | ||
524 | : "=&r" (pmc5), "=&r" (pmc6) | ||
525 | : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), | ||
526 | "i" (SPRN_MMCR0), | ||
527 | "i" (SPRN_PMC5), "i" (SPRN_PMC6)); | ||
528 | |||
529 | if (mmcr0 & MMCR0_FC) | ||
530 | freeze_limited_counters(cpuhw, pmc5, pmc6); | ||
531 | else | ||
532 | thaw_limited_counters(cpuhw, pmc5, pmc6); | ||
533 | |||
534 | /* | ||
535 | * Write the full MMCR0 including the event overflow interrupt | ||
536 | * enable bits, if necessary. | ||
537 | */ | ||
538 | if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
539 | mtspr(SPRN_MMCR0, mmcr0); | ||
540 | } | ||
541 | |||
542 | /* | ||
543 | * Disable all events to prevent PMU interrupts and to allow | ||
544 | * events to be added or removed. | ||
545 | */ | ||
546 | static void power_pmu_disable(struct pmu *pmu) | ||
547 | { | ||
548 | struct cpu_hw_events *cpuhw; | ||
549 | unsigned long flags; | ||
550 | |||
551 | if (!ppmu) | ||
552 | return; | ||
553 | local_irq_save(flags); | ||
554 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
555 | |||
556 | if (!cpuhw->disabled) { | ||
557 | cpuhw->disabled = 1; | ||
558 | cpuhw->n_added = 0; | ||
559 | |||
560 | /* | ||
561 | * Check if we ever enabled the PMU on this cpu. | ||
562 | */ | ||
563 | if (!cpuhw->pmcs_enabled) { | ||
564 | ppc_enable_pmcs(); | ||
565 | cpuhw->pmcs_enabled = 1; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Disable instruction sampling if it was enabled | ||
570 | */ | ||
571 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
572 | mtspr(SPRN_MMCRA, | ||
573 | cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
574 | mb(); | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * Set the 'freeze counters' bit. | ||
579 | * The barrier is to make sure the mtspr has been | ||
580 | * executed and the PMU has frozen the events | ||
581 | * before we return. | ||
582 | */ | ||
583 | write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
584 | mb(); | ||
585 | } | ||
586 | local_irq_restore(flags); | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Re-enable all events if disable == 0. | ||
591 | * If we were previously disabled and events were added, then | ||
592 | * put the new config on the PMU. | ||
593 | */ | ||
594 | static void power_pmu_enable(struct pmu *pmu) | ||
595 | { | ||
596 | struct perf_event *event; | ||
597 | struct cpu_hw_events *cpuhw; | ||
598 | unsigned long flags; | ||
599 | long i; | ||
600 | unsigned long val; | ||
601 | s64 left; | ||
602 | unsigned int hwc_index[MAX_HWEVENTS]; | ||
603 | int n_lim; | ||
604 | int idx; | ||
605 | |||
606 | if (!ppmu) | ||
607 | return; | ||
608 | local_irq_save(flags); | ||
609 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
610 | if (!cpuhw->disabled) { | ||
611 | local_irq_restore(flags); | ||
612 | return; | ||
613 | } | ||
614 | cpuhw->disabled = 0; | ||
615 | |||
616 | /* | ||
617 | * If we didn't change anything, or only removed events, | ||
618 | * no need to recalculate MMCR* settings and reset the PMCs. | ||
619 | * Just reenable the PMU with the current MMCR* settings | ||
620 | * (possibly updated for removal of events). | ||
621 | */ | ||
622 | if (!cpuhw->n_added) { | ||
623 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
624 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
625 | if (cpuhw->n_events == 0) | ||
626 | ppc_set_pmu_inuse(0); | ||
627 | goto out_enable; | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Compute MMCR* values for the new set of events | ||
632 | */ | ||
633 | if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, | ||
634 | cpuhw->mmcr)) { | ||
635 | /* shouldn't ever get here */ | ||
636 | printk(KERN_ERR "oops compute_mmcr failed\n"); | ||
637 | goto out; | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * Add in MMCR0 freeze bits corresponding to the | ||
642 | * attr.exclude_* bits for the first event. | ||
643 | * We have already checked that all events have the | ||
644 | * same values for these bits as the first event. | ||
645 | */ | ||
646 | event = cpuhw->event[0]; | ||
647 | if (event->attr.exclude_user) | ||
648 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
649 | if (event->attr.exclude_kernel) | ||
650 | cpuhw->mmcr[0] |= freeze_events_kernel; | ||
651 | if (event->attr.exclude_hv) | ||
652 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
653 | |||
654 | /* | ||
655 | * Write the new configuration to MMCR* with the freeze | ||
656 | * bit set and set the hardware events to their initial values. | ||
657 | * Then unfreeze the events. | ||
658 | */ | ||
659 | ppc_set_pmu_inuse(1); | ||
660 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
661 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
662 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
663 | | MMCR0_FC); | ||
664 | |||
665 | /* | ||
666 | * Read off any pre-existing events that need to move | ||
667 | * to another PMC. | ||
668 | */ | ||
669 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
670 | event = cpuhw->event[i]; | ||
671 | if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { | ||
672 | power_pmu_read(event); | ||
673 | write_pmc(event->hw.idx, 0); | ||
674 | event->hw.idx = 0; | ||
675 | } | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Initialize the PMCs for all the new and moved events. | ||
680 | */ | ||
681 | cpuhw->n_limited = n_lim = 0; | ||
682 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
683 | event = cpuhw->event[i]; | ||
684 | if (event->hw.idx) | ||
685 | continue; | ||
686 | idx = hwc_index[i] + 1; | ||
687 | if (is_limited_pmc(idx)) { | ||
688 | cpuhw->limited_counter[n_lim] = event; | ||
689 | cpuhw->limited_hwidx[n_lim] = idx; | ||
690 | ++n_lim; | ||
691 | continue; | ||
692 | } | ||
693 | val = 0; | ||
694 | if (event->hw.sample_period) { | ||
695 | left = local64_read(&event->hw.period_left); | ||
696 | if (left < 0x80000000L) | ||
697 | val = 0x80000000L - left; | ||
698 | } | ||
699 | local64_set(&event->hw.prev_count, val); | ||
700 | event->hw.idx = idx; | ||
701 | if (event->hw.state & PERF_HES_STOPPED) | ||
702 | val = 0; | ||
703 | write_pmc(idx, val); | ||
704 | perf_event_update_userpage(event); | ||
705 | } | ||
706 | cpuhw->n_limited = n_lim; | ||
707 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | ||
708 | |||
709 | out_enable: | ||
710 | mb(); | ||
711 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
712 | |||
713 | /* | ||
714 | * Enable instruction sampling if necessary | ||
715 | */ | ||
716 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
717 | mb(); | ||
718 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); | ||
719 | } | ||
720 | |||
721 | out: | ||
722 | local_irq_restore(flags); | ||
723 | } | ||
724 | |||
725 | static int collect_events(struct perf_event *group, int max_count, | ||
726 | struct perf_event *ctrs[], u64 *events, | ||
727 | unsigned int *flags) | ||
728 | { | ||
729 | int n = 0; | ||
730 | struct perf_event *event; | ||
731 | |||
732 | if (!is_software_event(group)) { | ||
733 | if (n >= max_count) | ||
734 | return -1; | ||
735 | ctrs[n] = group; | ||
736 | flags[n] = group->hw.event_base; | ||
737 | events[n++] = group->hw.config; | ||
738 | } | ||
739 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
740 | if (!is_software_event(event) && | ||
741 | event->state != PERF_EVENT_STATE_OFF) { | ||
742 | if (n >= max_count) | ||
743 | return -1; | ||
744 | ctrs[n] = event; | ||
745 | flags[n] = event->hw.event_base; | ||
746 | events[n++] = event->hw.config; | ||
747 | } | ||
748 | } | ||
749 | return n; | ||
750 | } | ||
751 | |||
752 | /* | ||
753 | * Add a event to the PMU. | ||
754 | * If all events are not already frozen, then we disable and | ||
755 | * re-enable the PMU in order to get hw_perf_enable to do the | ||
756 | * actual work of reconfiguring the PMU. | ||
757 | */ | ||
758 | static int power_pmu_add(struct perf_event *event, int ef_flags) | ||
759 | { | ||
760 | struct cpu_hw_events *cpuhw; | ||
761 | unsigned long flags; | ||
762 | int n0; | ||
763 | int ret = -EAGAIN; | ||
764 | |||
765 | local_irq_save(flags); | ||
766 | perf_pmu_disable(event->pmu); | ||
767 | |||
768 | /* | ||
769 | * Add the event to the list (if there is room) | ||
770 | * and check whether the total set is still feasible. | ||
771 | */ | ||
772 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
773 | n0 = cpuhw->n_events; | ||
774 | if (n0 >= ppmu->n_counter) | ||
775 | goto out; | ||
776 | cpuhw->event[n0] = event; | ||
777 | cpuhw->events[n0] = event->hw.config; | ||
778 | cpuhw->flags[n0] = event->hw.event_base; | ||
779 | |||
780 | if (!(ef_flags & PERF_EF_START)) | ||
781 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
782 | |||
783 | /* | ||
784 | * If group events scheduling transaction was started, | ||
785 | * skip the schedulability test here, it will be performed | ||
786 | * at commit time(->commit_txn) as a whole | ||
787 | */ | ||
788 | if (cpuhw->group_flag & PERF_EVENT_TXN) | ||
789 | goto nocheck; | ||
790 | |||
791 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) | ||
792 | goto out; | ||
793 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) | ||
794 | goto out; | ||
795 | event->hw.config = cpuhw->events[n0]; | ||
796 | |||
797 | nocheck: | ||
798 | ++cpuhw->n_events; | ||
799 | ++cpuhw->n_added; | ||
800 | |||
801 | ret = 0; | ||
802 | out: | ||
803 | perf_pmu_enable(event->pmu); | ||
804 | local_irq_restore(flags); | ||
805 | return ret; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Remove a event from the PMU. | ||
810 | */ | ||
811 | static void power_pmu_del(struct perf_event *event, int ef_flags) | ||
812 | { | ||
813 | struct cpu_hw_events *cpuhw; | ||
814 | long i; | ||
815 | unsigned long flags; | ||
816 | |||
817 | local_irq_save(flags); | ||
818 | perf_pmu_disable(event->pmu); | ||
819 | |||
820 | power_pmu_read(event); | ||
821 | |||
822 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
823 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
824 | if (event == cpuhw->event[i]) { | ||
825 | while (++i < cpuhw->n_events) { | ||
826 | cpuhw->event[i-1] = cpuhw->event[i]; | ||
827 | cpuhw->events[i-1] = cpuhw->events[i]; | ||
828 | cpuhw->flags[i-1] = cpuhw->flags[i]; | ||
829 | } | ||
830 | --cpuhw->n_events; | ||
831 | ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); | ||
832 | if (event->hw.idx) { | ||
833 | write_pmc(event->hw.idx, 0); | ||
834 | event->hw.idx = 0; | ||
835 | } | ||
836 | perf_event_update_userpage(event); | ||
837 | break; | ||
838 | } | ||
839 | } | ||
840 | for (i = 0; i < cpuhw->n_limited; ++i) | ||
841 | if (event == cpuhw->limited_counter[i]) | ||
842 | break; | ||
843 | if (i < cpuhw->n_limited) { | ||
844 | while (++i < cpuhw->n_limited) { | ||
845 | cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; | ||
846 | cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; | ||
847 | } | ||
848 | --cpuhw->n_limited; | ||
849 | } | ||
850 | if (cpuhw->n_events == 0) { | ||
851 | /* disable exceptions if no events are running */ | ||
852 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | ||
853 | } | ||
854 | |||
855 | perf_pmu_enable(event->pmu); | ||
856 | local_irq_restore(flags); | ||
857 | } | ||
858 | |||
859 | /* | ||
860 | * POWER-PMU does not support disabling individual counters, hence | ||
861 | * program their cycle counter to their max value and ignore the interrupts. | ||
862 | */ | ||
863 | |||
864 | static void power_pmu_start(struct perf_event *event, int ef_flags) | ||
865 | { | ||
866 | unsigned long flags; | ||
867 | s64 left; | ||
868 | unsigned long val; | ||
869 | |||
870 | if (!event->hw.idx || !event->hw.sample_period) | ||
871 | return; | ||
872 | |||
873 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
874 | return; | ||
875 | |||
876 | if (ef_flags & PERF_EF_RELOAD) | ||
877 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
878 | |||
879 | local_irq_save(flags); | ||
880 | perf_pmu_disable(event->pmu); | ||
881 | |||
882 | event->hw.state = 0; | ||
883 | left = local64_read(&event->hw.period_left); | ||
884 | |||
885 | val = 0; | ||
886 | if (left < 0x80000000L) | ||
887 | val = 0x80000000L - left; | ||
888 | |||
889 | write_pmc(event->hw.idx, val); | ||
890 | |||
891 | perf_event_update_userpage(event); | ||
892 | perf_pmu_enable(event->pmu); | ||
893 | local_irq_restore(flags); | ||
894 | } | ||
895 | |||
896 | static void power_pmu_stop(struct perf_event *event, int ef_flags) | ||
897 | { | ||
898 | unsigned long flags; | ||
899 | |||
900 | if (!event->hw.idx || !event->hw.sample_period) | ||
901 | return; | ||
902 | |||
903 | if (event->hw.state & PERF_HES_STOPPED) | ||
904 | return; | ||
905 | |||
906 | local_irq_save(flags); | ||
907 | perf_pmu_disable(event->pmu); | ||
908 | |||
909 | power_pmu_read(event); | ||
910 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
911 | write_pmc(event->hw.idx, 0); | ||
912 | |||
913 | perf_event_update_userpage(event); | ||
914 | perf_pmu_enable(event->pmu); | ||
915 | local_irq_restore(flags); | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * Start group events scheduling transaction | ||
920 | * Set the flag to make pmu::enable() not perform the | ||
921 | * schedulability test, it will be performed at commit time | ||
922 | */ | ||
923 | void power_pmu_start_txn(struct pmu *pmu) | ||
924 | { | ||
925 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
926 | |||
927 | perf_pmu_disable(pmu); | ||
928 | cpuhw->group_flag |= PERF_EVENT_TXN; | ||
929 | cpuhw->n_txn_start = cpuhw->n_events; | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Stop group events scheduling transaction | ||
934 | * Clear the flag and pmu::enable() will perform the | ||
935 | * schedulability test. | ||
936 | */ | ||
937 | void power_pmu_cancel_txn(struct pmu *pmu) | ||
938 | { | ||
939 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
940 | |||
941 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
942 | perf_pmu_enable(pmu); | ||
943 | } | ||
944 | |||
945 | /* | ||
946 | * Commit group events scheduling transaction | ||
947 | * Perform the group schedulability test as a whole | ||
948 | * Return 0 if success | ||
949 | */ | ||
950 | int power_pmu_commit_txn(struct pmu *pmu) | ||
951 | { | ||
952 | struct cpu_hw_events *cpuhw; | ||
953 | long i, n; | ||
954 | |||
955 | if (!ppmu) | ||
956 | return -EAGAIN; | ||
957 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
958 | n = cpuhw->n_events; | ||
959 | if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) | ||
960 | return -EAGAIN; | ||
961 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); | ||
962 | if (i < 0) | ||
963 | return -EAGAIN; | ||
964 | |||
965 | for (i = cpuhw->n_txn_start; i < n; ++i) | ||
966 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | ||
967 | |||
968 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
969 | perf_pmu_enable(pmu); | ||
970 | return 0; | ||
971 | } | ||
972 | |||
973 | /* | ||
974 | * Return 1 if we might be able to put event on a limited PMC, | ||
975 | * or 0 if not. | ||
976 | * A event can only go on a limited PMC if it counts something | ||
977 | * that a limited PMC can count, doesn't require interrupts, and | ||
978 | * doesn't exclude any processor mode. | ||
979 | */ | ||
980 | static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, | ||
981 | unsigned int flags) | ||
982 | { | ||
983 | int n; | ||
984 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
985 | |||
986 | if (event->attr.exclude_user | ||
987 | || event->attr.exclude_kernel | ||
988 | || event->attr.exclude_hv | ||
989 | || event->attr.sample_period) | ||
990 | return 0; | ||
991 | |||
992 | if (ppmu->limited_pmc_event(ev)) | ||
993 | return 1; | ||
994 | |||
995 | /* | ||
996 | * The requested event_id isn't on a limited PMC already; | ||
997 | * see if any alternative code goes on a limited PMC. | ||
998 | */ | ||
999 | if (!ppmu->get_alternatives) | ||
1000 | return 0; | ||
1001 | |||
1002 | flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; | ||
1003 | n = ppmu->get_alternatives(ev, flags, alt); | ||
1004 | |||
1005 | return n > 0; | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * Find an alternative event_id that goes on a normal PMC, if possible, | ||
1010 | * and return the event_id code, or 0 if there is no such alternative. | ||
1011 | * (Note: event_id code 0 is "don't count" on all machines.) | ||
1012 | */ | ||
1013 | static u64 normal_pmc_alternative(u64 ev, unsigned long flags) | ||
1014 | { | ||
1015 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
1016 | int n; | ||
1017 | |||
1018 | flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); | ||
1019 | n = ppmu->get_alternatives(ev, flags, alt); | ||
1020 | if (!n) | ||
1021 | return 0; | ||
1022 | return alt[0]; | ||
1023 | } | ||
1024 | |||
1025 | /* Number of perf_events counting hardware events */ | ||
1026 | static atomic_t num_events; | ||
1027 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
1028 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
1029 | |||
1030 | /* | ||
1031 | * Release the PMU if this is the last perf_event. | ||
1032 | */ | ||
1033 | static void hw_perf_event_destroy(struct perf_event *event) | ||
1034 | { | ||
1035 | if (!atomic_add_unless(&num_events, -1, 1)) { | ||
1036 | mutex_lock(&pmc_reserve_mutex); | ||
1037 | if (atomic_dec_return(&num_events) == 0) | ||
1038 | release_pmc_hardware(); | ||
1039 | mutex_unlock(&pmc_reserve_mutex); | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Translate a generic cache event_id config to a raw event_id code. | ||
1045 | */ | ||
1046 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
1047 | { | ||
1048 | unsigned long type, op, result; | ||
1049 | int ev; | ||
1050 | |||
1051 | if (!ppmu->cache_events) | ||
1052 | return -EINVAL; | ||
1053 | |||
1054 | /* unpack config */ | ||
1055 | type = config & 0xff; | ||
1056 | op = (config >> 8) & 0xff; | ||
1057 | result = (config >> 16) & 0xff; | ||
1058 | |||
1059 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
1060 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
1061 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
1062 | return -EINVAL; | ||
1063 | |||
1064 | ev = (*ppmu->cache_events)[type][op][result]; | ||
1065 | if (ev == 0) | ||
1066 | return -EOPNOTSUPP; | ||
1067 | if (ev == -1) | ||
1068 | return -EINVAL; | ||
1069 | *eventp = ev; | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | static int power_pmu_event_init(struct perf_event *event) | ||
1074 | { | ||
1075 | u64 ev; | ||
1076 | unsigned long flags; | ||
1077 | struct perf_event *ctrs[MAX_HWEVENTS]; | ||
1078 | u64 events[MAX_HWEVENTS]; | ||
1079 | unsigned int cflags[MAX_HWEVENTS]; | ||
1080 | int n; | ||
1081 | int err; | ||
1082 | struct cpu_hw_events *cpuhw; | ||
1083 | |||
1084 | if (!ppmu) | ||
1085 | return -ENOENT; | ||
1086 | |||
1087 | /* does not support taken branch sampling */ | ||
1088 | if (has_branch_stack(event)) | ||
1089 | return -EOPNOTSUPP; | ||
1090 | |||
1091 | switch (event->attr.type) { | ||
1092 | case PERF_TYPE_HARDWARE: | ||
1093 | ev = event->attr.config; | ||
1094 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
1095 | return -EOPNOTSUPP; | ||
1096 | ev = ppmu->generic_events[ev]; | ||
1097 | break; | ||
1098 | case PERF_TYPE_HW_CACHE: | ||
1099 | err = hw_perf_cache_event(event->attr.config, &ev); | ||
1100 | if (err) | ||
1101 | return err; | ||
1102 | break; | ||
1103 | case PERF_TYPE_RAW: | ||
1104 | ev = event->attr.config; | ||
1105 | break; | ||
1106 | default: | ||
1107 | return -ENOENT; | ||
1108 | } | ||
1109 | |||
1110 | event->hw.config_base = ev; | ||
1111 | event->hw.idx = 0; | ||
1112 | |||
1113 | /* | ||
1114 | * If we are not running on a hypervisor, force the | ||
1115 | * exclude_hv bit to 0 so that we don't care what | ||
1116 | * the user set it to. | ||
1117 | */ | ||
1118 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
1119 | event->attr.exclude_hv = 0; | ||
1120 | |||
1121 | /* | ||
1122 | * If this is a per-task event, then we can use | ||
1123 | * PM_RUN_* events interchangeably with their non RUN_* | ||
1124 | * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. | ||
1125 | * XXX we should check if the task is an idle task. | ||
1126 | */ | ||
1127 | flags = 0; | ||
1128 | if (event->attach_state & PERF_ATTACH_TASK) | ||
1129 | flags |= PPMU_ONLY_COUNT_RUN; | ||
1130 | |||
1131 | /* | ||
1132 | * If this machine has limited events, check whether this | ||
1133 | * event_id could go on a limited event. | ||
1134 | */ | ||
1135 | if (ppmu->flags & PPMU_LIMITED_PMC5_6) { | ||
1136 | if (can_go_on_limited_pmc(event, ev, flags)) { | ||
1137 | flags |= PPMU_LIMITED_PMC_OK; | ||
1138 | } else if (ppmu->limited_pmc_event(ev)) { | ||
1139 | /* | ||
1140 | * The requested event_id is on a limited PMC, | ||
1141 | * but we can't use a limited PMC; see if any | ||
1142 | * alternative goes on a normal PMC. | ||
1143 | */ | ||
1144 | ev = normal_pmc_alternative(ev, flags); | ||
1145 | if (!ev) | ||
1146 | return -EINVAL; | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | /* | ||
1151 | * If this is in a group, check if it can go on with all the | ||
1152 | * other hardware events in the group. We assume the event | ||
1153 | * hasn't been linked into its leader's sibling list at this point. | ||
1154 | */ | ||
1155 | n = 0; | ||
1156 | if (event->group_leader != event) { | ||
1157 | n = collect_events(event->group_leader, ppmu->n_counter - 1, | ||
1158 | ctrs, events, cflags); | ||
1159 | if (n < 0) | ||
1160 | return -EINVAL; | ||
1161 | } | ||
1162 | events[n] = ev; | ||
1163 | ctrs[n] = event; | ||
1164 | cflags[n] = flags; | ||
1165 | if (check_excludes(ctrs, cflags, n, 1)) | ||
1166 | return -EINVAL; | ||
1167 | |||
1168 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
1169 | err = power_check_constraints(cpuhw, events, cflags, n + 1); | ||
1170 | put_cpu_var(cpu_hw_events); | ||
1171 | if (err) | ||
1172 | return -EINVAL; | ||
1173 | |||
1174 | event->hw.config = events[n]; | ||
1175 | event->hw.event_base = cflags[n]; | ||
1176 | event->hw.last_period = event->hw.sample_period; | ||
1177 | local64_set(&event->hw.period_left, event->hw.last_period); | ||
1178 | |||
1179 | /* | ||
1180 | * See if we need to reserve the PMU. | ||
1181 | * If no events are currently in use, then we have to take a | ||
1182 | * mutex to ensure that we don't race with another task doing | ||
1183 | * reserve_pmc_hardware or release_pmc_hardware. | ||
1184 | */ | ||
1185 | err = 0; | ||
1186 | if (!atomic_inc_not_zero(&num_events)) { | ||
1187 | mutex_lock(&pmc_reserve_mutex); | ||
1188 | if (atomic_read(&num_events) == 0 && | ||
1189 | reserve_pmc_hardware(perf_event_interrupt)) | ||
1190 | err = -EBUSY; | ||
1191 | else | ||
1192 | atomic_inc(&num_events); | ||
1193 | mutex_unlock(&pmc_reserve_mutex); | ||
1194 | } | ||
1195 | event->destroy = hw_perf_event_destroy; | ||
1196 | |||
1197 | return err; | ||
1198 | } | ||
1199 | |||
1200 | static int power_pmu_event_idx(struct perf_event *event) | ||
1201 | { | ||
1202 | return event->hw.idx; | ||
1203 | } | ||
1204 | |||
1205 | struct pmu power_pmu = { | ||
1206 | .pmu_enable = power_pmu_enable, | ||
1207 | .pmu_disable = power_pmu_disable, | ||
1208 | .event_init = power_pmu_event_init, | ||
1209 | .add = power_pmu_add, | ||
1210 | .del = power_pmu_del, | ||
1211 | .start = power_pmu_start, | ||
1212 | .stop = power_pmu_stop, | ||
1213 | .read = power_pmu_read, | ||
1214 | .start_txn = power_pmu_start_txn, | ||
1215 | .cancel_txn = power_pmu_cancel_txn, | ||
1216 | .commit_txn = power_pmu_commit_txn, | ||
1217 | .event_idx = power_pmu_event_idx, | ||
1218 | }; | ||
1219 | |||
1220 | /* | ||
1221 | * A counter has overflowed; update its count and record | ||
1222 | * things if requested. Note that interrupts are hard-disabled | ||
1223 | * here so there is no possibility of being interrupted. | ||
1224 | */ | ||
1225 | static void record_and_restart(struct perf_event *event, unsigned long val, | ||
1226 | struct pt_regs *regs) | ||
1227 | { | ||
1228 | u64 period = event->hw.sample_period; | ||
1229 | s64 prev, delta, left; | ||
1230 | int record = 0; | ||
1231 | |||
1232 | if (event->hw.state & PERF_HES_STOPPED) { | ||
1233 | write_pmc(event->hw.idx, 0); | ||
1234 | return; | ||
1235 | } | ||
1236 | |||
1237 | /* we don't have to worry about interrupts here */ | ||
1238 | prev = local64_read(&event->hw.prev_count); | ||
1239 | delta = check_and_compute_delta(prev, val); | ||
1240 | local64_add(delta, &event->count); | ||
1241 | |||
1242 | /* | ||
1243 | * See if the total period for this event has expired, | ||
1244 | * and update for the next period. | ||
1245 | */ | ||
1246 | val = 0; | ||
1247 | left = local64_read(&event->hw.period_left) - delta; | ||
1248 | if (period) { | ||
1249 | if (left <= 0) { | ||
1250 | left += period; | ||
1251 | if (left <= 0) | ||
1252 | left = period; | ||
1253 | record = 1; | ||
1254 | event->hw.last_period = event->hw.sample_period; | ||
1255 | } | ||
1256 | if (left < 0x80000000LL) | ||
1257 | val = 0x80000000LL - left; | ||
1258 | } | ||
1259 | |||
1260 | write_pmc(event->hw.idx, val); | ||
1261 | local64_set(&event->hw.prev_count, val); | ||
1262 | local64_set(&event->hw.period_left, left); | ||
1263 | perf_event_update_userpage(event); | ||
1264 | |||
1265 | /* | ||
1266 | * Finally record data if requested. | ||
1267 | */ | ||
1268 | if (record) { | ||
1269 | struct perf_sample_data data; | ||
1270 | |||
1271 | perf_sample_data_init(&data, ~0ULL); | ||
1272 | data.period = event->hw.last_period; | ||
1273 | |||
1274 | if (event->attr.sample_type & PERF_SAMPLE_ADDR) | ||
1275 | perf_get_data_addr(regs, &data.addr); | ||
1276 | |||
1277 | if (perf_event_overflow(event, &data, regs)) | ||
1278 | power_pmu_stop(event, 0); | ||
1279 | } | ||
1280 | } | ||
1281 | |||
1282 | /* | ||
1283 | * Called from generic code to get the misc flags (i.e. processor mode) | ||
1284 | * for an event_id. | ||
1285 | */ | ||
1286 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1287 | { | ||
1288 | u32 flags = perf_get_misc_flags(regs); | ||
1289 | |||
1290 | if (flags) | ||
1291 | return flags; | ||
1292 | return user_mode(regs) ? PERF_RECORD_MISC_USER : | ||
1293 | PERF_RECORD_MISC_KERNEL; | ||
1294 | } | ||
1295 | |||
1296 | /* | ||
1297 | * Called from generic code to get the instruction pointer | ||
1298 | * for an event_id. | ||
1299 | */ | ||
1300 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1301 | { | ||
1302 | unsigned long ip; | ||
1303 | |||
1304 | if (TRAP(regs) != 0xf00) | ||
1305 | return regs->nip; /* not a PMU interrupt */ | ||
1306 | |||
1307 | ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); | ||
1308 | return ip; | ||
1309 | } | ||
1310 | |||
1311 | static bool pmc_overflow(unsigned long val) | ||
1312 | { | ||
1313 | if ((int)val < 0) | ||
1314 | return true; | ||
1315 | |||
1316 | /* | ||
1317 | * Events on POWER7 can roll back if a speculative event doesn't | ||
1318 | * eventually complete. Unfortunately in some rare cases they will | ||
1319 | * raise a performance monitor exception. We need to catch this to | ||
1320 | * ensure we reset the PMC. In all cases the PMC will be 256 or less | ||
1321 | * cycles from overflow. | ||
1322 | * | ||
1323 | * We only do this if the first pass fails to find any overflowing | ||
1324 | * PMCs because a user might set a period of less than 256 and we | ||
1325 | * don't want to mistakenly reset them. | ||
1326 | */ | ||
1327 | if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) | ||
1328 | return true; | ||
1329 | |||
1330 | return false; | ||
1331 | } | ||
1332 | |||
1333 | /* | ||
1334 | * Performance monitor interrupt stuff | ||
1335 | */ | ||
1336 | static void perf_event_interrupt(struct pt_regs *regs) | ||
1337 | { | ||
1338 | int i; | ||
1339 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
1340 | struct perf_event *event; | ||
1341 | unsigned long val; | ||
1342 | int found = 0; | ||
1343 | int nmi; | ||
1344 | |||
1345 | if (cpuhw->n_limited) | ||
1346 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), | ||
1347 | mfspr(SPRN_PMC6)); | ||
1348 | |||
1349 | perf_read_regs(regs); | ||
1350 | |||
1351 | nmi = perf_intr_is_nmi(regs); | ||
1352 | if (nmi) | ||
1353 | nmi_enter(); | ||
1354 | else | ||
1355 | irq_enter(); | ||
1356 | |||
1357 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
1358 | event = cpuhw->event[i]; | ||
1359 | if (!event->hw.idx || is_limited_pmc(event->hw.idx)) | ||
1360 | continue; | ||
1361 | val = read_pmc(event->hw.idx); | ||
1362 | if ((int)val < 0) { | ||
1363 | /* event has overflowed */ | ||
1364 | found = 1; | ||
1365 | record_and_restart(event, val, regs); | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | /* | ||
1370 | * In case we didn't find and reset the event that caused | ||
1371 | * the interrupt, scan all events and reset any that are | ||
1372 | * negative, to avoid getting continual interrupts. | ||
1373 | * Any that we processed in the previous loop will not be negative. | ||
1374 | */ | ||
1375 | if (!found) { | ||
1376 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
1377 | if (is_limited_pmc(i + 1)) | ||
1378 | continue; | ||
1379 | val = read_pmc(i + 1); | ||
1380 | if (pmc_overflow(val)) | ||
1381 | write_pmc(i + 1, 0); | ||
1382 | } | ||
1383 | } | ||
1384 | |||
1385 | /* | ||
1386 | * Reset MMCR0 to its normal value. This will set PMXE and | ||
1387 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) | ||
1388 | * and thus allow interrupts to occur again. | ||
1389 | * XXX might want to use MSR.PM to keep the events frozen until | ||
1390 | * we get back out of this interrupt. | ||
1391 | */ | ||
1392 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
1393 | |||
1394 | if (nmi) | ||
1395 | nmi_exit(); | ||
1396 | else | ||
1397 | irq_exit(); | ||
1398 | } | ||
1399 | |||
1400 | static void power_pmu_setup(int cpu) | ||
1401 | { | ||
1402 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
1403 | |||
1404 | if (!ppmu) | ||
1405 | return; | ||
1406 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
1407 | cpuhw->mmcr[0] = MMCR0_FC; | ||
1408 | } | ||
1409 | |||
1410 | static int __cpuinit | ||
1411 | power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
1412 | { | ||
1413 | unsigned int cpu = (long)hcpu; | ||
1414 | |||
1415 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1416 | case CPU_UP_PREPARE: | ||
1417 | power_pmu_setup(cpu); | ||
1418 | break; | ||
1419 | |||
1420 | default: | ||
1421 | break; | ||
1422 | } | ||
1423 | |||
1424 | return NOTIFY_OK; | ||
1425 | } | ||
1426 | |||
1427 | int __cpuinit register_power_pmu(struct power_pmu *pmu) | ||
1428 | { | ||
1429 | if (ppmu) | ||
1430 | return -EBUSY; /* something's already registered */ | ||
1431 | |||
1432 | ppmu = pmu; | ||
1433 | pr_info("%s performance monitor hardware support registered\n", | ||
1434 | pmu->name); | ||
1435 | |||
1436 | #ifdef MSR_HV | ||
1437 | /* | ||
1438 | * Use FCHV to ignore kernel events if MSR.HV is set. | ||
1439 | */ | ||
1440 | if (mfmsr() & MSR_HV) | ||
1441 | freeze_events_kernel = MMCR0_FCHV; | ||
1442 | #endif /* CONFIG_PPC64 */ | ||
1443 | |||
1444 | perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); | ||
1445 | perf_cpu_notifier(power_pmu_notifier); | ||
1446 | |||
1447 | return 0; | ||
1448 | } | ||
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c new file mode 100644 index 000000000000..0a6d2a9d569c --- /dev/null +++ b/arch/powerpc/perf/core-fsl-emb.c | |||
@@ -0,0 +1,688 @@ | |||
1 | /* | ||
2 | * Performance event support - Freescale Embedded Performance Monitor | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * Copyright 2010 Freescale Semiconductor, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/perf_event.h> | ||
15 | #include <linux/percpu.h> | ||
16 | #include <linux/hardirq.h> | ||
17 | #include <asm/reg_fsl_emb.h> | ||
18 | #include <asm/pmc.h> | ||
19 | #include <asm/machdep.h> | ||
20 | #include <asm/firmware.h> | ||
21 | #include <asm/ptrace.h> | ||
22 | |||
23 | struct cpu_hw_events { | ||
24 | int n_events; | ||
25 | int disabled; | ||
26 | u8 pmcs_enabled; | ||
27 | struct perf_event *event[MAX_HWEVENTS]; | ||
28 | }; | ||
29 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
30 | |||
31 | static struct fsl_emb_pmu *ppmu; | ||
32 | |||
33 | /* Number of perf_events counting hardware events */ | ||
34 | static atomic_t num_events; | ||
35 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
36 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
37 | |||
38 | /* | ||
39 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
40 | * it as an NMI. | ||
41 | */ | ||
42 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
43 | { | ||
44 | #ifdef __powerpc64__ | ||
45 | return !regs->softe; | ||
46 | #else | ||
47 | return 0; | ||
48 | #endif | ||
49 | } | ||
50 | |||
51 | static void perf_event_interrupt(struct pt_regs *regs); | ||
52 | |||
53 | /* | ||
54 | * Read one performance monitor counter (PMC). | ||
55 | */ | ||
56 | static unsigned long read_pmc(int idx) | ||
57 | { | ||
58 | unsigned long val; | ||
59 | |||
60 | switch (idx) { | ||
61 | case 0: | ||
62 | val = mfpmr(PMRN_PMC0); | ||
63 | break; | ||
64 | case 1: | ||
65 | val = mfpmr(PMRN_PMC1); | ||
66 | break; | ||
67 | case 2: | ||
68 | val = mfpmr(PMRN_PMC2); | ||
69 | break; | ||
70 | case 3: | ||
71 | val = mfpmr(PMRN_PMC3); | ||
72 | break; | ||
73 | default: | ||
74 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
75 | val = 0; | ||
76 | } | ||
77 | return val; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Write one PMC. | ||
82 | */ | ||
83 | static void write_pmc(int idx, unsigned long val) | ||
84 | { | ||
85 | switch (idx) { | ||
86 | case 0: | ||
87 | mtpmr(PMRN_PMC0, val); | ||
88 | break; | ||
89 | case 1: | ||
90 | mtpmr(PMRN_PMC1, val); | ||
91 | break; | ||
92 | case 2: | ||
93 | mtpmr(PMRN_PMC2, val); | ||
94 | break; | ||
95 | case 3: | ||
96 | mtpmr(PMRN_PMC3, val); | ||
97 | break; | ||
98 | default: | ||
99 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
100 | } | ||
101 | |||
102 | isync(); | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Write one local control A register | ||
107 | */ | ||
108 | static void write_pmlca(int idx, unsigned long val) | ||
109 | { | ||
110 | switch (idx) { | ||
111 | case 0: | ||
112 | mtpmr(PMRN_PMLCA0, val); | ||
113 | break; | ||
114 | case 1: | ||
115 | mtpmr(PMRN_PMLCA1, val); | ||
116 | break; | ||
117 | case 2: | ||
118 | mtpmr(PMRN_PMLCA2, val); | ||
119 | break; | ||
120 | case 3: | ||
121 | mtpmr(PMRN_PMLCA3, val); | ||
122 | break; | ||
123 | default: | ||
124 | printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); | ||
125 | } | ||
126 | |||
127 | isync(); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Write one local control B register | ||
132 | */ | ||
133 | static void write_pmlcb(int idx, unsigned long val) | ||
134 | { | ||
135 | switch (idx) { | ||
136 | case 0: | ||
137 | mtpmr(PMRN_PMLCB0, val); | ||
138 | break; | ||
139 | case 1: | ||
140 | mtpmr(PMRN_PMLCB1, val); | ||
141 | break; | ||
142 | case 2: | ||
143 | mtpmr(PMRN_PMLCB2, val); | ||
144 | break; | ||
145 | case 3: | ||
146 | mtpmr(PMRN_PMLCB3, val); | ||
147 | break; | ||
148 | default: | ||
149 | printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); | ||
150 | } | ||
151 | |||
152 | isync(); | ||
153 | } | ||
154 | |||
155 | static void fsl_emb_pmu_read(struct perf_event *event) | ||
156 | { | ||
157 | s64 val, delta, prev; | ||
158 | |||
159 | if (event->hw.state & PERF_HES_STOPPED) | ||
160 | return; | ||
161 | |||
162 | /* | ||
163 | * Performance monitor interrupts come even when interrupts | ||
164 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
165 | * Therefore we treat them like NMIs. | ||
166 | */ | ||
167 | do { | ||
168 | prev = local64_read(&event->hw.prev_count); | ||
169 | barrier(); | ||
170 | val = read_pmc(event->hw.idx); | ||
171 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | ||
172 | |||
173 | /* The counters are only 32 bits wide */ | ||
174 | delta = (val - prev) & 0xfffffffful; | ||
175 | local64_add(delta, &event->count); | ||
176 | local64_sub(delta, &event->hw.period_left); | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * Disable all events to prevent PMU interrupts and to allow | ||
181 | * events to be added or removed. | ||
182 | */ | ||
183 | static void fsl_emb_pmu_disable(struct pmu *pmu) | ||
184 | { | ||
185 | struct cpu_hw_events *cpuhw; | ||
186 | unsigned long flags; | ||
187 | |||
188 | local_irq_save(flags); | ||
189 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
190 | |||
191 | if (!cpuhw->disabled) { | ||
192 | cpuhw->disabled = 1; | ||
193 | |||
194 | /* | ||
195 | * Check if we ever enabled the PMU on this cpu. | ||
196 | */ | ||
197 | if (!cpuhw->pmcs_enabled) { | ||
198 | ppc_enable_pmcs(); | ||
199 | cpuhw->pmcs_enabled = 1; | ||
200 | } | ||
201 | |||
202 | if (atomic_read(&num_events)) { | ||
203 | /* | ||
204 | * Set the 'freeze all counters' bit, and disable | ||
205 | * interrupts. The barrier is to make sure the | ||
206 | * mtpmr has been executed and the PMU has frozen | ||
207 | * the events before we return. | ||
208 | */ | ||
209 | |||
210 | mtpmr(PMRN_PMGC0, PMGC0_FAC); | ||
211 | isync(); | ||
212 | } | ||
213 | } | ||
214 | local_irq_restore(flags); | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Re-enable all events if disable == 0. | ||
219 | * If we were previously disabled and events were added, then | ||
220 | * put the new config on the PMU. | ||
221 | */ | ||
222 | static void fsl_emb_pmu_enable(struct pmu *pmu) | ||
223 | { | ||
224 | struct cpu_hw_events *cpuhw; | ||
225 | unsigned long flags; | ||
226 | |||
227 | local_irq_save(flags); | ||
228 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
229 | if (!cpuhw->disabled) | ||
230 | goto out; | ||
231 | |||
232 | cpuhw->disabled = 0; | ||
233 | ppc_set_pmu_inuse(cpuhw->n_events != 0); | ||
234 | |||
235 | if (cpuhw->n_events > 0) { | ||
236 | mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); | ||
237 | isync(); | ||
238 | } | ||
239 | |||
240 | out: | ||
241 | local_irq_restore(flags); | ||
242 | } | ||
243 | |||
244 | static int collect_events(struct perf_event *group, int max_count, | ||
245 | struct perf_event *ctrs[]) | ||
246 | { | ||
247 | int n = 0; | ||
248 | struct perf_event *event; | ||
249 | |||
250 | if (!is_software_event(group)) { | ||
251 | if (n >= max_count) | ||
252 | return -1; | ||
253 | ctrs[n] = group; | ||
254 | n++; | ||
255 | } | ||
256 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
257 | if (!is_software_event(event) && | ||
258 | event->state != PERF_EVENT_STATE_OFF) { | ||
259 | if (n >= max_count) | ||
260 | return -1; | ||
261 | ctrs[n] = event; | ||
262 | n++; | ||
263 | } | ||
264 | } | ||
265 | return n; | ||
266 | } | ||
267 | |||
268 | /* context locked on entry */ | ||
269 | static int fsl_emb_pmu_add(struct perf_event *event, int flags) | ||
270 | { | ||
271 | struct cpu_hw_events *cpuhw; | ||
272 | int ret = -EAGAIN; | ||
273 | int num_counters = ppmu->n_counter; | ||
274 | u64 val; | ||
275 | int i; | ||
276 | |||
277 | perf_pmu_disable(event->pmu); | ||
278 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
279 | |||
280 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) | ||
281 | num_counters = ppmu->n_restricted; | ||
282 | |||
283 | /* | ||
284 | * Allocate counters from top-down, so that restricted-capable | ||
285 | * counters are kept free as long as possible. | ||
286 | */ | ||
287 | for (i = num_counters - 1; i >= 0; i--) { | ||
288 | if (cpuhw->event[i]) | ||
289 | continue; | ||
290 | |||
291 | break; | ||
292 | } | ||
293 | |||
294 | if (i < 0) | ||
295 | goto out; | ||
296 | |||
297 | event->hw.idx = i; | ||
298 | cpuhw->event[i] = event; | ||
299 | ++cpuhw->n_events; | ||
300 | |||
301 | val = 0; | ||
302 | if (event->hw.sample_period) { | ||
303 | s64 left = local64_read(&event->hw.period_left); | ||
304 | if (left < 0x80000000L) | ||
305 | val = 0x80000000L - left; | ||
306 | } | ||
307 | local64_set(&event->hw.prev_count, val); | ||
308 | |||
309 | if (!(flags & PERF_EF_START)) { | ||
310 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
311 | val = 0; | ||
312 | } | ||
313 | |||
314 | write_pmc(i, val); | ||
315 | perf_event_update_userpage(event); | ||
316 | |||
317 | write_pmlcb(i, event->hw.config >> 32); | ||
318 | write_pmlca(i, event->hw.config_base); | ||
319 | |||
320 | ret = 0; | ||
321 | out: | ||
322 | put_cpu_var(cpu_hw_events); | ||
323 | perf_pmu_enable(event->pmu); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | /* context locked on entry */ | ||
328 | static void fsl_emb_pmu_del(struct perf_event *event, int flags) | ||
329 | { | ||
330 | struct cpu_hw_events *cpuhw; | ||
331 | int i = event->hw.idx; | ||
332 | |||
333 | perf_pmu_disable(event->pmu); | ||
334 | if (i < 0) | ||
335 | goto out; | ||
336 | |||
337 | fsl_emb_pmu_read(event); | ||
338 | |||
339 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
340 | |||
341 | WARN_ON(event != cpuhw->event[event->hw.idx]); | ||
342 | |||
343 | write_pmlca(i, 0); | ||
344 | write_pmlcb(i, 0); | ||
345 | write_pmc(i, 0); | ||
346 | |||
347 | cpuhw->event[i] = NULL; | ||
348 | event->hw.idx = -1; | ||
349 | |||
350 | /* | ||
351 | * TODO: if at least one restricted event exists, and we | ||
352 | * just freed up a non-restricted-capable counter, and | ||
353 | * there is a restricted-capable counter occupied by | ||
354 | * a non-restricted event, migrate that event to the | ||
355 | * vacated counter. | ||
356 | */ | ||
357 | |||
358 | cpuhw->n_events--; | ||
359 | |||
360 | out: | ||
361 | perf_pmu_enable(event->pmu); | ||
362 | put_cpu_var(cpu_hw_events); | ||
363 | } | ||
364 | |||
365 | static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) | ||
366 | { | ||
367 | unsigned long flags; | ||
368 | s64 left; | ||
369 | |||
370 | if (event->hw.idx < 0 || !event->hw.sample_period) | ||
371 | return; | ||
372 | |||
373 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
374 | return; | ||
375 | |||
376 | if (ef_flags & PERF_EF_RELOAD) | ||
377 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
378 | |||
379 | local_irq_save(flags); | ||
380 | perf_pmu_disable(event->pmu); | ||
381 | |||
382 | event->hw.state = 0; | ||
383 | left = local64_read(&event->hw.period_left); | ||
384 | write_pmc(event->hw.idx, left); | ||
385 | |||
386 | perf_event_update_userpage(event); | ||
387 | perf_pmu_enable(event->pmu); | ||
388 | local_irq_restore(flags); | ||
389 | } | ||
390 | |||
391 | static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) | ||
392 | { | ||
393 | unsigned long flags; | ||
394 | |||
395 | if (event->hw.idx < 0 || !event->hw.sample_period) | ||
396 | return; | ||
397 | |||
398 | if (event->hw.state & PERF_HES_STOPPED) | ||
399 | return; | ||
400 | |||
401 | local_irq_save(flags); | ||
402 | perf_pmu_disable(event->pmu); | ||
403 | |||
404 | fsl_emb_pmu_read(event); | ||
405 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
406 | write_pmc(event->hw.idx, 0); | ||
407 | |||
408 | perf_event_update_userpage(event); | ||
409 | perf_pmu_enable(event->pmu); | ||
410 | local_irq_restore(flags); | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Release the PMU if this is the last perf_event. | ||
415 | */ | ||
416 | static void hw_perf_event_destroy(struct perf_event *event) | ||
417 | { | ||
418 | if (!atomic_add_unless(&num_events, -1, 1)) { | ||
419 | mutex_lock(&pmc_reserve_mutex); | ||
420 | if (atomic_dec_return(&num_events) == 0) | ||
421 | release_pmc_hardware(); | ||
422 | mutex_unlock(&pmc_reserve_mutex); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Translate a generic cache event_id config to a raw event_id code. | ||
428 | */ | ||
429 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
430 | { | ||
431 | unsigned long type, op, result; | ||
432 | int ev; | ||
433 | |||
434 | if (!ppmu->cache_events) | ||
435 | return -EINVAL; | ||
436 | |||
437 | /* unpack config */ | ||
438 | type = config & 0xff; | ||
439 | op = (config >> 8) & 0xff; | ||
440 | result = (config >> 16) & 0xff; | ||
441 | |||
442 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
443 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
444 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
445 | return -EINVAL; | ||
446 | |||
447 | ev = (*ppmu->cache_events)[type][op][result]; | ||
448 | if (ev == 0) | ||
449 | return -EOPNOTSUPP; | ||
450 | if (ev == -1) | ||
451 | return -EINVAL; | ||
452 | *eventp = ev; | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | static int fsl_emb_pmu_event_init(struct perf_event *event) | ||
457 | { | ||
458 | u64 ev; | ||
459 | struct perf_event *events[MAX_HWEVENTS]; | ||
460 | int n; | ||
461 | int err; | ||
462 | int num_restricted; | ||
463 | int i; | ||
464 | |||
465 | switch (event->attr.type) { | ||
466 | case PERF_TYPE_HARDWARE: | ||
467 | ev = event->attr.config; | ||
468 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
469 | return -EOPNOTSUPP; | ||
470 | ev = ppmu->generic_events[ev]; | ||
471 | break; | ||
472 | |||
473 | case PERF_TYPE_HW_CACHE: | ||
474 | err = hw_perf_cache_event(event->attr.config, &ev); | ||
475 | if (err) | ||
476 | return err; | ||
477 | break; | ||
478 | |||
479 | case PERF_TYPE_RAW: | ||
480 | ev = event->attr.config; | ||
481 | break; | ||
482 | |||
483 | default: | ||
484 | return -ENOENT; | ||
485 | } | ||
486 | |||
487 | event->hw.config = ppmu->xlate_event(ev); | ||
488 | if (!(event->hw.config & FSL_EMB_EVENT_VALID)) | ||
489 | return -EINVAL; | ||
490 | |||
491 | /* | ||
492 | * If this is in a group, check if it can go on with all the | ||
493 | * other hardware events in the group. We assume the event | ||
494 | * hasn't been linked into its leader's sibling list at this point. | ||
495 | */ | ||
496 | n = 0; | ||
497 | if (event->group_leader != event) { | ||
498 | n = collect_events(event->group_leader, | ||
499 | ppmu->n_counter - 1, events); | ||
500 | if (n < 0) | ||
501 | return -EINVAL; | ||
502 | } | ||
503 | |||
504 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { | ||
505 | num_restricted = 0; | ||
506 | for (i = 0; i < n; i++) { | ||
507 | if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) | ||
508 | num_restricted++; | ||
509 | } | ||
510 | |||
511 | if (num_restricted >= ppmu->n_restricted) | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | event->hw.idx = -1; | ||
516 | |||
517 | event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | | ||
518 | (u32)((ev << 16) & PMLCA_EVENT_MASK); | ||
519 | |||
520 | if (event->attr.exclude_user) | ||
521 | event->hw.config_base |= PMLCA_FCU; | ||
522 | if (event->attr.exclude_kernel) | ||
523 | event->hw.config_base |= PMLCA_FCS; | ||
524 | if (event->attr.exclude_idle) | ||
525 | return -ENOTSUPP; | ||
526 | |||
527 | event->hw.last_period = event->hw.sample_period; | ||
528 | local64_set(&event->hw.period_left, event->hw.last_period); | ||
529 | |||
530 | /* | ||
531 | * See if we need to reserve the PMU. | ||
532 | * If no events are currently in use, then we have to take a | ||
533 | * mutex to ensure that we don't race with another task doing | ||
534 | * reserve_pmc_hardware or release_pmc_hardware. | ||
535 | */ | ||
536 | err = 0; | ||
537 | if (!atomic_inc_not_zero(&num_events)) { | ||
538 | mutex_lock(&pmc_reserve_mutex); | ||
539 | if (atomic_read(&num_events) == 0 && | ||
540 | reserve_pmc_hardware(perf_event_interrupt)) | ||
541 | err = -EBUSY; | ||
542 | else | ||
543 | atomic_inc(&num_events); | ||
544 | mutex_unlock(&pmc_reserve_mutex); | ||
545 | |||
546 | mtpmr(PMRN_PMGC0, PMGC0_FAC); | ||
547 | isync(); | ||
548 | } | ||
549 | event->destroy = hw_perf_event_destroy; | ||
550 | |||
551 | return err; | ||
552 | } | ||
553 | |||
554 | static struct pmu fsl_emb_pmu = { | ||
555 | .pmu_enable = fsl_emb_pmu_enable, | ||
556 | .pmu_disable = fsl_emb_pmu_disable, | ||
557 | .event_init = fsl_emb_pmu_event_init, | ||
558 | .add = fsl_emb_pmu_add, | ||
559 | .del = fsl_emb_pmu_del, | ||
560 | .start = fsl_emb_pmu_start, | ||
561 | .stop = fsl_emb_pmu_stop, | ||
562 | .read = fsl_emb_pmu_read, | ||
563 | }; | ||
564 | |||
565 | /* | ||
566 | * A counter has overflowed; update its count and record | ||
567 | * things if requested. Note that interrupts are hard-disabled | ||
568 | * here so there is no possibility of being interrupted. | ||
569 | */ | ||
570 | static void record_and_restart(struct perf_event *event, unsigned long val, | ||
571 | struct pt_regs *regs) | ||
572 | { | ||
573 | u64 period = event->hw.sample_period; | ||
574 | s64 prev, delta, left; | ||
575 | int record = 0; | ||
576 | |||
577 | if (event->hw.state & PERF_HES_STOPPED) { | ||
578 | write_pmc(event->hw.idx, 0); | ||
579 | return; | ||
580 | } | ||
581 | |||
582 | /* we don't have to worry about interrupts here */ | ||
583 | prev = local64_read(&event->hw.prev_count); | ||
584 | delta = (val - prev) & 0xfffffffful; | ||
585 | local64_add(delta, &event->count); | ||
586 | |||
587 | /* | ||
588 | * See if the total period for this event has expired, | ||
589 | * and update for the next period. | ||
590 | */ | ||
591 | val = 0; | ||
592 | left = local64_read(&event->hw.period_left) - delta; | ||
593 | if (period) { | ||
594 | if (left <= 0) { | ||
595 | left += period; | ||
596 | if (left <= 0) | ||
597 | left = period; | ||
598 | record = 1; | ||
599 | event->hw.last_period = event->hw.sample_period; | ||
600 | } | ||
601 | if (left < 0x80000000LL) | ||
602 | val = 0x80000000LL - left; | ||
603 | } | ||
604 | |||
605 | write_pmc(event->hw.idx, val); | ||
606 | local64_set(&event->hw.prev_count, val); | ||
607 | local64_set(&event->hw.period_left, left); | ||
608 | perf_event_update_userpage(event); | ||
609 | |||
610 | /* | ||
611 | * Finally record data if requested. | ||
612 | */ | ||
613 | if (record) { | ||
614 | struct perf_sample_data data; | ||
615 | |||
616 | perf_sample_data_init(&data, 0); | ||
617 | data.period = event->hw.last_period; | ||
618 | |||
619 | if (perf_event_overflow(event, &data, regs)) | ||
620 | fsl_emb_pmu_stop(event, 0); | ||
621 | } | ||
622 | } | ||
623 | |||
624 | static void perf_event_interrupt(struct pt_regs *regs) | ||
625 | { | ||
626 | int i; | ||
627 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
628 | struct perf_event *event; | ||
629 | unsigned long val; | ||
630 | int found = 0; | ||
631 | int nmi; | ||
632 | |||
633 | nmi = perf_intr_is_nmi(regs); | ||
634 | if (nmi) | ||
635 | nmi_enter(); | ||
636 | else | ||
637 | irq_enter(); | ||
638 | |||
639 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
640 | event = cpuhw->event[i]; | ||
641 | |||
642 | val = read_pmc(i); | ||
643 | if ((int)val < 0) { | ||
644 | if (event) { | ||
645 | /* event has overflowed */ | ||
646 | found = 1; | ||
647 | record_and_restart(event, val, regs); | ||
648 | } else { | ||
649 | /* | ||
650 | * Disabled counter is negative, | ||
651 | * reset it just in case. | ||
652 | */ | ||
653 | write_pmc(i, 0); | ||
654 | } | ||
655 | } | ||
656 | } | ||
657 | |||
658 | /* PMM will keep counters frozen until we return from the interrupt. */ | ||
659 | mtmsr(mfmsr() | MSR_PMM); | ||
660 | mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); | ||
661 | isync(); | ||
662 | |||
663 | if (nmi) | ||
664 | nmi_exit(); | ||
665 | else | ||
666 | irq_exit(); | ||
667 | } | ||
668 | |||
669 | void hw_perf_event_setup(int cpu) | ||
670 | { | ||
671 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
672 | |||
673 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
674 | } | ||
675 | |||
676 | int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) | ||
677 | { | ||
678 | if (ppmu) | ||
679 | return -EBUSY; /* something's already registered */ | ||
680 | |||
681 | ppmu = pmu; | ||
682 | pr_info("%s performance monitor hardware support registered\n", | ||
683 | pmu->name); | ||
684 | |||
685 | perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); | ||
686 | |||
687 | return 0; | ||
688 | } | ||
diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c new file mode 100644 index 000000000000..cb2e2949c8d1 --- /dev/null +++ b/arch/powerpc/perf/e500-pmu.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * Performance counter support for e500 family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * Copyright 2010 Freescale Semiconductor, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/perf_event.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Map of generic hardware event types to hardware events | ||
19 | * Zero if unsupported | ||
20 | */ | ||
21 | static int e500_generic_events[] = { | ||
22 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
23 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
24 | [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ | ||
25 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, | ||
26 | [PERF_COUNT_HW_BRANCH_MISSES] = 15, | ||
27 | }; | ||
28 | |||
29 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
30 | |||
31 | /* | ||
32 | * Table of generalized cache-related events. | ||
33 | * 0 means not supported, -1 means nonsensical, other values | ||
34 | * are event codes. | ||
35 | */ | ||
36 | static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
37 | /* | ||
38 | * D-cache misses are not split into read/write/prefetch; | ||
39 | * use raw event 41. | ||
40 | */ | ||
41 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
42 | [C(OP_READ)] = { 27, 0 }, | ||
43 | [C(OP_WRITE)] = { 28, 0 }, | ||
44 | [C(OP_PREFETCH)] = { 29, 0 }, | ||
45 | }, | ||
46 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
47 | [C(OP_READ)] = { 2, 60 }, | ||
48 | [C(OP_WRITE)] = { -1, -1 }, | ||
49 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
50 | }, | ||
51 | /* | ||
52 | * Assuming LL means L2, it's not a good match for this model. | ||
53 | * It allocates only on L1 castout or explicit prefetch, and | ||
54 | * does not have separate read/write events (but it does have | ||
55 | * separate instruction/data events). | ||
56 | */ | ||
57 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
58 | [C(OP_READ)] = { 0, 0 }, | ||
59 | [C(OP_WRITE)] = { 0, 0 }, | ||
60 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
61 | }, | ||
62 | /* | ||
63 | * There are data/instruction MMU misses, but that's a miss on | ||
64 | * the chip's internal level-one TLB which is probably not | ||
65 | * what the user wants. Instead, unified level-two TLB misses | ||
66 | * are reported here. | ||
67 | */ | ||
68 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
69 | [C(OP_READ)] = { 26, 66 }, | ||
70 | [C(OP_WRITE)] = { -1, -1 }, | ||
71 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
72 | }, | ||
73 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
74 | [C(OP_READ)] = { 12, 15 }, | ||
75 | [C(OP_WRITE)] = { -1, -1 }, | ||
76 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
77 | }, | ||
78 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
79 | [C(OP_READ)] = { -1, -1 }, | ||
80 | [C(OP_WRITE)] = { -1, -1 }, | ||
81 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
82 | }, | ||
83 | }; | ||
84 | |||
85 | static int num_events = 128; | ||
86 | |||
87 | /* Upper half of event id is PMLCb, for threshold events */ | ||
88 | static u64 e500_xlate_event(u64 event_id) | ||
89 | { | ||
90 | u32 event_low = (u32)event_id; | ||
91 | u64 ret; | ||
92 | |||
93 | if (event_low >= num_events) | ||
94 | return 0; | ||
95 | |||
96 | ret = FSL_EMB_EVENT_VALID; | ||
97 | |||
98 | if (event_low >= 76 && event_low <= 81) { | ||
99 | ret |= FSL_EMB_EVENT_RESTRICTED; | ||
100 | ret |= event_id & | ||
101 | (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); | ||
102 | } else if (event_id & | ||
103 | (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { | ||
104 | /* Threshold requested on non-threshold event */ | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static struct fsl_emb_pmu e500_pmu = { | ||
112 | .name = "e500 family", | ||
113 | .n_counter = 4, | ||
114 | .n_restricted = 2, | ||
115 | .xlate_event = e500_xlate_event, | ||
116 | .n_generic = ARRAY_SIZE(e500_generic_events), | ||
117 | .generic_events = e500_generic_events, | ||
118 | .cache_events = &e500_cache_events, | ||
119 | }; | ||
120 | |||
121 | static int init_e500_pmu(void) | ||
122 | { | ||
123 | if (!cur_cpu_spec->oprofile_cpu_type) | ||
124 | return -ENODEV; | ||
125 | |||
126 | if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) | ||
127 | num_events = 256; | ||
128 | else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) | ||
129 | return -ENODEV; | ||
130 | |||
131 | return register_fsl_emb_pmu(&e500_pmu); | ||
132 | } | ||
133 | |||
134 | early_initcall(init_e500_pmu); | ||
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c new file mode 100644 index 000000000000..fe21b515ca44 --- /dev/null +++ b/arch/powerpc/perf/mpc7450-pmu.c | |||
@@ -0,0 +1,422 @@ | |||
1 | /* | ||
2 | * Performance counter support for MPC7450-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <asm/reg.h> | ||
14 | #include <asm/cputable.h> | ||
15 | |||
16 | #define N_COUNTER 6 /* Number of hardware counters */ | ||
17 | #define MAX_ALT 3 /* Maximum number of event alternative codes */ | ||
18 | |||
19 | /* | ||
20 | * Bits in event code for MPC7450 family | ||
21 | */ | ||
22 | #define PM_THRMULT_MSKS 0x40000 | ||
23 | #define PM_THRESH_SH 12 | ||
24 | #define PM_THRESH_MSK 0x3f | ||
25 | #define PM_PMC_SH 8 | ||
26 | #define PM_PMC_MSK 7 | ||
27 | #define PM_PMCSEL_MSK 0x7f | ||
28 | |||
29 | /* | ||
30 | * Classify events according to how specific their PMC requirements are. | ||
31 | * Result is: | ||
32 | * 0: can go on any PMC | ||
33 | * 1: can go on PMCs 1-4 | ||
34 | * 2: can go on PMCs 1,2,4 | ||
35 | * 3: can go on PMCs 1 or 2 | ||
36 | * 4: can only go on one PMC | ||
37 | * -1: event code is invalid | ||
38 | */ | ||
39 | #define N_CLASSES 5 | ||
40 | |||
41 | static int mpc7450_classify_event(u32 event) | ||
42 | { | ||
43 | int pmc; | ||
44 | |||
45 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
46 | if (pmc) { | ||
47 | if (pmc > N_COUNTER) | ||
48 | return -1; | ||
49 | return 4; | ||
50 | } | ||
51 | event &= PM_PMCSEL_MSK; | ||
52 | if (event <= 1) | ||
53 | return 0; | ||
54 | if (event <= 7) | ||
55 | return 1; | ||
56 | if (event <= 13) | ||
57 | return 2; | ||
58 | if (event <= 22) | ||
59 | return 3; | ||
60 | return -1; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Events using threshold and possible threshold scale: | ||
65 | * code scale? name | ||
66 | * 11e N PM_INSTQ_EXCEED_CYC | ||
67 | * 11f N PM_ALTV_IQ_EXCEED_CYC | ||
68 | * 128 Y PM_DTLB_SEARCH_EXCEED_CYC | ||
69 | * 12b Y PM_LD_MISS_EXCEED_L1_CYC | ||
70 | * 220 N PM_CQ_EXCEED_CYC | ||
71 | * 30c N PM_GPR_RB_EXCEED_CYC | ||
72 | * 30d ? PM_FPR_IQ_EXCEED_CYC ? | ||
73 | * 311 Y PM_ITLB_SEARCH_EXCEED | ||
74 | * 410 N PM_GPR_IQ_EXCEED_CYC | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Return use of threshold and threshold scale bits: | ||
79 | * 0 = uses neither, 1 = uses threshold, 2 = uses both | ||
80 | */ | ||
81 | static int mpc7450_threshold_use(u32 event) | ||
82 | { | ||
83 | int pmc, sel; | ||
84 | |||
85 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
86 | sel = event & PM_PMCSEL_MSK; | ||
87 | switch (pmc) { | ||
88 | case 1: | ||
89 | if (sel == 0x1e || sel == 0x1f) | ||
90 | return 1; | ||
91 | if (sel == 0x28 || sel == 0x2b) | ||
92 | return 2; | ||
93 | break; | ||
94 | case 2: | ||
95 | if (sel == 0x20) | ||
96 | return 1; | ||
97 | break; | ||
98 | case 3: | ||
99 | if (sel == 0xc || sel == 0xd) | ||
100 | return 1; | ||
101 | if (sel == 0x11) | ||
102 | return 2; | ||
103 | break; | ||
104 | case 4: | ||
105 | if (sel == 0x10) | ||
106 | return 1; | ||
107 | break; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Layout of constraint bits: | ||
114 | * 33222222222211111111110000000000 | ||
115 | * 10987654321098765432109876543210 | ||
116 | * |< >< > < > < ><><><><><><> | ||
117 | * TS TV G4 G3 G2P6P5P4P3P2P1 | ||
118 | * | ||
119 | * P1 - P6 | ||
120 | * 0 - 11: Count of events needing PMC1 .. PMC6 | ||
121 | * | ||
122 | * G2 | ||
123 | * 12 - 14: Count of events needing PMC1 or PMC2 | ||
124 | * | ||
125 | * G3 | ||
126 | * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 | ||
127 | * | ||
128 | * G4 | ||
129 | * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 | ||
130 | * | ||
131 | * TV | ||
132 | * 24 - 29: Threshold value requested | ||
133 | * | ||
134 | * TS | ||
135 | * 30: Threshold scale value requested | ||
136 | */ | ||
137 | |||
138 | static u32 pmcbits[N_COUNTER][2] = { | ||
139 | { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ | ||
140 | { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ | ||
141 | { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ | ||
142 | { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ | ||
143 | { 0x00000200, 0x00000100 }, /* PMC5: P5 */ | ||
144 | { 0x00000800, 0x00000400 } /* PMC6: P6 */ | ||
145 | }; | ||
146 | |||
147 | static u32 classbits[N_CLASSES - 1][2] = { | ||
148 | { 0x00000000, 0x00000000 }, /* class 0: no constraint */ | ||
149 | { 0x00800000, 0x00100000 }, /* class 1: G4 */ | ||
150 | { 0x00040000, 0x00010000 }, /* class 2: G3 */ | ||
151 | { 0x00004000, 0x00001000 }, /* class 3: G2 */ | ||
152 | }; | ||
153 | |||
154 | static int mpc7450_get_constraint(u64 event, unsigned long *maskp, | ||
155 | unsigned long *valp) | ||
156 | { | ||
157 | int pmc, class; | ||
158 | u32 mask, value; | ||
159 | int thresh, tuse; | ||
160 | |||
161 | class = mpc7450_classify_event(event); | ||
162 | if (class < 0) | ||
163 | return -1; | ||
164 | if (class == 4) { | ||
165 | pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; | ||
166 | mask = pmcbits[pmc - 1][0]; | ||
167 | value = pmcbits[pmc - 1][1]; | ||
168 | } else { | ||
169 | mask = classbits[class][0]; | ||
170 | value = classbits[class][1]; | ||
171 | } | ||
172 | |||
173 | tuse = mpc7450_threshold_use(event); | ||
174 | if (tuse) { | ||
175 | thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
176 | mask |= 0x3f << 24; | ||
177 | value |= thresh << 24; | ||
178 | if (tuse == 2) { | ||
179 | mask |= 0x40000000; | ||
180 | if ((unsigned int)event & PM_THRMULT_MSKS) | ||
181 | value |= 0x40000000; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | *maskp = mask; | ||
186 | *valp = value; | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
191 | { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ | ||
192 | { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ | ||
193 | { 0x502, 0x602 }, /* PM_L2_HIT */ | ||
194 | { 0x503, 0x603 }, /* PM_L3_HIT */ | ||
195 | { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ | ||
196 | { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ | ||
197 | { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ | ||
198 | { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ | ||
199 | { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ | ||
200 | { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ | ||
201 | { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ | ||
202 | { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ | ||
203 | { 0x512, 0x612 }, /* PM_INT_LOCAL */ | ||
204 | { 0x513, 0x61d }, /* PM_L2_MISS */ | ||
205 | { 0x514, 0x61e }, /* PM_L3_MISS */ | ||
206 | }; | ||
207 | |||
208 | /* | ||
209 | * Scan the alternatives table for a match and return the | ||
210 | * index into the alternatives table if found, else -1. | ||
211 | */ | ||
212 | static int find_alternative(u32 event) | ||
213 | { | ||
214 | int i, j; | ||
215 | |||
216 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
217 | if (event < event_alternatives[i][0]) | ||
218 | break; | ||
219 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
220 | if (event == event_alternatives[i][j]) | ||
221 | return i; | ||
222 | } | ||
223 | return -1; | ||
224 | } | ||
225 | |||
226 | static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
227 | { | ||
228 | int i, j, nalt = 1; | ||
229 | u32 ae; | ||
230 | |||
231 | alt[0] = event; | ||
232 | nalt = 1; | ||
233 | i = find_alternative((u32)event); | ||
234 | if (i >= 0) { | ||
235 | for (j = 0; j < MAX_ALT; ++j) { | ||
236 | ae = event_alternatives[i][j]; | ||
237 | if (ae && ae != (u32)event) | ||
238 | alt[nalt++] = ae; | ||
239 | } | ||
240 | } | ||
241 | return nalt; | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Bitmaps of which PMCs each class can use for classes 0 - 3. | ||
246 | * Bit i is set if PMC i+1 is usable. | ||
247 | */ | ||
248 | static const u8 classmap[N_CLASSES] = { | ||
249 | 0x3f, 0x0f, 0x0b, 0x03, 0 | ||
250 | }; | ||
251 | |||
252 | /* Bit position and width of each PMCSEL field */ | ||
253 | static const int pmcsel_shift[N_COUNTER] = { | ||
254 | 6, 0, 27, 22, 17, 11 | ||
255 | }; | ||
256 | static const u32 pmcsel_mask[N_COUNTER] = { | ||
257 | 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f | ||
258 | }; | ||
259 | |||
260 | /* | ||
261 | * Compute MMCR0/1/2 values for a set of events. | ||
262 | */ | ||
263 | static int mpc7450_compute_mmcr(u64 event[], int n_ev, | ||
264 | unsigned int hwc[], unsigned long mmcr[]) | ||
265 | { | ||
266 | u8 event_index[N_CLASSES][N_COUNTER]; | ||
267 | int n_classevent[N_CLASSES]; | ||
268 | int i, j, class, tuse; | ||
269 | u32 pmc_inuse = 0, pmc_avail; | ||
270 | u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; | ||
271 | u32 ev, pmc, thresh; | ||
272 | |||
273 | if (n_ev > N_COUNTER) | ||
274 | return -1; | ||
275 | |||
276 | /* First pass: count usage in each class */ | ||
277 | for (i = 0; i < N_CLASSES; ++i) | ||
278 | n_classevent[i] = 0; | ||
279 | for (i = 0; i < n_ev; ++i) { | ||
280 | class = mpc7450_classify_event(event[i]); | ||
281 | if (class < 0) | ||
282 | return -1; | ||
283 | j = n_classevent[class]++; | ||
284 | event_index[class][j] = i; | ||
285 | } | ||
286 | |||
287 | /* Second pass: allocate PMCs from most specific event to least */ | ||
288 | for (class = N_CLASSES - 1; class >= 0; --class) { | ||
289 | for (i = 0; i < n_classevent[class]; ++i) { | ||
290 | ev = event[event_index[class][i]]; | ||
291 | if (class == 4) { | ||
292 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
293 | if (pmc_inuse & (1 << (pmc - 1))) | ||
294 | return -1; | ||
295 | } else { | ||
296 | /* Find a suitable PMC */ | ||
297 | pmc_avail = classmap[class] & ~pmc_inuse; | ||
298 | if (!pmc_avail) | ||
299 | return -1; | ||
300 | pmc = ffs(pmc_avail); | ||
301 | } | ||
302 | pmc_inuse |= 1 << (pmc - 1); | ||
303 | |||
304 | tuse = mpc7450_threshold_use(ev); | ||
305 | if (tuse) { | ||
306 | thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
307 | mmcr0 |= thresh << 16; | ||
308 | if (tuse == 2 && (ev & PM_THRMULT_MSKS)) | ||
309 | mmcr2 = 0x80000000; | ||
310 | } | ||
311 | ev &= pmcsel_mask[pmc - 1]; | ||
312 | ev <<= pmcsel_shift[pmc - 1]; | ||
313 | if (pmc <= 2) | ||
314 | mmcr0 |= ev; | ||
315 | else | ||
316 | mmcr1 |= ev; | ||
317 | hwc[event_index[class][i]] = pmc - 1; | ||
318 | } | ||
319 | } | ||
320 | |||
321 | if (pmc_inuse & 1) | ||
322 | mmcr0 |= MMCR0_PMC1CE; | ||
323 | if (pmc_inuse & 0x3e) | ||
324 | mmcr0 |= MMCR0_PMCnCE; | ||
325 | |||
326 | /* Return MMCRx values */ | ||
327 | mmcr[0] = mmcr0; | ||
328 | mmcr[1] = mmcr1; | ||
329 | mmcr[2] = mmcr2; | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Disable counting by a PMC. | ||
335 | * Note that the pmc argument is 0-based here, not 1-based. | ||
336 | */ | ||
337 | static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
338 | { | ||
339 | if (pmc <= 1) | ||
340 | mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
341 | else | ||
342 | mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
343 | } | ||
344 | |||
345 | static int mpc7450_generic_events[] = { | ||
346 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
347 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
348 | [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ | ||
349 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ | ||
350 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ | ||
351 | }; | ||
352 | |||
353 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
354 | |||
355 | /* | ||
356 | * Table of generalized cache-related events. | ||
357 | * 0 means not supported, -1 means nonsensical, other values | ||
358 | * are event codes. | ||
359 | */ | ||
360 | static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
361 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
362 | [C(OP_READ)] = { 0, 0x225 }, | ||
363 | [C(OP_WRITE)] = { 0, 0x227 }, | ||
364 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
365 | }, | ||
366 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
367 | [C(OP_READ)] = { 0x129, 0x115 }, | ||
368 | [C(OP_WRITE)] = { -1, -1 }, | ||
369 | [C(OP_PREFETCH)] = { 0x634, 0 }, | ||
370 | }, | ||
371 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
372 | [C(OP_READ)] = { 0, 0 }, | ||
373 | [C(OP_WRITE)] = { 0, 0 }, | ||
374 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
375 | }, | ||
376 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
377 | [C(OP_READ)] = { 0, 0x312 }, | ||
378 | [C(OP_WRITE)] = { -1, -1 }, | ||
379 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
380 | }, | ||
381 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
382 | [C(OP_READ)] = { 0, 0x223 }, | ||
383 | [C(OP_WRITE)] = { -1, -1 }, | ||
384 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
385 | }, | ||
386 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
387 | [C(OP_READ)] = { 0x122, 0x41c }, | ||
388 | [C(OP_WRITE)] = { -1, -1 }, | ||
389 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
390 | }, | ||
391 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
392 | [C(OP_READ)] = { -1, -1 }, | ||
393 | [C(OP_WRITE)] = { -1, -1 }, | ||
394 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
395 | }, | ||
396 | }; | ||
397 | |||
398 | struct power_pmu mpc7450_pmu = { | ||
399 | .name = "MPC7450 family", | ||
400 | .n_counter = N_COUNTER, | ||
401 | .max_alternatives = MAX_ALT, | ||
402 | .add_fields = 0x00111555ul, | ||
403 | .test_adder = 0x00301000ul, | ||
404 | .compute_mmcr = mpc7450_compute_mmcr, | ||
405 | .get_constraint = mpc7450_get_constraint, | ||
406 | .get_alternatives = mpc7450_get_alternatives, | ||
407 | .disable_pmc = mpc7450_disable_pmc, | ||
408 | .n_generic = ARRAY_SIZE(mpc7450_generic_events), | ||
409 | .generic_events = mpc7450_generic_events, | ||
410 | .cache_events = &mpc7450_cache_events, | ||
411 | }; | ||
412 | |||
413 | static int __init init_mpc7450_pmu(void) | ||
414 | { | ||
415 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
416 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) | ||
417 | return -ENODEV; | ||
418 | |||
419 | return register_power_pmu(&mpc7450_pmu); | ||
420 | } | ||
421 | |||
422 | early_initcall(init_mpc7450_pmu); | ||
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c new file mode 100644 index 000000000000..b4f1dda4d089 --- /dev/null +++ b/arch/powerpc/perf/power4-pmu.c | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER4 | ||
19 | */ | ||
20 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
23 | #define PM_UNIT_MSK 0xf | ||
24 | #define PM_LOWER_SH 6 | ||
25 | #define PM_LOWER_MSK 1 | ||
26 | #define PM_LOWER_MSKS 0x40 | ||
27 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
28 | #define PM_BYTE_MSK 3 | ||
29 | #define PM_PMCSEL_MSK 7 | ||
30 | |||
31 | /* | ||
32 | * Unit code values | ||
33 | */ | ||
34 | #define PM_FPU 1 | ||
35 | #define PM_ISU1 2 | ||
36 | #define PM_IFU 3 | ||
37 | #define PM_IDU0 4 | ||
38 | #define PM_ISU1_ALT 6 | ||
39 | #define PM_ISU2 7 | ||
40 | #define PM_IFU_ALT 8 | ||
41 | #define PM_LSU0 9 | ||
42 | #define PM_LSU1 0xc | ||
43 | #define PM_GPS 0xf | ||
44 | |||
45 | /* | ||
46 | * Bits in MMCR0 for POWER4 | ||
47 | */ | ||
48 | #define MMCR0_PMC1SEL_SH 8 | ||
49 | #define MMCR0_PMC2SEL_SH 1 | ||
50 | #define MMCR_PMCSEL_MSK 0x1f | ||
51 | |||
52 | /* | ||
53 | * Bits in MMCR1 for POWER4 | ||
54 | */ | ||
55 | #define MMCR1_TTM0SEL_SH 62 | ||
56 | #define MMCR1_TTC0SEL_SH 61 | ||
57 | #define MMCR1_TTM1SEL_SH 59 | ||
58 | #define MMCR1_TTC1SEL_SH 58 | ||
59 | #define MMCR1_TTM2SEL_SH 56 | ||
60 | #define MMCR1_TTC2SEL_SH 55 | ||
61 | #define MMCR1_TTM3SEL_SH 53 | ||
62 | #define MMCR1_TTC3SEL_SH 52 | ||
63 | #define MMCR1_TTMSEL_MSK 3 | ||
64 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
65 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
66 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
67 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
68 | #define MMCR1_DEBUG0SEL_SH 43 | ||
69 | #define MMCR1_DEBUG1SEL_SH 42 | ||
70 | #define MMCR1_DEBUG2SEL_SH 41 | ||
71 | #define MMCR1_DEBUG3SEL_SH 40 | ||
72 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
73 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
74 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
75 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
76 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
77 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
78 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
79 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
80 | #define MMCR1_PMC3SEL_SH 27 | ||
81 | #define MMCR1_PMC4SEL_SH 22 | ||
82 | #define MMCR1_PMC5SEL_SH 17 | ||
83 | #define MMCR1_PMC6SEL_SH 12 | ||
84 | #define MMCR1_PMC7SEL_SH 7 | ||
85 | #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ | ||
86 | |||
87 | static short mmcr1_adder_bits[8] = { | ||
88 | MMCR1_PMC1_ADDER_SEL_SH, | ||
89 | MMCR1_PMC2_ADDER_SEL_SH, | ||
90 | MMCR1_PMC3_ADDER_SEL_SH, | ||
91 | MMCR1_PMC4_ADDER_SEL_SH, | ||
92 | MMCR1_PMC5_ADDER_SEL_SH, | ||
93 | MMCR1_PMC6_ADDER_SEL_SH, | ||
94 | MMCR1_PMC7_ADDER_SEL_SH, | ||
95 | MMCR1_PMC8_ADDER_SEL_SH | ||
96 | }; | ||
97 | |||
98 | /* | ||
99 | * Bits in MMCRA | ||
100 | */ | ||
101 | #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ | ||
102 | |||
103 | /* | ||
104 | * Layout of constraint bits: | ||
105 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
106 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
107 | * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> | ||
108 | * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
109 | * \SMPL ||\TTC3SEL | ||
110 | * |\TTC_IFU_SEL | ||
111 | * \TTM2SEL0 | ||
112 | * | ||
113 | * SMPL - SAMPLE_ENABLE constraint | ||
114 | * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 | ||
115 | * | ||
116 | * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 | ||
117 | * 55: UC1 error 0x0080_0000_0000_0000 | ||
118 | * 54: FPU events needed 0x0040_0000_0000_0000 | ||
119 | * 53: ISU1 events needed 0x0020_0000_0000_0000 | ||
120 | * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 | ||
121 | * | ||
122 | * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 | ||
123 | * 51: UC2 error 0x0008_0000_0000_0000 | ||
124 | * 50: FPU events needed 0x0004_0000_0000_0000 | ||
125 | * 49: IFU events needed 0x0002_0000_0000_0000 | ||
126 | * 48: LSU0 events needed 0x0001_0000_0000_0000 | ||
127 | * | ||
128 | * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 | ||
129 | * 47: UC3 error 0x8000_0000_0000 | ||
130 | * 46: LSU0 events needed 0x4000_0000_0000 | ||
131 | * 45: IFU events needed 0x2000_0000_0000 | ||
132 | * 44: IDU0|ISU2 events needed 0x1000_0000_0000 | ||
133 | * 43: ISU1 events needed 0x0800_0000_0000 | ||
134 | * | ||
135 | * TTM2SEL0 | ||
136 | * 42: 0 = IDU0 events needed | ||
137 | * 1 = ISU2 events needed 0x0400_0000_0000 | ||
138 | * | ||
139 | * TTC_IFU_SEL | ||
140 | * 41: 0 = IFU.U events needed | ||
141 | * 1 = IFU.L events needed 0x0200_0000_0000 | ||
142 | * | ||
143 | * TTC3SEL | ||
144 | * 40: 0 = LSU1.U events needed | ||
145 | * 1 = LSU1.L events needed 0x0100_0000_0000 | ||
146 | * | ||
147 | * PS1 | ||
148 | * 39: PS1 error 0x0080_0000_0000 | ||
149 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
150 | * | ||
151 | * PS2 | ||
152 | * 35: PS2 error 0x0008_0000_0000 | ||
153 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
154 | * | ||
155 | * B0 | ||
156 | * 28-31: Byte 0 event source 0xf000_0000 | ||
157 | * 1 = FPU | ||
158 | * 2 = ISU1 | ||
159 | * 3 = IFU | ||
160 | * 4 = IDU0 | ||
161 | * 7 = ISU2 | ||
162 | * 9 = LSU0 | ||
163 | * c = LSU1 | ||
164 | * f = GPS | ||
165 | * | ||
166 | * B1, B2, B3 | ||
167 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
168 | * | ||
169 | * P8 | ||
170 | * 15: P8 error 0x8000 | ||
171 | * 14-15: Count of events needing PMC8 | ||
172 | * | ||
173 | * P1..P7 | ||
174 | * 0-13: Count of events needing PMC1..PMC7 | ||
175 | * | ||
176 | * Note: this doesn't allow events using IFU.U to be combined with events | ||
177 | * using IFU.L, though that is feasible (using TTM0 and TTM2). However | ||
178 | * there are no listed events for IFU.L (they are debug events not | ||
179 | * verified for performance monitoring) so this shouldn't cause a | ||
180 | * problem. | ||
181 | */ | ||
182 | |||
183 | static struct unitinfo { | ||
184 | unsigned long value, mask; | ||
185 | int unit; | ||
186 | int lowerbit; | ||
187 | } p4_unitinfo[16] = { | ||
188 | [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, | ||
189 | [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, | ||
190 | [PM_ISU1_ALT] = | ||
191 | { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, | ||
192 | [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, | ||
193 | [PM_IFU_ALT] = | ||
194 | { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, | ||
195 | [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, | ||
196 | [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, | ||
197 | [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, | ||
198 | [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, | ||
199 | [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } | ||
200 | }; | ||
201 | |||
202 | static unsigned char direct_marked_event[8] = { | ||
203 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
204 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
205 | (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ | ||
206 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
207 | (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ | ||
208 | (1<<3) | (1<<4) | (1<<5), | ||
209 | /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
210 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
211 | (1<<4), /* PMC8: PM_MRK_LSU_FIN */ | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Returns 1 if event counts things relating to marked instructions | ||
216 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
217 | */ | ||
218 | static int p4_marked_instr_event(u64 event) | ||
219 | { | ||
220 | int pmc, psel, unit, byte, bit; | ||
221 | unsigned int mask; | ||
222 | |||
223 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
224 | psel = event & PM_PMCSEL_MSK; | ||
225 | if (pmc) { | ||
226 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
227 | return 1; | ||
228 | if (psel == 0) /* add events */ | ||
229 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
230 | else if (psel == 6) /* decode events */ | ||
231 | bit = 4; | ||
232 | else | ||
233 | return 0; | ||
234 | } else | ||
235 | bit = psel; | ||
236 | |||
237 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
238 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
239 | mask = 0; | ||
240 | switch (unit) { | ||
241 | case PM_LSU1: | ||
242 | if (event & PM_LOWER_MSKS) | ||
243 | mask = 1 << 28; /* byte 7 bit 4 */ | ||
244 | else | ||
245 | mask = 6 << 24; /* byte 3 bits 1 and 2 */ | ||
246 | break; | ||
247 | case PM_LSU0: | ||
248 | /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ | ||
249 | mask = 0x083dff00; | ||
250 | } | ||
251 | return (mask >> (byte * 8 + bit)) & 1; | ||
252 | } | ||
253 | |||
254 | static int p4_get_constraint(u64 event, unsigned long *maskp, | ||
255 | unsigned long *valp) | ||
256 | { | ||
257 | int pmc, byte, unit, lower, sh; | ||
258 | unsigned long mask = 0, value = 0; | ||
259 | int grp = -1; | ||
260 | |||
261 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
262 | if (pmc) { | ||
263 | if (pmc > 8) | ||
264 | return -1; | ||
265 | sh = (pmc - 1) * 2; | ||
266 | mask |= 2 << sh; | ||
267 | value |= 1 << sh; | ||
268 | grp = ((pmc - 1) >> 1) & 1; | ||
269 | } | ||
270 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
271 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
272 | if (unit) { | ||
273 | lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
274 | |||
275 | /* | ||
276 | * Bus events on bytes 0 and 2 can be counted | ||
277 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
278 | */ | ||
279 | if (!pmc) | ||
280 | grp = byte & 1; | ||
281 | |||
282 | if (!p4_unitinfo[unit].unit) | ||
283 | return -1; | ||
284 | mask |= p4_unitinfo[unit].mask; | ||
285 | value |= p4_unitinfo[unit].value; | ||
286 | sh = p4_unitinfo[unit].lowerbit; | ||
287 | if (sh > 1) | ||
288 | value |= (unsigned long)lower << sh; | ||
289 | else if (lower != sh) | ||
290 | return -1; | ||
291 | unit = p4_unitinfo[unit].unit; | ||
292 | |||
293 | /* Set byte lane select field */ | ||
294 | mask |= 0xfULL << (28 - 4 * byte); | ||
295 | value |= (unsigned long)unit << (28 - 4 * byte); | ||
296 | } | ||
297 | if (grp == 0) { | ||
298 | /* increment PMC1/2/5/6 field */ | ||
299 | mask |= 0x8000000000ull; | ||
300 | value |= 0x1000000000ull; | ||
301 | } else { | ||
302 | /* increment PMC3/4/7/8 field */ | ||
303 | mask |= 0x800000000ull; | ||
304 | value |= 0x100000000ull; | ||
305 | } | ||
306 | |||
307 | /* Marked instruction events need sample_enable set */ | ||
308 | if (p4_marked_instr_event(event)) { | ||
309 | mask |= 1ull << 56; | ||
310 | value |= 1ull << 56; | ||
311 | } | ||
312 | |||
313 | /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ | ||
314 | if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) | ||
315 | mask |= 1ull << 56; | ||
316 | |||
317 | *maskp = mask; | ||
318 | *valp = value; | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static unsigned int ppc_inst_cmpl[] = { | ||
323 | 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 | ||
324 | }; | ||
325 | |||
326 | static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
327 | { | ||
328 | int i, j, na; | ||
329 | |||
330 | alt[0] = event; | ||
331 | na = 1; | ||
332 | |||
333 | /* 2 possibilities for PM_GRP_DISP_REJECT */ | ||
334 | if (event == 0x8003 || event == 0x0224) { | ||
335 | alt[1] = event ^ (0x8003 ^ 0x0224); | ||
336 | return 2; | ||
337 | } | ||
338 | |||
339 | /* 2 possibilities for PM_ST_MISS_L1 */ | ||
340 | if (event == 0x0c13 || event == 0x0c23) { | ||
341 | alt[1] = event ^ (0x0c13 ^ 0x0c23); | ||
342 | return 2; | ||
343 | } | ||
344 | |||
345 | /* several possibilities for PM_INST_CMPL */ | ||
346 | for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { | ||
347 | if (event == ppc_inst_cmpl[i]) { | ||
348 | for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) | ||
349 | if (j != i) | ||
350 | alt[na++] = ppc_inst_cmpl[j]; | ||
351 | break; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | return na; | ||
356 | } | ||
357 | |||
358 | static int p4_compute_mmcr(u64 event[], int n_ev, | ||
359 | unsigned int hwc[], unsigned long mmcr[]) | ||
360 | { | ||
361 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
362 | unsigned int pmc, unit, byte, psel, lower; | ||
363 | unsigned int ttm, grp; | ||
364 | unsigned int pmc_inuse = 0; | ||
365 | unsigned int pmc_grp_use[2]; | ||
366 | unsigned char busbyte[4]; | ||
367 | unsigned char unituse[16]; | ||
368 | unsigned int unitlower = 0; | ||
369 | int i; | ||
370 | |||
371 | if (n_ev > 8) | ||
372 | return -1; | ||
373 | |||
374 | /* First pass to count resource use */ | ||
375 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
376 | memset(busbyte, 0, sizeof(busbyte)); | ||
377 | memset(unituse, 0, sizeof(unituse)); | ||
378 | for (i = 0; i < n_ev; ++i) { | ||
379 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
380 | if (pmc) { | ||
381 | if (pmc_inuse & (1 << (pmc - 1))) | ||
382 | return -1; | ||
383 | pmc_inuse |= 1 << (pmc - 1); | ||
384 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
385 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
386 | } | ||
387 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
388 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
389 | lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
390 | if (unit) { | ||
391 | if (!pmc) | ||
392 | ++pmc_grp_use[byte & 1]; | ||
393 | if (unit == 6 || unit == 8) | ||
394 | /* map alt ISU1/IFU codes: 6->2, 8->3 */ | ||
395 | unit = (unit >> 1) - 1; | ||
396 | if (busbyte[byte] && busbyte[byte] != unit) | ||
397 | return -1; | ||
398 | busbyte[byte] = unit; | ||
399 | lower <<= unit; | ||
400 | if (unituse[unit] && lower != (unitlower & lower)) | ||
401 | return -1; | ||
402 | unituse[unit] = 1; | ||
403 | unitlower |= lower; | ||
404 | } | ||
405 | } | ||
406 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
407 | return -1; | ||
408 | |||
409 | /* | ||
410 | * Assign resources and set multiplexer selects. | ||
411 | * | ||
412 | * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. | ||
413 | * Each TTMx can only select one unit, but since | ||
414 | * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, | ||
415 | * we have some choices. | ||
416 | */ | ||
417 | if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { | ||
418 | unituse[6] = 1; /* Move 2 to 6 */ | ||
419 | unituse[2] = 0; | ||
420 | } | ||
421 | if (unituse[3] & (unituse[1] | unituse[2])) { | ||
422 | unituse[8] = 1; /* Move 3 to 8 */ | ||
423 | unituse[3] = 0; | ||
424 | unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); | ||
425 | } | ||
426 | /* Check only one unit per TTMx */ | ||
427 | if (unituse[1] + unituse[2] + unituse[3] > 1 || | ||
428 | unituse[4] + unituse[6] + unituse[7] > 1 || | ||
429 | unituse[8] + unituse[9] > 1 || | ||
430 | (unituse[5] | unituse[10] | unituse[11] | | ||
431 | unituse[13] | unituse[14])) | ||
432 | return -1; | ||
433 | |||
434 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | ||
435 | mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) | ||
436 | << MMCR1_TTM0SEL_SH; | ||
437 | mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) | ||
438 | << MMCR1_TTM1SEL_SH; | ||
439 | mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; | ||
440 | |||
441 | /* Set TTCxSEL fields. */ | ||
442 | if (unitlower & 0xe) | ||
443 | mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; | ||
444 | if (unitlower & 0xf0) | ||
445 | mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; | ||
446 | if (unitlower & 0xf00) | ||
447 | mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; | ||
448 | if (unitlower & 0x7000) | ||
449 | mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; | ||
450 | |||
451 | /* Set byte lane select fields. */ | ||
452 | for (byte = 0; byte < 4; ++byte) { | ||
453 | unit = busbyte[byte]; | ||
454 | if (!unit) | ||
455 | continue; | ||
456 | if (unit == 0xf) { | ||
457 | /* special case for GPS */ | ||
458 | mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); | ||
459 | } else { | ||
460 | if (!unituse[unit]) | ||
461 | ttm = unit - 1; /* 2->1, 3->2 */ | ||
462 | else | ||
463 | ttm = unit >> 2; | ||
464 | mmcr1 |= (unsigned long)ttm | ||
465 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
470 | for (i = 0; i < n_ev; ++i) { | ||
471 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
472 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
473 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
474 | psel = event[i] & PM_PMCSEL_MSK; | ||
475 | if (!pmc) { | ||
476 | /* Bus event or 00xxx direct event (off or cycles) */ | ||
477 | if (unit) | ||
478 | psel |= 0x10 | ((byte & 2) << 2); | ||
479 | for (pmc = 0; pmc < 8; ++pmc) { | ||
480 | if (pmc_inuse & (1 << pmc)) | ||
481 | continue; | ||
482 | grp = (pmc >> 1) & 1; | ||
483 | if (unit) { | ||
484 | if (grp == (byte & 1)) | ||
485 | break; | ||
486 | } else if (pmc_grp_use[grp] < 4) { | ||
487 | ++pmc_grp_use[grp]; | ||
488 | break; | ||
489 | } | ||
490 | } | ||
491 | pmc_inuse |= 1 << pmc; | ||
492 | } else { | ||
493 | /* Direct event */ | ||
494 | --pmc; | ||
495 | if (psel == 0 && (byte & 2)) | ||
496 | /* add events on higher-numbered bus */ | ||
497 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
498 | else if (psel == 6 && byte == 3) | ||
499 | /* seem to need to set sample_enable here */ | ||
500 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
501 | psel |= 8; | ||
502 | } | ||
503 | if (pmc <= 1) | ||
504 | mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
505 | else | ||
506 | mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
507 | if (pmc == 7) /* PMC8 */ | ||
508 | mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; | ||
509 | hwc[i] = pmc; | ||
510 | if (p4_marked_instr_event(event[i])) | ||
511 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
512 | } | ||
513 | |||
514 | if (pmc_inuse & 1) | ||
515 | mmcr0 |= MMCR0_PMC1CE; | ||
516 | if (pmc_inuse & 0xfe) | ||
517 | mmcr0 |= MMCR0_PMCjCE; | ||
518 | |||
519 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
520 | |||
521 | /* Return MMCRx values */ | ||
522 | mmcr[0] = mmcr0; | ||
523 | mmcr[1] = mmcr1; | ||
524 | mmcr[2] = mmcra; | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
529 | { | ||
530 | /* | ||
531 | * Setting the PMCxSEL field to 0 disables PMC x. | ||
532 | * (Note that pmc is 0-based here, not 1-based.) | ||
533 | */ | ||
534 | if (pmc <= 1) { | ||
535 | mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); | ||
536 | } else { | ||
537 | mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); | ||
538 | if (pmc == 7) | ||
539 | mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); | ||
540 | } | ||
541 | } | ||
542 | |||
543 | static int p4_generic_events[] = { | ||
544 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
545 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, | ||
546 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ | ||
547 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ | ||
548 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ | ||
549 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ | ||
550 | }; | ||
551 | |||
552 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
553 | |||
554 | /* | ||
555 | * Table of generalized cache-related events. | ||
556 | * 0 means not supported, -1 means nonsensical, other values | ||
557 | * are event codes. | ||
558 | */ | ||
559 | static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
560 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
561 | [C(OP_READ)] = { 0x8c10, 0x3c10 }, | ||
562 | [C(OP_WRITE)] = { 0x7c10, 0xc13 }, | ||
563 | [C(OP_PREFETCH)] = { 0xc35, 0 }, | ||
564 | }, | ||
565 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
566 | [C(OP_READ)] = { 0, 0 }, | ||
567 | [C(OP_WRITE)] = { -1, -1 }, | ||
568 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
569 | }, | ||
570 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
571 | [C(OP_READ)] = { 0, 0 }, | ||
572 | [C(OP_WRITE)] = { 0, 0 }, | ||
573 | [C(OP_PREFETCH)] = { 0xc34, 0 }, | ||
574 | }, | ||
575 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
576 | [C(OP_READ)] = { 0, 0x904 }, | ||
577 | [C(OP_WRITE)] = { -1, -1 }, | ||
578 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
579 | }, | ||
580 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
581 | [C(OP_READ)] = { 0, 0x900 }, | ||
582 | [C(OP_WRITE)] = { -1, -1 }, | ||
583 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
584 | }, | ||
585 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
586 | [C(OP_READ)] = { 0x330, 0x331 }, | ||
587 | [C(OP_WRITE)] = { -1, -1 }, | ||
588 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
589 | }, | ||
590 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
591 | [C(OP_READ)] = { -1, -1 }, | ||
592 | [C(OP_WRITE)] = { -1, -1 }, | ||
593 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
594 | }, | ||
595 | }; | ||
596 | |||
597 | static struct power_pmu power4_pmu = { | ||
598 | .name = "POWER4/4+", | ||
599 | .n_counter = 8, | ||
600 | .max_alternatives = 5, | ||
601 | .add_fields = 0x0000001100005555ul, | ||
602 | .test_adder = 0x0011083300000000ul, | ||
603 | .compute_mmcr = p4_compute_mmcr, | ||
604 | .get_constraint = p4_get_constraint, | ||
605 | .get_alternatives = p4_get_alternatives, | ||
606 | .disable_pmc = p4_disable_pmc, | ||
607 | .n_generic = ARRAY_SIZE(p4_generic_events), | ||
608 | .generic_events = p4_generic_events, | ||
609 | .cache_events = &power4_cache_events, | ||
610 | }; | ||
611 | |||
612 | static int __init init_power4_pmu(void) | ||
613 | { | ||
614 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
615 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) | ||
616 | return -ENODEV; | ||
617 | |||
618 | return register_power_pmu(&power4_pmu); | ||
619 | } | ||
620 | |||
621 | early_initcall(init_power4_pmu); | ||
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c new file mode 100644 index 000000000000..a8757baa28f3 --- /dev/null +++ b/arch/powerpc/perf/power5+-pmu.c | |||
@@ -0,0 +1,690 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5+/++ (not POWER5) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | ||
19 | */ | ||
20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 7 | ||
27 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
28 | #define PM_GRS_MSK 7 | ||
29 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
30 | #define PM_PMCSEL_MSK 0x7f | ||
31 | |||
32 | /* Values in PM_UNIT field */ | ||
33 | #define PM_FPU 0 | ||
34 | #define PM_ISU0 1 | ||
35 | #define PM_IFU 2 | ||
36 | #define PM_ISU1 3 | ||
37 | #define PM_IDU 4 | ||
38 | #define PM_ISU0_ALT 6 | ||
39 | #define PM_GRS 7 | ||
40 | #define PM_LSU0 8 | ||
41 | #define PM_LSU1 0xc | ||
42 | #define PM_LASTUNIT 0xc | ||
43 | |||
44 | /* | ||
45 | * Bits in MMCR1 for POWER5+ | ||
46 | */ | ||
47 | #define MMCR1_TTM0SEL_SH 62 | ||
48 | #define MMCR1_TTM1SEL_SH 60 | ||
49 | #define MMCR1_TTM2SEL_SH 58 | ||
50 | #define MMCR1_TTM3SEL_SH 56 | ||
51 | #define MMCR1_TTMSEL_MSK 3 | ||
52 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
53 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
54 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
55 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
56 | #define MMCR1_GRS_L2SEL_SH 46 | ||
57 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
58 | #define MMCR1_GRS_L3SEL_SH 44 | ||
59 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
60 | #define MMCR1_GRS_MCSEL_SH 41 | ||
61 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
62 | #define MMCR1_GRS_FABSEL_SH 39 | ||
63 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
64 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
65 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
68 | #define MMCR1_PMC1SEL_SH 25 | ||
69 | #define MMCR1_PMC2SEL_SH 17 | ||
70 | #define MMCR1_PMC3SEL_SH 9 | ||
71 | #define MMCR1_PMC4SEL_SH 1 | ||
72 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
73 | #define MMCR1_PMCSEL_MSK 0x7f | ||
74 | |||
75 | /* | ||
76 | * Layout of constraint bits: | ||
77 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
78 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
79 | * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> | ||
80 | * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 | ||
81 | * | ||
82 | * NC - number of counters | ||
83 | * 51: NC error 0x0008_0000_0000_0000 | ||
84 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
85 | * | ||
86 | * G0..G3 - GRS mux constraints | ||
87 | * 46-47: GRS_L2SEL value | ||
88 | * 44-45: GRS_L3SEL value | ||
89 | * 41-44: GRS_MCSEL value | ||
90 | * 39-40: GRS_FABSEL value | ||
91 | * Note that these match up with their bit positions in MMCR1 | ||
92 | * | ||
93 | * T0 - TTM0 constraint | ||
94 | * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 | ||
95 | * | ||
96 | * T1 - TTM1 constraint | ||
97 | * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 | ||
98 | * | ||
99 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
100 | * 33: UC3 error 0x02_0000_0000 | ||
101 | * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 | ||
102 | * 31: ISU0 events needed 0x01_8000_0000 | ||
103 | * 30: IDU|GRS events needed 0x00_4000_0000 | ||
104 | * | ||
105 | * B0 | ||
106 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
107 | * Encoding as for the event code | ||
108 | * | ||
109 | * B1, B2, B3 | ||
110 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
111 | * | ||
112 | * P6 | ||
113 | * 11: P6 error 0x800 | ||
114 | * 10-11: Count of events needing PMC6 | ||
115 | * | ||
116 | * P1..P5 | ||
117 | * 0-9: Count of events needing PMC1..PMC5 | ||
118 | */ | ||
119 | |||
120 | static const int grsel_shift[8] = { | ||
121 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
122 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
123 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
124 | }; | ||
125 | |||
126 | /* Masks and values for using events from the various units */ | ||
127 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
128 | [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, | ||
129 | [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, | ||
130 | [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, | ||
131 | [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, | ||
132 | [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, | ||
133 | [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, | ||
134 | }; | ||
135 | |||
136 | static int power5p_get_constraint(u64 event, unsigned long *maskp, | ||
137 | unsigned long *valp) | ||
138 | { | ||
139 | int pmc, byte, unit, sh; | ||
140 | int bit, fmask; | ||
141 | unsigned long mask = 0, value = 0; | ||
142 | |||
143 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
144 | if (pmc) { | ||
145 | if (pmc > 6) | ||
146 | return -1; | ||
147 | sh = (pmc - 1) * 2; | ||
148 | mask |= 2 << sh; | ||
149 | value |= 1 << sh; | ||
150 | if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) | ||
151 | return -1; | ||
152 | } | ||
153 | if (event & PM_BUSEVENT_MSK) { | ||
154 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
155 | if (unit > PM_LASTUNIT) | ||
156 | return -1; | ||
157 | if (unit == PM_ISU0_ALT) | ||
158 | unit = PM_ISU0; | ||
159 | mask |= unit_cons[unit][0]; | ||
160 | value |= unit_cons[unit][1]; | ||
161 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
162 | if (byte >= 4) { | ||
163 | if (unit != PM_LSU1) | ||
164 | return -1; | ||
165 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
166 | ++unit; | ||
167 | byte &= 3; | ||
168 | } | ||
169 | if (unit == PM_GRS) { | ||
170 | bit = event & 7; | ||
171 | fmask = (bit == 6)? 7: 3; | ||
172 | sh = grsel_shift[bit]; | ||
173 | mask |= (unsigned long)fmask << sh; | ||
174 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) | ||
175 | << sh; | ||
176 | } | ||
177 | /* Set byte lane select field */ | ||
178 | mask |= 0xfUL << (24 - 4 * byte); | ||
179 | value |= (unsigned long)unit << (24 - 4 * byte); | ||
180 | } | ||
181 | if (pmc < 5) { | ||
182 | /* need a counter from PMC1-4 set */ | ||
183 | mask |= 0x8000000000000ul; | ||
184 | value |= 0x1000000000000ul; | ||
185 | } | ||
186 | *maskp = mask; | ||
187 | *valp = value; | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static int power5p_limited_pmc_event(u64 event) | ||
192 | { | ||
193 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
194 | |||
195 | return pmc == 5 || pmc == 6; | ||
196 | } | ||
197 | |||
198 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
199 | |||
200 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
201 | { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ | ||
202 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
203 | { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ | ||
204 | { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ | ||
205 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
206 | { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ | ||
207 | { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ | ||
208 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
209 | { 0x100009, 0x200009 }, /* PM_INST_CMPL */ | ||
210 | { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ | ||
211 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Scan the alternatives table for a match and return the | ||
216 | * index into the alternatives table if found, else -1. | ||
217 | */ | ||
218 | static int find_alternative(unsigned int event) | ||
219 | { | ||
220 | int i, j; | ||
221 | |||
222 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
223 | if (event < event_alternatives[i][0]) | ||
224 | break; | ||
225 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
226 | if (event == event_alternatives[i][j]) | ||
227 | return i; | ||
228 | } | ||
229 | return -1; | ||
230 | } | ||
231 | |||
232 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
233 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
234 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
235 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
236 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
237 | }; | ||
238 | |||
239 | /* | ||
240 | * Some direct events for decodes of event bus byte 3 have alternative | ||
241 | * PMCSEL values on other counters. This returns the alternative | ||
242 | * event code for those that do, or -1 otherwise. This also handles | ||
243 | * alternative PCMSEL values for add events. | ||
244 | */ | ||
245 | static s64 find_alternative_bdecode(u64 event) | ||
246 | { | ||
247 | int pmc, altpmc, pp, j; | ||
248 | |||
249 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
250 | if (pmc == 0 || pmc > 4) | ||
251 | return -1; | ||
252 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
253 | pp = event & PM_PMCSEL_MSK; | ||
254 | for (j = 0; j < 4; ++j) { | ||
255 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
256 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
257 | (altpmc << PM_PMC_SH) | | ||
258 | bytedecode_alternatives[altpmc - 1][j]; | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /* new decode alternatives for power5+ */ | ||
263 | if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) | ||
264 | return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); | ||
265 | if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) | ||
266 | return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); | ||
267 | |||
268 | /* alternative add event encodings */ | ||
269 | if (pp == 0x10 || pp == 0x28) | ||
270 | return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | | ||
271 | (altpmc << PM_PMC_SH); | ||
272 | |||
273 | return -1; | ||
274 | } | ||
275 | |||
276 | static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
277 | { | ||
278 | int i, j, nalt = 1; | ||
279 | int nlim; | ||
280 | s64 ae; | ||
281 | |||
282 | alt[0] = event; | ||
283 | nalt = 1; | ||
284 | nlim = power5p_limited_pmc_event(event); | ||
285 | i = find_alternative(event); | ||
286 | if (i >= 0) { | ||
287 | for (j = 0; j < MAX_ALT; ++j) { | ||
288 | ae = event_alternatives[i][j]; | ||
289 | if (ae && ae != event) | ||
290 | alt[nalt++] = ae; | ||
291 | nlim += power5p_limited_pmc_event(ae); | ||
292 | } | ||
293 | } else { | ||
294 | ae = find_alternative_bdecode(event); | ||
295 | if (ae > 0) | ||
296 | alt[nalt++] = ae; | ||
297 | } | ||
298 | |||
299 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
300 | /* | ||
301 | * We're only counting in RUN state, | ||
302 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
303 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
304 | * This doesn't include alternatives that don't provide | ||
305 | * any extra flexibility in assigning PMCs (e.g. | ||
306 | * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). | ||
307 | * Note that even with these additional alternatives | ||
308 | * we never end up with more than 3 alternatives for any event. | ||
309 | */ | ||
310 | j = nalt; | ||
311 | for (i = 0; i < nalt; ++i) { | ||
312 | switch (alt[i]) { | ||
313 | case 0xf: /* PM_CYC */ | ||
314 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
315 | ++nlim; | ||
316 | break; | ||
317 | case 0x600005: /* PM_RUN_CYC */ | ||
318 | alt[j++] = 0xf; | ||
319 | break; | ||
320 | case 0x100009: /* PM_INST_CMPL */ | ||
321 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
322 | ++nlim; | ||
323 | break; | ||
324 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
325 | alt[j++] = 0x100009; /* PM_INST_CMPL */ | ||
326 | alt[j++] = 0x200009; | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | nalt = j; | ||
331 | } | ||
332 | |||
333 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
334 | /* remove the limited PMC events */ | ||
335 | j = 0; | ||
336 | for (i = 0; i < nalt; ++i) { | ||
337 | if (!power5p_limited_pmc_event(alt[i])) { | ||
338 | alt[j] = alt[i]; | ||
339 | ++j; | ||
340 | } | ||
341 | } | ||
342 | nalt = j; | ||
343 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
344 | /* remove all but the limited PMC events */ | ||
345 | j = 0; | ||
346 | for (i = 0; i < nalt; ++i) { | ||
347 | if (power5p_limited_pmc_event(alt[i])) { | ||
348 | alt[j] = alt[i]; | ||
349 | ++j; | ||
350 | } | ||
351 | } | ||
352 | nalt = j; | ||
353 | } | ||
354 | |||
355 | return nalt; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Map of which direct events on which PMCs are marked instruction events. | ||
360 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
361 | * Bit 0 is set if it is marked for all PMCs. | ||
362 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
363 | */ | ||
364 | static unsigned char direct_event_is_marked[0x28] = { | ||
365 | 0, /* 00 */ | ||
366 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
367 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
368 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
369 | 0, /* 04 */ | ||
370 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
371 | 0x80, /* 06 */ | ||
372 | 0x80, /* 07 */ | ||
373 | 0, 0, 0,/* 08 - 0a */ | ||
374 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
375 | 0, /* 0c */ | ||
376 | 0x80, /* 0d */ | ||
377 | 0x80, /* 0e */ | ||
378 | 0, /* 0f */ | ||
379 | 0, /* 10 */ | ||
380 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
381 | 0, /* 12 */ | ||
382 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
383 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
384 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
385 | 0x80, /* 16 */ | ||
386 | 0x80, /* 17 */ | ||
387 | 0, 0, 0, 0, 0, | ||
388 | 0x80, /* 1d */ | ||
389 | 0x80, /* 1e */ | ||
390 | 0, /* 1f */ | ||
391 | 0x80, /* 20 */ | ||
392 | 0x80, /* 21 */ | ||
393 | 0x80, /* 22 */ | ||
394 | 0x80, /* 23 */ | ||
395 | 0x80, /* 24 */ | ||
396 | 0x80, /* 25 */ | ||
397 | 0x80, /* 26 */ | ||
398 | 0x80, /* 27 */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Returns 1 if event counts things relating to marked instructions | ||
403 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
404 | */ | ||
405 | static int power5p_marked_instr_event(u64 event) | ||
406 | { | ||
407 | int pmc, psel; | ||
408 | int bit, byte, unit; | ||
409 | u32 mask; | ||
410 | |||
411 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
412 | psel = event & PM_PMCSEL_MSK; | ||
413 | if (pmc >= 5) | ||
414 | return 0; | ||
415 | |||
416 | bit = -1; | ||
417 | if (psel < sizeof(direct_event_is_marked)) { | ||
418 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
419 | return 1; | ||
420 | if (direct_event_is_marked[psel] & 0x80) | ||
421 | bit = 4; | ||
422 | else if (psel == 0x08) | ||
423 | bit = pmc - 1; | ||
424 | else if (psel == 0x10) | ||
425 | bit = 4 - pmc; | ||
426 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
427 | bit = 4; | ||
428 | } else if ((psel & 0x48) == 0x40) { | ||
429 | bit = psel & 7; | ||
430 | } else if (psel == 0x28) { | ||
431 | bit = pmc - 1; | ||
432 | } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { | ||
433 | bit = 4; | ||
434 | } | ||
435 | |||
436 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
437 | return 0; | ||
438 | |||
439 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
440 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
441 | if (unit == PM_LSU0) { | ||
442 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
443 | mask = 0x5dff00; | ||
444 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
445 | byte -= 4; | ||
446 | /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ | ||
447 | mask = 0x5f11c000; | ||
448 | } else | ||
449 | return 0; | ||
450 | |||
451 | return (mask >> (byte * 8 + bit)) & 1; | ||
452 | } | ||
453 | |||
454 | static int power5p_compute_mmcr(u64 event[], int n_ev, | ||
455 | unsigned int hwc[], unsigned long mmcr[]) | ||
456 | { | ||
457 | unsigned long mmcr1 = 0; | ||
458 | unsigned long mmcra = 0; | ||
459 | unsigned int pmc, unit, byte, psel; | ||
460 | unsigned int ttm; | ||
461 | int i, isbus, bit, grsel; | ||
462 | unsigned int pmc_inuse = 0; | ||
463 | unsigned char busbyte[4]; | ||
464 | unsigned char unituse[16]; | ||
465 | int ttmuse; | ||
466 | |||
467 | if (n_ev > 6) | ||
468 | return -1; | ||
469 | |||
470 | /* First pass to count resource use */ | ||
471 | memset(busbyte, 0, sizeof(busbyte)); | ||
472 | memset(unituse, 0, sizeof(unituse)); | ||
473 | for (i = 0; i < n_ev; ++i) { | ||
474 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
475 | if (pmc) { | ||
476 | if (pmc > 6) | ||
477 | return -1; | ||
478 | if (pmc_inuse & (1 << (pmc - 1))) | ||
479 | return -1; | ||
480 | pmc_inuse |= 1 << (pmc - 1); | ||
481 | } | ||
482 | if (event[i] & PM_BUSEVENT_MSK) { | ||
483 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
484 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
485 | if (unit > PM_LASTUNIT) | ||
486 | return -1; | ||
487 | if (unit == PM_ISU0_ALT) | ||
488 | unit = PM_ISU0; | ||
489 | if (byte >= 4) { | ||
490 | if (unit != PM_LSU1) | ||
491 | return -1; | ||
492 | ++unit; | ||
493 | byte &= 3; | ||
494 | } | ||
495 | if (busbyte[byte] && busbyte[byte] != unit) | ||
496 | return -1; | ||
497 | busbyte[byte] = unit; | ||
498 | unituse[unit] = 1; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Assign resources and set multiplexer selects. | ||
504 | * | ||
505 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
506 | * choice we have to deal with. | ||
507 | */ | ||
508 | if (unituse[PM_ISU0] & | ||
509 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
510 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
511 | unituse[PM_ISU0] = 0; | ||
512 | } | ||
513 | /* Set TTM[01]SEL fields. */ | ||
514 | ttmuse = 0; | ||
515 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
516 | if (!unituse[i]) | ||
517 | continue; | ||
518 | if (ttmuse++) | ||
519 | return -1; | ||
520 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; | ||
521 | } | ||
522 | ttmuse = 0; | ||
523 | for (; i <= PM_GRS; ++i) { | ||
524 | if (!unituse[i]) | ||
525 | continue; | ||
526 | if (ttmuse++) | ||
527 | return -1; | ||
528 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; | ||
529 | } | ||
530 | if (ttmuse > 1) | ||
531 | return -1; | ||
532 | |||
533 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
534 | for (byte = 0; byte < 4; ++byte) { | ||
535 | unit = busbyte[byte]; | ||
536 | if (!unit) | ||
537 | continue; | ||
538 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
539 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
540 | unit = PM_ISU0_ALT; | ||
541 | } else if (unit == PM_LSU1 + 1) { | ||
542 | /* select lower word of LSU1 for this byte */ | ||
543 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
544 | } | ||
545 | ttm = unit >> 2; | ||
546 | mmcr1 |= (unsigned long)ttm | ||
547 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
548 | } | ||
549 | |||
550 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
551 | for (i = 0; i < n_ev; ++i) { | ||
552 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
553 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
554 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
555 | psel = event[i] & PM_PMCSEL_MSK; | ||
556 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
557 | if (!pmc) { | ||
558 | /* Bus event or any-PMC direct event */ | ||
559 | for (pmc = 0; pmc < 4; ++pmc) { | ||
560 | if (!(pmc_inuse & (1 << pmc))) | ||
561 | break; | ||
562 | } | ||
563 | if (pmc >= 4) | ||
564 | return -1; | ||
565 | pmc_inuse |= 1 << pmc; | ||
566 | } else if (pmc <= 4) { | ||
567 | /* Direct event */ | ||
568 | --pmc; | ||
569 | if (isbus && (byte & 2) && | ||
570 | (psel == 8 || psel == 0x10 || psel == 0x28)) | ||
571 | /* add events on higher-numbered bus */ | ||
572 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
573 | } else { | ||
574 | /* Instructions or run cycles on PMC5/6 */ | ||
575 | --pmc; | ||
576 | } | ||
577 | if (isbus && unit == PM_GRS) { | ||
578 | bit = psel & 7; | ||
579 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
580 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; | ||
581 | } | ||
582 | if (power5p_marked_instr_event(event[i])) | ||
583 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
584 | if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) | ||
585 | /* select alternate byte lane */ | ||
586 | psel |= 0x10; | ||
587 | if (pmc <= 3) | ||
588 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
589 | hwc[i] = pmc; | ||
590 | } | ||
591 | |||
592 | /* Return MMCRx values */ | ||
593 | mmcr[0] = 0; | ||
594 | if (pmc_inuse & 1) | ||
595 | mmcr[0] = MMCR0_PMC1CE; | ||
596 | if (pmc_inuse & 0x3e) | ||
597 | mmcr[0] |= MMCR0_PMCjCE; | ||
598 | mmcr[1] = mmcr1; | ||
599 | mmcr[2] = mmcra; | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
604 | { | ||
605 | if (pmc <= 3) | ||
606 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
607 | } | ||
608 | |||
609 | static int power5p_generic_events[] = { | ||
610 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
611 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
612 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ | ||
613 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
614 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
615 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
616 | }; | ||
617 | |||
618 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
619 | |||
620 | /* | ||
621 | * Table of generalized cache-related events. | ||
622 | * 0 means not supported, -1 means nonsensical, other values | ||
623 | * are event codes. | ||
624 | */ | ||
625 | static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
626 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
627 | [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, | ||
628 | [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, | ||
629 | [C(OP_PREFETCH)] = { 0xc70e7, -1 }, | ||
630 | }, | ||
631 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
632 | [C(OP_READ)] = { 0, 0 }, | ||
633 | [C(OP_WRITE)] = { -1, -1 }, | ||
634 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
635 | }, | ||
636 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
637 | [C(OP_READ)] = { 0, 0 }, | ||
638 | [C(OP_WRITE)] = { 0, 0 }, | ||
639 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
640 | }, | ||
641 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
642 | [C(OP_READ)] = { 0xc20e4, 0x800c4 }, | ||
643 | [C(OP_WRITE)] = { -1, -1 }, | ||
644 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
645 | }, | ||
646 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
647 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
648 | [C(OP_WRITE)] = { -1, -1 }, | ||
649 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
650 | }, | ||
651 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
652 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
653 | [C(OP_WRITE)] = { -1, -1 }, | ||
654 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
655 | }, | ||
656 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
657 | [C(OP_READ)] = { -1, -1 }, | ||
658 | [C(OP_WRITE)] = { -1, -1 }, | ||
659 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
660 | }, | ||
661 | }; | ||
662 | |||
663 | static struct power_pmu power5p_pmu = { | ||
664 | .name = "POWER5+/++", | ||
665 | .n_counter = 6, | ||
666 | .max_alternatives = MAX_ALT, | ||
667 | .add_fields = 0x7000000000055ul, | ||
668 | .test_adder = 0x3000040000000ul, | ||
669 | .compute_mmcr = power5p_compute_mmcr, | ||
670 | .get_constraint = power5p_get_constraint, | ||
671 | .get_alternatives = power5p_get_alternatives, | ||
672 | .disable_pmc = power5p_disable_pmc, | ||
673 | .limited_pmc_event = power5p_limited_pmc_event, | ||
674 | .flags = PPMU_LIMITED_PMC5_6, | ||
675 | .n_generic = ARRAY_SIZE(power5p_generic_events), | ||
676 | .generic_events = power5p_generic_events, | ||
677 | .cache_events = &power5p_cache_events, | ||
678 | }; | ||
679 | |||
680 | static int __init init_power5p_pmu(void) | ||
681 | { | ||
682 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
683 | (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") | ||
684 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))) | ||
685 | return -ENODEV; | ||
686 | |||
687 | return register_power_pmu(&power5p_pmu); | ||
688 | } | ||
689 | |||
690 | early_initcall(init_power5p_pmu); | ||
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c new file mode 100644 index 000000000000..e7f06eb7a861 --- /dev/null +++ b/arch/powerpc/perf/power5-pmu.c | |||
@@ -0,0 +1,629 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5 (not POWER5++) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER5 (not POWER5++) | ||
19 | */ | ||
20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 7 | ||
27 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
28 | #define PM_GRS_MSK 7 | ||
29 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
30 | #define PM_PMCSEL_MSK 0x7f | ||
31 | |||
32 | /* Values in PM_UNIT field */ | ||
33 | #define PM_FPU 0 | ||
34 | #define PM_ISU0 1 | ||
35 | #define PM_IFU 2 | ||
36 | #define PM_ISU1 3 | ||
37 | #define PM_IDU 4 | ||
38 | #define PM_ISU0_ALT 6 | ||
39 | #define PM_GRS 7 | ||
40 | #define PM_LSU0 8 | ||
41 | #define PM_LSU1 0xc | ||
42 | #define PM_LASTUNIT 0xc | ||
43 | |||
44 | /* | ||
45 | * Bits in MMCR1 for POWER5 | ||
46 | */ | ||
47 | #define MMCR1_TTM0SEL_SH 62 | ||
48 | #define MMCR1_TTM1SEL_SH 60 | ||
49 | #define MMCR1_TTM2SEL_SH 58 | ||
50 | #define MMCR1_TTM3SEL_SH 56 | ||
51 | #define MMCR1_TTMSEL_MSK 3 | ||
52 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
53 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
54 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
55 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
56 | #define MMCR1_GRS_L2SEL_SH 46 | ||
57 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
58 | #define MMCR1_GRS_L3SEL_SH 44 | ||
59 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
60 | #define MMCR1_GRS_MCSEL_SH 41 | ||
61 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
62 | #define MMCR1_GRS_FABSEL_SH 39 | ||
63 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
64 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
65 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
68 | #define MMCR1_PMC1SEL_SH 25 | ||
69 | #define MMCR1_PMC2SEL_SH 17 | ||
70 | #define MMCR1_PMC3SEL_SH 9 | ||
71 | #define MMCR1_PMC4SEL_SH 1 | ||
72 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
73 | #define MMCR1_PMCSEL_MSK 0x7f | ||
74 | |||
75 | /* | ||
76 | * Layout of constraint bits: | ||
77 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
78 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
79 | * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> | ||
80 | * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 | ||
81 | * | ||
82 | * T0 - TTM0 constraint | ||
83 | * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 | ||
84 | * | ||
85 | * T1 - TTM1 constraint | ||
86 | * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 | ||
87 | * | ||
88 | * NC - number of counters | ||
89 | * 51: NC error 0x0008_0000_0000_0000 | ||
90 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
91 | * | ||
92 | * G0..G3 - GRS mux constraints | ||
93 | * 46-47: GRS_L2SEL value | ||
94 | * 44-45: GRS_L3SEL value | ||
95 | * 41-44: GRS_MCSEL value | ||
96 | * 39-40: GRS_FABSEL value | ||
97 | * Note that these match up with their bit positions in MMCR1 | ||
98 | * | ||
99 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
100 | * 37: UC3 error 0x20_0000_0000 | ||
101 | * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 | ||
102 | * 35: ISU0 events needed 0x08_0000_0000 | ||
103 | * 34: IDU|GRS events needed 0x04_0000_0000 | ||
104 | * | ||
105 | * PS1 | ||
106 | * 33: PS1 error 0x2_0000_0000 | ||
107 | * 31-32: count of events needing PMC1/2 0x1_8000_0000 | ||
108 | * | ||
109 | * PS2 | ||
110 | * 30: PS2 error 0x4000_0000 | ||
111 | * 28-29: count of events needing PMC3/4 0x3000_0000 | ||
112 | * | ||
113 | * B0 | ||
114 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
115 | * Encoding as for the event code | ||
116 | * | ||
117 | * B1, B2, B3 | ||
118 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
119 | * | ||
120 | * P1..P6 | ||
121 | * 0-11: Count of events needing PMC1..PMC6 | ||
122 | */ | ||
123 | |||
124 | static const int grsel_shift[8] = { | ||
125 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
126 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
127 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
128 | }; | ||
129 | |||
130 | /* Masks and values for using events from the various units */ | ||
131 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
132 | [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, | ||
133 | [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, | ||
134 | [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, | ||
135 | [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, | ||
136 | [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, | ||
137 | [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, | ||
138 | }; | ||
139 | |||
140 | static int power5_get_constraint(u64 event, unsigned long *maskp, | ||
141 | unsigned long *valp) | ||
142 | { | ||
143 | int pmc, byte, unit, sh; | ||
144 | int bit, fmask; | ||
145 | unsigned long mask = 0, value = 0; | ||
146 | int grp = -1; | ||
147 | |||
148 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
149 | if (pmc) { | ||
150 | if (pmc > 6) | ||
151 | return -1; | ||
152 | sh = (pmc - 1) * 2; | ||
153 | mask |= 2 << sh; | ||
154 | value |= 1 << sh; | ||
155 | if (pmc <= 4) | ||
156 | grp = (pmc - 1) >> 1; | ||
157 | else if (event != 0x500009 && event != 0x600005) | ||
158 | return -1; | ||
159 | } | ||
160 | if (event & PM_BUSEVENT_MSK) { | ||
161 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
162 | if (unit > PM_LASTUNIT) | ||
163 | return -1; | ||
164 | if (unit == PM_ISU0_ALT) | ||
165 | unit = PM_ISU0; | ||
166 | mask |= unit_cons[unit][0]; | ||
167 | value |= unit_cons[unit][1]; | ||
168 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
169 | if (byte >= 4) { | ||
170 | if (unit != PM_LSU1) | ||
171 | return -1; | ||
172 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
173 | ++unit; | ||
174 | byte &= 3; | ||
175 | } | ||
176 | if (unit == PM_GRS) { | ||
177 | bit = event & 7; | ||
178 | fmask = (bit == 6)? 7: 3; | ||
179 | sh = grsel_shift[bit]; | ||
180 | mask |= (unsigned long)fmask << sh; | ||
181 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) | ||
182 | << sh; | ||
183 | } | ||
184 | /* | ||
185 | * Bus events on bytes 0 and 2 can be counted | ||
186 | * on PMC1/2; bytes 1 and 3 on PMC3/4. | ||
187 | */ | ||
188 | if (!pmc) | ||
189 | grp = byte & 1; | ||
190 | /* Set byte lane select field */ | ||
191 | mask |= 0xfUL << (24 - 4 * byte); | ||
192 | value |= (unsigned long)unit << (24 - 4 * byte); | ||
193 | } | ||
194 | if (grp == 0) { | ||
195 | /* increment PMC1/2 field */ | ||
196 | mask |= 0x200000000ul; | ||
197 | value |= 0x080000000ul; | ||
198 | } else if (grp == 1) { | ||
199 | /* increment PMC3/4 field */ | ||
200 | mask |= 0x40000000ul; | ||
201 | value |= 0x10000000ul; | ||
202 | } | ||
203 | if (pmc < 5) { | ||
204 | /* need a counter from PMC1-4 set */ | ||
205 | mask |= 0x8000000000000ul; | ||
206 | value |= 0x1000000000000ul; | ||
207 | } | ||
208 | *maskp = mask; | ||
209 | *valp = value; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
214 | |||
215 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
216 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
217 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
218 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
219 | { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ | ||
220 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
221 | }; | ||
222 | |||
223 | /* | ||
224 | * Scan the alternatives table for a match and return the | ||
225 | * index into the alternatives table if found, else -1. | ||
226 | */ | ||
227 | static int find_alternative(u64 event) | ||
228 | { | ||
229 | int i, j; | ||
230 | |||
231 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
232 | if (event < event_alternatives[i][0]) | ||
233 | break; | ||
234 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
235 | if (event == event_alternatives[i][j]) | ||
236 | return i; | ||
237 | } | ||
238 | return -1; | ||
239 | } | ||
240 | |||
241 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
242 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
243 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
244 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
245 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
246 | }; | ||
247 | |||
248 | /* | ||
249 | * Some direct events for decodes of event bus byte 3 have alternative | ||
250 | * PMCSEL values on other counters. This returns the alternative | ||
251 | * event code for those that do, or -1 otherwise. | ||
252 | */ | ||
253 | static s64 find_alternative_bdecode(u64 event) | ||
254 | { | ||
255 | int pmc, altpmc, pp, j; | ||
256 | |||
257 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
258 | if (pmc == 0 || pmc > 4) | ||
259 | return -1; | ||
260 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
261 | pp = event & PM_PMCSEL_MSK; | ||
262 | for (j = 0; j < 4; ++j) { | ||
263 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
264 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
265 | (altpmc << PM_PMC_SH) | | ||
266 | bytedecode_alternatives[altpmc - 1][j]; | ||
267 | } | ||
268 | } | ||
269 | return -1; | ||
270 | } | ||
271 | |||
272 | static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
273 | { | ||
274 | int i, j, nalt = 1; | ||
275 | s64 ae; | ||
276 | |||
277 | alt[0] = event; | ||
278 | nalt = 1; | ||
279 | i = find_alternative(event); | ||
280 | if (i >= 0) { | ||
281 | for (j = 0; j < MAX_ALT; ++j) { | ||
282 | ae = event_alternatives[i][j]; | ||
283 | if (ae && ae != event) | ||
284 | alt[nalt++] = ae; | ||
285 | } | ||
286 | } else { | ||
287 | ae = find_alternative_bdecode(event); | ||
288 | if (ae > 0) | ||
289 | alt[nalt++] = ae; | ||
290 | } | ||
291 | return nalt; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Map of which direct events on which PMCs are marked instruction events. | ||
296 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
297 | * Bit 0 is set if it is marked for all PMCs. | ||
298 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
299 | */ | ||
300 | static unsigned char direct_event_is_marked[0x28] = { | ||
301 | 0, /* 00 */ | ||
302 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
303 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
304 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
305 | 0, /* 04 */ | ||
306 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
307 | 0x80, /* 06 */ | ||
308 | 0x80, /* 07 */ | ||
309 | 0, 0, 0,/* 08 - 0a */ | ||
310 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
311 | 0, /* 0c */ | ||
312 | 0x80, /* 0d */ | ||
313 | 0x80, /* 0e */ | ||
314 | 0, /* 0f */ | ||
315 | 0, /* 10 */ | ||
316 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
317 | 0, /* 12 */ | ||
318 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
319 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
320 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
321 | 0x80, /* 16 */ | ||
322 | 0x80, /* 17 */ | ||
323 | 0, 0, 0, 0, 0, | ||
324 | 0x80, /* 1d */ | ||
325 | 0x80, /* 1e */ | ||
326 | 0, /* 1f */ | ||
327 | 0x80, /* 20 */ | ||
328 | 0x80, /* 21 */ | ||
329 | 0x80, /* 22 */ | ||
330 | 0x80, /* 23 */ | ||
331 | 0x80, /* 24 */ | ||
332 | 0x80, /* 25 */ | ||
333 | 0x80, /* 26 */ | ||
334 | 0x80, /* 27 */ | ||
335 | }; | ||
336 | |||
337 | /* | ||
338 | * Returns 1 if event counts things relating to marked instructions | ||
339 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
340 | */ | ||
341 | static int power5_marked_instr_event(u64 event) | ||
342 | { | ||
343 | int pmc, psel; | ||
344 | int bit, byte, unit; | ||
345 | u32 mask; | ||
346 | |||
347 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
348 | psel = event & PM_PMCSEL_MSK; | ||
349 | if (pmc >= 5) | ||
350 | return 0; | ||
351 | |||
352 | bit = -1; | ||
353 | if (psel < sizeof(direct_event_is_marked)) { | ||
354 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
355 | return 1; | ||
356 | if (direct_event_is_marked[psel] & 0x80) | ||
357 | bit = 4; | ||
358 | else if (psel == 0x08) | ||
359 | bit = pmc - 1; | ||
360 | else if (psel == 0x10) | ||
361 | bit = 4 - pmc; | ||
362 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
363 | bit = 4; | ||
364 | } else if ((psel & 0x58) == 0x40) | ||
365 | bit = psel & 7; | ||
366 | |||
367 | if (!(event & PM_BUSEVENT_MSK)) | ||
368 | return 0; | ||
369 | |||
370 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
371 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
372 | if (unit == PM_LSU0) { | ||
373 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
374 | mask = 0x5dff00; | ||
375 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
376 | byte -= 4; | ||
377 | /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ | ||
378 | mask = 0x5f00c0aa; | ||
379 | } else | ||
380 | return 0; | ||
381 | |||
382 | return (mask >> (byte * 8 + bit)) & 1; | ||
383 | } | ||
384 | |||
385 | static int power5_compute_mmcr(u64 event[], int n_ev, | ||
386 | unsigned int hwc[], unsigned long mmcr[]) | ||
387 | { | ||
388 | unsigned long mmcr1 = 0; | ||
389 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
390 | unsigned int pmc, unit, byte, psel; | ||
391 | unsigned int ttm, grp; | ||
392 | int i, isbus, bit, grsel; | ||
393 | unsigned int pmc_inuse = 0; | ||
394 | unsigned int pmc_grp_use[2]; | ||
395 | unsigned char busbyte[4]; | ||
396 | unsigned char unituse[16]; | ||
397 | int ttmuse; | ||
398 | |||
399 | if (n_ev > 6) | ||
400 | return -1; | ||
401 | |||
402 | /* First pass to count resource use */ | ||
403 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
404 | memset(busbyte, 0, sizeof(busbyte)); | ||
405 | memset(unituse, 0, sizeof(unituse)); | ||
406 | for (i = 0; i < n_ev; ++i) { | ||
407 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
408 | if (pmc) { | ||
409 | if (pmc > 6) | ||
410 | return -1; | ||
411 | if (pmc_inuse & (1 << (pmc - 1))) | ||
412 | return -1; | ||
413 | pmc_inuse |= 1 << (pmc - 1); | ||
414 | /* count 1/2 vs 3/4 use */ | ||
415 | if (pmc <= 4) | ||
416 | ++pmc_grp_use[(pmc - 1) >> 1]; | ||
417 | } | ||
418 | if (event[i] & PM_BUSEVENT_MSK) { | ||
419 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
420 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
421 | if (unit > PM_LASTUNIT) | ||
422 | return -1; | ||
423 | if (unit == PM_ISU0_ALT) | ||
424 | unit = PM_ISU0; | ||
425 | if (byte >= 4) { | ||
426 | if (unit != PM_LSU1) | ||
427 | return -1; | ||
428 | ++unit; | ||
429 | byte &= 3; | ||
430 | } | ||
431 | if (!pmc) | ||
432 | ++pmc_grp_use[byte & 1]; | ||
433 | if (busbyte[byte] && busbyte[byte] != unit) | ||
434 | return -1; | ||
435 | busbyte[byte] = unit; | ||
436 | unituse[unit] = 1; | ||
437 | } | ||
438 | } | ||
439 | if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) | ||
440 | return -1; | ||
441 | |||
442 | /* | ||
443 | * Assign resources and set multiplexer selects. | ||
444 | * | ||
445 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
446 | * choice we have to deal with. | ||
447 | */ | ||
448 | if (unituse[PM_ISU0] & | ||
449 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
450 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
451 | unituse[PM_ISU0] = 0; | ||
452 | } | ||
453 | /* Set TTM[01]SEL fields. */ | ||
454 | ttmuse = 0; | ||
455 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
456 | if (!unituse[i]) | ||
457 | continue; | ||
458 | if (ttmuse++) | ||
459 | return -1; | ||
460 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; | ||
461 | } | ||
462 | ttmuse = 0; | ||
463 | for (; i <= PM_GRS; ++i) { | ||
464 | if (!unituse[i]) | ||
465 | continue; | ||
466 | if (ttmuse++) | ||
467 | return -1; | ||
468 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; | ||
469 | } | ||
470 | if (ttmuse > 1) | ||
471 | return -1; | ||
472 | |||
473 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
474 | for (byte = 0; byte < 4; ++byte) { | ||
475 | unit = busbyte[byte]; | ||
476 | if (!unit) | ||
477 | continue; | ||
478 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
479 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
480 | unit = PM_ISU0_ALT; | ||
481 | } else if (unit == PM_LSU1 + 1) { | ||
482 | /* select lower word of LSU1 for this byte */ | ||
483 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
484 | } | ||
485 | ttm = unit >> 2; | ||
486 | mmcr1 |= (unsigned long)ttm | ||
487 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
488 | } | ||
489 | |||
490 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
491 | for (i = 0; i < n_ev; ++i) { | ||
492 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
493 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
494 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
495 | psel = event[i] & PM_PMCSEL_MSK; | ||
496 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
497 | if (!pmc) { | ||
498 | /* Bus event or any-PMC direct event */ | ||
499 | for (pmc = 0; pmc < 4; ++pmc) { | ||
500 | if (pmc_inuse & (1 << pmc)) | ||
501 | continue; | ||
502 | grp = (pmc >> 1) & 1; | ||
503 | if (isbus) { | ||
504 | if (grp == (byte & 1)) | ||
505 | break; | ||
506 | } else if (pmc_grp_use[grp] < 2) { | ||
507 | ++pmc_grp_use[grp]; | ||
508 | break; | ||
509 | } | ||
510 | } | ||
511 | pmc_inuse |= 1 << pmc; | ||
512 | } else if (pmc <= 4) { | ||
513 | /* Direct event */ | ||
514 | --pmc; | ||
515 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | ||
516 | /* add events on higher-numbered bus */ | ||
517 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
518 | } else { | ||
519 | /* Instructions or run cycles on PMC5/6 */ | ||
520 | --pmc; | ||
521 | } | ||
522 | if (isbus && unit == PM_GRS) { | ||
523 | bit = psel & 7; | ||
524 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
525 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; | ||
526 | } | ||
527 | if (power5_marked_instr_event(event[i])) | ||
528 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
529 | if (pmc <= 3) | ||
530 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
531 | hwc[i] = pmc; | ||
532 | } | ||
533 | |||
534 | /* Return MMCRx values */ | ||
535 | mmcr[0] = 0; | ||
536 | if (pmc_inuse & 1) | ||
537 | mmcr[0] = MMCR0_PMC1CE; | ||
538 | if (pmc_inuse & 0x3e) | ||
539 | mmcr[0] |= MMCR0_PMCjCE; | ||
540 | mmcr[1] = mmcr1; | ||
541 | mmcr[2] = mmcra; | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
546 | { | ||
547 | if (pmc <= 3) | ||
548 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
549 | } | ||
550 | |||
551 | static int power5_generic_events[] = { | ||
552 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
553 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
554 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ | ||
555 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
556 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
557 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
558 | }; | ||
559 | |||
560 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
561 | |||
562 | /* | ||
563 | * Table of generalized cache-related events. | ||
564 | * 0 means not supported, -1 means nonsensical, other values | ||
565 | * are event codes. | ||
566 | */ | ||
567 | static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
568 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
569 | [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, | ||
570 | [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, | ||
571 | [C(OP_PREFETCH)] = { 0xc70e7, 0 }, | ||
572 | }, | ||
573 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
574 | [C(OP_READ)] = { 0, 0 }, | ||
575 | [C(OP_WRITE)] = { -1, -1 }, | ||
576 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
577 | }, | ||
578 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
579 | [C(OP_READ)] = { 0, 0x3c309b }, | ||
580 | [C(OP_WRITE)] = { 0, 0 }, | ||
581 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
582 | }, | ||
583 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
584 | [C(OP_READ)] = { 0x2c4090, 0x800c4 }, | ||
585 | [C(OP_WRITE)] = { -1, -1 }, | ||
586 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
587 | }, | ||
588 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
589 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
590 | [C(OP_WRITE)] = { -1, -1 }, | ||
591 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
592 | }, | ||
593 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
594 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
595 | [C(OP_WRITE)] = { -1, -1 }, | ||
596 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
597 | }, | ||
598 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
599 | [C(OP_READ)] = { -1, -1 }, | ||
600 | [C(OP_WRITE)] = { -1, -1 }, | ||
601 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
602 | }, | ||
603 | }; | ||
604 | |||
605 | static struct power_pmu power5_pmu = { | ||
606 | .name = "POWER5", | ||
607 | .n_counter = 6, | ||
608 | .max_alternatives = MAX_ALT, | ||
609 | .add_fields = 0x7000090000555ul, | ||
610 | .test_adder = 0x3000490000000ul, | ||
611 | .compute_mmcr = power5_compute_mmcr, | ||
612 | .get_constraint = power5_get_constraint, | ||
613 | .get_alternatives = power5_get_alternatives, | ||
614 | .disable_pmc = power5_disable_pmc, | ||
615 | .n_generic = ARRAY_SIZE(power5_generic_events), | ||
616 | .generic_events = power5_generic_events, | ||
617 | .cache_events = &power5_cache_events, | ||
618 | }; | ||
619 | |||
620 | static int __init init_power5_pmu(void) | ||
621 | { | ||
622 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
623 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) | ||
624 | return -ENODEV; | ||
625 | |||
626 | return register_power_pmu(&power5_pmu); | ||
627 | } | ||
628 | |||
629 | early_initcall(init_power5_pmu); | ||
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c new file mode 100644 index 000000000000..31128e086fed --- /dev/null +++ b/arch/powerpc/perf/power6-pmu.c | |||
@@ -0,0 +1,552 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER6 processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER6 | ||
19 | */ | ||
20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0x7 | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) | ||
26 | #define PM_LLAV 0x8000 /* Load lookahead match value */ | ||
27 | #define PM_LLA 0x4000 /* Load lookahead match enable */ | ||
28 | #define PM_BYTE_SH 12 /* Byte of event bus to use */ | ||
29 | #define PM_BYTE_MSK 3 | ||
30 | #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ | ||
31 | #define PM_SUBUNIT_MSK 7 | ||
32 | #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) | ||
33 | #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ | ||
34 | #define PM_BUSEVENT_MSK 0xf3700 | ||
35 | |||
36 | /* | ||
37 | * Bits in MMCR1 for POWER6 | ||
38 | */ | ||
39 | #define MMCR1_TTM0SEL_SH 60 | ||
40 | #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) | ||
41 | #define MMCR1_TTMSEL_MSK 0xf | ||
42 | #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) | ||
43 | #define MMCR1_NESTSEL_SH 45 | ||
44 | #define MMCR1_NESTSEL_MSK 0x7 | ||
45 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | ||
46 | #define MMCR1_PMC1_LLA (1ul << 44) | ||
47 | #define MMCR1_PMC1_LLA_VALUE (1ul << 39) | ||
48 | #define MMCR1_PMC1_ADDR_SEL (1ul << 35) | ||
49 | #define MMCR1_PMC1SEL_SH 24 | ||
50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
51 | #define MMCR1_PMCSEL_MSK 0xff | ||
52 | |||
53 | /* | ||
54 | * Map of which direct events on which PMCs are marked instruction events. | ||
55 | * Indexed by PMCSEL value >> 1. | ||
56 | * Bottom 4 bits are a map of which PMCs are interesting, | ||
57 | * top 4 bits say what sort of event: | ||
58 | * 0 = direct marked event, | ||
59 | * 1 = byte decode event, | ||
60 | * 4 = add/and event (PMC1 -> bits 0 & 4), | ||
61 | * 5 = add/and event (PMC1 -> bits 1 & 5), | ||
62 | * 6 = add/and event (PMC1 -> bits 2 & 6), | ||
63 | * 7 = add/and event (PMC1 -> bits 3 & 7). | ||
64 | */ | ||
65 | static unsigned char direct_event_is_marked[0x60 >> 1] = { | ||
66 | 0, /* 00 */ | ||
67 | 0, /* 02 */ | ||
68 | 0, /* 04 */ | ||
69 | 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
70 | 0x04, /* 08 PM_MRK_DFU_FIN */ | ||
71 | 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ | ||
72 | 0, /* 0c */ | ||
73 | 0, /* 0e */ | ||
74 | 0x02, /* 10 PM_MRK_INST_DISP */ | ||
75 | 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ | ||
76 | 0, /* 14 */ | ||
77 | 0, /* 16 */ | ||
78 | 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ | ||
79 | 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
80 | 0x01, /* 1c PM_MRK_INST_ISSUED */ | ||
81 | 0, /* 1e */ | ||
82 | 0, /* 20 */ | ||
83 | 0, /* 22 */ | ||
84 | 0, /* 24 */ | ||
85 | 0, /* 26 */ | ||
86 | 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ | ||
87 | 0, /* 2a */ | ||
88 | 0, /* 2c */ | ||
89 | 0, /* 2e */ | ||
90 | 0x4f, /* 30 */ | ||
91 | 0x7f, /* 32 */ | ||
92 | 0x4f, /* 34 */ | ||
93 | 0x5f, /* 36 */ | ||
94 | 0x6f, /* 38 */ | ||
95 | 0x4f, /* 3a */ | ||
96 | 0, /* 3c */ | ||
97 | 0x08, /* 3e PM_MRK_INST_TIMEO */ | ||
98 | 0x1f, /* 40 */ | ||
99 | 0x1f, /* 42 */ | ||
100 | 0x1f, /* 44 */ | ||
101 | 0x1f, /* 46 */ | ||
102 | 0x1f, /* 48 */ | ||
103 | 0x1f, /* 4a */ | ||
104 | 0x1f, /* 4c */ | ||
105 | 0x1f, /* 4e */ | ||
106 | 0, /* 50 */ | ||
107 | 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ | ||
108 | 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ | ||
109 | 0x02, /* 56 PM_MRK_LD_MISS_L1 */ | ||
110 | 0, /* 58 */ | ||
111 | 0, /* 5a */ | ||
112 | 0, /* 5c */ | ||
113 | 0, /* 5e */ | ||
114 | }; | ||
115 | |||
116 | /* | ||
117 | * Masks showing for each unit which bits are marked events. | ||
118 | * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. | ||
119 | */ | ||
120 | static u32 marked_bus_events[16] = { | ||
121 | 0x01000000, /* direct events set 1: byte 3 bit 0 */ | ||
122 | 0x00010000, /* direct events set 2: byte 2 bit 0 */ | ||
123 | 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ | ||
124 | 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ | ||
125 | 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ | ||
126 | 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ | ||
127 | 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ | ||
128 | 0, /* LSU set 3 */ | ||
129 | 0x00000010, /* VMX set 3: byte 0 bit 4 */ | ||
130 | 0, /* BFP set 1 */ | ||
131 | 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ | ||
132 | 0, 0 | ||
133 | }; | ||
134 | |||
135 | /* | ||
136 | * Returns 1 if event counts things relating to marked instructions | ||
137 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
138 | */ | ||
139 | static int power6_marked_instr_event(u64 event) | ||
140 | { | ||
141 | int pmc, psel, ptype; | ||
142 | int bit, byte, unit; | ||
143 | u32 mask; | ||
144 | |||
145 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
146 | psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ | ||
147 | if (pmc >= 5) | ||
148 | return 0; | ||
149 | |||
150 | bit = -1; | ||
151 | if (psel < sizeof(direct_event_is_marked)) { | ||
152 | ptype = direct_event_is_marked[psel]; | ||
153 | if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) | ||
154 | return 0; | ||
155 | ptype >>= 4; | ||
156 | if (ptype == 0) | ||
157 | return 1; | ||
158 | if (ptype == 1) | ||
159 | bit = 0; | ||
160 | else | ||
161 | bit = ptype ^ (pmc - 1); | ||
162 | } else if ((psel & 0x48) == 0x40) | ||
163 | bit = psel & 7; | ||
164 | |||
165 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
166 | return 0; | ||
167 | |||
168 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
169 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
170 | mask = marked_bus_events[unit]; | ||
171 | return (mask >> (byte * 8 + bit)) & 1; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Assign PMC numbers and compute MMCR1 value for a set of events | ||
176 | */ | ||
177 | static int p6_compute_mmcr(u64 event[], int n_ev, | ||
178 | unsigned int hwc[], unsigned long mmcr[]) | ||
179 | { | ||
180 | unsigned long mmcr1 = 0; | ||
181 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
182 | int i; | ||
183 | unsigned int pmc, ev, b, u, s, psel; | ||
184 | unsigned int ttmset = 0; | ||
185 | unsigned int pmc_inuse = 0; | ||
186 | |||
187 | if (n_ev > 6) | ||
188 | return -1; | ||
189 | for (i = 0; i < n_ev; ++i) { | ||
190 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
191 | if (pmc) { | ||
192 | if (pmc_inuse & (1 << (pmc - 1))) | ||
193 | return -1; /* collision! */ | ||
194 | pmc_inuse |= 1 << (pmc - 1); | ||
195 | } | ||
196 | } | ||
197 | for (i = 0; i < n_ev; ++i) { | ||
198 | ev = event[i]; | ||
199 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
200 | if (pmc) { | ||
201 | --pmc; | ||
202 | } else { | ||
203 | /* can go on any PMC; find a free one */ | ||
204 | for (pmc = 0; pmc < 4; ++pmc) | ||
205 | if (!(pmc_inuse & (1 << pmc))) | ||
206 | break; | ||
207 | if (pmc >= 4) | ||
208 | return -1; | ||
209 | pmc_inuse |= 1 << pmc; | ||
210 | } | ||
211 | hwc[i] = pmc; | ||
212 | psel = ev & PM_PMCSEL_MSK; | ||
213 | if (ev & PM_BUSEVENT_MSK) { | ||
214 | /* this event uses the event bus */ | ||
215 | b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
216 | u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
217 | /* check for conflict on this byte of event bus */ | ||
218 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | ||
219 | return -1; | ||
220 | mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b); | ||
221 | ttmset |= 1 << b; | ||
222 | if (u == 5) { | ||
223 | /* Nest events have a further mux */ | ||
224 | s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
225 | if ((ttmset & 0x10) && | ||
226 | MMCR1_NESTSEL(mmcr1) != s) | ||
227 | return -1; | ||
228 | ttmset |= 0x10; | ||
229 | mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH; | ||
230 | } | ||
231 | if (0x30 <= psel && psel <= 0x3d) { | ||
232 | /* these need the PMCx_ADDR_SEL bits */ | ||
233 | if (b >= 2) | ||
234 | mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; | ||
235 | } | ||
236 | /* bus select values are different for PMC3/4 */ | ||
237 | if (pmc >= 2 && (psel & 0x90) == 0x80) | ||
238 | psel ^= 0x20; | ||
239 | } | ||
240 | if (ev & PM_LLA) { | ||
241 | mmcr1 |= MMCR1_PMC1_LLA >> pmc; | ||
242 | if (ev & PM_LLAV) | ||
243 | mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; | ||
244 | } | ||
245 | if (power6_marked_instr_event(event[i])) | ||
246 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
247 | if (pmc < 4) | ||
248 | mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); | ||
249 | } | ||
250 | mmcr[0] = 0; | ||
251 | if (pmc_inuse & 1) | ||
252 | mmcr[0] = MMCR0_PMC1CE; | ||
253 | if (pmc_inuse & 0xe) | ||
254 | mmcr[0] |= MMCR0_PMCjCE; | ||
255 | mmcr[1] = mmcr1; | ||
256 | mmcr[2] = mmcra; | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Layout of constraint bits: | ||
262 | * | ||
263 | * 0-1 add field: number of uses of PMC1 (max 1) | ||
264 | * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 | ||
265 | * 12-15 add field: number of uses of PMC1-4 (max 4) | ||
266 | * 16-19 select field: unit on byte 0 of event bus | ||
267 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | ||
268 | * 32-34 select field: nest (subunit) event selector | ||
269 | */ | ||
270 | static int p6_get_constraint(u64 event, unsigned long *maskp, | ||
271 | unsigned long *valp) | ||
272 | { | ||
273 | int pmc, byte, sh, subunit; | ||
274 | unsigned long mask = 0, value = 0; | ||
275 | |||
276 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
277 | if (pmc) { | ||
278 | if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) | ||
279 | return -1; | ||
280 | sh = (pmc - 1) * 2; | ||
281 | mask |= 2 << sh; | ||
282 | value |= 1 << sh; | ||
283 | } | ||
284 | if (event & PM_BUSEVENT_MSK) { | ||
285 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
286 | sh = byte * 4 + (16 - PM_UNIT_SH); | ||
287 | mask |= PM_UNIT_MSKS << sh; | ||
288 | value |= (unsigned long)(event & PM_UNIT_MSKS) << sh; | ||
289 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | ||
290 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
291 | mask |= (unsigned long)PM_SUBUNIT_MSK << 32; | ||
292 | value |= (unsigned long)subunit << 32; | ||
293 | } | ||
294 | } | ||
295 | if (pmc <= 4) { | ||
296 | mask |= 0x8000; /* add field for count of PMC1-4 uses */ | ||
297 | value |= 0x1000; | ||
298 | } | ||
299 | *maskp = mask; | ||
300 | *valp = value; | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static int p6_limited_pmc_event(u64 event) | ||
305 | { | ||
306 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
307 | |||
308 | return pmc == 5 || pmc == 6; | ||
309 | } | ||
310 | |||
311 | #define MAX_ALT 4 /* at most 4 alternatives for any event */ | ||
312 | |||
313 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
314 | { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ | ||
315 | { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ | ||
316 | { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ | ||
317 | { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ | ||
318 | { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ | ||
319 | { 0x10000e, 0x400010 }, /* PM_PURR */ | ||
320 | { 0x100010, 0x4000f8 }, /* PM_FLUSH */ | ||
321 | { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ | ||
322 | { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ | ||
323 | { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ | ||
324 | { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ | ||
325 | { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ | ||
326 | { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ | ||
327 | { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ | ||
328 | { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ | ||
329 | { 0x200012, 0x300012 }, /* PM_INST_DISP */ | ||
330 | { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ | ||
331 | { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ | ||
332 | { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ | ||
333 | { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ | ||
334 | { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ | ||
335 | { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ | ||
336 | { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ | ||
337 | }; | ||
338 | |||
339 | /* | ||
340 | * This could be made more efficient with a binary search on | ||
341 | * a presorted list, if necessary | ||
342 | */ | ||
343 | static int find_alternatives_list(u64 event) | ||
344 | { | ||
345 | int i, j; | ||
346 | unsigned int alt; | ||
347 | |||
348 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
349 | if (event < event_alternatives[i][0]) | ||
350 | return -1; | ||
351 | for (j = 0; j < MAX_ALT; ++j) { | ||
352 | alt = event_alternatives[i][j]; | ||
353 | if (!alt || event < alt) | ||
354 | break; | ||
355 | if (event == alt) | ||
356 | return i; | ||
357 | } | ||
358 | } | ||
359 | return -1; | ||
360 | } | ||
361 | |||
362 | static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
363 | { | ||
364 | int i, j, nlim; | ||
365 | unsigned int psel, pmc; | ||
366 | unsigned int nalt = 1; | ||
367 | u64 aevent; | ||
368 | |||
369 | alt[0] = event; | ||
370 | nlim = p6_limited_pmc_event(event); | ||
371 | |||
372 | /* check the alternatives table */ | ||
373 | i = find_alternatives_list(event); | ||
374 | if (i >= 0) { | ||
375 | /* copy out alternatives from list */ | ||
376 | for (j = 0; j < MAX_ALT; ++j) { | ||
377 | aevent = event_alternatives[i][j]; | ||
378 | if (!aevent) | ||
379 | break; | ||
380 | if (aevent != event) | ||
381 | alt[nalt++] = aevent; | ||
382 | nlim += p6_limited_pmc_event(aevent); | ||
383 | } | ||
384 | |||
385 | } else { | ||
386 | /* Check for alternative ways of computing sum events */ | ||
387 | /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ | ||
388 | psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ | ||
389 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
390 | if (pmc && (psel == 0x32 || psel == 0x34)) | ||
391 | alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | | ||
392 | ((5 - pmc) << PM_PMC_SH); | ||
393 | |||
394 | /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ | ||
395 | if (pmc && (psel == 0x38 || psel == 0x3a)) | ||
396 | alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | | ||
397 | ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); | ||
398 | } | ||
399 | |||
400 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
401 | /* | ||
402 | * We're only counting in RUN state, | ||
403 | * so PM_CYC is equivalent to PM_RUN_CYC, | ||
404 | * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. | ||
405 | * This doesn't include alternatives that don't provide | ||
406 | * any extra flexibility in assigning PMCs (e.g. | ||
407 | * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). | ||
408 | * Note that even with these additional alternatives | ||
409 | * we never end up with more than 4 alternatives for any event. | ||
410 | */ | ||
411 | j = nalt; | ||
412 | for (i = 0; i < nalt; ++i) { | ||
413 | switch (alt[i]) { | ||
414 | case 0x1e: /* PM_CYC */ | ||
415 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
416 | ++nlim; | ||
417 | break; | ||
418 | case 0x10000a: /* PM_RUN_CYC */ | ||
419 | alt[j++] = 0x1e; /* PM_CYC */ | ||
420 | break; | ||
421 | case 2: /* PM_INST_CMPL */ | ||
422 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
423 | ++nlim; | ||
424 | break; | ||
425 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
426 | alt[j++] = 2; /* PM_INST_CMPL */ | ||
427 | break; | ||
428 | case 0x10000e: /* PM_PURR */ | ||
429 | alt[j++] = 0x4000f4; /* PM_RUN_PURR */ | ||
430 | break; | ||
431 | case 0x4000f4: /* PM_RUN_PURR */ | ||
432 | alt[j++] = 0x10000e; /* PM_PURR */ | ||
433 | break; | ||
434 | } | ||
435 | } | ||
436 | nalt = j; | ||
437 | } | ||
438 | |||
439 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
440 | /* remove the limited PMC events */ | ||
441 | j = 0; | ||
442 | for (i = 0; i < nalt; ++i) { | ||
443 | if (!p6_limited_pmc_event(alt[i])) { | ||
444 | alt[j] = alt[i]; | ||
445 | ++j; | ||
446 | } | ||
447 | } | ||
448 | nalt = j; | ||
449 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
450 | /* remove all but the limited PMC events */ | ||
451 | j = 0; | ||
452 | for (i = 0; i < nalt; ++i) { | ||
453 | if (p6_limited_pmc_event(alt[i])) { | ||
454 | alt[j] = alt[i]; | ||
455 | ++j; | ||
456 | } | ||
457 | } | ||
458 | nalt = j; | ||
459 | } | ||
460 | |||
461 | return nalt; | ||
462 | } | ||
463 | |||
464 | static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
465 | { | ||
466 | /* Set PMCxSEL to 0 to disable PMCx */ | ||
467 | if (pmc <= 3) | ||
468 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
469 | } | ||
470 | |||
471 | static int power6_generic_events[] = { | ||
472 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
473 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
474 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ | ||
475 | [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ | ||
476 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ | ||
477 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ | ||
478 | }; | ||
479 | |||
480 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
481 | |||
482 | /* | ||
483 | * Table of generalized cache-related events. | ||
484 | * 0 means not supported, -1 means nonsensical, other values | ||
485 | * are event codes. | ||
486 | * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. | ||
487 | */ | ||
488 | static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
489 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
490 | [C(OP_READ)] = { 0x280030, 0x80080 }, | ||
491 | [C(OP_WRITE)] = { 0x180032, 0x80088 }, | ||
492 | [C(OP_PREFETCH)] = { 0x810a4, 0 }, | ||
493 | }, | ||
494 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
495 | [C(OP_READ)] = { 0, 0x100056 }, | ||
496 | [C(OP_WRITE)] = { -1, -1 }, | ||
497 | [C(OP_PREFETCH)] = { 0x4008c, 0 }, | ||
498 | }, | ||
499 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
500 | [C(OP_READ)] = { 0x150730, 0x250532 }, | ||
501 | [C(OP_WRITE)] = { 0x250432, 0x150432 }, | ||
502 | [C(OP_PREFETCH)] = { 0x810a6, 0 }, | ||
503 | }, | ||
504 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
505 | [C(OP_READ)] = { 0, 0x20000e }, | ||
506 | [C(OP_WRITE)] = { -1, -1 }, | ||
507 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
508 | }, | ||
509 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
510 | [C(OP_READ)] = { 0, 0x420ce }, | ||
511 | [C(OP_WRITE)] = { -1, -1 }, | ||
512 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
513 | }, | ||
514 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
515 | [C(OP_READ)] = { 0x430e6, 0x400052 }, | ||
516 | [C(OP_WRITE)] = { -1, -1 }, | ||
517 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
518 | }, | ||
519 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
520 | [C(OP_READ)] = { -1, -1 }, | ||
521 | [C(OP_WRITE)] = { -1, -1 }, | ||
522 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
523 | }, | ||
524 | }; | ||
525 | |||
526 | static struct power_pmu power6_pmu = { | ||
527 | .name = "POWER6", | ||
528 | .n_counter = 6, | ||
529 | .max_alternatives = MAX_ALT, | ||
530 | .add_fields = 0x1555, | ||
531 | .test_adder = 0x3000, | ||
532 | .compute_mmcr = p6_compute_mmcr, | ||
533 | .get_constraint = p6_get_constraint, | ||
534 | .get_alternatives = p6_get_alternatives, | ||
535 | .disable_pmc = p6_disable_pmc, | ||
536 | .limited_pmc_event = p6_limited_pmc_event, | ||
537 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, | ||
538 | .n_generic = ARRAY_SIZE(power6_generic_events), | ||
539 | .generic_events = power6_generic_events, | ||
540 | .cache_events = &power6_cache_events, | ||
541 | }; | ||
542 | |||
543 | static int __init init_power6_pmu(void) | ||
544 | { | ||
545 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
546 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) | ||
547 | return -ENODEV; | ||
548 | |||
549 | return register_power_pmu(&power6_pmu); | ||
550 | } | ||
551 | |||
552 | early_initcall(init_power6_pmu); | ||
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c new file mode 100644 index 000000000000..1251e4d7e262 --- /dev/null +++ b/arch/powerpc/perf/power7-pmu.c | |||
@@ -0,0 +1,379 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER7 processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | /* | ||
18 | * Bits in event code for POWER7 | ||
19 | */ | ||
20 | #define PM_PMC_SH 16 /* PMC number (1-based) for direct events */ | ||
21 | #define PM_PMC_MSK 0xf | ||
22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
23 | #define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */ | ||
24 | #define PM_UNIT_MSK 0xf | ||
25 | #define PM_COMBINE_SH 11 /* Combined event bit */ | ||
26 | #define PM_COMBINE_MSK 1 | ||
27 | #define PM_COMBINE_MSKS 0x800 | ||
28 | #define PM_L2SEL_SH 8 /* L2 event select */ | ||
29 | #define PM_L2SEL_MSK 7 | ||
30 | #define PM_PMCSEL_MSK 0xff | ||
31 | |||
32 | /* | ||
33 | * Bits in MMCR1 for POWER7 | ||
34 | */ | ||
35 | #define MMCR1_TTM0SEL_SH 60 | ||
36 | #define MMCR1_TTM1SEL_SH 56 | ||
37 | #define MMCR1_TTM2SEL_SH 52 | ||
38 | #define MMCR1_TTM3SEL_SH 48 | ||
39 | #define MMCR1_TTMSEL_MSK 0xf | ||
40 | #define MMCR1_L2SEL_SH 45 | ||
41 | #define MMCR1_L2SEL_MSK 7 | ||
42 | #define MMCR1_PMC1_COMBINE_SH 35 | ||
43 | #define MMCR1_PMC2_COMBINE_SH 34 | ||
44 | #define MMCR1_PMC3_COMBINE_SH 33 | ||
45 | #define MMCR1_PMC4_COMBINE_SH 32 | ||
46 | #define MMCR1_PMC1SEL_SH 24 | ||
47 | #define MMCR1_PMC2SEL_SH 16 | ||
48 | #define MMCR1_PMC3SEL_SH 8 | ||
49 | #define MMCR1_PMC4SEL_SH 0 | ||
50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
51 | #define MMCR1_PMCSEL_MSK 0xff | ||
52 | |||
53 | /* | ||
54 | * Layout of constraint bits: | ||
55 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
56 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
57 | * [ ><><><><><><> | ||
58 | * NC P6P5P4P3P2P1 | ||
59 | * | ||
60 | * NC - number of counters | ||
61 | * 15: NC error 0x8000 | ||
62 | * 12-14: number of events needing PMC1-4 0x7000 | ||
63 | * | ||
64 | * P6 | ||
65 | * 11: P6 error 0x800 | ||
66 | * 10-11: Count of events needing PMC6 | ||
67 | * | ||
68 | * P1..P5 | ||
69 | * 0-9: Count of events needing PMC1..PMC5 | ||
70 | */ | ||
71 | |||
72 | static int power7_get_constraint(u64 event, unsigned long *maskp, | ||
73 | unsigned long *valp) | ||
74 | { | ||
75 | int pmc, sh; | ||
76 | unsigned long mask = 0, value = 0; | ||
77 | |||
78 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
79 | if (pmc) { | ||
80 | if (pmc > 6) | ||
81 | return -1; | ||
82 | sh = (pmc - 1) * 2; | ||
83 | mask |= 2 << sh; | ||
84 | value |= 1 << sh; | ||
85 | if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4)) | ||
86 | return -1; | ||
87 | } | ||
88 | if (pmc < 5) { | ||
89 | /* need a counter from PMC1-4 set */ | ||
90 | mask |= 0x8000; | ||
91 | value |= 0x1000; | ||
92 | } | ||
93 | *maskp = mask; | ||
94 | *valp = value; | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | #define MAX_ALT 2 /* at most 2 alternatives for any event */ | ||
99 | |||
100 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
101 | { 0x200f2, 0x300f2 }, /* PM_INST_DISP */ | ||
102 | { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */ | ||
103 | { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */ | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * Scan the alternatives table for a match and return the | ||
108 | * index into the alternatives table if found, else -1. | ||
109 | */ | ||
110 | static int find_alternative(u64 event) | ||
111 | { | ||
112 | int i, j; | ||
113 | |||
114 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
115 | if (event < event_alternatives[i][0]) | ||
116 | break; | ||
117 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
118 | if (event == event_alternatives[i][j]) | ||
119 | return i; | ||
120 | } | ||
121 | return -1; | ||
122 | } | ||
123 | |||
124 | static s64 find_alternative_decode(u64 event) | ||
125 | { | ||
126 | int pmc, psel; | ||
127 | |||
128 | /* this only handles the 4x decode events */ | ||
129 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
130 | psel = event & PM_PMCSEL_MSK; | ||
131 | if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40) | ||
132 | return event - (1 << PM_PMC_SH) + 8; | ||
133 | if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48) | ||
134 | return event + (1 << PM_PMC_SH) - 8; | ||
135 | return -1; | ||
136 | } | ||
137 | |||
138 | static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
139 | { | ||
140 | int i, j, nalt = 1; | ||
141 | s64 ae; | ||
142 | |||
143 | alt[0] = event; | ||
144 | nalt = 1; | ||
145 | i = find_alternative(event); | ||
146 | if (i >= 0) { | ||
147 | for (j = 0; j < MAX_ALT; ++j) { | ||
148 | ae = event_alternatives[i][j]; | ||
149 | if (ae && ae != event) | ||
150 | alt[nalt++] = ae; | ||
151 | } | ||
152 | } else { | ||
153 | ae = find_alternative_decode(event); | ||
154 | if (ae > 0) | ||
155 | alt[nalt++] = ae; | ||
156 | } | ||
157 | |||
158 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
159 | /* | ||
160 | * We're only counting in RUN state, | ||
161 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
162 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
163 | * This doesn't include alternatives that don't provide | ||
164 | * any extra flexibility in assigning PMCs. | ||
165 | */ | ||
166 | j = nalt; | ||
167 | for (i = 0; i < nalt; ++i) { | ||
168 | switch (alt[i]) { | ||
169 | case 0x1e: /* PM_CYC */ | ||
170 | alt[j++] = 0x600f4; /* PM_RUN_CYC */ | ||
171 | break; | ||
172 | case 0x600f4: /* PM_RUN_CYC */ | ||
173 | alt[j++] = 0x1e; | ||
174 | break; | ||
175 | case 0x2: /* PM_PPC_CMPL */ | ||
176 | alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ | ||
177 | break; | ||
178 | case 0x500fa: /* PM_RUN_INST_CMPL */ | ||
179 | alt[j++] = 0x2; /* PM_PPC_CMPL */ | ||
180 | break; | ||
181 | } | ||
182 | } | ||
183 | nalt = j; | ||
184 | } | ||
185 | |||
186 | return nalt; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Returns 1 if event counts things relating to marked instructions | ||
191 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
192 | */ | ||
193 | static int power7_marked_instr_event(u64 event) | ||
194 | { | ||
195 | int pmc, psel; | ||
196 | int unit; | ||
197 | |||
198 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
199 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
200 | psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */ | ||
201 | if (pmc >= 5) | ||
202 | return 0; | ||
203 | |||
204 | switch (psel >> 4) { | ||
205 | case 2: | ||
206 | return pmc == 2 || pmc == 4; | ||
207 | case 3: | ||
208 | if (psel == 0x3c) | ||
209 | return pmc == 1; | ||
210 | if (psel == 0x3e) | ||
211 | return pmc != 2; | ||
212 | return 1; | ||
213 | case 4: | ||
214 | case 5: | ||
215 | return unit == 0xd; | ||
216 | case 6: | ||
217 | if (psel == 0x64) | ||
218 | return pmc >= 3; | ||
219 | case 8: | ||
220 | return unit == 0xd; | ||
221 | } | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static int power7_compute_mmcr(u64 event[], int n_ev, | ||
226 | unsigned int hwc[], unsigned long mmcr[]) | ||
227 | { | ||
228 | unsigned long mmcr1 = 0; | ||
229 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
230 | unsigned int pmc, unit, combine, l2sel, psel; | ||
231 | unsigned int pmc_inuse = 0; | ||
232 | int i; | ||
233 | |||
234 | /* First pass to count resource use */ | ||
235 | for (i = 0; i < n_ev; ++i) { | ||
236 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
237 | if (pmc) { | ||
238 | if (pmc > 6) | ||
239 | return -1; | ||
240 | if (pmc_inuse & (1 << (pmc - 1))) | ||
241 | return -1; | ||
242 | pmc_inuse |= 1 << (pmc - 1); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | /* Second pass: assign PMCs, set all MMCR1 fields */ | ||
247 | for (i = 0; i < n_ev; ++i) { | ||
248 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
249 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
250 | combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK; | ||
251 | l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK; | ||
252 | psel = event[i] & PM_PMCSEL_MSK; | ||
253 | if (!pmc) { | ||
254 | /* Bus event or any-PMC direct event */ | ||
255 | for (pmc = 0; pmc < 4; ++pmc) { | ||
256 | if (!(pmc_inuse & (1 << pmc))) | ||
257 | break; | ||
258 | } | ||
259 | if (pmc >= 4) | ||
260 | return -1; | ||
261 | pmc_inuse |= 1 << pmc; | ||
262 | } else { | ||
263 | /* Direct or decoded event */ | ||
264 | --pmc; | ||
265 | } | ||
266 | if (pmc <= 3) { | ||
267 | mmcr1 |= (unsigned long) unit | ||
268 | << (MMCR1_TTM0SEL_SH - 4 * pmc); | ||
269 | mmcr1 |= (unsigned long) combine | ||
270 | << (MMCR1_PMC1_COMBINE_SH - pmc); | ||
271 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
272 | if (unit == 6) /* L2 events */ | ||
273 | mmcr1 |= (unsigned long) l2sel | ||
274 | << MMCR1_L2SEL_SH; | ||
275 | } | ||
276 | if (power7_marked_instr_event(event[i])) | ||
277 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
278 | hwc[i] = pmc; | ||
279 | } | ||
280 | |||
281 | /* Return MMCRx values */ | ||
282 | mmcr[0] = 0; | ||
283 | if (pmc_inuse & 1) | ||
284 | mmcr[0] = MMCR0_PMC1CE; | ||
285 | if (pmc_inuse & 0x3e) | ||
286 | mmcr[0] |= MMCR0_PMCjCE; | ||
287 | mmcr[1] = mmcr1; | ||
288 | mmcr[2] = mmcra; | ||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
293 | { | ||
294 | if (pmc <= 3) | ||
295 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
296 | } | ||
297 | |||
298 | static int power7_generic_events[] = { | ||
299 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
300 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */ | ||
301 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */ | ||
302 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
303 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ | ||
304 | [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ | ||
305 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ | ||
306 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ | ||
307 | }; | ||
308 | |||
309 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
310 | |||
311 | /* | ||
312 | * Table of generalized cache-related events. | ||
313 | * 0 means not supported, -1 means nonsensical, other values | ||
314 | * are event codes. | ||
315 | */ | ||
316 | static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
317 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
318 | [C(OP_READ)] = { 0xc880, 0x400f0 }, | ||
319 | [C(OP_WRITE)] = { 0, 0x300f0 }, | ||
320 | [C(OP_PREFETCH)] = { 0xd8b8, 0 }, | ||
321 | }, | ||
322 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
323 | [C(OP_READ)] = { 0, 0x200fc }, | ||
324 | [C(OP_WRITE)] = { -1, -1 }, | ||
325 | [C(OP_PREFETCH)] = { 0x408a, 0 }, | ||
326 | }, | ||
327 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
328 | [C(OP_READ)] = { 0x16080, 0x26080 }, | ||
329 | [C(OP_WRITE)] = { 0x16082, 0x26082 }, | ||
330 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
331 | }, | ||
332 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
333 | [C(OP_READ)] = { 0, 0x300fc }, | ||
334 | [C(OP_WRITE)] = { -1, -1 }, | ||
335 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
336 | }, | ||
337 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
338 | [C(OP_READ)] = { 0, 0x400fc }, | ||
339 | [C(OP_WRITE)] = { -1, -1 }, | ||
340 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
341 | }, | ||
342 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
343 | [C(OP_READ)] = { 0x10068, 0x400f6 }, | ||
344 | [C(OP_WRITE)] = { -1, -1 }, | ||
345 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
346 | }, | ||
347 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
348 | [C(OP_READ)] = { -1, -1 }, | ||
349 | [C(OP_WRITE)] = { -1, -1 }, | ||
350 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
351 | }, | ||
352 | }; | ||
353 | |||
354 | static struct power_pmu power7_pmu = { | ||
355 | .name = "POWER7", | ||
356 | .n_counter = 6, | ||
357 | .max_alternatives = MAX_ALT + 1, | ||
358 | .add_fields = 0x1555ul, | ||
359 | .test_adder = 0x3000ul, | ||
360 | .compute_mmcr = power7_compute_mmcr, | ||
361 | .get_constraint = power7_get_constraint, | ||
362 | .get_alternatives = power7_get_alternatives, | ||
363 | .disable_pmc = power7_disable_pmc, | ||
364 | .flags = PPMU_ALT_SIPR, | ||
365 | .n_generic = ARRAY_SIZE(power7_generic_events), | ||
366 | .generic_events = power7_generic_events, | ||
367 | .cache_events = &power7_cache_events, | ||
368 | }; | ||
369 | |||
370 | static int __init init_power7_pmu(void) | ||
371 | { | ||
372 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
373 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) | ||
374 | return -ENODEV; | ||
375 | |||
376 | return register_power_pmu(&power7_pmu); | ||
377 | } | ||
378 | |||
379 | early_initcall(init_power7_pmu); | ||
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c new file mode 100644 index 000000000000..111eb25bb0b6 --- /dev/null +++ b/arch/powerpc/perf/ppc970-pmu.c | |||
@@ -0,0 +1,502 @@ | |||
1 | /* | ||
2 | * Performance counter support for PPC970-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_event.h> | ||
13 | #include <asm/reg.h> | ||
14 | #include <asm/cputable.h> | ||
15 | |||
16 | /* | ||
17 | * Bits in event code for PPC970 | ||
18 | */ | ||
19 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
20 | #define PM_PMC_MSK 0xf | ||
21 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_SPCSEL_SH 6 | ||
24 | #define PM_SPCSEL_MSK 3 | ||
25 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 3 | ||
27 | #define PM_PMCSEL_MSK 0xf | ||
28 | |||
29 | /* Values in PM_UNIT field */ | ||
30 | #define PM_NONE 0 | ||
31 | #define PM_FPU 1 | ||
32 | #define PM_VPU 2 | ||
33 | #define PM_ISU 3 | ||
34 | #define PM_IFU 4 | ||
35 | #define PM_IDU 5 | ||
36 | #define PM_STS 6 | ||
37 | #define PM_LSU0 7 | ||
38 | #define PM_LSU1U 8 | ||
39 | #define PM_LSU1L 9 | ||
40 | #define PM_LASTUNIT 9 | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR0 for PPC970 | ||
44 | */ | ||
45 | #define MMCR0_PMC1SEL_SH 8 | ||
46 | #define MMCR0_PMC2SEL_SH 1 | ||
47 | #define MMCR_PMCSEL_MSK 0x1f | ||
48 | |||
49 | /* | ||
50 | * Bits in MMCR1 for PPC970 | ||
51 | */ | ||
52 | #define MMCR1_TTM0SEL_SH 62 | ||
53 | #define MMCR1_TTM1SEL_SH 59 | ||
54 | #define MMCR1_TTM3SEL_SH 53 | ||
55 | #define MMCR1_TTMSEL_MSK 3 | ||
56 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
57 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
58 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
59 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
60 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
61 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
62 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
63 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
64 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
65 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
68 | #define MMCR1_PMC3SEL_SH 27 | ||
69 | #define MMCR1_PMC4SEL_SH 22 | ||
70 | #define MMCR1_PMC5SEL_SH 17 | ||
71 | #define MMCR1_PMC6SEL_SH 12 | ||
72 | #define MMCR1_PMC7SEL_SH 7 | ||
73 | #define MMCR1_PMC8SEL_SH 2 | ||
74 | |||
75 | static short mmcr1_adder_bits[8] = { | ||
76 | MMCR1_PMC1_ADDER_SEL_SH, | ||
77 | MMCR1_PMC2_ADDER_SEL_SH, | ||
78 | MMCR1_PMC3_ADDER_SEL_SH, | ||
79 | MMCR1_PMC4_ADDER_SEL_SH, | ||
80 | MMCR1_PMC5_ADDER_SEL_SH, | ||
81 | MMCR1_PMC6_ADDER_SEL_SH, | ||
82 | MMCR1_PMC7_ADDER_SEL_SH, | ||
83 | MMCR1_PMC8_ADDER_SEL_SH | ||
84 | }; | ||
85 | |||
86 | /* | ||
87 | * Layout of constraint bits: | ||
88 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
89 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
90 | * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> | ||
91 | * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
92 | * | ||
93 | * SP - SPCSEL constraint | ||
94 | * 48-49: SPCSEL value 0x3_0000_0000_0000 | ||
95 | * | ||
96 | * T0 - TTM0 constraint | ||
97 | * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 | ||
98 | * | ||
99 | * T1 - TTM1 constraint | ||
100 | * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 | ||
101 | * | ||
102 | * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS | ||
103 | * 43: UC3 error 0x0800_0000_0000 | ||
104 | * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 | ||
105 | * 41: ISU events needed 0x0200_0000_0000 | ||
106 | * 40: IDU|STS events needed 0x0100_0000_0000 | ||
107 | * | ||
108 | * PS1 | ||
109 | * 39: PS1 error 0x0080_0000_0000 | ||
110 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
111 | * | ||
112 | * PS2 | ||
113 | * 35: PS2 error 0x0008_0000_0000 | ||
114 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
115 | * | ||
116 | * B0 | ||
117 | * 28-31: Byte 0 event source 0xf000_0000 | ||
118 | * Encoding as for the event code | ||
119 | * | ||
120 | * B1, B2, B3 | ||
121 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
122 | * | ||
123 | * P1 | ||
124 | * 15: P1 error 0x8000 | ||
125 | * 14-15: Count of events needing PMC1 | ||
126 | * | ||
127 | * P2..P8 | ||
128 | * 0-13: Count of events needing PMC2..PMC8 | ||
129 | */ | ||
130 | |||
131 | static unsigned char direct_marked_event[8] = { | ||
132 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
133 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
134 | (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ | ||
135 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
136 | (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ | ||
137 | (1<<3) | (1<<4) | (1<<5), | ||
138 | /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
139 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
140 | (1<<4) /* PMC8: PM_MRK_LSU_FIN */ | ||
141 | }; | ||
142 | |||
143 | /* | ||
144 | * Returns 1 if event counts things relating to marked instructions | ||
145 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
146 | */ | ||
147 | static int p970_marked_instr_event(u64 event) | ||
148 | { | ||
149 | int pmc, psel, unit, byte, bit; | ||
150 | unsigned int mask; | ||
151 | |||
152 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
153 | psel = event & PM_PMCSEL_MSK; | ||
154 | if (pmc) { | ||
155 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
156 | return 1; | ||
157 | if (psel == 0) /* add events */ | ||
158 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
159 | else if (psel == 7 || psel == 13) /* decode events */ | ||
160 | bit = 4; | ||
161 | else | ||
162 | return 0; | ||
163 | } else | ||
164 | bit = psel; | ||
165 | |||
166 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
167 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
168 | mask = 0; | ||
169 | switch (unit) { | ||
170 | case PM_VPU: | ||
171 | mask = 0x4c; /* byte 0 bits 2,3,6 */ | ||
172 | break; | ||
173 | case PM_LSU0: | ||
174 | /* byte 2 bits 0,2,3,4,6; all of byte 1 */ | ||
175 | mask = 0x085dff00; | ||
176 | break; | ||
177 | case PM_LSU1L: | ||
178 | mask = 0x50 << 24; /* byte 3 bits 4,6 */ | ||
179 | break; | ||
180 | } | ||
181 | return (mask >> (byte * 8 + bit)) & 1; | ||
182 | } | ||
183 | |||
184 | /* Masks and values for using events from the various units */ | ||
185 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
186 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | ||
187 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | ||
188 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | ||
189 | [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, | ||
190 | [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, | ||
191 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | ||
192 | }; | ||
193 | |||
194 | static int p970_get_constraint(u64 event, unsigned long *maskp, | ||
195 | unsigned long *valp) | ||
196 | { | ||
197 | int pmc, byte, unit, sh, spcsel; | ||
198 | unsigned long mask = 0, value = 0; | ||
199 | int grp = -1; | ||
200 | |||
201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
202 | if (pmc) { | ||
203 | if (pmc > 8) | ||
204 | return -1; | ||
205 | sh = (pmc - 1) * 2; | ||
206 | mask |= 2 << sh; | ||
207 | value |= 1 << sh; | ||
208 | grp = ((pmc - 1) >> 1) & 1; | ||
209 | } | ||
210 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
211 | if (unit) { | ||
212 | if (unit > PM_LASTUNIT) | ||
213 | return -1; | ||
214 | mask |= unit_cons[unit][0]; | ||
215 | value |= unit_cons[unit][1]; | ||
216 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
217 | /* | ||
218 | * Bus events on bytes 0 and 2 can be counted | ||
219 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
220 | */ | ||
221 | if (!pmc) | ||
222 | grp = byte & 1; | ||
223 | /* Set byte lane select field */ | ||
224 | mask |= 0xfULL << (28 - 4 * byte); | ||
225 | value |= (unsigned long)unit << (28 - 4 * byte); | ||
226 | } | ||
227 | if (grp == 0) { | ||
228 | /* increment PMC1/2/5/6 field */ | ||
229 | mask |= 0x8000000000ull; | ||
230 | value |= 0x1000000000ull; | ||
231 | } else if (grp == 1) { | ||
232 | /* increment PMC3/4/7/8 field */ | ||
233 | mask |= 0x800000000ull; | ||
234 | value |= 0x100000000ull; | ||
235 | } | ||
236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
237 | if (spcsel) { | ||
238 | mask |= 3ull << 48; | ||
239 | value |= (unsigned long)spcsel << 48; | ||
240 | } | ||
241 | *maskp = mask; | ||
242 | *valp = value; | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
247 | { | ||
248 | alt[0] = event; | ||
249 | |||
250 | /* 2 alternatives for LSU empty */ | ||
251 | if (event == 0x2002 || event == 0x3002) { | ||
252 | alt[1] = event ^ 0x1000; | ||
253 | return 2; | ||
254 | } | ||
255 | |||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static int p970_compute_mmcr(u64 event[], int n_ev, | ||
260 | unsigned int hwc[], unsigned long mmcr[]) | ||
261 | { | ||
262 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
263 | unsigned int pmc, unit, byte, psel; | ||
264 | unsigned int ttm, grp; | ||
265 | unsigned int pmc_inuse = 0; | ||
266 | unsigned int pmc_grp_use[2]; | ||
267 | unsigned char busbyte[4]; | ||
268 | unsigned char unituse[16]; | ||
269 | unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; | ||
270 | unsigned char ttmuse[2]; | ||
271 | unsigned char pmcsel[8]; | ||
272 | int i; | ||
273 | int spcsel; | ||
274 | |||
275 | if (n_ev > 8) | ||
276 | return -1; | ||
277 | |||
278 | /* First pass to count resource use */ | ||
279 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
280 | memset(busbyte, 0, sizeof(busbyte)); | ||
281 | memset(unituse, 0, sizeof(unituse)); | ||
282 | for (i = 0; i < n_ev; ++i) { | ||
283 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
284 | if (pmc) { | ||
285 | if (pmc_inuse & (1 << (pmc - 1))) | ||
286 | return -1; | ||
287 | pmc_inuse |= 1 << (pmc - 1); | ||
288 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
289 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
290 | } | ||
291 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
292 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
293 | if (unit) { | ||
294 | if (unit > PM_LASTUNIT) | ||
295 | return -1; | ||
296 | if (!pmc) | ||
297 | ++pmc_grp_use[byte & 1]; | ||
298 | if (busbyte[byte] && busbyte[byte] != unit) | ||
299 | return -1; | ||
300 | busbyte[byte] = unit; | ||
301 | unituse[unit] = 1; | ||
302 | } | ||
303 | } | ||
304 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
305 | return -1; | ||
306 | |||
307 | /* | ||
308 | * Assign resources and set multiplexer selects. | ||
309 | * | ||
310 | * PM_ISU can go either on TTM0 or TTM1, but that's the only | ||
311 | * choice we have to deal with. | ||
312 | */ | ||
313 | if (unituse[PM_ISU] & | ||
314 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) | ||
315 | unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ | ||
316 | /* Set TTM[01]SEL fields. */ | ||
317 | ttmuse[0] = ttmuse[1] = 0; | ||
318 | for (i = PM_FPU; i <= PM_STS; ++i) { | ||
319 | if (!unituse[i]) | ||
320 | continue; | ||
321 | ttm = unitmap[i]; | ||
322 | ++ttmuse[(ttm >> 2) & 1]; | ||
323 | mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH; | ||
324 | } | ||
325 | /* Check only one unit per TTMx */ | ||
326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | ||
327 | return -1; | ||
328 | |||
329 | /* Set byte lane select fields and TTM3SEL. */ | ||
330 | for (byte = 0; byte < 4; ++byte) { | ||
331 | unit = busbyte[byte]; | ||
332 | if (!unit) | ||
333 | continue; | ||
334 | if (unit <= PM_STS) | ||
335 | ttm = (unitmap[unit] >> 2) & 1; | ||
336 | else if (unit == PM_LSU0) | ||
337 | ttm = 2; | ||
338 | else { | ||
339 | ttm = 3; | ||
340 | if (unit == PM_LSU1L && byte >= 2) | ||
341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
342 | } | ||
343 | mmcr1 |= (unsigned long)ttm | ||
344 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
345 | } | ||
346 | |||
347 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
348 | memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ | ||
349 | for (i = 0; i < n_ev; ++i) { | ||
350 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
351 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
352 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
353 | psel = event[i] & PM_PMCSEL_MSK; | ||
354 | if (!pmc) { | ||
355 | /* Bus event or any-PMC direct event */ | ||
356 | if (unit) | ||
357 | psel |= 0x10 | ((byte & 2) << 2); | ||
358 | else | ||
359 | psel |= 8; | ||
360 | for (pmc = 0; pmc < 8; ++pmc) { | ||
361 | if (pmc_inuse & (1 << pmc)) | ||
362 | continue; | ||
363 | grp = (pmc >> 1) & 1; | ||
364 | if (unit) { | ||
365 | if (grp == (byte & 1)) | ||
366 | break; | ||
367 | } else if (pmc_grp_use[grp] < 4) { | ||
368 | ++pmc_grp_use[grp]; | ||
369 | break; | ||
370 | } | ||
371 | } | ||
372 | pmc_inuse |= 1 << pmc; | ||
373 | } else { | ||
374 | /* Direct event */ | ||
375 | --pmc; | ||
376 | if (psel == 0 && (byte & 2)) | ||
377 | /* add events on higher-numbered bus */ | ||
378 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
379 | } | ||
380 | pmcsel[pmc] = psel; | ||
381 | hwc[i] = pmc; | ||
382 | spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
383 | mmcr1 |= spcsel; | ||
384 | if (p970_marked_instr_event(event[i])) | ||
385 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
386 | } | ||
387 | for (pmc = 0; pmc < 2; ++pmc) | ||
388 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
389 | for (; pmc < 8; ++pmc) | ||
390 | mmcr1 |= (unsigned long)pmcsel[pmc] | ||
391 | << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
392 | if (pmc_inuse & 1) | ||
393 | mmcr0 |= MMCR0_PMC1CE; | ||
394 | if (pmc_inuse & 0xfe) | ||
395 | mmcr0 |= MMCR0_PMCjCE; | ||
396 | |||
397 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
398 | |||
399 | /* Return MMCRx values */ | ||
400 | mmcr[0] = mmcr0; | ||
401 | mmcr[1] = mmcr1; | ||
402 | mmcr[2] = mmcra; | ||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
407 | { | ||
408 | int shift, i; | ||
409 | |||
410 | if (pmc <= 1) { | ||
411 | shift = MMCR0_PMC1SEL_SH - 7 * pmc; | ||
412 | i = 0; | ||
413 | } else { | ||
414 | shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); | ||
415 | i = 1; | ||
416 | } | ||
417 | /* | ||
418 | * Setting the PMCxSEL field to 0x08 disables PMC x. | ||
419 | */ | ||
420 | mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); | ||
421 | } | ||
422 | |||
423 | static int ppc970_generic_events[] = { | ||
424 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
425 | [PERF_COUNT_HW_INSTRUCTIONS] = 1, | ||
426 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ | ||
427 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ | ||
428 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ | ||
429 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ | ||
430 | }; | ||
431 | |||
432 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
433 | |||
434 | /* | ||
435 | * Table of generalized cache-related events. | ||
436 | * 0 means not supported, -1 means nonsensical, other values | ||
437 | * are event codes. | ||
438 | */ | ||
439 | static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
440 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
441 | [C(OP_READ)] = { 0x8810, 0x3810 }, | ||
442 | [C(OP_WRITE)] = { 0x7810, 0x813 }, | ||
443 | [C(OP_PREFETCH)] = { 0x731, 0 }, | ||
444 | }, | ||
445 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
446 | [C(OP_READ)] = { 0, 0 }, | ||
447 | [C(OP_WRITE)] = { -1, -1 }, | ||
448 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
449 | }, | ||
450 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
451 | [C(OP_READ)] = { 0, 0 }, | ||
452 | [C(OP_WRITE)] = { 0, 0 }, | ||
453 | [C(OP_PREFETCH)] = { 0x733, 0 }, | ||
454 | }, | ||
455 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
456 | [C(OP_READ)] = { 0, 0x704 }, | ||
457 | [C(OP_WRITE)] = { -1, -1 }, | ||
458 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
459 | }, | ||
460 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
461 | [C(OP_READ)] = { 0, 0x700 }, | ||
462 | [C(OP_WRITE)] = { -1, -1 }, | ||
463 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
464 | }, | ||
465 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
466 | [C(OP_READ)] = { 0x431, 0x327 }, | ||
467 | [C(OP_WRITE)] = { -1, -1 }, | ||
468 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
469 | }, | ||
470 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
471 | [C(OP_READ)] = { -1, -1 }, | ||
472 | [C(OP_WRITE)] = { -1, -1 }, | ||
473 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
474 | }, | ||
475 | }; | ||
476 | |||
477 | static struct power_pmu ppc970_pmu = { | ||
478 | .name = "PPC970/FX/MP", | ||
479 | .n_counter = 8, | ||
480 | .max_alternatives = 2, | ||
481 | .add_fields = 0x001100005555ull, | ||
482 | .test_adder = 0x013300000000ull, | ||
483 | .compute_mmcr = p970_compute_mmcr, | ||
484 | .get_constraint = p970_get_constraint, | ||
485 | .get_alternatives = p970_get_alternatives, | ||
486 | .disable_pmc = p970_disable_pmc, | ||
487 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | ||
488 | .generic_events = ppc970_generic_events, | ||
489 | .cache_events = &ppc970_cache_events, | ||
490 | }; | ||
491 | |||
492 | static int __init init_ppc970_pmu(void) | ||
493 | { | ||
494 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
495 | (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") | ||
496 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))) | ||
497 | return -ENODEV; | ||
498 | |||
499 | return register_power_pmu(&ppc970_pmu); | ||
500 | } | ||
501 | |||
502 | early_initcall(init_ppc970_pmu); | ||