diff options
author | Paul Mundt <lethal@linux-sh.org> | 2009-10-28 04:57:54 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2009-10-28 04:57:54 -0400 |
commit | ac44e6694755744fe96442919da1f2c7e87a2a61 (patch) | |
tree | 155ecdb56348513eb434df87edfccc779c40cc1a /arch/sh/kernel/cpu | |
parent | 3714a9a026bba09a58e7cf06e0c23c67da6841c2 (diff) |
sh: perf events: Add preliminary support for SH-4A counters.
This adds in preliminary support for the SH-4A performance counters.
Presently only the first 2 counters are supported, as these are the ones
of the most interest to the perf tool and end users. Counter chaining is
not presently handled, so these are simply implemented as 32-bit
counters.
This also establishes a perf event support framework for other hardware
counters, which the existing SH-4 oprofile code will migrate over to as
the SH-4A support evolves.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/kernel/cpu')
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/Makefile | 1 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/perf_event.c | 231 |
2 files changed, 232 insertions, 0 deletions
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index 490d5dc9e372..33bab477d2e2 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile | |||
@@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o | |||
44 | obj-y += $(clock-y) | 44 | obj-y += $(clock-y) |
45 | obj-$(CONFIG_SMP) += $(smp-y) | 45 | obj-$(CONFIG_SMP) += $(smp-y) |
46 | obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) | 46 | obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) |
47 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c new file mode 100644 index 000000000000..d0938345799f --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c | |||
@@ -0,0 +1,231 @@ | |||
1 | /* | ||
2 | * Performance events support for SH-4A performance counters | ||
3 | * | ||
4 | * Copyright (C) 2009 Paul Mundt | ||
5 | * | ||
6 | * This file is subject to the terms and conditions of the GNU General Public | ||
7 | * License. See the file "COPYING" in the main directory of this archive | ||
8 | * for more details. | ||
9 | */ | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/io.h> | ||
13 | #include <linux/irq.h> | ||
14 | #include <linux/perf_event.h> | ||
15 | #include <asm/processor.h> | ||
16 | |||
17 | #define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx)) | ||
18 | #define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx)) | ||
19 | |||
20 | #define CCBR_CIT_MASK (0x7ff << 6) | ||
21 | #define CCBR_DUC (1 << 3) | ||
22 | #define CCBR_CMDS (1 << 1) | ||
23 | #define CCBR_PPCE (1 << 0) | ||
24 | |||
25 | #define PPC_PMCAT 0xfc100080 | ||
26 | |||
27 | #define PMCAT_OVF3 (1 << 27) | ||
28 | #define PMCAT_CNN3 (1 << 26) | ||
29 | #define PMCAT_CLR3 (1 << 25) | ||
30 | #define PMCAT_OVF2 (1 << 19) | ||
31 | #define PMCAT_CLR2 (1 << 17) | ||
32 | #define PMCAT_OVF1 (1 << 11) | ||
33 | #define PMCAT_CNN1 (1 << 10) | ||
34 | #define PMCAT_CLR1 (1 << 9) | ||
35 | #define PMCAT_OVF0 (1 << 3) | ||
36 | #define PMCAT_CLR0 (1 << 1) | ||
37 | |||
38 | static struct sh_pmu sh4a_pmu; | ||
39 | |||
40 | /* | ||
41 | * Special reserved bits used by hardware emulators, read values will | ||
42 | * vary, but writes must always be 0. | ||
43 | */ | ||
44 | #define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0)) | ||
45 | |||
46 | static const int sh4a_general_events[] = { | ||
47 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0000, | ||
48 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202, | ||
49 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */ | ||
50 | [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */ | ||
51 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204, | ||
52 | [PERF_COUNT_HW_BRANCH_MISSES] = -1, | ||
53 | [PERF_COUNT_HW_BUS_CYCLES] = -1, | ||
54 | }; | ||
55 | |||
56 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
57 | |||
58 | static const int sh4a_cache_events | ||
59 | [PERF_COUNT_HW_CACHE_MAX] | ||
60 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
61 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
62 | { | ||
63 | [ C(L1D) ] = { | ||
64 | [ C(OP_READ) ] = { | ||
65 | [ C(RESULT_ACCESS) ] = 0x0031, | ||
66 | [ C(RESULT_MISS) ] = 0x0032, | ||
67 | }, | ||
68 | [ C(OP_WRITE) ] = { | ||
69 | [ C(RESULT_ACCESS) ] = 0x0039, | ||
70 | [ C(RESULT_MISS) ] = 0x003a, | ||
71 | }, | ||
72 | [ C(OP_PREFETCH) ] = { | ||
73 | [ C(RESULT_ACCESS) ] = 0, | ||
74 | [ C(RESULT_MISS) ] = 0, | ||
75 | }, | ||
76 | }, | ||
77 | |||
78 | [ C(L1I) ] = { | ||
79 | [ C(OP_READ) ] = { | ||
80 | [ C(RESULT_ACCESS) ] = 0x0029, | ||
81 | [ C(RESULT_MISS) ] = 0x002a, | ||
82 | }, | ||
83 | [ C(OP_WRITE) ] = { | ||
84 | [ C(RESULT_ACCESS) ] = -1, | ||
85 | [ C(RESULT_MISS) ] = -1, | ||
86 | }, | ||
87 | [ C(OP_PREFETCH) ] = { | ||
88 | [ C(RESULT_ACCESS) ] = 0, | ||
89 | [ C(RESULT_MISS) ] = 0, | ||
90 | }, | ||
91 | }, | ||
92 | |||
93 | [ C(LL) ] = { | ||
94 | [ C(OP_READ) ] = { | ||
95 | [ C(RESULT_ACCESS) ] = 0x0030, | ||
96 | [ C(RESULT_MISS) ] = 0, | ||
97 | }, | ||
98 | [ C(OP_WRITE) ] = { | ||
99 | [ C(RESULT_ACCESS) ] = 0x0038, | ||
100 | [ C(RESULT_MISS) ] = 0, | ||
101 | }, | ||
102 | [ C(OP_PREFETCH) ] = { | ||
103 | [ C(RESULT_ACCESS) ] = 0, | ||
104 | [ C(RESULT_MISS) ] = 0, | ||
105 | }, | ||
106 | }, | ||
107 | |||
108 | [ C(DTLB) ] = { | ||
109 | [ C(OP_READ) ] = { | ||
110 | [ C(RESULT_ACCESS) ] = 0x0222, | ||
111 | [ C(RESULT_MISS) ] = 0x0220, | ||
112 | }, | ||
113 | [ C(OP_WRITE) ] = { | ||
114 | [ C(RESULT_ACCESS) ] = 0, | ||
115 | [ C(RESULT_MISS) ] = 0, | ||
116 | }, | ||
117 | [ C(OP_PREFETCH) ] = { | ||
118 | [ C(RESULT_ACCESS) ] = 0, | ||
119 | [ C(RESULT_MISS) ] = 0, | ||
120 | }, | ||
121 | }, | ||
122 | |||
123 | [ C(ITLB) ] = { | ||
124 | [ C(OP_READ) ] = { | ||
125 | [ C(RESULT_ACCESS) ] = 0, | ||
126 | [ C(RESULT_MISS) ] = 0x02a0, | ||
127 | }, | ||
128 | [ C(OP_WRITE) ] = { | ||
129 | [ C(RESULT_ACCESS) ] = -1, | ||
130 | [ C(RESULT_MISS) ] = -1, | ||
131 | }, | ||
132 | [ C(OP_PREFETCH) ] = { | ||
133 | [ C(RESULT_ACCESS) ] = -1, | ||
134 | [ C(RESULT_MISS) ] = -1, | ||
135 | }, | ||
136 | }, | ||
137 | |||
138 | [ C(BPU) ] = { | ||
139 | [ C(OP_READ) ] = { | ||
140 | [ C(RESULT_ACCESS) ] = -1, | ||
141 | [ C(RESULT_MISS) ] = -1, | ||
142 | }, | ||
143 | [ C(OP_WRITE) ] = { | ||
144 | [ C(RESULT_ACCESS) ] = -1, | ||
145 | [ C(RESULT_MISS) ] = -1, | ||
146 | }, | ||
147 | [ C(OP_PREFETCH) ] = { | ||
148 | [ C(RESULT_ACCESS) ] = -1, | ||
149 | [ C(RESULT_MISS) ] = -1, | ||
150 | }, | ||
151 | }, | ||
152 | }; | ||
153 | |||
154 | static int sh4a_event_map(int event) | ||
155 | { | ||
156 | return sh4a_general_events[event]; | ||
157 | } | ||
158 | |||
159 | static u64 sh4a_pmu_read(int idx) | ||
160 | { | ||
161 | return __raw_readl(PPC_PMCTR(idx)); | ||
162 | } | ||
163 | |||
164 | static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx) | ||
165 | { | ||
166 | unsigned int tmp; | ||
167 | |||
168 | tmp = __raw_readl(PPC_CCBR(idx)); | ||
169 | tmp &= ~(CCBR_CIT_MASK | CCBR_DUC); | ||
170 | __raw_writel(tmp, PPC_CCBR(idx)); | ||
171 | } | ||
172 | |||
173 | static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx) | ||
174 | { | ||
175 | unsigned int tmp; | ||
176 | |||
177 | tmp = __raw_readl(PPC_PMCAT); | ||
178 | tmp &= ~PMCAT_EMU_CLR_MASK; | ||
179 | tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0; | ||
180 | __raw_writel(tmp, PPC_PMCAT); | ||
181 | |||
182 | tmp = __raw_readl(PPC_CCBR(idx)); | ||
183 | tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE; | ||
184 | __raw_writel(tmp, PPC_CCBR(idx)); | ||
185 | |||
186 | __raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx)); | ||
187 | } | ||
188 | |||
189 | static void sh4a_pmu_disable_all(void) | ||
190 | { | ||
191 | int i; | ||
192 | |||
193 | for (i = 0; i < sh4a_pmu.num_events; i++) | ||
194 | __raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i)); | ||
195 | } | ||
196 | |||
197 | static void sh4a_pmu_enable_all(void) | ||
198 | { | ||
199 | int i; | ||
200 | |||
201 | for (i = 0; i < sh4a_pmu.num_events; i++) | ||
202 | __raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i)); | ||
203 | } | ||
204 | |||
205 | static struct sh_pmu sh4a_pmu = { | ||
206 | .name = "SH-4A", | ||
207 | .num_events = 2, | ||
208 | .event_map = sh4a_event_map, | ||
209 | .max_events = ARRAY_SIZE(sh4a_general_events), | ||
210 | .raw_event_mask = 0x3ff, | ||
211 | .cache_events = &sh4a_cache_events, | ||
212 | .read = sh4a_pmu_read, | ||
213 | .disable = sh4a_pmu_disable, | ||
214 | .enable = sh4a_pmu_enable, | ||
215 | .disable_all = sh4a_pmu_disable_all, | ||
216 | .enable_all = sh4a_pmu_enable_all, | ||
217 | }; | ||
218 | |||
219 | static int __init sh4a_pmu_init(void) | ||
220 | { | ||
221 | /* | ||
222 | * Make sure this CPU actually has perf counters. | ||
223 | */ | ||
224 | if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { | ||
225 | pr_notice("HW perf events unsupported, software events only.\n"); | ||
226 | return -ENODEV; | ||
227 | } | ||
228 | |||
229 | return register_sh_pmu(&sh4a_pmu); | ||
230 | } | ||
231 | arch_initcall(sh4a_pmu_init); | ||