diff options
-rw-r--r-- | arch/sh/kernel/cpu/sh4/Makefile | 5 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/perf_event.c | 253 |
2 files changed, 258 insertions, 0 deletions
diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile index 203b18347b83..c39c1235db91 100644 --- a/arch/sh/kernel/cpu/sh4/Makefile +++ b/arch/sh/kernel/cpu/sh4/Makefile | |||
@@ -9,6 +9,11 @@ obj-$(CONFIG_HIBERNATION) += $(addprefix ../sh3/, swsusp.o) | |||
9 | obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o | 9 | obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o |
10 | obj-$(CONFIG_SH_STORE_QUEUES) += sq.o | 10 | obj-$(CONFIG_SH_STORE_QUEUES) += sq.o |
11 | 11 | ||
12 | # Perf events | ||
13 | obj-$(CONFIG_CPU_SUBTYPE_SH7750) += perf_event.o | ||
14 | obj-$(CONFIG_CPU_SUBTYPE_SH7750S) += perf_event.o | ||
15 | obj-$(CONFIG_CPU_SUBTYPE_SH7091) += perf_event.o | ||
16 | |||
12 | # CPU subtype setup | 17 | # CPU subtype setup |
13 | obj-$(CONFIG_CPU_SUBTYPE_SH7750) += setup-sh7750.o | 18 | obj-$(CONFIG_CPU_SUBTYPE_SH7750) += setup-sh7750.o |
14 | obj-$(CONFIG_CPU_SUBTYPE_SH7750R) += setup-sh7750.o | 19 | obj-$(CONFIG_CPU_SUBTYPE_SH7750R) += setup-sh7750.o |
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c new file mode 100644 index 000000000000..7f9ecc9c2d02 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4/perf_event.c | |||
@@ -0,0 +1,253 @@ | |||
1 | /* | ||
2 | * Performance events support for SH7750-style performance counters | ||
3 | * | ||
4 | * Copyright (C) 2009 Paul Mundt | ||
5 | * | ||
6 | * This file is subject to the terms and conditions of the GNU General Public | ||
7 | * License. See the file "COPYING" in the main directory of this archive | ||
8 | * for more details. | ||
9 | */ | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/io.h> | ||
13 | #include <linux/irq.h> | ||
14 | #include <linux/perf_event.h> | ||
15 | #include <asm/processor.h> | ||
16 | |||
17 | #define PM_CR_BASE 0xff000084 /* 16-bit */ | ||
18 | #define PM_CTR_BASE 0xff100004 /* 32-bit */ | ||
19 | |||
20 | #define PMCR(n) (PM_CR_BASE + ((n) * 0x04)) | ||
21 | #define PMCTRH(n) (PM_CTR_BASE + 0x00 + ((n) * 0x08)) | ||
22 | #define PMCTRL(n) (PM_CTR_BASE + 0x04 + ((n) * 0x08)) | ||
23 | |||
24 | #define PMCR_PMM_MASK 0x0000003f | ||
25 | |||
26 | #define PMCR_CLKF 0x00000100 | ||
27 | #define PMCR_PMCLR 0x00002000 | ||
28 | #define PMCR_PMST 0x00004000 | ||
29 | #define PMCR_PMEN 0x00008000 | ||
30 | |||
31 | static struct sh_pmu sh7750_pmu; | ||
32 | |||
33 | /* | ||
34 | * There are a number of events supported by each counter (33 in total). | ||
35 | * Since we have 2 counters, each counter will take the event code as it | ||
36 | * corresponds to the PMCR PMM setting. Each counter can be configured | ||
37 | * independently. | ||
38 | * | ||
39 | * Event Code Description | ||
40 | * ---------- ----------- | ||
41 | * | ||
42 | * 0x01 Operand read access | ||
43 | * 0x02 Operand write access | ||
44 | * 0x03 UTLB miss | ||
45 | * 0x04 Operand cache read miss | ||
46 | * 0x05 Operand cache write miss | ||
47 | * 0x06 Instruction fetch (w/ cache) | ||
48 | * 0x07 Instruction TLB miss | ||
49 | * 0x08 Instruction cache miss | ||
50 | * 0x09 All operand accesses | ||
51 | * 0x0a All instruction accesses | ||
52 | * 0x0b OC RAM operand access | ||
53 | * 0x0d On-chip I/O space access | ||
54 | * 0x0e Operand access (r/w) | ||
55 | * 0x0f Operand cache miss (r/w) | ||
56 | * 0x10 Branch instruction | ||
57 | * 0x11 Branch taken | ||
58 | * 0x12 BSR/BSRF/JSR | ||
59 | * 0x13 Instruction execution | ||
60 | * 0x14 Instruction execution in parallel | ||
61 | * 0x15 FPU Instruction execution | ||
62 | * 0x16 Interrupt | ||
63 | * 0x17 NMI | ||
64 | * 0x18 trapa instruction execution | ||
65 | * 0x19 UBCA match | ||
66 | * 0x1a UBCB match | ||
67 | * 0x21 Instruction cache fill | ||
68 | * 0x22 Operand cache fill | ||
69 | * 0x23 Elapsed time | ||
70 | * 0x24 Pipeline freeze by I-cache miss | ||
71 | * 0x25 Pipeline freeze by D-cache miss | ||
72 | * 0x27 Pipeline freeze by branch instruction | ||
73 | * 0x28 Pipeline freeze by CPU register | ||
74 | * 0x29 Pipeline freeze by FPU | ||
75 | */ | ||
76 | |||
77 | static const int sh7750_general_events[] = { | ||
78 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0023, | ||
79 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x000a, | ||
80 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0006, /* I-cache */ | ||
81 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0008, /* I-cache */ | ||
82 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0010, | ||
83 | [PERF_COUNT_HW_BRANCH_MISSES] = -1, | ||
84 | [PERF_COUNT_HW_BUS_CYCLES] = -1, | ||
85 | }; | ||
86 | |||
87 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
88 | |||
89 | static const int sh7750_cache_events | ||
90 | [PERF_COUNT_HW_CACHE_MAX] | ||
91 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
92 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
93 | { | ||
94 | [ C(L1D) ] = { | ||
95 | [ C(OP_READ) ] = { | ||
96 | [ C(RESULT_ACCESS) ] = 0x0001, | ||
97 | [ C(RESULT_MISS) ] = 0x0004, | ||
98 | }, | ||
99 | [ C(OP_WRITE) ] = { | ||
100 | [ C(RESULT_ACCESS) ] = 0x0002, | ||
101 | [ C(RESULT_MISS) ] = 0x0005, | ||
102 | }, | ||
103 | [ C(OP_PREFETCH) ] = { | ||
104 | [ C(RESULT_ACCESS) ] = 0, | ||
105 | [ C(RESULT_MISS) ] = 0, | ||
106 | }, | ||
107 | }, | ||
108 | |||
109 | [ C(L1I) ] = { | ||
110 | [ C(OP_READ) ] = { | ||
111 | [ C(RESULT_ACCESS) ] = 0x0006, | ||
112 | [ C(RESULT_MISS) ] = 0x0008, | ||
113 | }, | ||
114 | [ C(OP_WRITE) ] = { | ||
115 | [ C(RESULT_ACCESS) ] = -1, | ||
116 | [ C(RESULT_MISS) ] = -1, | ||
117 | }, | ||
118 | [ C(OP_PREFETCH) ] = { | ||
119 | [ C(RESULT_ACCESS) ] = 0, | ||
120 | [ C(RESULT_MISS) ] = 0, | ||
121 | }, | ||
122 | }, | ||
123 | |||
124 | [ C(LL) ] = { | ||
125 | [ C(OP_READ) ] = { | ||
126 | [ C(RESULT_ACCESS) ] = 0, | ||
127 | [ C(RESULT_MISS) ] = 0, | ||
128 | }, | ||
129 | [ C(OP_WRITE) ] = { | ||
130 | [ C(RESULT_ACCESS) ] = 0, | ||
131 | [ C(RESULT_MISS) ] = 0, | ||
132 | }, | ||
133 | [ C(OP_PREFETCH) ] = { | ||
134 | [ C(RESULT_ACCESS) ] = 0, | ||
135 | [ C(RESULT_MISS) ] = 0, | ||
136 | }, | ||
137 | }, | ||
138 | |||
139 | [ C(DTLB) ] = { | ||
140 | [ C(OP_READ) ] = { | ||
141 | [ C(RESULT_ACCESS) ] = 0, | ||
142 | [ C(RESULT_MISS) ] = 0x0003, | ||
143 | }, | ||
144 | [ C(OP_WRITE) ] = { | ||
145 | [ C(RESULT_ACCESS) ] = 0, | ||
146 | [ C(RESULT_MISS) ] = 0, | ||
147 | }, | ||
148 | [ C(OP_PREFETCH) ] = { | ||
149 | [ C(RESULT_ACCESS) ] = 0, | ||
150 | [ C(RESULT_MISS) ] = 0, | ||
151 | }, | ||
152 | }, | ||
153 | |||
154 | [ C(ITLB) ] = { | ||
155 | [ C(OP_READ) ] = { | ||
156 | [ C(RESULT_ACCESS) ] = 0, | ||
157 | [ C(RESULT_MISS) ] = 0x0007, | ||
158 | }, | ||
159 | [ C(OP_WRITE) ] = { | ||
160 | [ C(RESULT_ACCESS) ] = -1, | ||
161 | [ C(RESULT_MISS) ] = -1, | ||
162 | }, | ||
163 | [ C(OP_PREFETCH) ] = { | ||
164 | [ C(RESULT_ACCESS) ] = -1, | ||
165 | [ C(RESULT_MISS) ] = -1, | ||
166 | }, | ||
167 | }, | ||
168 | |||
169 | [ C(BPU) ] = { | ||
170 | [ C(OP_READ) ] = { | ||
171 | [ C(RESULT_ACCESS) ] = -1, | ||
172 | [ C(RESULT_MISS) ] = -1, | ||
173 | }, | ||
174 | [ C(OP_WRITE) ] = { | ||
175 | [ C(RESULT_ACCESS) ] = -1, | ||
176 | [ C(RESULT_MISS) ] = -1, | ||
177 | }, | ||
178 | [ C(OP_PREFETCH) ] = { | ||
179 | [ C(RESULT_ACCESS) ] = -1, | ||
180 | [ C(RESULT_MISS) ] = -1, | ||
181 | }, | ||
182 | }, | ||
183 | }; | ||
184 | |||
185 | static int sh7750_event_map(int event) | ||
186 | { | ||
187 | return sh7750_general_events[event]; | ||
188 | } | ||
189 | |||
190 | static u64 sh7750_pmu_read(int idx) | ||
191 | { | ||
192 | return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) | | ||
193 | __raw_readl(PMCTRL(idx)); | ||
194 | } | ||
195 | |||
196 | static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx) | ||
197 | { | ||
198 | unsigned int tmp; | ||
199 | |||
200 | tmp = __raw_readw(PMCR(idx)); | ||
201 | tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN); | ||
202 | __raw_writew(tmp, PMCR(idx)); | ||
203 | } | ||
204 | |||
205 | static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx) | ||
206 | { | ||
207 | __raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx)); | ||
208 | __raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx)); | ||
209 | } | ||
210 | |||
211 | static void sh7750_pmu_disable_all(void) | ||
212 | { | ||
213 | int i; | ||
214 | |||
215 | for (i = 0; i < sh7750_pmu.num_events; i++) | ||
216 | __raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i)); | ||
217 | } | ||
218 | |||
219 | static void sh7750_pmu_enable_all(void) | ||
220 | { | ||
221 | int i; | ||
222 | |||
223 | for (i = 0; i < sh7750_pmu.num_events; i++) | ||
224 | __raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i)); | ||
225 | } | ||
226 | |||
227 | static struct sh_pmu sh7750_pmu = { | ||
228 | .name = "SH7750", | ||
229 | .num_events = 2, | ||
230 | .event_map = sh7750_event_map, | ||
231 | .max_events = ARRAY_SIZE(sh7750_general_events), | ||
232 | .raw_event_mask = PMCR_PMM_MASK, | ||
233 | .cache_events = &sh7750_cache_events, | ||
234 | .read = sh7750_pmu_read, | ||
235 | .disable = sh7750_pmu_disable, | ||
236 | .enable = sh7750_pmu_enable, | ||
237 | .disable_all = sh7750_pmu_disable_all, | ||
238 | .enable_all = sh7750_pmu_enable_all, | ||
239 | }; | ||
240 | |||
241 | static int __init sh7750_pmu_init(void) | ||
242 | { | ||
243 | /* | ||
244 | * Make sure this CPU actually has perf counters. | ||
245 | */ | ||
246 | if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { | ||
247 | pr_notice("HW perf events unsupported, software events only.\n"); | ||
248 | return -ENODEV; | ||
249 | } | ||
250 | |||
251 | return register_sh_pmu(&sh7750_pmu); | ||
252 | } | ||
253 | arch_initcall(sh7750_pmu_init); | ||