aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sh/include/asm/perf_event.h31
-rw-r--r--arch/sh/kernel/Makefile1
-rw-r--r--arch/sh/kernel/cpu/sh4a/Makefile1
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c231
-rw-r--r--arch/sh/kernel/perf_event.c314
5 files changed, 576 insertions, 2 deletions
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
index 11a302297ab7..3d0c9f36d150 100644
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -1,8 +1,35 @@
1#ifndef __ASM_SH_PERF_EVENT_H 1#ifndef __ASM_SH_PERF_EVENT_H
2#define __ASM_SH_PERF_EVENT_H 2#define __ASM_SH_PERF_EVENT_H
3 3
4/* SH only supports software events through this interface. */ 4struct hw_perf_event;
5static inline void set_perf_event_pending(void) {} 5
6#define MAX_HWEVENTS 2
7
8struct sh_pmu {
9 const char *name;
10 unsigned int num_events;
11 void (*disable_all)(void);
12 void (*enable_all)(void);
13 void (*enable)(struct hw_perf_event *, int);
14 void (*disable)(struct hw_perf_event *, int);
15 u64 (*read)(int);
16 int (*event_map)(int);
17 unsigned int max_events;
18 unsigned long raw_event_mask;
19 const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
20 [PERF_COUNT_HW_CACHE_OP_MAX]
21 [PERF_COUNT_HW_CACHE_RESULT_MAX];
22};
23
24/* arch/sh/kernel/perf_event.c */
25extern int register_sh_pmu(struct sh_pmu *);
26extern int reserve_pmc_hardware(void);
27extern void release_pmc_hardware(void);
28
29static inline void set_perf_event_pending(void)
30{
31 /* Nothing to see here, move along. */
32}
6 33
7#define PERF_EVENT_INDEX_OFFSET 0 34#define PERF_EVENT_INDEX_OFFSET 0
8 35
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 097ae5ceb0e3..0a67bafce425 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
39obj-$(CONFIG_DUMP_CODE) += disassemble.o 39obj-$(CONFIG_DUMP_CODE) += disassemble.o
40obj-$(CONFIG_HIBERNATION) += swsusp.o 40obj-$(CONFIG_HIBERNATION) += swsusp.o
41obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o 41obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
42obj-$(CONFIG_PERF_EVENTS) += perf_event.o
42 43
43obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o 44obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o
44 45
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 490d5dc9e372..33bab477d2e2 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o
44obj-y += $(clock-y) 44obj-y += $(clock-y)
45obj-$(CONFIG_SMP) += $(smp-y) 45obj-$(CONFIG_SMP) += $(smp-y)
46obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) 46obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y)
47obj-$(CONFIG_PERF_EVENTS) += perf_event.o
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
new file mode 100644
index 000000000000..d0938345799f
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -0,0 +1,231 @@
1/*
2 * Performance events support for SH-4A performance counters
3 *
4 * Copyright (C) 2009 Paul Mundt
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 */
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/io.h>
13#include <linux/irq.h>
14#include <linux/perf_event.h>
15#include <asm/processor.h>
16
17#define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx))
18#define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx))
19
20#define CCBR_CIT_MASK (0x7ff << 6)
21#define CCBR_DUC (1 << 3)
22#define CCBR_CMDS (1 << 1)
23#define CCBR_PPCE (1 << 0)
24
25#define PPC_PMCAT 0xfc100080
26
27#define PMCAT_OVF3 (1 << 27)
28#define PMCAT_CNN3 (1 << 26)
29#define PMCAT_CLR3 (1 << 25)
30#define PMCAT_OVF2 (1 << 19)
31#define PMCAT_CLR2 (1 << 17)
32#define PMCAT_OVF1 (1 << 11)
33#define PMCAT_CNN1 (1 << 10)
34#define PMCAT_CLR1 (1 << 9)
35#define PMCAT_OVF0 (1 << 3)
36#define PMCAT_CLR0 (1 << 1)
37
38static struct sh_pmu sh4a_pmu;
39
40/*
41 * Special reserved bits used by hardware emulators, read values will
42 * vary, but writes must always be 0.
43 */
44#define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0))
45
46static const int sh4a_general_events[] = {
47 [PERF_COUNT_HW_CPU_CYCLES] = 0x0000,
48 [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202,
49 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */
50 [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */
51 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204,
52 [PERF_COUNT_HW_BRANCH_MISSES] = -1,
53 [PERF_COUNT_HW_BUS_CYCLES] = -1,
54};
55
56#define C(x) PERF_COUNT_HW_CACHE_##x
57
58static const int sh4a_cache_events
59 [PERF_COUNT_HW_CACHE_MAX]
60 [PERF_COUNT_HW_CACHE_OP_MAX]
61 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
62{
63 [ C(L1D) ] = {
64 [ C(OP_READ) ] = {
65 [ C(RESULT_ACCESS) ] = 0x0031,
66 [ C(RESULT_MISS) ] = 0x0032,
67 },
68 [ C(OP_WRITE) ] = {
69 [ C(RESULT_ACCESS) ] = 0x0039,
70 [ C(RESULT_MISS) ] = 0x003a,
71 },
72 [ C(OP_PREFETCH) ] = {
73 [ C(RESULT_ACCESS) ] = 0,
74 [ C(RESULT_MISS) ] = 0,
75 },
76 },
77
78 [ C(L1I) ] = {
79 [ C(OP_READ) ] = {
80 [ C(RESULT_ACCESS) ] = 0x0029,
81 [ C(RESULT_MISS) ] = 0x002a,
82 },
83 [ C(OP_WRITE) ] = {
84 [ C(RESULT_ACCESS) ] = -1,
85 [ C(RESULT_MISS) ] = -1,
86 },
87 [ C(OP_PREFETCH) ] = {
88 [ C(RESULT_ACCESS) ] = 0,
89 [ C(RESULT_MISS) ] = 0,
90 },
91 },
92
93 [ C(LL) ] = {
94 [ C(OP_READ) ] = {
95 [ C(RESULT_ACCESS) ] = 0x0030,
96 [ C(RESULT_MISS) ] = 0,
97 },
98 [ C(OP_WRITE) ] = {
99 [ C(RESULT_ACCESS) ] = 0x0038,
100 [ C(RESULT_MISS) ] = 0,
101 },
102 [ C(OP_PREFETCH) ] = {
103 [ C(RESULT_ACCESS) ] = 0,
104 [ C(RESULT_MISS) ] = 0,
105 },
106 },
107
108 [ C(DTLB) ] = {
109 [ C(OP_READ) ] = {
110 [ C(RESULT_ACCESS) ] = 0x0222,
111 [ C(RESULT_MISS) ] = 0x0220,
112 },
113 [ C(OP_WRITE) ] = {
114 [ C(RESULT_ACCESS) ] = 0,
115 [ C(RESULT_MISS) ] = 0,
116 },
117 [ C(OP_PREFETCH) ] = {
118 [ C(RESULT_ACCESS) ] = 0,
119 [ C(RESULT_MISS) ] = 0,
120 },
121 },
122
123 [ C(ITLB) ] = {
124 [ C(OP_READ) ] = {
125 [ C(RESULT_ACCESS) ] = 0,
126 [ C(RESULT_MISS) ] = 0x02a0,
127 },
128 [ C(OP_WRITE) ] = {
129 [ C(RESULT_ACCESS) ] = -1,
130 [ C(RESULT_MISS) ] = -1,
131 },
132 [ C(OP_PREFETCH) ] = {
133 [ C(RESULT_ACCESS) ] = -1,
134 [ C(RESULT_MISS) ] = -1,
135 },
136 },
137
138 [ C(BPU) ] = {
139 [ C(OP_READ) ] = {
140 [ C(RESULT_ACCESS) ] = -1,
141 [ C(RESULT_MISS) ] = -1,
142 },
143 [ C(OP_WRITE) ] = {
144 [ C(RESULT_ACCESS) ] = -1,
145 [ C(RESULT_MISS) ] = -1,
146 },
147 [ C(OP_PREFETCH) ] = {
148 [ C(RESULT_ACCESS) ] = -1,
149 [ C(RESULT_MISS) ] = -1,
150 },
151 },
152};
153
154static int sh4a_event_map(int event)
155{
156 return sh4a_general_events[event];
157}
158
159static u64 sh4a_pmu_read(int idx)
160{
161 return __raw_readl(PPC_PMCTR(idx));
162}
163
164static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx)
165{
166 unsigned int tmp;
167
168 tmp = __raw_readl(PPC_CCBR(idx));
169 tmp &= ~(CCBR_CIT_MASK | CCBR_DUC);
170 __raw_writel(tmp, PPC_CCBR(idx));
171}
172
173static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx)
174{
175 unsigned int tmp;
176
177 tmp = __raw_readl(PPC_PMCAT);
178 tmp &= ~PMCAT_EMU_CLR_MASK;
179 tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0;
180 __raw_writel(tmp, PPC_PMCAT);
181
182 tmp = __raw_readl(PPC_CCBR(idx));
183 tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE;
184 __raw_writel(tmp, PPC_CCBR(idx));
185
186 __raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx));
187}
188
189static void sh4a_pmu_disable_all(void)
190{
191 int i;
192
193 for (i = 0; i < sh4a_pmu.num_events; i++)
194 __raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i));
195}
196
197static void sh4a_pmu_enable_all(void)
198{
199 int i;
200
201 for (i = 0; i < sh4a_pmu.num_events; i++)
202 __raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i));
203}
204
205static struct sh_pmu sh4a_pmu = {
206 .name = "SH-4A",
207 .num_events = 2,
208 .event_map = sh4a_event_map,
209 .max_events = ARRAY_SIZE(sh4a_general_events),
210 .raw_event_mask = 0x3ff,
211 .cache_events = &sh4a_cache_events,
212 .read = sh4a_pmu_read,
213 .disable = sh4a_pmu_disable,
214 .enable = sh4a_pmu_enable,
215 .disable_all = sh4a_pmu_disable_all,
216 .enable_all = sh4a_pmu_enable_all,
217};
218
219static int __init sh4a_pmu_init(void)
220{
221 /*
222 * Make sure this CPU actually has perf counters.
223 */
224 if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
225 pr_notice("HW perf events unsupported, software events only.\n");
226 return -ENODEV;
227 }
228
229 return register_sh_pmu(&sh4a_pmu);
230}
231arch_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
new file mode 100644
index 000000000000..d1510702f201
--- /dev/null
+++ b/arch/sh/kernel/perf_event.c
@@ -0,0 +1,314 @@
1/*
2 * Performance event support framework for SuperH hardware counters.
3 *
4 * Copyright (C) 2009 Paul Mundt
5 *
6 * Heavily based on the x86 and PowerPC implementations.
7 *
8 * x86:
9 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
10 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
11 * Copyright (C) 2009 Jaswinder Singh Rajput
12 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
13 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
14 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
15 *
16 * ppc:
17 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
18 *
19 * This file is subject to the terms and conditions of the GNU General Public
20 * License. See the file "COPYING" in the main directory of this archive
21 * for more details.
22 */
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/io.h>
26#include <linux/irq.h>
27#include <linux/perf_event.h>
28#include <asm/processor.h>
29
30struct cpu_hw_events {
31 struct perf_event *events[MAX_HWEVENTS];
32 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
33 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
34};
35
36DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
37
38static struct sh_pmu *sh_pmu __read_mostly;
39
40/* Number of perf_events counting hardware events */
41static atomic_t num_events;
42/* Used to avoid races in calling reserve/release_pmc_hardware */
43static DEFINE_MUTEX(pmc_reserve_mutex);
44
45/*
46 * Stub these out for now, do something more profound later.
47 */
48int reserve_pmc_hardware(void)
49{
50 return 0;
51}
52
53void release_pmc_hardware(void)
54{
55}
56
57static inline int sh_pmu_initialized(void)
58{
59 return !!sh_pmu;
60}
61
62/*
63 * Release the PMU if this is the last perf_event.
64 */
65static void hw_perf_event_destroy(struct perf_event *event)
66{
67 if (!atomic_add_unless(&num_events, -1, 1)) {
68 mutex_lock(&pmc_reserve_mutex);
69 if (atomic_dec_return(&num_events) == 0)
70 release_pmc_hardware();
71 mutex_unlock(&pmc_reserve_mutex);
72 }
73}
74
75static int hw_perf_cache_event(int config, int *evp)
76{
77 unsigned long type, op, result;
78 int ev;
79
80 if (!sh_pmu->cache_events)
81 return -EINVAL;
82
83 /* unpack config */
84 type = config & 0xff;
85 op = (config >> 8) & 0xff;
86 result = (config >> 16) & 0xff;
87
88 if (type >= PERF_COUNT_HW_CACHE_MAX ||
89 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
90 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
91 return -EINVAL;
92
93 ev = (*sh_pmu->cache_events)[type][op][result];
94 if (ev == 0)
95 return -EOPNOTSUPP;
96 if (ev == -1)
97 return -EINVAL;
98 *evp = ev;
99 return 0;
100}
101
102static int __hw_perf_event_init(struct perf_event *event)
103{
104 struct perf_event_attr *attr = &event->attr;
105 struct hw_perf_event *hwc = &event->hw;
106 int config;
107 int err;
108
109 if (!sh_pmu_initialized())
110 return -ENODEV;
111
112 /*
113 * All of the on-chip counters are "limited", in that they have
114 * no interrupts, and are therefore unable to do sampling without
115 * further work and timer assistance.
116 */
117 if (hwc->sample_period)
118 return -EINVAL;
119
120 /*
121 * See if we need to reserve the counter.
122 *
123 * If no events are currently in use, then we have to take a
124 * mutex to ensure that we don't race with another task doing
125 * reserve_pmc_hardware or release_pmc_hardware.
126 */
127 err = 0;
128 if (!atomic_inc_not_zero(&num_events)) {
129 mutex_lock(&pmc_reserve_mutex);
130 if (atomic_read(&num_events) == 0 &&
131 reserve_pmc_hardware())
132 err = -EBUSY;
133 else
134 atomic_inc(&num_events);
135 mutex_unlock(&pmc_reserve_mutex);
136 }
137
138 if (err)
139 return err;
140
141 event->destroy = hw_perf_event_destroy;
142
143 switch (attr->type) {
144 case PERF_TYPE_RAW:
145 config = attr->config & sh_pmu->raw_event_mask;
146 break;
147 case PERF_TYPE_HW_CACHE:
148 err = hw_perf_cache_event(attr->config, &config);
149 if (err)
150 return err;
151 break;
152 case PERF_TYPE_HARDWARE:
153 if (attr->config >= sh_pmu->max_events)
154 return -EINVAL;
155
156 config = sh_pmu->event_map(attr->config);
157 break;
158 default:
159 return -EINVAL;
160 }
161
162 if (config == -1)
163 return -EINVAL;
164
165 hwc->config |= config;
166
167 return 0;
168}
169
170static void sh_perf_event_update(struct perf_event *event,
171 struct hw_perf_event *hwc, int idx)
172{
173 u64 prev_raw_count, new_raw_count;
174 s64 delta;
175 int shift = 0;
176
177 /*
178 * Depending on the counter configuration, they may or may not
179 * be chained, in which case the previous counter value can be
180 * updated underneath us if the lower-half overflows.
181 *
182 * Our tactic to handle this is to first atomically read and
183 * exchange a new raw count - then add that new-prev delta
184 * count to the generic counter atomically.
185 *
186 * As there is no interrupt associated with the overflow events,
187 * this is the simplest approach for maintaining consistency.
188 */
189again:
190 prev_raw_count = atomic64_read(&hwc->prev_count);
191 new_raw_count = sh_pmu->read(idx);
192
193 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
194 new_raw_count) != prev_raw_count)
195 goto again;
196
197 /*
198 * Now we have the new raw value and have updated the prev
199 * timestamp already. We can now calculate the elapsed delta
200 * (counter-)time and add that to the generic counter.
201 *
202 * Careful, not all hw sign-extends above the physical width
203 * of the count.
204 */
205 delta = (new_raw_count << shift) - (prev_raw_count << shift);
206 delta >>= shift;
207
208 atomic64_add(delta, &event->count);
209}
210
211static void sh_pmu_disable(struct perf_event *event)
212{
213 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
214 struct hw_perf_event *hwc = &event->hw;
215 int idx = hwc->idx;
216
217 clear_bit(idx, cpuc->active_mask);
218 sh_pmu->disable(hwc, idx);
219
220 barrier();
221
222 sh_perf_event_update(event, &event->hw, idx);
223
224 cpuc->events[idx] = NULL;
225 clear_bit(idx, cpuc->used_mask);
226
227 perf_event_update_userpage(event);
228}
229
230static int sh_pmu_enable(struct perf_event *event)
231{
232 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
233 struct hw_perf_event *hwc = &event->hw;
234 int idx = hwc->idx;
235
236 if (test_and_set_bit(idx, cpuc->used_mask)) {
237 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
238 if (idx == sh_pmu->num_events)
239 return -EAGAIN;
240
241 set_bit(idx, cpuc->used_mask);
242 hwc->idx = idx;
243 }
244
245 sh_pmu->disable(hwc, idx);
246
247 cpuc->events[idx] = event;
248 set_bit(idx, cpuc->active_mask);
249
250 sh_pmu->enable(hwc, idx);
251
252 perf_event_update_userpage(event);
253
254 return 0;
255}
256
257static void sh_pmu_read(struct perf_event *event)
258{
259 sh_perf_event_update(event, &event->hw, event->hw.idx);
260}
261
262static const struct pmu pmu = {
263 .enable = sh_pmu_enable,
264 .disable = sh_pmu_disable,
265 .read = sh_pmu_read,
266};
267
268const struct pmu *hw_perf_event_init(struct perf_event *event)
269{
270 int err = __hw_perf_event_init(event);
271 if (unlikely(err)) {
272 if (event->destroy)
273 event->destroy(event);
274 return ERR_PTR(err);
275 }
276
277 return &pmu;
278}
279
280void hw_perf_event_setup(int cpu)
281{
282 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
283
284 memset(cpuhw, 0, sizeof(struct cpu_hw_events));
285}
286
287void hw_perf_enable(void)
288{
289 if (!sh_pmu_initialized())
290 return;
291
292 sh_pmu->enable_all();
293}
294
295void hw_perf_disable(void)
296{
297 if (!sh_pmu_initialized())
298 return;
299
300 sh_pmu->disable_all();
301}
302
303int register_sh_pmu(struct sh_pmu *pmu)
304{
305 if (sh_pmu)
306 return -EBUSY;
307 sh_pmu = pmu;
308
309 pr_info("Performance Events: %s support registered\n", pmu->name);
310
311 WARN_ON(pmu->num_events >= MAX_HWEVENTS);
312
313 return 0;
314}