aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2009-10-28 04:57:54 -0400
committerPaul Mundt <lethal@linux-sh.org>2009-10-28 04:57:54 -0400
commitac44e6694755744fe96442919da1f2c7e87a2a61 (patch)
tree155ecdb56348513eb434df87edfccc779c40cc1a /arch
parent3714a9a026bba09a58e7cf06e0c23c67da6841c2 (diff)
sh: perf events: Add preliminary support for SH-4A counters.
This adds in preliminary support for the SH-4A performance counters. Presently only the first 2 counters are supported, as these are the ones of the most interest to the perf tool and end users. Counter chaining is not presently handled, so these are simply implemented as 32-bit counters. This also establishes a perf event support framework for other hardware counters, which the existing SH-4 oprofile code will migrate over to as the SH-4A support evolves. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/sh/include/asm/perf_event.h31
-rw-r--r--arch/sh/kernel/Makefile1
-rw-r--r--arch/sh/kernel/cpu/sh4a/Makefile1
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c231
-rw-r--r--arch/sh/kernel/perf_event.c314
5 files changed, 576 insertions, 2 deletions
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
index 11a302297ab7..3d0c9f36d150 100644
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -1,8 +1,35 @@
1#ifndef __ASM_SH_PERF_EVENT_H 1#ifndef __ASM_SH_PERF_EVENT_H
2#define __ASM_SH_PERF_EVENT_H 2#define __ASM_SH_PERF_EVENT_H
3 3
4/* SH only supports software events through this interface. */ 4struct hw_perf_event;
5static inline void set_perf_event_pending(void) {} 5
6#define MAX_HWEVENTS 2
7
8struct sh_pmu {
9 const char *name;
10 unsigned int num_events;
11 void (*disable_all)(void);
12 void (*enable_all)(void);
13 void (*enable)(struct hw_perf_event *, int);
14 void (*disable)(struct hw_perf_event *, int);
15 u64 (*read)(int);
16 int (*event_map)(int);
17 unsigned int max_events;
18 unsigned long raw_event_mask;
19 const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
20 [PERF_COUNT_HW_CACHE_OP_MAX]
21 [PERF_COUNT_HW_CACHE_RESULT_MAX];
22};
23
24/* arch/sh/kernel/perf_event.c */
25extern int register_sh_pmu(struct sh_pmu *);
26extern int reserve_pmc_hardware(void);
27extern void release_pmc_hardware(void);
28
29static inline void set_perf_event_pending(void)
30{
31 /* Nothing to see here, move along. */
32}
6 33
7#define PERF_EVENT_INDEX_OFFSET 0 34#define PERF_EVENT_INDEX_OFFSET 0
8 35
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 097ae5ceb0e3..0a67bafce425 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
39obj-$(CONFIG_DUMP_CODE) += disassemble.o 39obj-$(CONFIG_DUMP_CODE) += disassemble.o
40obj-$(CONFIG_HIBERNATION) += swsusp.o 40obj-$(CONFIG_HIBERNATION) += swsusp.o
41obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o 41obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
42obj-$(CONFIG_PERF_EVENTS) += perf_event.o
42 43
43obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o 44obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o
44 45
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 490d5dc9e372..33bab477d2e2 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o
44obj-y += $(clock-y) 44obj-y += $(clock-y)
45obj-$(CONFIG_SMP) += $(smp-y) 45obj-$(CONFIG_SMP) += $(smp-y)
46obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) 46obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y)
47obj-$(CONFIG_PERF_EVENTS) += perf_event.o
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
new file mode 100644
index 000000000000..d0938345799f
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -0,0 +1,231 @@
1/*
2 * Performance events support for SH-4A performance counters
3 *
4 * Copyright (C) 2009 Paul Mundt
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 */
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/io.h>
13#include <linux/irq.h>
14#include <linux/perf_event.h>
15#include <asm/processor.h>
16
17#define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx))
18#define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx))
19
20#define CCBR_CIT_MASK (0x7ff << 6)
21#define CCBR_DUC (1 << 3)
22#define CCBR_CMDS (1 << 1)
23#define CCBR_PPCE (1 << 0)
24
25#define PPC_PMCAT 0xfc100080
26
27#define PMCAT_OVF3 (1 << 27)
28#define PMCAT_CNN3 (1 << 26)
29#define PMCAT_CLR3 (1 << 25)
30#define PMCAT_OVF2 (1 << 19)
31#define PMCAT_CLR2 (1 << 17)
32#define PMCAT_OVF1 (1 << 11)
33#define PMCAT_CNN1 (1 << 10)
34#define PMCAT_CLR1 (1 << 9)
35#define PMCAT_OVF0 (1 << 3)
36#define PMCAT_CLR0 (1 << 1)
37
38static struct sh_pmu sh4a_pmu;
39
40/*
41 * Special reserved bits used by hardware emulators, read values will
42 * vary, but writes must always be 0.
43 */
44#define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0))
45
46static const int sh4a_general_events[] = {
47 [PERF_COUNT_HW_CPU_CYCLES] = 0x0000,
48 [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202,
49 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */
50 [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */
51 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204,
52 [PERF_COUNT_HW_BRANCH_MISSES] = -1,
53 [PERF_COUNT_HW_BUS_CYCLES] = -1,
54};
55
56#define C(x) PERF_COUNT_HW_CACHE_##x
57
58static const int sh4a_cache_events
59 [PERF_COUNT_HW_CACHE_MAX]
60 [PERF_COUNT_HW_CACHE_OP_MAX]
61 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
62{
63 [ C(L1D) ] = {
64 [ C(OP_READ) ] = {
65 [ C(RESULT_ACCESS) ] = 0x0031,
66 [ C(RESULT_MISS) ] = 0x0032,
67 },
68 [ C(OP_WRITE) ] = {
69 [ C(RESULT_ACCESS) ] = 0x0039,
70 [ C(RESULT_MISS) ] = 0x003a,
71 },
72 [ C(OP_PREFETCH) ] = {
73 [ C(RESULT_ACCESS) ] = 0,
74 [ C(RESULT_MISS) ] = 0,
75 },
76 },
77
78 [ C(L1I) ] = {
79 [ C(OP_READ) ] = {
80 [ C(RESULT_ACCESS) ] = 0x0029,
81 [ C(RESULT_MISS) ] = 0x002a,
82 },
83 [ C(OP_WRITE) ] = {
84 [ C(RESULT_ACCESS) ] = -1,
85 [ C(RESULT_MISS) ] = -1,
86 },
87 [ C(OP_PREFETCH) ] = {
88 [ C(RESULT_ACCESS) ] = 0,
89 [ C(RESULT_MISS) ] = 0,
90 },
91 },
92
93 [ C(LL) ] = {
94 [ C(OP_READ) ] = {
95 [ C(RESULT_ACCESS) ] = 0x0030,
96 [ C(RESULT_MISS) ] = 0,
97 },
98 [ C(OP_WRITE) ] = {
99 [ C(RESULT_ACCESS) ] = 0x0038,
100 [ C(RESULT_MISS) ] = 0,
101 },
102 [ C(OP_PREFETCH) ] = {
103 [ C(RESULT_ACCESS) ] = 0,
104 [ C(RESULT_MISS) ] = 0,
105 },
106 },
107
108 [ C(DTLB) ] = {
109 [ C(OP_READ) ] = {
110 [ C(RESULT_ACCESS) ] = 0x0222,
111 [ C(RESULT_MISS) ] = 0x0220,
112 },
113 [ C(OP_WRITE) ] = {
114 [ C(RESULT_ACCESS) ] = 0,
115 [ C(RESULT_MISS) ] = 0,
116 },
117 [ C(OP_PREFETCH) ] = {
118 [ C(RESULT_ACCESS) ] = 0,
119 [ C(RESULT_MISS) ] = 0,
120 },
121 },
122
123 [ C(ITLB) ] = {
124 [ C(OP_READ) ] = {
125 [ C(RESULT_ACCESS) ] = 0,
126 [ C(RESULT_MISS) ] = 0x02a0,
127 },
128 [ C(OP_WRITE) ] = {
129 [ C(RESULT_ACCESS) ] = -1,
130 [ C(RESULT_MISS) ] = -1,
131 },
132 [ C(OP_PREFETCH) ] = {
133 [ C(RESULT_ACCESS) ] = -1,
134 [ C(RESULT_MISS) ] = -1,
135 },
136 },
137
138 [ C(BPU) ] = {
139 [ C(OP_READ) ] = {
140 [ C(RESULT_ACCESS) ] = -1,
141 [ C(RESULT_MISS) ] = -1,
142 },
143 [ C(OP_WRITE) ] = {
144 [ C(RESULT_ACCESS) ] = -1,
145 [ C(RESULT_MISS) ] = -1,
146 },
147 [ C(OP_PREFETCH) ] = {
148 [ C(RESULT_ACCESS) ] = -1,
149 [ C(RESULT_MISS) ] = -1,
150 },
151 },
152};
153
154static int sh4a_event_map(int event)
155{
156 return sh4a_general_events[event];
157}
158
159static u64 sh4a_pmu_read(int idx)
160{
161 return __raw_readl(PPC_PMCTR(idx));
162}
163
164static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx)
165{
166 unsigned int tmp;
167
168 tmp = __raw_readl(PPC_CCBR(idx));
169 tmp &= ~(CCBR_CIT_MASK | CCBR_DUC);
170 __raw_writel(tmp, PPC_CCBR(idx));
171}
172
173static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx)
174{
175 unsigned int tmp;
176
177 tmp = __raw_readl(PPC_PMCAT);
178 tmp &= ~PMCAT_EMU_CLR_MASK;
179 tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0;
180 __raw_writel(tmp, PPC_PMCAT);
181
182 tmp = __raw_readl(PPC_CCBR(idx));
183 tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE;
184 __raw_writel(tmp, PPC_CCBR(idx));
185
186 __raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx));
187}
188
189static void sh4a_pmu_disable_all(void)
190{
191 int i;
192
193 for (i = 0; i < sh4a_pmu.num_events; i++)
194 __raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i));
195}
196
197static void sh4a_pmu_enable_all(void)
198{
199 int i;
200
201 for (i = 0; i < sh4a_pmu.num_events; i++)
202 __raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i));
203}
204
205static struct sh_pmu sh4a_pmu = {
206 .name = "SH-4A",
207 .num_events = 2,
208 .event_map = sh4a_event_map,
209 .max_events = ARRAY_SIZE(sh4a_general_events),
210 .raw_event_mask = 0x3ff,
211 .cache_events = &sh4a_cache_events,
212 .read = sh4a_pmu_read,
213 .disable = sh4a_pmu_disable,
214 .enable = sh4a_pmu_enable,
215 .disable_all = sh4a_pmu_disable_all,
216 .enable_all = sh4a_pmu_enable_all,
217};
218
219static int __init sh4a_pmu_init(void)
220{
221 /*
222 * Make sure this CPU actually has perf counters.
223 */
224 if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
225 pr_notice("HW perf events unsupported, software events only.\n");
226 return -ENODEV;
227 }
228
229 return register_sh_pmu(&sh4a_pmu);
230}
231arch_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
new file mode 100644
index 000000000000..d1510702f201
--- /dev/null
+++ b/arch/sh/kernel/perf_event.c
@@ -0,0 +1,314 @@
1/*
2 * Performance event support framework for SuperH hardware counters.
3 *
4 * Copyright (C) 2009 Paul Mundt
5 *
6 * Heavily based on the x86 and PowerPC implementations.
7 *
8 * x86:
9 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
10 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
11 * Copyright (C) 2009 Jaswinder Singh Rajput
12 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
13 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
14 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
15 *
16 * ppc:
17 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
18 *
19 * This file is subject to the terms and conditions of the GNU General Public
20 * License. See the file "COPYING" in the main directory of this archive
21 * for more details.
22 */
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/io.h>
26#include <linux/irq.h>
27#include <linux/perf_event.h>
28#include <asm/processor.h>
29
30struct cpu_hw_events {
31 struct perf_event *events[MAX_HWEVENTS];
32 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
33 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
34};
35
36DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
37
38static struct sh_pmu *sh_pmu __read_mostly;
39
40/* Number of perf_events counting hardware events */
41static atomic_t num_events;
42/* Used to avoid races in calling reserve/release_pmc_hardware */
43static DEFINE_MUTEX(pmc_reserve_mutex);
44
45/*
46 * Stub these out for now, do something more profound later.
47 */
48int reserve_pmc_hardware(void)
49{
50 return 0;
51}
52
53void release_pmc_hardware(void)
54{
55}
56
57static inline int sh_pmu_initialized(void)
58{
59 return !!sh_pmu;
60}
61
62/*
63 * Release the PMU if this is the last perf_event.
64 */
65static void hw_perf_event_destroy(struct perf_event *event)
66{
67 if (!atomic_add_unless(&num_events, -1, 1)) {
68 mutex_lock(&pmc_reserve_mutex);
69 if (atomic_dec_return(&num_events) == 0)
70 release_pmc_hardware();
71 mutex_unlock(&pmc_reserve_mutex);
72 }
73}
74
75static int hw_perf_cache_event(int config, int *evp)
76{
77 unsigned long type, op, result;
78 int ev;
79
80 if (!sh_pmu->cache_events)
81 return -EINVAL;
82
83 /* unpack config */
84 type = config & 0xff;
85 op = (config >> 8) & 0xff;
86 result = (config >> 16) & 0xff;
87
88 if (type >= PERF_COUNT_HW_CACHE_MAX ||
89 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
90 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
91 return -EINVAL;
92
93 ev = (*sh_pmu->cache_events)[type][op][result];
94 if (ev == 0)
95 return -EOPNOTSUPP;
96 if (ev == -1)
97 return -EINVAL;
98 *evp = ev;
99 return 0;
100}
101
102static int __hw_perf_event_init(struct perf_event *event)
103{
104 struct perf_event_attr *attr = &event->attr;
105 struct hw_perf_event *hwc = &event->hw;
106 int config;
107 int err;
108
109 if (!sh_pmu_initialized())
110 return -ENODEV;
111
112 /*
113 * All of the on-chip counters are "limited", in that they have
114 * no interrupts, and are therefore unable to do sampling without
115 * further work and timer assistance.
116 */
117 if (hwc->sample_period)
118 return -EINVAL;
119
120 /*
121 * See if we need to reserve the counter.
122 *
123 * If no events are currently in use, then we have to take a
124 * mutex to ensure that we don't race with another task doing
125 * reserve_pmc_hardware or release_pmc_hardware.
126 */
127 err = 0;
128 if (!atomic_inc_not_zero(&num_events)) {
129 mutex_lock(&pmc_reserve_mutex);
130 if (atomic_read(&num_events) == 0 &&
131 reserve_pmc_hardware())
132 err = -EBUSY;
133 else
134 atomic_inc(&num_events);
135 mutex_unlock(&pmc_reserve_mutex);
136 }
137
138 if (err)
139 return err;
140
141 event->destroy = hw_perf_event_destroy;
142
143 switch (attr->type) {
144 case PERF_TYPE_RAW:
145 config = attr->config & sh_pmu->raw_event_mask;
146 break;
147 case PERF_TYPE_HW_CACHE:
148 err = hw_perf_cache_event(attr->config, &config);
149 if (err)
150 return err;
151 break;
152 case PERF_TYPE_HARDWARE:
153 if (attr->config >= sh_pmu->max_events)
154 return -EINVAL;
155
156 config = sh_pmu->event_map(attr->config);
157 break;
158 default:
159 return -EINVAL;
160 }
161
162 if (config == -1)
163 return -EINVAL;
164
165 hwc->config |= config;
166
167 return 0;
168}
169
170static void sh_perf_event_update(struct perf_event *event,
171 struct hw_perf_event *hwc, int idx)
172{
173 u64 prev_raw_count, new_raw_count;
174 s64 delta;
175 int shift = 0;
176
177 /*
178 * Depending on the counter configuration, they may or may not
179 * be chained, in which case the previous counter value can be
180 * updated underneath us if the lower-half overflows.
181 *
182 * Our tactic to handle this is to first atomically read and
183 * exchange a new raw count - then add that new-prev delta
184 * count to the generic counter atomically.
185 *
186 * As there is no interrupt associated with the overflow events,
187 * this is the simplest approach for maintaining consistency.
188 */
189again:
190 prev_raw_count = atomic64_read(&hwc->prev_count);
191 new_raw_count = sh_pmu->read(idx);
192
193 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
194 new_raw_count) != prev_raw_count)
195 goto again;
196
197 /*
198 * Now we have the new raw value and have updated the prev
199 * timestamp already. We can now calculate the elapsed delta
200 * (counter-)time and add that to the generic counter.
201 *
202 * Careful, not all hw sign-extends above the physical width
203 * of the count.
204 */
205 delta = (new_raw_count << shift) - (prev_raw_count << shift);
206 delta >>= shift;
207
208 atomic64_add(delta, &event->count);
209}
210
211static void sh_pmu_disable(struct perf_event *event)
212{
213 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
214 struct hw_perf_event *hwc = &event->hw;
215 int idx = hwc->idx;
216
217 clear_bit(idx, cpuc->active_mask);
218 sh_pmu->disable(hwc, idx);
219
220 barrier();
221
222 sh_perf_event_update(event, &event->hw, idx);
223
224 cpuc->events[idx] = NULL;
225 clear_bit(idx, cpuc->used_mask);
226
227 perf_event_update_userpage(event);
228}
229
230static int sh_pmu_enable(struct perf_event *event)
231{
232 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
233 struct hw_perf_event *hwc = &event->hw;
234 int idx = hwc->idx;
235
236 if (test_and_set_bit(idx, cpuc->used_mask)) {
237 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
238 if (idx == sh_pmu->num_events)
239 return -EAGAIN;
240
241 set_bit(idx, cpuc->used_mask);
242 hwc->idx = idx;
243 }
244
245 sh_pmu->disable(hwc, idx);
246
247 cpuc->events[idx] = event;
248 set_bit(idx, cpuc->active_mask);
249
250 sh_pmu->enable(hwc, idx);
251
252 perf_event_update_userpage(event);
253
254 return 0;
255}
256
257static void sh_pmu_read(struct perf_event *event)
258{
259 sh_perf_event_update(event, &event->hw, event->hw.idx);
260}
261
262static const struct pmu pmu = {
263 .enable = sh_pmu_enable,
264 .disable = sh_pmu_disable,
265 .read = sh_pmu_read,
266};
267
268const struct pmu *hw_perf_event_init(struct perf_event *event)
269{
270 int err = __hw_perf_event_init(event);
271 if (unlikely(err)) {
272 if (event->destroy)
273 event->destroy(event);
274 return ERR_PTR(err);
275 }
276
277 return &pmu;
278}
279
280void hw_perf_event_setup(int cpu)
281{
282 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
283
284 memset(cpuhw, 0, sizeof(struct cpu_hw_events));
285}
286
287void hw_perf_enable(void)
288{
289 if (!sh_pmu_initialized())
290 return;
291
292 sh_pmu->enable_all();
293}
294
295void hw_perf_disable(void)
296{
297 if (!sh_pmu_initialized())
298 return;
299
300 sh_pmu->disable_all();
301}
302
303int register_sh_pmu(struct sh_pmu *pmu)
304{
305 if (sh_pmu)
306 return -EBUSY;
307 sh_pmu = pmu;
308
309 pr_info("Performance Events: %s support registered\n", pmu->name);
310
311 WARN_ON(pmu->num_events >= MAX_HWEVENTS);
312
313 return 0;
314}