diff options
author | Zhigang Lu <zlu@tilera.com> | 2014-01-27 21:03:50 -0500 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2014-03-07 11:19:48 -0500 |
commit | 8d61dd7d3e374eb52a174ab04169b04e3d9d729f (patch) | |
tree | 0312a9743c802bc329ebcb6ec7952727a5adc204 /arch/tile | |
parent | ba67823163c963de7f1f2d87526c9c87f3a3ea0b (diff) |
tile/perf: Support perf_events on tilegx and tilepro
Add perf support for tile architecture.
Signed-off-by: Zhigang Lu <zlu@tilera.com>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile')
-rw-r--r-- | arch/tile/Kconfig | 2 | ||||
-rw-r--r-- | arch/tile/include/asm/perf_event.h | 22 | ||||
-rw-r--r-- | arch/tile/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/tile/kernel/irq.c | 18 | ||||
-rw-r--r-- | arch/tile/kernel/perf_event.c | 1005 |
5 files changed, 1048 insertions, 0 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 3067b15e80d6..31c8c6223995 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | config TILE | 4 | config TILE |
5 | def_bool y | 5 | def_bool y |
6 | select HAVE_PERF_EVENTS | ||
7 | select USE_PMC if PERF_EVENTS | ||
6 | select HAVE_DMA_ATTRS | 8 | select HAVE_DMA_ATTRS |
7 | select HAVE_DMA_API_DEBUG | 9 | select HAVE_DMA_API_DEBUG |
8 | select HAVE_KVM if !TILEGX | 10 | select HAVE_KVM if !TILEGX |
diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h new file mode 100644 index 000000000000..59c5b164e5b6 --- /dev/null +++ b/arch/tile/include/asm/perf_event.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_TILE_PERF_EVENT_H | ||
16 | #define _ASM_TILE_PERF_EVENT_H | ||
17 | |||
18 | #include <linux/percpu.h> | ||
19 | DECLARE_PER_CPU(u64, perf_irqs); | ||
20 | |||
21 | unsigned long handle_syscall_link_address(void); | ||
22 | #endif /* _ASM_TILE_PERF_EVENT_H */ | ||
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 71d835365c73..21f77bf68c69 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile | |||
@@ -25,6 +25,7 @@ obj-$(CONFIG_PCI) += pci_gx.o | |||
25 | else | 25 | else |
26 | obj-$(CONFIG_PCI) += pci.o | 26 | obj-$(CONFIG_PCI) += pci.o |
27 | endif | 27 | endif |
28 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
28 | obj-$(CONFIG_USE_PMC) += pmc.o | 29 | obj-$(CONFIG_USE_PMC) += pmc.o |
29 | obj-$(CONFIG_TILE_USB) += usb.o | 30 | obj-$(CONFIG_TILE_USB) += usb.o |
30 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o | 31 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0586fdb9352d..906a76bdb31d 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <hv/drv_pcie_rc_intf.h> | 21 | #include <hv/drv_pcie_rc_intf.h> |
22 | #include <arch/spr_def.h> | 22 | #include <arch/spr_def.h> |
23 | #include <asm/traps.h> | 23 | #include <asm/traps.h> |
24 | #include <linux/perf_event.h> | ||
24 | 25 | ||
25 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ | 26 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ |
26 | #define IS_HW_CLEARED 1 | 27 | #define IS_HW_CLEARED 1 |
@@ -261,6 +262,23 @@ void ack_bad_irq(unsigned int irq) | |||
261 | } | 262 | } |
262 | 263 | ||
263 | /* | 264 | /* |
265 | * /proc/interrupts printing: | ||
266 | */ | ||
267 | int arch_show_interrupts(struct seq_file *p, int prec) | ||
268 | { | ||
269 | #ifdef CONFIG_PERF_EVENTS | ||
270 | int i; | ||
271 | |||
272 | seq_printf(p, "%*s: ", prec, "PMI"); | ||
273 | |||
274 | for_each_online_cpu(i) | ||
275 | seq_printf(p, "%10llu ", per_cpu(perf_irqs, i)); | ||
276 | seq_puts(p, " perf_events\n"); | ||
277 | #endif | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | /* | ||
264 | * Generic, controller-independent functions: | 282 | * Generic, controller-independent functions: |
265 | */ | 283 | */ |
266 | 284 | ||
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c new file mode 100644 index 000000000000..2bf6c9c135c1 --- /dev/null +++ b/arch/tile/kernel/perf_event.c | |||
@@ -0,0 +1,1005 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * | ||
15 | * Perf_events support for Tile processor. | ||
16 | * | ||
17 | * This code is based upon the x86 perf event | ||
18 | * code, which is: | ||
19 | * | ||
20 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
21 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
22 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
23 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
24 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
25 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
26 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
27 | */ | ||
28 | |||
29 | #include <linux/kprobes.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/kdebug.h> | ||
32 | #include <linux/mutex.h> | ||
33 | #include <linux/bitmap.h> | ||
34 | #include <linux/irq.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/perf_event.h> | ||
37 | #include <linux/atomic.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/stack.h> | ||
40 | #include <asm/pmc.h> | ||
41 | #include <hv/hypervisor.h> | ||
42 | |||
43 | #define TILE_MAX_COUNTERS 4 | ||
44 | |||
45 | #define PERF_COUNT_0_IDX 0 | ||
46 | #define PERF_COUNT_1_IDX 1 | ||
47 | #define AUX_PERF_COUNT_0_IDX 2 | ||
48 | #define AUX_PERF_COUNT_1_IDX 3 | ||
49 | |||
50 | struct cpu_hw_events { | ||
51 | int n_events; | ||
52 | struct perf_event *events[TILE_MAX_COUNTERS]; /* counter order */ | ||
53 | struct perf_event *event_list[TILE_MAX_COUNTERS]; /* enabled | ||
54 | order */ | ||
55 | int assign[TILE_MAX_COUNTERS]; | ||
56 | unsigned long active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)]; | ||
57 | unsigned long used_mask; | ||
58 | }; | ||
59 | |||
60 | /* TILE arch specific performance monitor unit */ | ||
61 | struct tile_pmu { | ||
62 | const char *name; | ||
63 | int version; | ||
64 | const int *hw_events; /* generic hw events table */ | ||
65 | /* generic hw cache events table */ | ||
66 | const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
67 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
68 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
69 | int (*map_hw_event)(u64); /*method used to map | ||
70 | hw events */ | ||
71 | int (*map_cache_event)(u64); /*method used to map | ||
72 | cache events */ | ||
73 | |||
74 | u64 max_period; /* max sampling period */ | ||
75 | u64 cntval_mask; /* counter width mask */ | ||
76 | int cntval_bits; /* counter width */ | ||
77 | int max_events; /* max generic hw events | ||
78 | in map */ | ||
79 | int num_counters; /* number base + aux counters */ | ||
80 | int num_base_counters; /* number base counters */ | ||
81 | }; | ||
82 | |||
83 | DEFINE_PER_CPU(u64, perf_irqs); | ||
84 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
85 | |||
86 | #define TILE_OP_UNSUPP (-1) | ||
87 | |||
88 | #ifndef __tilegx__ | ||
89 | /* TILEPro hardware events map */ | ||
90 | static const int tile_hw_event_map[] = { | ||
91 | [PERF_COUNT_HW_CPU_CYCLES] = 0x01, /* ONE */ | ||
92 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x06, /* MP_BUNDLE_RETIRED */ | ||
93 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
94 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
95 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x16, /* | ||
96 | MP_CONDITIONAL_BRANCH_ISSUED */ | ||
97 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x14, /* | ||
98 | MP_CONDITIONAL_BRANCH_MISSPREDICT */ | ||
99 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
100 | }; | ||
101 | #else | ||
102 | /* TILEGx hardware events map */ | ||
103 | static const int tile_hw_event_map[] = { | ||
104 | [PERF_COUNT_HW_CPU_CYCLES] = 0x181, /* ONE */ | ||
105 | [PERF_COUNT_HW_INSTRUCTIONS] = 0xdb, /* INSTRUCTION_BUNDLE */ | ||
106 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
107 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
108 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xd9, /* | ||
109 | COND_BRANCH_PRED_CORRECT */ | ||
110 | [PERF_COUNT_HW_BRANCH_MISSES] = 0xda, /* | ||
111 | COND_BRANCH_PRED_INCORRECT */ | ||
112 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
113 | }; | ||
114 | #endif | ||
115 | |||
116 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
117 | |||
118 | /* | ||
119 | * Generalized hw caching related hw_event table, filled | ||
120 | * in on a per model basis. A value of -1 means | ||
121 | * 'not supported', any other value means the | ||
122 | * raw hw_event ID. | ||
123 | */ | ||
124 | #ifndef __tilegx__ | ||
125 | /* TILEPro hardware cache event map */ | ||
126 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
127 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
128 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
129 | [C(L1D)] = { | ||
130 | [C(OP_READ)] = { | ||
131 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
132 | [C(RESULT_MISS)] = 0x21, /* RD_MISS */ | ||
133 | }, | ||
134 | [C(OP_WRITE)] = { | ||
135 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
136 | [C(RESULT_MISS)] = 0x22, /* WR_MISS */ | ||
137 | }, | ||
138 | [C(OP_PREFETCH)] = { | ||
139 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
140 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
141 | }, | ||
142 | }, | ||
143 | [C(L1I)] = { | ||
144 | [C(OP_READ)] = { | ||
145 | [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */ | ||
146 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
147 | }, | ||
148 | [C(OP_WRITE)] = { | ||
149 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
150 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
151 | }, | ||
152 | [C(OP_PREFETCH)] = { | ||
153 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
154 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
155 | }, | ||
156 | }, | ||
157 | [C(LL)] = { | ||
158 | [C(OP_READ)] = { | ||
159 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
160 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
161 | }, | ||
162 | [C(OP_WRITE)] = { | ||
163 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
164 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
165 | }, | ||
166 | [C(OP_PREFETCH)] = { | ||
167 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
168 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
169 | }, | ||
170 | }, | ||
171 | [C(DTLB)] = { | ||
172 | [C(OP_READ)] = { | ||
173 | [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */ | ||
174 | [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */ | ||
175 | }, | ||
176 | [C(OP_WRITE)] = { | ||
177 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
178 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
179 | }, | ||
180 | [C(OP_PREFETCH)] = { | ||
181 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
182 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
183 | }, | ||
184 | }, | ||
185 | [C(ITLB)] = { | ||
186 | [C(OP_READ)] = { | ||
187 | [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */ | ||
188 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
189 | }, | ||
190 | [C(OP_WRITE)] = { | ||
191 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
192 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
193 | }, | ||
194 | [C(OP_PREFETCH)] = { | ||
195 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
196 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
197 | }, | ||
198 | }, | ||
199 | [C(BPU)] = { | ||
200 | [C(OP_READ)] = { | ||
201 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
202 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
203 | }, | ||
204 | [C(OP_WRITE)] = { | ||
205 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
206 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
207 | }, | ||
208 | [C(OP_PREFETCH)] = { | ||
209 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
210 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
211 | }, | ||
212 | }, | ||
213 | }; | ||
214 | #else | ||
215 | /* TILEGx hardware events map */ | ||
216 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
217 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
218 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
219 | [C(L1D)] = { | ||
220 | /* | ||
221 | * Like some other architectures (e.g. ARM), the performance | ||
222 | * counters don't differentiate between read and write | ||
223 | * accesses/misses, so this isn't strictly correct, but it's the | ||
224 | * best we can do. Writes and reads get combined. | ||
225 | */ | ||
226 | [C(OP_READ)] = { | ||
227 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
228 | [C(RESULT_MISS)] = 0x44, /* RD_MISS */ | ||
229 | }, | ||
230 | [C(OP_WRITE)] = { | ||
231 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
232 | [C(RESULT_MISS)] = 0x45, /* WR_MISS */ | ||
233 | }, | ||
234 | [C(OP_PREFETCH)] = { | ||
235 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
236 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
237 | }, | ||
238 | }, | ||
239 | [C(L1I)] = { | ||
240 | [C(OP_READ)] = { | ||
241 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
242 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
243 | }, | ||
244 | [C(OP_WRITE)] = { | ||
245 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
246 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
247 | }, | ||
248 | [C(OP_PREFETCH)] = { | ||
249 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
250 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
251 | }, | ||
252 | }, | ||
253 | [C(LL)] = { | ||
254 | [C(OP_READ)] = { | ||
255 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
256 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
257 | }, | ||
258 | [C(OP_WRITE)] = { | ||
259 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
260 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
261 | }, | ||
262 | [C(OP_PREFETCH)] = { | ||
263 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
264 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
265 | }, | ||
266 | }, | ||
267 | [C(DTLB)] = { | ||
268 | [C(OP_READ)] = { | ||
269 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
270 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
271 | }, | ||
272 | [C(OP_WRITE)] = { | ||
273 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
274 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
275 | }, | ||
276 | [C(OP_PREFETCH)] = { | ||
277 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
278 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
279 | }, | ||
280 | }, | ||
281 | [C(ITLB)] = { | ||
282 | [C(OP_READ)] = { | ||
283 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
284 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
285 | }, | ||
286 | [C(OP_WRITE)] = { | ||
287 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
288 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
289 | }, | ||
290 | [C(OP_PREFETCH)] = { | ||
291 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
292 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
293 | }, | ||
294 | }, | ||
295 | [C(BPU)] = { | ||
296 | [C(OP_READ)] = { | ||
297 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
298 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
299 | }, | ||
300 | [C(OP_WRITE)] = { | ||
301 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
302 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
303 | }, | ||
304 | [C(OP_PREFETCH)] = { | ||
305 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
306 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
307 | }, | ||
308 | }, | ||
309 | }; | ||
310 | #endif | ||
311 | |||
312 | static atomic_t tile_active_events; | ||
313 | static DEFINE_MUTEX(perf_intr_reserve_mutex); | ||
314 | |||
315 | static int tile_map_hw_event(u64 config); | ||
316 | static int tile_map_cache_event(u64 config); | ||
317 | |||
318 | static int tile_pmu_handle_irq(struct pt_regs *regs, int fault); | ||
319 | |||
320 | /* | ||
321 | * To avoid new_raw_count getting larger then pre_raw_count | ||
322 | * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1. | ||
323 | */ | ||
324 | static const struct tile_pmu tilepmu = { | ||
325 | #ifndef __tilegx__ | ||
326 | .name = "tilepro", | ||
327 | #else | ||
328 | .name = "tilegx", | ||
329 | #endif | ||
330 | .max_events = ARRAY_SIZE(tile_hw_event_map), | ||
331 | .map_hw_event = tile_map_hw_event, | ||
332 | .hw_events = tile_hw_event_map, | ||
333 | .map_cache_event = tile_map_cache_event, | ||
334 | .cache_events = &tile_cache_event_map, | ||
335 | .cntval_bits = 32, | ||
336 | .cntval_mask = (1ULL << 32) - 1, | ||
337 | .max_period = (1ULL << 31) - 1, | ||
338 | .num_counters = TILE_MAX_COUNTERS, | ||
339 | .num_base_counters = TILE_BASE_COUNTERS, | ||
340 | }; | ||
341 | |||
342 | static const struct tile_pmu *tile_pmu __read_mostly; | ||
343 | |||
344 | /* | ||
345 | * Check whether perf event is enabled. | ||
346 | */ | ||
347 | int tile_perf_enabled(void) | ||
348 | { | ||
349 | return atomic_read(&tile_active_events) != 0; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Read Performance Counters. | ||
354 | */ | ||
355 | static inline u64 read_counter(int idx) | ||
356 | { | ||
357 | u64 val = 0; | ||
358 | |||
359 | /* __insn_mfspr() only takes an immediate argument */ | ||
360 | switch (idx) { | ||
361 | case PERF_COUNT_0_IDX: | ||
362 | val = __insn_mfspr(SPR_PERF_COUNT_0); | ||
363 | break; | ||
364 | case PERF_COUNT_1_IDX: | ||
365 | val = __insn_mfspr(SPR_PERF_COUNT_1); | ||
366 | break; | ||
367 | case AUX_PERF_COUNT_0_IDX: | ||
368 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_0); | ||
369 | break; | ||
370 | case AUX_PERF_COUNT_1_IDX: | ||
371 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_1); | ||
372 | break; | ||
373 | default: | ||
374 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
375 | idx < PERF_COUNT_0_IDX); | ||
376 | } | ||
377 | |||
378 | return val; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Write Performance Counters. | ||
383 | */ | ||
384 | static inline void write_counter(int idx, u64 value) | ||
385 | { | ||
386 | /* __insn_mtspr() only takes an immediate argument */ | ||
387 | switch (idx) { | ||
388 | case PERF_COUNT_0_IDX: | ||
389 | __insn_mtspr(SPR_PERF_COUNT_0, value); | ||
390 | break; | ||
391 | case PERF_COUNT_1_IDX: | ||
392 | __insn_mtspr(SPR_PERF_COUNT_1, value); | ||
393 | break; | ||
394 | case AUX_PERF_COUNT_0_IDX: | ||
395 | __insn_mtspr(SPR_AUX_PERF_COUNT_0, value); | ||
396 | break; | ||
397 | case AUX_PERF_COUNT_1_IDX: | ||
398 | __insn_mtspr(SPR_AUX_PERF_COUNT_1, value); | ||
399 | break; | ||
400 | default: | ||
401 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
402 | idx < PERF_COUNT_0_IDX); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Enable performance event by setting | ||
408 | * Performance Counter Control registers. | ||
409 | */ | ||
410 | static inline void tile_pmu_enable_event(struct perf_event *event) | ||
411 | { | ||
412 | struct hw_perf_event *hwc = &event->hw; | ||
413 | unsigned long cfg, mask; | ||
414 | int shift, idx = hwc->idx; | ||
415 | |||
416 | /* | ||
417 | * prevent early activation from tile_pmu_start() in hw_perf_enable | ||
418 | */ | ||
419 | |||
420 | if (WARN_ON_ONCE(idx == -1)) | ||
421 | return; | ||
422 | |||
423 | if (idx < tile_pmu->num_base_counters) | ||
424 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
425 | else | ||
426 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
427 | |||
428 | switch (idx) { | ||
429 | case PERF_COUNT_0_IDX: | ||
430 | case AUX_PERF_COUNT_0_IDX: | ||
431 | mask = TILE_EVENT_MASK; | ||
432 | shift = 0; | ||
433 | break; | ||
434 | case PERF_COUNT_1_IDX: | ||
435 | case AUX_PERF_COUNT_1_IDX: | ||
436 | mask = TILE_EVENT_MASK << 16; | ||
437 | shift = 16; | ||
438 | break; | ||
439 | default: | ||
440 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
441 | idx > AUX_PERF_COUNT_1_IDX); | ||
442 | return; | ||
443 | } | ||
444 | |||
445 | /* Clear mask bits to enable the event. */ | ||
446 | cfg &= ~mask; | ||
447 | cfg |= hwc->config << shift; | ||
448 | |||
449 | if (idx < tile_pmu->num_base_counters) | ||
450 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
451 | else | ||
452 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * Disable performance event by clearing | ||
457 | * Performance Counter Control registers. | ||
458 | */ | ||
459 | static inline void tile_pmu_disable_event(struct perf_event *event) | ||
460 | { | ||
461 | struct hw_perf_event *hwc = &event->hw; | ||
462 | unsigned long cfg, mask; | ||
463 | int idx = hwc->idx; | ||
464 | |||
465 | if (idx == -1) | ||
466 | return; | ||
467 | |||
468 | if (idx < tile_pmu->num_base_counters) | ||
469 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
470 | else | ||
471 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
472 | |||
473 | switch (idx) { | ||
474 | case PERF_COUNT_0_IDX: | ||
475 | case AUX_PERF_COUNT_0_IDX: | ||
476 | mask = TILE_PLM_MASK; | ||
477 | break; | ||
478 | case PERF_COUNT_1_IDX: | ||
479 | case AUX_PERF_COUNT_1_IDX: | ||
480 | mask = TILE_PLM_MASK << 16; | ||
481 | break; | ||
482 | default: | ||
483 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
484 | idx > AUX_PERF_COUNT_1_IDX); | ||
485 | return; | ||
486 | } | ||
487 | |||
488 | /* Set mask bits to disable the event. */ | ||
489 | cfg |= mask; | ||
490 | |||
491 | if (idx < tile_pmu->num_base_counters) | ||
492 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
493 | else | ||
494 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Propagate event elapsed time into the generic event. | ||
499 | * Can only be executed on the CPU where the event is active. | ||
500 | * Returns the delta events processed. | ||
501 | */ | ||
502 | static u64 tile_perf_event_update(struct perf_event *event) | ||
503 | { | ||
504 | struct hw_perf_event *hwc = &event->hw; | ||
505 | int shift = 64 - tile_pmu->cntval_bits; | ||
506 | u64 prev_raw_count, new_raw_count; | ||
507 | u64 oldval; | ||
508 | int idx = hwc->idx; | ||
509 | u64 delta; | ||
510 | |||
511 | /* | ||
512 | * Careful: an NMI might modify the previous event value. | ||
513 | * | ||
514 | * Our tactic to handle this is to first atomically read and | ||
515 | * exchange a new raw count - then add that new-prev delta | ||
516 | * count to the generic event atomically: | ||
517 | */ | ||
518 | again: | ||
519 | prev_raw_count = local64_read(&hwc->prev_count); | ||
520 | new_raw_count = read_counter(idx); | ||
521 | |||
522 | oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
523 | new_raw_count); | ||
524 | if (oldval != prev_raw_count) | ||
525 | goto again; | ||
526 | |||
527 | /* | ||
528 | * Now we have the new raw value and have updated the prev | ||
529 | * timestamp already. We can now calculate the elapsed delta | ||
530 | * (event-)time and add that to the generic event. | ||
531 | * | ||
532 | * Careful, not all hw sign-extends above the physical width | ||
533 | * of the count. | ||
534 | */ | ||
535 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
536 | delta >>= shift; | ||
537 | |||
538 | local64_add(delta, &event->count); | ||
539 | local64_sub(delta, &hwc->period_left); | ||
540 | |||
541 | return new_raw_count; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Set the next IRQ period, based on the hwc->period_left value. | ||
546 | * To be called with the event disabled in hw: | ||
547 | */ | ||
548 | static int tile_event_set_period(struct perf_event *event) | ||
549 | { | ||
550 | struct hw_perf_event *hwc = &event->hw; | ||
551 | int idx = hwc->idx; | ||
552 | s64 left = local64_read(&hwc->period_left); | ||
553 | s64 period = hwc->sample_period; | ||
554 | int ret = 0; | ||
555 | |||
556 | /* | ||
557 | * If we are way outside a reasonable range then just skip forward: | ||
558 | */ | ||
559 | if (unlikely(left <= -period)) { | ||
560 | left = period; | ||
561 | local64_set(&hwc->period_left, left); | ||
562 | hwc->last_period = period; | ||
563 | ret = 1; | ||
564 | } | ||
565 | |||
566 | if (unlikely(left <= 0)) { | ||
567 | left += period; | ||
568 | local64_set(&hwc->period_left, left); | ||
569 | hwc->last_period = period; | ||
570 | ret = 1; | ||
571 | } | ||
572 | if (left > tile_pmu->max_period) | ||
573 | left = tile_pmu->max_period; | ||
574 | |||
575 | /* | ||
576 | * The hw event starts counting from this event offset, | ||
577 | * mark it to be able to extra future deltas: | ||
578 | */ | ||
579 | local64_set(&hwc->prev_count, (u64)-left); | ||
580 | |||
581 | write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask); | ||
582 | |||
583 | perf_event_update_userpage(event); | ||
584 | |||
585 | return ret; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Stop the event but do not release the PMU counter | ||
590 | */ | ||
591 | static void tile_pmu_stop(struct perf_event *event, int flags) | ||
592 | { | ||
593 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
594 | struct hw_perf_event *hwc = &event->hw; | ||
595 | int idx = hwc->idx; | ||
596 | |||
597 | if (__test_and_clear_bit(idx, cpuc->active_mask)) { | ||
598 | tile_pmu_disable_event(event); | ||
599 | cpuc->events[hwc->idx] = NULL; | ||
600 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
601 | hwc->state |= PERF_HES_STOPPED; | ||
602 | } | ||
603 | |||
604 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
605 | /* | ||
606 | * Drain the remaining delta count out of a event | ||
607 | * that we are disabling: | ||
608 | */ | ||
609 | tile_perf_event_update(event); | ||
610 | hwc->state |= PERF_HES_UPTODATE; | ||
611 | } | ||
612 | } | ||
613 | |||
614 | /* | ||
615 | * Start an event (without re-assigning counter) | ||
616 | */ | ||
617 | static void tile_pmu_start(struct perf_event *event, int flags) | ||
618 | { | ||
619 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
620 | int idx = event->hw.idx; | ||
621 | |||
622 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
623 | return; | ||
624 | |||
625 | if (WARN_ON_ONCE(idx == -1)) | ||
626 | return; | ||
627 | |||
628 | if (flags & PERF_EF_RELOAD) { | ||
629 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
630 | tile_event_set_period(event); | ||
631 | } | ||
632 | |||
633 | event->hw.state = 0; | ||
634 | |||
635 | cpuc->events[idx] = event; | ||
636 | __set_bit(idx, cpuc->active_mask); | ||
637 | |||
638 | unmask_pmc_interrupts(); | ||
639 | |||
640 | tile_pmu_enable_event(event); | ||
641 | |||
642 | perf_event_update_userpage(event); | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Add a single event to the PMU. | ||
647 | * | ||
648 | * The event is added to the group of enabled events | ||
649 | * but only if it can be scehduled with existing events. | ||
650 | */ | ||
651 | static int tile_pmu_add(struct perf_event *event, int flags) | ||
652 | { | ||
653 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
654 | struct hw_perf_event *hwc; | ||
655 | unsigned long mask; | ||
656 | int b, max_cnt; | ||
657 | |||
658 | hwc = &event->hw; | ||
659 | |||
660 | /* | ||
661 | * We are full. | ||
662 | */ | ||
663 | if (cpuc->n_events == tile_pmu->num_counters) | ||
664 | return -ENOSPC; | ||
665 | |||
666 | cpuc->event_list[cpuc->n_events] = event; | ||
667 | cpuc->n_events++; | ||
668 | |||
669 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
670 | if (!(flags & PERF_EF_START)) | ||
671 | hwc->state |= PERF_HES_ARCH; | ||
672 | |||
673 | /* | ||
674 | * Find first empty counter. | ||
675 | */ | ||
676 | max_cnt = tile_pmu->num_counters; | ||
677 | mask = ~cpuc->used_mask; | ||
678 | |||
679 | /* Find next free counter. */ | ||
680 | b = find_next_bit(&mask, max_cnt, 0); | ||
681 | |||
682 | /* Should not happen. */ | ||
683 | if (WARN_ON_ONCE(b == max_cnt)) | ||
684 | return -ENOSPC; | ||
685 | |||
686 | /* | ||
687 | * Assign counter to event. | ||
688 | */ | ||
689 | event->hw.idx = b; | ||
690 | __set_bit(b, &cpuc->used_mask); | ||
691 | |||
692 | /* | ||
693 | * Start if requested. | ||
694 | */ | ||
695 | if (flags & PERF_EF_START) | ||
696 | tile_pmu_start(event, PERF_EF_RELOAD); | ||
697 | |||
698 | return 0; | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * Delete a single event from the PMU. | ||
703 | * | ||
704 | * The event is deleted from the group of enabled events. | ||
705 | * If it is the last event, disable PMU interrupt. | ||
706 | */ | ||
707 | static void tile_pmu_del(struct perf_event *event, int flags) | ||
708 | { | ||
709 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
710 | int i; | ||
711 | |||
712 | /* | ||
713 | * Remove event from list, compact list if necessary. | ||
714 | */ | ||
715 | for (i = 0; i < cpuc->n_events; i++) { | ||
716 | if (cpuc->event_list[i] == event) { | ||
717 | while (++i < cpuc->n_events) | ||
718 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
719 | --cpuc->n_events; | ||
720 | cpuc->events[event->hw.idx] = NULL; | ||
721 | __clear_bit(event->hw.idx, &cpuc->used_mask); | ||
722 | tile_pmu_stop(event, PERF_EF_UPDATE); | ||
723 | break; | ||
724 | } | ||
725 | } | ||
726 | /* | ||
727 | * If there are no events left, then mask PMU interrupt. | ||
728 | */ | ||
729 | if (cpuc->n_events == 0) | ||
730 | mask_pmc_interrupts(); | ||
731 | perf_event_update_userpage(event); | ||
732 | } | ||
733 | |||
734 | /* | ||
735 | * Propagate event elapsed time into the event. | ||
736 | */ | ||
737 | static inline void tile_pmu_read(struct perf_event *event) | ||
738 | { | ||
739 | tile_perf_event_update(event); | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * Map generic events to Tile PMU. | ||
744 | */ | ||
745 | static int tile_map_hw_event(u64 config) | ||
746 | { | ||
747 | if (config >= tile_pmu->max_events) | ||
748 | return -EINVAL; | ||
749 | return tile_pmu->hw_events[config]; | ||
750 | } | ||
751 | |||
752 | /* | ||
753 | * Map generic hardware cache events to Tile PMU. | ||
754 | */ | ||
755 | static int tile_map_cache_event(u64 config) | ||
756 | { | ||
757 | unsigned int cache_type, cache_op, cache_result; | ||
758 | int code; | ||
759 | |||
760 | if (!tile_pmu->cache_events) | ||
761 | return -ENOENT; | ||
762 | |||
763 | cache_type = (config >> 0) & 0xff; | ||
764 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
765 | return -EINVAL; | ||
766 | |||
767 | cache_op = (config >> 8) & 0xff; | ||
768 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
769 | return -EINVAL; | ||
770 | |||
771 | cache_result = (config >> 16) & 0xff; | ||
772 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
773 | return -EINVAL; | ||
774 | |||
775 | code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result]; | ||
776 | if (code == TILE_OP_UNSUPP) | ||
777 | return -EINVAL; | ||
778 | |||
779 | return code; | ||
780 | } | ||
781 | |||
782 | static void tile_event_destroy(struct perf_event *event) | ||
783 | { | ||
784 | if (atomic_dec_return(&tile_active_events) == 0) | ||
785 | release_pmc_hardware(); | ||
786 | } | ||
787 | |||
788 | static int __tile_event_init(struct perf_event *event) | ||
789 | { | ||
790 | struct perf_event_attr *attr = &event->attr; | ||
791 | struct hw_perf_event *hwc = &event->hw; | ||
792 | int code; | ||
793 | |||
794 | switch (attr->type) { | ||
795 | case PERF_TYPE_HARDWARE: | ||
796 | code = tile_pmu->map_hw_event(attr->config); | ||
797 | break; | ||
798 | case PERF_TYPE_HW_CACHE: | ||
799 | code = tile_pmu->map_cache_event(attr->config); | ||
800 | break; | ||
801 | case PERF_TYPE_RAW: | ||
802 | code = attr->config & TILE_EVENT_MASK; | ||
803 | break; | ||
804 | default: | ||
805 | /* Should not happen. */ | ||
806 | return -EOPNOTSUPP; | ||
807 | } | ||
808 | |||
809 | if (code < 0) | ||
810 | return code; | ||
811 | |||
812 | hwc->config = code; | ||
813 | hwc->idx = -1; | ||
814 | |||
815 | if (attr->exclude_user) | ||
816 | hwc->config |= TILE_CTL_EXCL_USER; | ||
817 | |||
818 | if (attr->exclude_kernel) | ||
819 | hwc->config |= TILE_CTL_EXCL_KERNEL; | ||
820 | |||
821 | if (attr->exclude_hv) | ||
822 | hwc->config |= TILE_CTL_EXCL_HV; | ||
823 | |||
824 | if (!hwc->sample_period) { | ||
825 | hwc->sample_period = tile_pmu->max_period; | ||
826 | hwc->last_period = hwc->sample_period; | ||
827 | local64_set(&hwc->period_left, hwc->sample_period); | ||
828 | } | ||
829 | event->destroy = tile_event_destroy; | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | static int tile_event_init(struct perf_event *event) | ||
834 | { | ||
835 | int err = 0; | ||
836 | perf_irq_t old_irq_handler = NULL; | ||
837 | |||
838 | if (atomic_inc_return(&tile_active_events) == 1) | ||
839 | old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq); | ||
840 | |||
841 | if (old_irq_handler) { | ||
842 | pr_warn("PMC hardware busy (reserved by oprofile)\n"); | ||
843 | |||
844 | atomic_dec(&tile_active_events); | ||
845 | return -EBUSY; | ||
846 | } | ||
847 | |||
848 | switch (event->attr.type) { | ||
849 | case PERF_TYPE_RAW: | ||
850 | case PERF_TYPE_HARDWARE: | ||
851 | case PERF_TYPE_HW_CACHE: | ||
852 | break; | ||
853 | |||
854 | default: | ||
855 | return -ENOENT; | ||
856 | } | ||
857 | |||
858 | err = __tile_event_init(event); | ||
859 | if (err) { | ||
860 | if (event->destroy) | ||
861 | event->destroy(event); | ||
862 | } | ||
863 | return err; | ||
864 | } | ||
865 | |||
866 | static struct pmu tilera_pmu = { | ||
867 | .event_init = tile_event_init, | ||
868 | .add = tile_pmu_add, | ||
869 | .del = tile_pmu_del, | ||
870 | |||
871 | .start = tile_pmu_start, | ||
872 | .stop = tile_pmu_stop, | ||
873 | |||
874 | .read = tile_pmu_read, | ||
875 | }; | ||
876 | |||
877 | /* | ||
878 | * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler. | ||
879 | */ | ||
880 | int tile_pmu_handle_irq(struct pt_regs *regs, int fault) | ||
881 | { | ||
882 | struct perf_sample_data data; | ||
883 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
884 | struct perf_event *event; | ||
885 | struct hw_perf_event *hwc; | ||
886 | u64 val; | ||
887 | unsigned long status; | ||
888 | int bit; | ||
889 | |||
890 | __get_cpu_var(perf_irqs)++; | ||
891 | |||
892 | if (!atomic_read(&tile_active_events)) | ||
893 | return 0; | ||
894 | |||
895 | status = pmc_get_overflow(); | ||
896 | pmc_ack_overflow(status); | ||
897 | |||
898 | for_each_set_bit(bit, &status, tile_pmu->num_counters) { | ||
899 | |||
900 | event = cpuc->events[bit]; | ||
901 | |||
902 | if (!event) | ||
903 | continue; | ||
904 | |||
905 | if (!test_bit(bit, cpuc->active_mask)) | ||
906 | continue; | ||
907 | |||
908 | hwc = &event->hw; | ||
909 | |||
910 | val = tile_perf_event_update(event); | ||
911 | if (val & (1ULL << (tile_pmu->cntval_bits - 1))) | ||
912 | continue; | ||
913 | |||
914 | perf_sample_data_init(&data, 0, event->hw.last_period); | ||
915 | if (!tile_event_set_period(event)) | ||
916 | continue; | ||
917 | |||
918 | if (perf_event_overflow(event, &data, regs)) | ||
919 | tile_pmu_stop(event, 0); | ||
920 | } | ||
921 | |||
922 | return 0; | ||
923 | } | ||
924 | |||
925 | static bool __init supported_pmu(void) | ||
926 | { | ||
927 | tile_pmu = &tilepmu; | ||
928 | return true; | ||
929 | } | ||
930 | |||
931 | int __init init_hw_perf_events(void) | ||
932 | { | ||
933 | supported_pmu(); | ||
934 | perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW); | ||
935 | return 0; | ||
936 | } | ||
937 | arch_initcall(init_hw_perf_events); | ||
938 | |||
939 | /* Callchain handling code. */ | ||
940 | |||
941 | /* | ||
942 | * Tile specific backtracing code for perf_events. | ||
943 | */ | ||
944 | static inline void perf_callchain(struct perf_callchain_entry *entry, | ||
945 | struct pt_regs *regs) | ||
946 | { | ||
947 | struct KBacktraceIterator kbt; | ||
948 | unsigned int i; | ||
949 | |||
950 | /* | ||
951 | * Get the address just after the "jalr" instruction that | ||
952 | * jumps to the handler for a syscall. When we find this | ||
953 | * address in a backtrace, we silently ignore it, which gives | ||
954 | * us a one-step backtrace connection from the sys_xxx() | ||
955 | * function in the kernel to the xxx() function in libc. | ||
956 | * Otherwise, we lose the ability to properly attribute time | ||
957 | * from the libc calls to the kernel implementations, since | ||
958 | * oprofile only considers PCs from backtraces a pair at a time. | ||
959 | */ | ||
960 | unsigned long handle_syscall_pc = handle_syscall_link_address(); | ||
961 | |||
962 | KBacktraceIterator_init(&kbt, NULL, regs); | ||
963 | kbt.profile = 1; | ||
964 | |||
965 | /* | ||
966 | * The sample for the pc is already recorded. Now we are adding the | ||
967 | * address of the callsites on the stack. Our iterator starts | ||
968 | * with the frame of the (already sampled) call site. If our | ||
969 | * iterator contained a "return address" field, we could have just | ||
970 | * used it and wouldn't have needed to skip the first | ||
971 | * frame. That's in effect what the arm and x86 versions do. | ||
972 | * Instead we peel off the first iteration to get the equivalent | ||
973 | * behavior. | ||
974 | */ | ||
975 | |||
976 | if (KBacktraceIterator_end(&kbt)) | ||
977 | return; | ||
978 | KBacktraceIterator_next(&kbt); | ||
979 | |||
980 | /* | ||
981 | * Set stack depth to 16 for user and kernel space respectively, that | ||
982 | * is, total 32 stack frames. | ||
983 | */ | ||
984 | for (i = 0; i < 16; ++i) { | ||
985 | unsigned long pc; | ||
986 | if (KBacktraceIterator_end(&kbt)) | ||
987 | break; | ||
988 | pc = kbt.it.pc; | ||
989 | if (pc != handle_syscall_pc) | ||
990 | perf_callchain_store(entry, pc); | ||
991 | KBacktraceIterator_next(&kbt); | ||
992 | } | ||
993 | } | ||
994 | |||
995 | void perf_callchain_user(struct perf_callchain_entry *entry, | ||
996 | struct pt_regs *regs) | ||
997 | { | ||
998 | perf_callchain(entry, regs); | ||
999 | } | ||
1000 | |||
1001 | void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
1002 | struct pt_regs *regs) | ||
1003 | { | ||
1004 | perf_callchain(entry, regs); | ||
1005 | } | ||