diff options
author | Mark Rutland <mark.rutland@arm.com> | 2015-07-06 07:23:53 -0400 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2015-07-31 10:01:14 -0400 |
commit | fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 (patch) | |
tree | 424ea935a8d5f40d099359351cf5074f50b8852f /drivers/perf | |
parent | bc1e3c4687df62a1f2ba1b6be11efbeb76145366 (diff) |
arm: perf: factor arm_pmu core out to drivers
To enable sharing of the arm_pmu code with arm64, this patch factors it
out to drivers/perf/. A new drivers/perf directory is added for
performance monitor drivers to live under.
MAINTAINERS is updated accordingly. Files added previously without a
corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and
perf_event.h) are also added.
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[will: augmented Kconfig help slightly]
Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'drivers/perf')
-rw-r--r-- | drivers/perf/Kconfig | 15 | ||||
-rw-r--r-- | drivers/perf/Makefile | 1 | ||||
-rw-r--r-- | drivers/perf/arm_pmu.c | 921 |
3 files changed, 937 insertions, 0 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig new file mode 100644 index 000000000000..d9de36ee165d --- /dev/null +++ b/drivers/perf/Kconfig | |||
@@ -0,0 +1,15 @@ | |||
1 | # | ||
2 | # Performance Monitor Drivers | ||
3 | # | ||
4 | |||
5 | menu "Performance monitor support" | ||
6 | |||
7 | config ARM_PMU | ||
8 | depends on PERF_EVENTS && ARM | ||
9 | bool "ARM PMU framework" | ||
10 | default y | ||
11 | help | ||
12 | Say y if you want to use CPU performance monitors on ARM-based | ||
13 | systems. | ||
14 | |||
15 | endmenu | ||
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile new file mode 100644 index 000000000000..acd2397ded94 --- /dev/null +++ b/drivers/perf/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_ARM_PMU) += arm_pmu.o | |||
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c new file mode 100644 index 000000000000..2365a32a595e --- /dev/null +++ b/drivers/perf/arm_pmu.c | |||
@@ -0,0 +1,921 @@ | |||
1 | #undef DEBUG | ||
2 | |||
3 | /* | ||
4 | * ARM performance counter support. | ||
5 | * | ||
6 | * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles | ||
7 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> | ||
8 | * | ||
9 | * This code is based on the sparc64 perf event code, which is in turn based | ||
10 | * on the x86 code. | ||
11 | */ | ||
12 | #define pr_fmt(fmt) "hw perfevents: " fmt | ||
13 | |||
14 | #include <linux/bitmap.h> | ||
15 | #include <linux/cpumask.h> | ||
16 | #include <linux/export.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/of_device.h> | ||
19 | #include <linux/perf/arm_pmu.h> | ||
20 | #include <linux/platform_device.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/irq.h> | ||
24 | #include <linux/irqdesc.h> | ||
25 | |||
26 | #include <asm/cputype.h> | ||
27 | #include <asm/irq_regs.h> | ||
28 | |||
29 | static int | ||
30 | armpmu_map_cache_event(const unsigned (*cache_map) | ||
31 | [PERF_COUNT_HW_CACHE_MAX] | ||
32 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
33 | [PERF_COUNT_HW_CACHE_RESULT_MAX], | ||
34 | u64 config) | ||
35 | { | ||
36 | unsigned int cache_type, cache_op, cache_result, ret; | ||
37 | |||
38 | cache_type = (config >> 0) & 0xff; | ||
39 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
40 | return -EINVAL; | ||
41 | |||
42 | cache_op = (config >> 8) & 0xff; | ||
43 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
44 | return -EINVAL; | ||
45 | |||
46 | cache_result = (config >> 16) & 0xff; | ||
47 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
48 | return -EINVAL; | ||
49 | |||
50 | ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; | ||
51 | |||
52 | if (ret == CACHE_OP_UNSUPPORTED) | ||
53 | return -ENOENT; | ||
54 | |||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | static int | ||
59 | armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) | ||
60 | { | ||
61 | int mapping; | ||
62 | |||
63 | if (config >= PERF_COUNT_HW_MAX) | ||
64 | return -EINVAL; | ||
65 | |||
66 | mapping = (*event_map)[config]; | ||
67 | return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; | ||
68 | } | ||
69 | |||
70 | static int | ||
71 | armpmu_map_raw_event(u32 raw_event_mask, u64 config) | ||
72 | { | ||
73 | return (int)(config & raw_event_mask); | ||
74 | } | ||
75 | |||
76 | int | ||
77 | armpmu_map_event(struct perf_event *event, | ||
78 | const unsigned (*event_map)[PERF_COUNT_HW_MAX], | ||
79 | const unsigned (*cache_map) | ||
80 | [PERF_COUNT_HW_CACHE_MAX] | ||
81 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
82 | [PERF_COUNT_HW_CACHE_RESULT_MAX], | ||
83 | u32 raw_event_mask) | ||
84 | { | ||
85 | u64 config = event->attr.config; | ||
86 | int type = event->attr.type; | ||
87 | |||
88 | if (type == event->pmu->type) | ||
89 | return armpmu_map_raw_event(raw_event_mask, config); | ||
90 | |||
91 | switch (type) { | ||
92 | case PERF_TYPE_HARDWARE: | ||
93 | return armpmu_map_hw_event(event_map, config); | ||
94 | case PERF_TYPE_HW_CACHE: | ||
95 | return armpmu_map_cache_event(cache_map, config); | ||
96 | case PERF_TYPE_RAW: | ||
97 | return armpmu_map_raw_event(raw_event_mask, config); | ||
98 | } | ||
99 | |||
100 | return -ENOENT; | ||
101 | } | ||
102 | |||
103 | int armpmu_event_set_period(struct perf_event *event) | ||
104 | { | ||
105 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
106 | struct hw_perf_event *hwc = &event->hw; | ||
107 | s64 left = local64_read(&hwc->period_left); | ||
108 | s64 period = hwc->sample_period; | ||
109 | int ret = 0; | ||
110 | |||
111 | if (unlikely(left <= -period)) { | ||
112 | left = period; | ||
113 | local64_set(&hwc->period_left, left); | ||
114 | hwc->last_period = period; | ||
115 | ret = 1; | ||
116 | } | ||
117 | |||
118 | if (unlikely(left <= 0)) { | ||
119 | left += period; | ||
120 | local64_set(&hwc->period_left, left); | ||
121 | hwc->last_period = period; | ||
122 | ret = 1; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Limit the maximum period to prevent the counter value | ||
127 | * from overtaking the one we are about to program. In | ||
128 | * effect we are reducing max_period to account for | ||
129 | * interrupt latency (and we are being very conservative). | ||
130 | */ | ||
131 | if (left > (armpmu->max_period >> 1)) | ||
132 | left = armpmu->max_period >> 1; | ||
133 | |||
134 | local64_set(&hwc->prev_count, (u64)-left); | ||
135 | |||
136 | armpmu->write_counter(event, (u64)(-left) & 0xffffffff); | ||
137 | |||
138 | perf_event_update_userpage(event); | ||
139 | |||
140 | return ret; | ||
141 | } | ||
142 | |||
143 | u64 armpmu_event_update(struct perf_event *event) | ||
144 | { | ||
145 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
146 | struct hw_perf_event *hwc = &event->hw; | ||
147 | u64 delta, prev_raw_count, new_raw_count; | ||
148 | |||
149 | again: | ||
150 | prev_raw_count = local64_read(&hwc->prev_count); | ||
151 | new_raw_count = armpmu->read_counter(event); | ||
152 | |||
153 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
154 | new_raw_count) != prev_raw_count) | ||
155 | goto again; | ||
156 | |||
157 | delta = (new_raw_count - prev_raw_count) & armpmu->max_period; | ||
158 | |||
159 | local64_add(delta, &event->count); | ||
160 | local64_sub(delta, &hwc->period_left); | ||
161 | |||
162 | return new_raw_count; | ||
163 | } | ||
164 | |||
165 | static void | ||
166 | armpmu_read(struct perf_event *event) | ||
167 | { | ||
168 | armpmu_event_update(event); | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | armpmu_stop(struct perf_event *event, int flags) | ||
173 | { | ||
174 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
175 | struct hw_perf_event *hwc = &event->hw; | ||
176 | |||
177 | /* | ||
178 | * ARM pmu always has to update the counter, so ignore | ||
179 | * PERF_EF_UPDATE, see comments in armpmu_start(). | ||
180 | */ | ||
181 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
182 | armpmu->disable(event); | ||
183 | armpmu_event_update(event); | ||
184 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | static void armpmu_start(struct perf_event *event, int flags) | ||
189 | { | ||
190 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
191 | struct hw_perf_event *hwc = &event->hw; | ||
192 | |||
193 | /* | ||
194 | * ARM pmu always has to reprogram the period, so ignore | ||
195 | * PERF_EF_RELOAD, see the comment below. | ||
196 | */ | ||
197 | if (flags & PERF_EF_RELOAD) | ||
198 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
199 | |||
200 | hwc->state = 0; | ||
201 | /* | ||
202 | * Set the period again. Some counters can't be stopped, so when we | ||
203 | * were stopped we simply disabled the IRQ source and the counter | ||
204 | * may have been left counting. If we don't do this step then we may | ||
205 | * get an interrupt too soon or *way* too late if the overflow has | ||
206 | * happened since disabling. | ||
207 | */ | ||
208 | armpmu_event_set_period(event); | ||
209 | armpmu->enable(event); | ||
210 | } | ||
211 | |||
212 | static void | ||
213 | armpmu_del(struct perf_event *event, int flags) | ||
214 | { | ||
215 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
216 | struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); | ||
217 | struct hw_perf_event *hwc = &event->hw; | ||
218 | int idx = hwc->idx; | ||
219 | |||
220 | armpmu_stop(event, PERF_EF_UPDATE); | ||
221 | hw_events->events[idx] = NULL; | ||
222 | clear_bit(idx, hw_events->used_mask); | ||
223 | if (armpmu->clear_event_idx) | ||
224 | armpmu->clear_event_idx(hw_events, event); | ||
225 | |||
226 | perf_event_update_userpage(event); | ||
227 | } | ||
228 | |||
229 | static int | ||
230 | armpmu_add(struct perf_event *event, int flags) | ||
231 | { | ||
232 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
233 | struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); | ||
234 | struct hw_perf_event *hwc = &event->hw; | ||
235 | int idx; | ||
236 | int err = 0; | ||
237 | |||
238 | /* An event following a process won't be stopped earlier */ | ||
239 | if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) | ||
240 | return -ENOENT; | ||
241 | |||
242 | perf_pmu_disable(event->pmu); | ||
243 | |||
244 | /* If we don't have a space for the counter then finish early. */ | ||
245 | idx = armpmu->get_event_idx(hw_events, event); | ||
246 | if (idx < 0) { | ||
247 | err = idx; | ||
248 | goto out; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * If there is an event in the counter we are going to use then make | ||
253 | * sure it is disabled. | ||
254 | */ | ||
255 | event->hw.idx = idx; | ||
256 | armpmu->disable(event); | ||
257 | hw_events->events[idx] = event; | ||
258 | |||
259 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
260 | if (flags & PERF_EF_START) | ||
261 | armpmu_start(event, PERF_EF_RELOAD); | ||
262 | |||
263 | /* Propagate our changes to the userspace mapping. */ | ||
264 | perf_event_update_userpage(event); | ||
265 | |||
266 | out: | ||
267 | perf_pmu_enable(event->pmu); | ||
268 | return err; | ||
269 | } | ||
270 | |||
271 | static int | ||
272 | validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, | ||
273 | struct perf_event *event) | ||
274 | { | ||
275 | struct arm_pmu *armpmu; | ||
276 | |||
277 | if (is_software_event(event)) | ||
278 | return 1; | ||
279 | |||
280 | /* | ||
281 | * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The | ||
282 | * core perf code won't check that the pmu->ctx == leader->ctx | ||
283 | * until after pmu->event_init(event). | ||
284 | */ | ||
285 | if (event->pmu != pmu) | ||
286 | return 0; | ||
287 | |||
288 | if (event->state < PERF_EVENT_STATE_OFF) | ||
289 | return 1; | ||
290 | |||
291 | if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) | ||
292 | return 1; | ||
293 | |||
294 | armpmu = to_arm_pmu(event->pmu); | ||
295 | return armpmu->get_event_idx(hw_events, event) >= 0; | ||
296 | } | ||
297 | |||
298 | static int | ||
299 | validate_group(struct perf_event *event) | ||
300 | { | ||
301 | struct perf_event *sibling, *leader = event->group_leader; | ||
302 | struct pmu_hw_events fake_pmu; | ||
303 | |||
304 | /* | ||
305 | * Initialise the fake PMU. We only need to populate the | ||
306 | * used_mask for the purposes of validation. | ||
307 | */ | ||
308 | memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); | ||
309 | |||
310 | if (!validate_event(event->pmu, &fake_pmu, leader)) | ||
311 | return -EINVAL; | ||
312 | |||
313 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
314 | if (!validate_event(event->pmu, &fake_pmu, sibling)) | ||
315 | return -EINVAL; | ||
316 | } | ||
317 | |||
318 | if (!validate_event(event->pmu, &fake_pmu, event)) | ||
319 | return -EINVAL; | ||
320 | |||
321 | return 0; | ||
322 | } | ||
323 | |||
324 | static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) | ||
325 | { | ||
326 | struct arm_pmu *armpmu; | ||
327 | struct platform_device *plat_device; | ||
328 | struct arm_pmu_platdata *plat; | ||
329 | int ret; | ||
330 | u64 start_clock, finish_clock; | ||
331 | |||
332 | /* | ||
333 | * we request the IRQ with a (possibly percpu) struct arm_pmu**, but | ||
334 | * the handlers expect a struct arm_pmu*. The percpu_irq framework will | ||
335 | * do any necessary shifting, we just need to perform the first | ||
336 | * dereference. | ||
337 | */ | ||
338 | armpmu = *(void **)dev; | ||
339 | plat_device = armpmu->plat_device; | ||
340 | plat = dev_get_platdata(&plat_device->dev); | ||
341 | |||
342 | start_clock = sched_clock(); | ||
343 | if (plat && plat->handle_irq) | ||
344 | ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); | ||
345 | else | ||
346 | ret = armpmu->handle_irq(irq, armpmu); | ||
347 | finish_clock = sched_clock(); | ||
348 | |||
349 | perf_sample_event_took(finish_clock - start_clock); | ||
350 | return ret; | ||
351 | } | ||
352 | |||
353 | static void | ||
354 | armpmu_release_hardware(struct arm_pmu *armpmu) | ||
355 | { | ||
356 | armpmu->free_irq(armpmu); | ||
357 | } | ||
358 | |||
359 | static int | ||
360 | armpmu_reserve_hardware(struct arm_pmu *armpmu) | ||
361 | { | ||
362 | int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); | ||
363 | if (err) { | ||
364 | armpmu_release_hardware(armpmu); | ||
365 | return err; | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static void | ||
372 | hw_perf_event_destroy(struct perf_event *event) | ||
373 | { | ||
374 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
375 | atomic_t *active_events = &armpmu->active_events; | ||
376 | struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; | ||
377 | |||
378 | if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { | ||
379 | armpmu_release_hardware(armpmu); | ||
380 | mutex_unlock(pmu_reserve_mutex); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | static int | ||
385 | event_requires_mode_exclusion(struct perf_event_attr *attr) | ||
386 | { | ||
387 | return attr->exclude_idle || attr->exclude_user || | ||
388 | attr->exclude_kernel || attr->exclude_hv; | ||
389 | } | ||
390 | |||
391 | static int | ||
392 | __hw_perf_event_init(struct perf_event *event) | ||
393 | { | ||
394 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
395 | struct hw_perf_event *hwc = &event->hw; | ||
396 | int mapping; | ||
397 | |||
398 | mapping = armpmu->map_event(event); | ||
399 | |||
400 | if (mapping < 0) { | ||
401 | pr_debug("event %x:%llx not supported\n", event->attr.type, | ||
402 | event->attr.config); | ||
403 | return mapping; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * We don't assign an index until we actually place the event onto | ||
408 | * hardware. Use -1 to signify that we haven't decided where to put it | ||
409 | * yet. For SMP systems, each core has it's own PMU so we can't do any | ||
410 | * clever allocation or constraints checking at this point. | ||
411 | */ | ||
412 | hwc->idx = -1; | ||
413 | hwc->config_base = 0; | ||
414 | hwc->config = 0; | ||
415 | hwc->event_base = 0; | ||
416 | |||
417 | /* | ||
418 | * Check whether we need to exclude the counter from certain modes. | ||
419 | */ | ||
420 | if ((!armpmu->set_event_filter || | ||
421 | armpmu->set_event_filter(hwc, &event->attr)) && | ||
422 | event_requires_mode_exclusion(&event->attr)) { | ||
423 | pr_debug("ARM performance counters do not support " | ||
424 | "mode exclusion\n"); | ||
425 | return -EOPNOTSUPP; | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Store the event encoding into the config_base field. | ||
430 | */ | ||
431 | hwc->config_base |= (unsigned long)mapping; | ||
432 | |||
433 | if (!is_sampling_event(event)) { | ||
434 | /* | ||
435 | * For non-sampling runs, limit the sample_period to half | ||
436 | * of the counter width. That way, the new counter value | ||
437 | * is far less likely to overtake the previous one unless | ||
438 | * you have some serious IRQ latency issues. | ||
439 | */ | ||
440 | hwc->sample_period = armpmu->max_period >> 1; | ||
441 | hwc->last_period = hwc->sample_period; | ||
442 | local64_set(&hwc->period_left, hwc->sample_period); | ||
443 | } | ||
444 | |||
445 | if (event->group_leader != event) { | ||
446 | if (validate_group(event) != 0) | ||
447 | return -EINVAL; | ||
448 | } | ||
449 | |||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static int armpmu_event_init(struct perf_event *event) | ||
454 | { | ||
455 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
456 | int err = 0; | ||
457 | atomic_t *active_events = &armpmu->active_events; | ||
458 | |||
459 | /* | ||
460 | * Reject CPU-affine events for CPUs that are of a different class to | ||
461 | * that which this PMU handles. Process-following events (where | ||
462 | * event->cpu == -1) can be migrated between CPUs, and thus we have to | ||
463 | * reject them later (in armpmu_add) if they're scheduled on a | ||
464 | * different class of CPU. | ||
465 | */ | ||
466 | if (event->cpu != -1 && | ||
467 | !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) | ||
468 | return -ENOENT; | ||
469 | |||
470 | /* does not support taken branch sampling */ | ||
471 | if (has_branch_stack(event)) | ||
472 | return -EOPNOTSUPP; | ||
473 | |||
474 | if (armpmu->map_event(event) == -ENOENT) | ||
475 | return -ENOENT; | ||
476 | |||
477 | event->destroy = hw_perf_event_destroy; | ||
478 | |||
479 | if (!atomic_inc_not_zero(active_events)) { | ||
480 | mutex_lock(&armpmu->reserve_mutex); | ||
481 | if (atomic_read(active_events) == 0) | ||
482 | err = armpmu_reserve_hardware(armpmu); | ||
483 | |||
484 | if (!err) | ||
485 | atomic_inc(active_events); | ||
486 | mutex_unlock(&armpmu->reserve_mutex); | ||
487 | } | ||
488 | |||
489 | if (err) | ||
490 | return err; | ||
491 | |||
492 | err = __hw_perf_event_init(event); | ||
493 | if (err) | ||
494 | hw_perf_event_destroy(event); | ||
495 | |||
496 | return err; | ||
497 | } | ||
498 | |||
499 | static void armpmu_enable(struct pmu *pmu) | ||
500 | { | ||
501 | struct arm_pmu *armpmu = to_arm_pmu(pmu); | ||
502 | struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); | ||
503 | int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); | ||
504 | |||
505 | /* For task-bound events we may be called on other CPUs */ | ||
506 | if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) | ||
507 | return; | ||
508 | |||
509 | if (enabled) | ||
510 | armpmu->start(armpmu); | ||
511 | } | ||
512 | |||
513 | static void armpmu_disable(struct pmu *pmu) | ||
514 | { | ||
515 | struct arm_pmu *armpmu = to_arm_pmu(pmu); | ||
516 | |||
517 | /* For task-bound events we may be called on other CPUs */ | ||
518 | if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) | ||
519 | return; | ||
520 | |||
521 | armpmu->stop(armpmu); | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * In heterogeneous systems, events are specific to a particular | ||
526 | * microarchitecture, and aren't suitable for another. Thus, only match CPUs of | ||
527 | * the same microarchitecture. | ||
528 | */ | ||
529 | static int armpmu_filter_match(struct perf_event *event) | ||
530 | { | ||
531 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | ||
532 | unsigned int cpu = smp_processor_id(); | ||
533 | return cpumask_test_cpu(cpu, &armpmu->supported_cpus); | ||
534 | } | ||
535 | |||
536 | static void armpmu_init(struct arm_pmu *armpmu) | ||
537 | { | ||
538 | atomic_set(&armpmu->active_events, 0); | ||
539 | mutex_init(&armpmu->reserve_mutex); | ||
540 | |||
541 | armpmu->pmu = (struct pmu) { | ||
542 | .pmu_enable = armpmu_enable, | ||
543 | .pmu_disable = armpmu_disable, | ||
544 | .event_init = armpmu_event_init, | ||
545 | .add = armpmu_add, | ||
546 | .del = armpmu_del, | ||
547 | .start = armpmu_start, | ||
548 | .stop = armpmu_stop, | ||
549 | .read = armpmu_read, | ||
550 | .filter_match = armpmu_filter_match, | ||
551 | }; | ||
552 | } | ||
553 | |||
554 | int armpmu_register(struct arm_pmu *armpmu, int type) | ||
555 | { | ||
556 | armpmu_init(armpmu); | ||
557 | pr_info("enabled with %s PMU driver, %d counters available\n", | ||
558 | armpmu->name, armpmu->num_events); | ||
559 | return perf_pmu_register(&armpmu->pmu, armpmu->name, type); | ||
560 | } | ||
561 | |||
562 | /* Set at runtime when we know what CPU type we are. */ | ||
563 | static struct arm_pmu *__oprofile_cpu_pmu; | ||
564 | |||
565 | /* | ||
566 | * Despite the names, these two functions are CPU-specific and are used | ||
567 | * by the OProfile/perf code. | ||
568 | */ | ||
569 | const char *perf_pmu_name(void) | ||
570 | { | ||
571 | if (!__oprofile_cpu_pmu) | ||
572 | return NULL; | ||
573 | |||
574 | return __oprofile_cpu_pmu->name; | ||
575 | } | ||
576 | EXPORT_SYMBOL_GPL(perf_pmu_name); | ||
577 | |||
578 | int perf_num_counters(void) | ||
579 | { | ||
580 | int max_events = 0; | ||
581 | |||
582 | if (__oprofile_cpu_pmu != NULL) | ||
583 | max_events = __oprofile_cpu_pmu->num_events; | ||
584 | |||
585 | return max_events; | ||
586 | } | ||
587 | EXPORT_SYMBOL_GPL(perf_num_counters); | ||
588 | |||
589 | static void cpu_pmu_enable_percpu_irq(void *data) | ||
590 | { | ||
591 | int irq = *(int *)data; | ||
592 | |||
593 | enable_percpu_irq(irq, IRQ_TYPE_NONE); | ||
594 | } | ||
595 | |||
596 | static void cpu_pmu_disable_percpu_irq(void *data) | ||
597 | { | ||
598 | int irq = *(int *)data; | ||
599 | |||
600 | disable_percpu_irq(irq); | ||
601 | } | ||
602 | |||
603 | static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) | ||
604 | { | ||
605 | int i, irq, irqs; | ||
606 | struct platform_device *pmu_device = cpu_pmu->plat_device; | ||
607 | struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; | ||
608 | |||
609 | irqs = min(pmu_device->num_resources, num_possible_cpus()); | ||
610 | |||
611 | irq = platform_get_irq(pmu_device, 0); | ||
612 | if (irq >= 0 && irq_is_percpu(irq)) { | ||
613 | on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); | ||
614 | free_percpu_irq(irq, &hw_events->percpu_pmu); | ||
615 | } else { | ||
616 | for (i = 0; i < irqs; ++i) { | ||
617 | int cpu = i; | ||
618 | |||
619 | if (cpu_pmu->irq_affinity) | ||
620 | cpu = cpu_pmu->irq_affinity[i]; | ||
621 | |||
622 | if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) | ||
623 | continue; | ||
624 | irq = platform_get_irq(pmu_device, i); | ||
625 | if (irq >= 0) | ||
626 | free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); | ||
627 | } | ||
628 | } | ||
629 | } | ||
630 | |||
631 | static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) | ||
632 | { | ||
633 | int i, err, irq, irqs; | ||
634 | struct platform_device *pmu_device = cpu_pmu->plat_device; | ||
635 | struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; | ||
636 | |||
637 | if (!pmu_device) | ||
638 | return -ENODEV; | ||
639 | |||
640 | irqs = min(pmu_device->num_resources, num_possible_cpus()); | ||
641 | if (irqs < 1) { | ||
642 | pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | irq = platform_get_irq(pmu_device, 0); | ||
647 | if (irq >= 0 && irq_is_percpu(irq)) { | ||
648 | err = request_percpu_irq(irq, handler, "arm-pmu", | ||
649 | &hw_events->percpu_pmu); | ||
650 | if (err) { | ||
651 | pr_err("unable to request IRQ%d for ARM PMU counters\n", | ||
652 | irq); | ||
653 | return err; | ||
654 | } | ||
655 | on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); | ||
656 | } else { | ||
657 | for (i = 0; i < irqs; ++i) { | ||
658 | int cpu = i; | ||
659 | |||
660 | err = 0; | ||
661 | irq = platform_get_irq(pmu_device, i); | ||
662 | if (irq < 0) | ||
663 | continue; | ||
664 | |||
665 | if (cpu_pmu->irq_affinity) | ||
666 | cpu = cpu_pmu->irq_affinity[i]; | ||
667 | |||
668 | /* | ||
669 | * If we have a single PMU interrupt that we can't shift, | ||
670 | * assume that we're running on a uniprocessor machine and | ||
671 | * continue. Otherwise, continue without this interrupt. | ||
672 | */ | ||
673 | if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { | ||
674 | pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", | ||
675 | irq, cpu); | ||
676 | continue; | ||
677 | } | ||
678 | |||
679 | err = request_irq(irq, handler, | ||
680 | IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", | ||
681 | per_cpu_ptr(&hw_events->percpu_pmu, cpu)); | ||
682 | if (err) { | ||
683 | pr_err("unable to request IRQ%d for ARM PMU counters\n", | ||
684 | irq); | ||
685 | return err; | ||
686 | } | ||
687 | |||
688 | cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | return 0; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * PMU hardware loses all context when a CPU goes offline. | ||
697 | * When a CPU is hotplugged back in, since some hardware registers are | ||
698 | * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading | ||
699 | * junk values out of them. | ||
700 | */ | ||
701 | static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, | ||
702 | void *hcpu) | ||
703 | { | ||
704 | int cpu = (unsigned long)hcpu; | ||
705 | struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); | ||
706 | |||
707 | if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) | ||
708 | return NOTIFY_DONE; | ||
709 | |||
710 | if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) | ||
711 | return NOTIFY_DONE; | ||
712 | |||
713 | if (pmu->reset) | ||
714 | pmu->reset(pmu); | ||
715 | else | ||
716 | return NOTIFY_DONE; | ||
717 | |||
718 | return NOTIFY_OK; | ||
719 | } | ||
720 | |||
721 | static int cpu_pmu_init(struct arm_pmu *cpu_pmu) | ||
722 | { | ||
723 | int err; | ||
724 | int cpu; | ||
725 | struct pmu_hw_events __percpu *cpu_hw_events; | ||
726 | |||
727 | cpu_hw_events = alloc_percpu(struct pmu_hw_events); | ||
728 | if (!cpu_hw_events) | ||
729 | return -ENOMEM; | ||
730 | |||
731 | cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; | ||
732 | err = register_cpu_notifier(&cpu_pmu->hotplug_nb); | ||
733 | if (err) | ||
734 | goto out_hw_events; | ||
735 | |||
736 | for_each_possible_cpu(cpu) { | ||
737 | struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); | ||
738 | raw_spin_lock_init(&events->pmu_lock); | ||
739 | events->percpu_pmu = cpu_pmu; | ||
740 | } | ||
741 | |||
742 | cpu_pmu->hw_events = cpu_hw_events; | ||
743 | cpu_pmu->request_irq = cpu_pmu_request_irq; | ||
744 | cpu_pmu->free_irq = cpu_pmu_free_irq; | ||
745 | |||
746 | /* Ensure the PMU has sane values out of reset. */ | ||
747 | if (cpu_pmu->reset) | ||
748 | on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, | ||
749 | cpu_pmu, 1); | ||
750 | |||
751 | /* If no interrupts available, set the corresponding capability flag */ | ||
752 | if (!platform_get_irq(cpu_pmu->plat_device, 0)) | ||
753 | cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | ||
754 | |||
755 | return 0; | ||
756 | |||
757 | out_hw_events: | ||
758 | free_percpu(cpu_hw_events); | ||
759 | return err; | ||
760 | } | ||
761 | |||
762 | static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) | ||
763 | { | ||
764 | unregister_cpu_notifier(&cpu_pmu->hotplug_nb); | ||
765 | free_percpu(cpu_pmu->hw_events); | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * CPU PMU identification and probing. | ||
770 | */ | ||
771 | static int probe_current_pmu(struct arm_pmu *pmu, | ||
772 | const struct pmu_probe_info *info) | ||
773 | { | ||
774 | int cpu = get_cpu(); | ||
775 | unsigned int cpuid = read_cpuid_id(); | ||
776 | int ret = -ENODEV; | ||
777 | |||
778 | pr_info("probing PMU on CPU %d\n", cpu); | ||
779 | |||
780 | for (; info->init != NULL; info++) { | ||
781 | if ((cpuid & info->mask) != info->cpuid) | ||
782 | continue; | ||
783 | ret = info->init(pmu); | ||
784 | break; | ||
785 | } | ||
786 | |||
787 | put_cpu(); | ||
788 | return ret; | ||
789 | } | ||
790 | |||
791 | static int of_pmu_irq_cfg(struct arm_pmu *pmu) | ||
792 | { | ||
793 | int *irqs, i = 0; | ||
794 | bool using_spi = false; | ||
795 | struct platform_device *pdev = pmu->plat_device; | ||
796 | |||
797 | irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); | ||
798 | if (!irqs) | ||
799 | return -ENOMEM; | ||
800 | |||
801 | do { | ||
802 | struct device_node *dn; | ||
803 | int cpu, irq; | ||
804 | |||
805 | /* See if we have an affinity entry */ | ||
806 | dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); | ||
807 | if (!dn) | ||
808 | break; | ||
809 | |||
810 | /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ | ||
811 | irq = platform_get_irq(pdev, i); | ||
812 | if (irq >= 0) { | ||
813 | bool spi = !irq_is_percpu(irq); | ||
814 | |||
815 | if (i > 0 && spi != using_spi) { | ||
816 | pr_err("PPI/SPI IRQ type mismatch for %s!\n", | ||
817 | dn->name); | ||
818 | kfree(irqs); | ||
819 | return -EINVAL; | ||
820 | } | ||
821 | |||
822 | using_spi = spi; | ||
823 | } | ||
824 | |||
825 | /* Now look up the logical CPU number */ | ||
826 | for_each_possible_cpu(cpu) | ||
827 | if (dn == of_cpu_device_node_get(cpu)) | ||
828 | break; | ||
829 | |||
830 | if (cpu >= nr_cpu_ids) { | ||
831 | pr_warn("Failed to find logical CPU for %s\n", | ||
832 | dn->name); | ||
833 | of_node_put(dn); | ||
834 | cpumask_setall(&pmu->supported_cpus); | ||
835 | break; | ||
836 | } | ||
837 | of_node_put(dn); | ||
838 | |||
839 | /* For SPIs, we need to track the affinity per IRQ */ | ||
840 | if (using_spi) { | ||
841 | if (i >= pdev->num_resources) { | ||
842 | of_node_put(dn); | ||
843 | break; | ||
844 | } | ||
845 | |||
846 | irqs[i] = cpu; | ||
847 | } | ||
848 | |||
849 | /* Keep track of the CPUs containing this PMU type */ | ||
850 | cpumask_set_cpu(cpu, &pmu->supported_cpus); | ||
851 | of_node_put(dn); | ||
852 | i++; | ||
853 | } while (1); | ||
854 | |||
855 | /* If we didn't manage to parse anything, claim to support all CPUs */ | ||
856 | if (cpumask_weight(&pmu->supported_cpus) == 0) | ||
857 | cpumask_setall(&pmu->supported_cpus); | ||
858 | |||
859 | /* If we matched up the IRQ affinities, use them to route the SPIs */ | ||
860 | if (using_spi && i == pdev->num_resources) | ||
861 | pmu->irq_affinity = irqs; | ||
862 | else | ||
863 | kfree(irqs); | ||
864 | |||
865 | return 0; | ||
866 | } | ||
867 | |||
868 | int arm_pmu_device_probe(struct platform_device *pdev, | ||
869 | const struct of_device_id *of_table, | ||
870 | const struct pmu_probe_info *probe_table) | ||
871 | { | ||
872 | const struct of_device_id *of_id; | ||
873 | const int (*init_fn)(struct arm_pmu *); | ||
874 | struct device_node *node = pdev->dev.of_node; | ||
875 | struct arm_pmu *pmu; | ||
876 | int ret = -ENODEV; | ||
877 | |||
878 | pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); | ||
879 | if (!pmu) { | ||
880 | pr_info("failed to allocate PMU device!\n"); | ||
881 | return -ENOMEM; | ||
882 | } | ||
883 | |||
884 | if (!__oprofile_cpu_pmu) | ||
885 | __oprofile_cpu_pmu = pmu; | ||
886 | |||
887 | pmu->plat_device = pdev; | ||
888 | |||
889 | if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { | ||
890 | init_fn = of_id->data; | ||
891 | |||
892 | ret = of_pmu_irq_cfg(pmu); | ||
893 | if (!ret) | ||
894 | ret = init_fn(pmu); | ||
895 | } else { | ||
896 | ret = probe_current_pmu(pmu, probe_table); | ||
897 | cpumask_setall(&pmu->supported_cpus); | ||
898 | } | ||
899 | |||
900 | if (ret) { | ||
901 | pr_info("failed to probe PMU!\n"); | ||
902 | goto out_free; | ||
903 | } | ||
904 | |||
905 | ret = cpu_pmu_init(pmu); | ||
906 | if (ret) | ||
907 | goto out_free; | ||
908 | |||
909 | ret = armpmu_register(pmu, -1); | ||
910 | if (ret) | ||
911 | goto out_destroy; | ||
912 | |||
913 | return 0; | ||
914 | |||
915 | out_destroy: | ||
916 | cpu_pmu_destroy(pmu); | ||
917 | out_free: | ||
918 | pr_info("failed to register PMU devices!\n"); | ||
919 | kfree(pmu); | ||
920 | return ret; | ||
921 | } | ||