aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/perf
diff options
context:
space:
mode:
authorSuzuki K Poulose <suzuki.poulose@arm.com>2018-01-02 06:25:33 -0500
committerWill Deacon <will.deacon@arm.com>2018-01-02 11:43:12 -0500
commit7520fa99246dade7ab6dde1573a146beed632abd (patch)
tree06a4763f3370fb4fb2365924540330a4ee156bcc /drivers/perf
parent9249dee611d6624bc9044fdf3877ace67d6143ab (diff)
perf: ARM DynamIQ Shared Unit PMU support
Add support for the Cluster PMU part of the ARM DynamIQ Shared Unit (DSU). The DSU integrates one or more cores with an L3 memory system, control logic, and external interfaces to form a multicore cluster. The PMU allows counting the various events related to L3, SCU etc, along with providing a cycle counter. The PMU can be accessed via system registers, which are common to the cores in the same cluster. The PMU registers follow the semantics of the ARMv8 PMU, mostly, with the exception that the counters record the cluster wide events. This driver is mostly based on the ARMv8 and CCI PMU drivers. The driver only supports ARM64 at the moment. It can be extended to support ARM32 by providing register accessors like we do in arch/arm64/include/arm_dsu_pmu.h. Cc: Mark Rutland <mark.rutland@arm.com> Cc: Will Deacon <will.deacon@arm.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm_dsu_pmu.c843
3 files changed, 853 insertions, 0 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index b8f44b068fc6..da5724cd89cf 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -17,6 +17,15 @@ config ARM_PMU_ACPI
17 depends on ARM_PMU && ACPI 17 depends on ARM_PMU && ACPI
18 def_bool y 18 def_bool y
19 19
20config ARM_DSU_PMU
21 tristate "ARM DynamIQ Shared Unit (DSU) PMU"
22 depends on ARM64
23 help
24 Provides support for performance monitor unit in ARM DynamIQ Shared
25 Unit (DSU). The DSU integrates one or more cores with an L3 memory
26 system, control logic. The PMU allows counting various events related
27 to DSU.
28
20config HISI_PMU 29config HISI_PMU
21 bool "HiSilicon SoC PMU" 30 bool "HiSilicon SoC PMU"
22 depends on ARM64 && ACPI 31 depends on ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 710a0135bd61..c2f27419bdf0 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,4 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
2obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 3obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
3obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o 4obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
4obj-$(CONFIG_HISI_PMU) += hisilicon/ 5obj-$(CONFIG_HISI_PMU) += hisilicon/
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
new file mode 100644
index 000000000000..37c0526c93d5
--- /dev/null
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -0,0 +1,843 @@
1/*
2 * ARM DynamIQ Shared Unit (DSU) PMU driver
3 *
4 * Copyright (C) ARM Limited, 2017.
5 *
6 * Based on ARM CCI-PMU, ARMv8 PMU-v3 drivers.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * version 2 as published by the Free Software Foundation.
11 */
12
13#define PMUNAME "arm_dsu"
14#define DRVNAME PMUNAME "_pmu"
15#define pr_fmt(fmt) DRVNAME ": " fmt
16
17#include <linux/bitmap.h>
18#include <linux/bitops.h>
19#include <linux/bug.h>
20#include <linux/cpumask.h>
21#include <linux/device.h>
22#include <linux/interrupt.h>
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <linux/of_device.h>
26#include <linux/perf_event.h>
27#include <linux/platform_device.h>
28#include <linux/spinlock.h>
29#include <linux/smp.h>
30#include <linux/sysfs.h>
31#include <linux/types.h>
32
33#include <asm/arm_dsu_pmu.h>
34#include <asm/local64.h>
35
36/* PMU event codes */
37#define DSU_PMU_EVT_CYCLES 0x11
38#define DSU_PMU_EVT_CHAIN 0x1e
39
40#define DSU_PMU_MAX_COMMON_EVENTS 0x40
41
42#define DSU_PMU_MAX_HW_CNTRS 32
43#define DSU_PMU_HW_COUNTER_MASK (DSU_PMU_MAX_HW_CNTRS - 1)
44
45#define CLUSTERPMCR_E BIT(0)
46#define CLUSTERPMCR_P BIT(1)
47#define CLUSTERPMCR_C BIT(2)
48#define CLUSTERPMCR_N_SHIFT 11
49#define CLUSTERPMCR_N_MASK 0x1f
50#define CLUSTERPMCR_IDCODE_SHIFT 16
51#define CLUSTERPMCR_IDCODE_MASK 0xff
52#define CLUSTERPMCR_IMP_SHIFT 24
53#define CLUSTERPMCR_IMP_MASK 0xff
54#define CLUSTERPMCR_RES_MASK 0x7e8
55#define CLUSTERPMCR_RES_VAL 0x40
56
57#define DSU_ACTIVE_CPU_MASK 0x0
58#define DSU_ASSOCIATED_CPU_MASK 0x1
59
60/*
61 * We use the index of the counters as they appear in the counter
62 * bit maps in the PMU registers (e.g CLUSTERPMSELR).
63 * i.e,
64 * counter 0 - Bit 0
65 * counter 1 - Bit 1
66 * ...
67 * Cycle counter - Bit 31
68 */
69#define DSU_PMU_IDX_CYCLE_COUNTER 31
70
71/* All event counters are 32bit, with a 64bit Cycle counter */
72#define DSU_PMU_COUNTER_WIDTH(idx) \
73 (((idx) == DSU_PMU_IDX_CYCLE_COUNTER) ? 64 : 32)
74
75#define DSU_PMU_COUNTER_MASK(idx) \
76 GENMASK_ULL((DSU_PMU_COUNTER_WIDTH((idx)) - 1), 0)
77
78#define DSU_EXT_ATTR(_name, _func, _config) \
79 (&((struct dev_ext_attribute[]) { \
80 { \
81 .attr = __ATTR(_name, 0444, _func, NULL), \
82 .var = (void *)_config \
83 } \
84 })[0].attr.attr)
85
86#define DSU_EVENT_ATTR(_name, _config) \
87 DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config)
88
89#define DSU_FORMAT_ATTR(_name, _config) \
90 DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config)
91
92#define DSU_CPUMASK_ATTR(_name, _config) \
93 DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config)
94
95struct dsu_hw_events {
96 DECLARE_BITMAP(used_mask, DSU_PMU_MAX_HW_CNTRS);
97 struct perf_event *events[DSU_PMU_MAX_HW_CNTRS];
98};
99
100/*
101 * struct dsu_pmu - DSU PMU descriptor
102 *
103 * @pmu_lock : Protects accesses to DSU PMU register from normal vs
104 * interrupt handler contexts.
105 * @hw_events : Holds the event counter state.
106 * @associated_cpus : CPUs attached to the DSU.
107 * @active_cpu : CPU to which the PMU is bound for accesses.
108 * @cpuhp_node : Node for CPU hotplug notifier link.
109 * @num_counters : Number of event counters implemented by the PMU,
110 * excluding the cycle counter.
111 * @irq : Interrupt line for counter overflow.
112 * @cpmceid_bitmap : Bitmap for the availability of architected common
113 * events (event_code < 0x40).
114 */
115struct dsu_pmu {
116 struct pmu pmu;
117 struct device *dev;
118 raw_spinlock_t pmu_lock;
119 struct dsu_hw_events hw_events;
120 cpumask_t associated_cpus;
121 cpumask_t active_cpu;
122 struct hlist_node cpuhp_node;
123 u8 num_counters;
124 int irq;
125 DECLARE_BITMAP(cpmceid_bitmap, DSU_PMU_MAX_COMMON_EVENTS);
126};
127
128static unsigned long dsu_pmu_cpuhp_state;
129
130static inline struct dsu_pmu *to_dsu_pmu(struct pmu *pmu)
131{
132 return container_of(pmu, struct dsu_pmu, pmu);
133}
134
135static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
136 struct device_attribute *attr,
137 char *buf)
138{
139 struct dev_ext_attribute *eattr = container_of(attr,
140 struct dev_ext_attribute, attr);
141 return snprintf(buf, PAGE_SIZE, "event=0x%lx\n",
142 (unsigned long)eattr->var);
143}
144
145static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
146 struct device_attribute *attr,
147 char *buf)
148{
149 struct dev_ext_attribute *eattr = container_of(attr,
150 struct dev_ext_attribute, attr);
151 return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
152}
153
154static ssize_t dsu_pmu_cpumask_show(struct device *dev,
155 struct device_attribute *attr,
156 char *buf)
157{
158 struct pmu *pmu = dev_get_drvdata(dev);
159 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
160 struct dev_ext_attribute *eattr = container_of(attr,
161 struct dev_ext_attribute, attr);
162 unsigned long mask_id = (unsigned long)eattr->var;
163 const cpumask_t *cpumask;
164
165 switch (mask_id) {
166 case DSU_ACTIVE_CPU_MASK:
167 cpumask = &dsu_pmu->active_cpu;
168 break;
169 case DSU_ASSOCIATED_CPU_MASK:
170 cpumask = &dsu_pmu->associated_cpus;
171 break;
172 default:
173 return 0;
174 }
175 return cpumap_print_to_pagebuf(true, buf, cpumask);
176}
177
178static struct attribute *dsu_pmu_format_attrs[] = {
179 DSU_FORMAT_ATTR(event, "config:0-31"),
180 NULL,
181};
182
183static const struct attribute_group dsu_pmu_format_attr_group = {
184 .name = "format",
185 .attrs = dsu_pmu_format_attrs,
186};
187
188static struct attribute *dsu_pmu_event_attrs[] = {
189 DSU_EVENT_ATTR(cycles, 0x11),
190 DSU_EVENT_ATTR(bus_access, 0x19),
191 DSU_EVENT_ATTR(memory_error, 0x1a),
192 DSU_EVENT_ATTR(bus_cycles, 0x1d),
193 DSU_EVENT_ATTR(l3d_cache_allocate, 0x29),
194 DSU_EVENT_ATTR(l3d_cache_refill, 0x2a),
195 DSU_EVENT_ATTR(l3d_cache, 0x2b),
196 DSU_EVENT_ATTR(l3d_cache_wb, 0x2c),
197 NULL,
198};
199
200static umode_t
201dsu_pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr,
202 int unused)
203{
204 struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
205 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
206 struct dev_ext_attribute *eattr = container_of(attr,
207 struct dev_ext_attribute, attr.attr);
208 unsigned long evt = (unsigned long)eattr->var;
209
210 return test_bit(evt, dsu_pmu->cpmceid_bitmap) ? attr->mode : 0;
211}
212
213static const struct attribute_group dsu_pmu_events_attr_group = {
214 .name = "events",
215 .attrs = dsu_pmu_event_attrs,
216 .is_visible = dsu_pmu_event_attr_is_visible,
217};
218
219static struct attribute *dsu_pmu_cpumask_attrs[] = {
220 DSU_CPUMASK_ATTR(cpumask, DSU_ACTIVE_CPU_MASK),
221 DSU_CPUMASK_ATTR(associated_cpus, DSU_ASSOCIATED_CPU_MASK),
222 NULL,
223};
224
225static const struct attribute_group dsu_pmu_cpumask_attr_group = {
226 .attrs = dsu_pmu_cpumask_attrs,
227};
228
229static const struct attribute_group *dsu_pmu_attr_groups[] = {
230 &dsu_pmu_cpumask_attr_group,
231 &dsu_pmu_events_attr_group,
232 &dsu_pmu_format_attr_group,
233 NULL,
234};
235
236static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
237{
238 struct cpumask online_supported;
239
240 cpumask_and(&online_supported,
241 &dsu_pmu->associated_cpus, cpu_online_mask);
242 return cpumask_any_but(&online_supported, cpu);
243}
244
245static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
246{
247 return (idx < dsu_pmu->num_counters) ||
248 (idx == DSU_PMU_IDX_CYCLE_COUNTER);
249}
250
251static inline u64 dsu_pmu_read_counter(struct perf_event *event)
252{
253 u64 val;
254 unsigned long flags;
255 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
256 int idx = event->hw.idx;
257
258 if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
259 &dsu_pmu->associated_cpus)))
260 return 0;
261
262 if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
263 dev_err(event->pmu->dev,
264 "Trying reading invalid counter %d\n", idx);
265 return 0;
266 }
267
268 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
269 if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
270 val = __dsu_pmu_read_pmccntr();
271 else
272 val = __dsu_pmu_read_counter(idx);
273 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
274
275 return val;
276}
277
278static void dsu_pmu_write_counter(struct perf_event *event, u64 val)
279{
280 unsigned long flags;
281 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
282 int idx = event->hw.idx;
283
284 if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
285 &dsu_pmu->associated_cpus)))
286 return;
287
288 if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
289 dev_err(event->pmu->dev,
290 "writing to invalid counter %d\n", idx);
291 return;
292 }
293
294 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
295 if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
296 __dsu_pmu_write_pmccntr(val);
297 else
298 __dsu_pmu_write_counter(idx, val);
299 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
300}
301
302static int dsu_pmu_get_event_idx(struct dsu_hw_events *hw_events,
303 struct perf_event *event)
304{
305 int idx;
306 unsigned long evtype = event->attr.config;
307 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
308 unsigned long *used_mask = hw_events->used_mask;
309
310 if (evtype == DSU_PMU_EVT_CYCLES) {
311 if (test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask))
312 return -EAGAIN;
313 return DSU_PMU_IDX_CYCLE_COUNTER;
314 }
315
316 idx = find_first_zero_bit(used_mask, dsu_pmu->num_counters);
317 if (idx >= dsu_pmu->num_counters)
318 return -EAGAIN;
319 set_bit(idx, hw_events->used_mask);
320 return idx;
321}
322
323static void dsu_pmu_enable_counter(struct dsu_pmu *dsu_pmu, int idx)
324{
325 __dsu_pmu_counter_interrupt_enable(idx);
326 __dsu_pmu_enable_counter(idx);
327}
328
329static void dsu_pmu_disable_counter(struct dsu_pmu *dsu_pmu, int idx)
330{
331 __dsu_pmu_disable_counter(idx);
332 __dsu_pmu_counter_interrupt_disable(idx);
333}
334
335static inline void dsu_pmu_set_event(struct dsu_pmu *dsu_pmu,
336 struct perf_event *event)
337{
338 int idx = event->hw.idx;
339 unsigned long flags;
340
341 if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
342 dev_err(event->pmu->dev,
343 "Trying to set invalid counter %d\n", idx);
344 return;
345 }
346
347 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
348 __dsu_pmu_set_event(idx, event->hw.config_base);
349 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
350}
351
352static void dsu_pmu_event_update(struct perf_event *event)
353{
354 struct hw_perf_event *hwc = &event->hw;
355 u64 delta, prev_count, new_count;
356
357 do {
358 /* We may also be called from the irq handler */
359 prev_count = local64_read(&hwc->prev_count);
360 new_count = dsu_pmu_read_counter(event);
361 } while (local64_cmpxchg(&hwc->prev_count, prev_count, new_count) !=
362 prev_count);
363 delta = (new_count - prev_count) & DSU_PMU_COUNTER_MASK(hwc->idx);
364 local64_add(delta, &event->count);
365}
366
367static void dsu_pmu_read(struct perf_event *event)
368{
369 dsu_pmu_event_update(event);
370}
371
372static inline u32 dsu_pmu_get_reset_overflow(void)
373{
374 return __dsu_pmu_get_reset_overflow();
375}
376
377/**
378 * dsu_pmu_set_event_period: Set the period for the counter.
379 *
380 * All DSU PMU event counters, except the cycle counter are 32bit
381 * counters. To handle cases of extreme interrupt latency, we program
382 * the counter with half of the max count for the counters.
383 */
384static void dsu_pmu_set_event_period(struct perf_event *event)
385{
386 int idx = event->hw.idx;
387 u64 val = DSU_PMU_COUNTER_MASK(idx) >> 1;
388
389 local64_set(&event->hw.prev_count, val);
390 dsu_pmu_write_counter(event, val);
391}
392
393static irqreturn_t dsu_pmu_handle_irq(int irq_num, void *dev)
394{
395 int i;
396 bool handled = false;
397 struct dsu_pmu *dsu_pmu = dev;
398 struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
399 unsigned long overflow;
400
401 overflow = dsu_pmu_get_reset_overflow();
402 if (!overflow)
403 return IRQ_NONE;
404
405 for_each_set_bit(i, &overflow, DSU_PMU_MAX_HW_CNTRS) {
406 struct perf_event *event = hw_events->events[i];
407
408 if (!event)
409 continue;
410 dsu_pmu_event_update(event);
411 dsu_pmu_set_event_period(event);
412 handled = true;
413 }
414
415 return IRQ_RETVAL(handled);
416}
417
418static void dsu_pmu_start(struct perf_event *event, int pmu_flags)
419{
420 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
421
422 /* We always reprogram the counter */
423 if (pmu_flags & PERF_EF_RELOAD)
424 WARN_ON(!(event->hw.state & PERF_HES_UPTODATE));
425 dsu_pmu_set_event_period(event);
426 if (event->hw.idx != DSU_PMU_IDX_CYCLE_COUNTER)
427 dsu_pmu_set_event(dsu_pmu, event);
428 event->hw.state = 0;
429 dsu_pmu_enable_counter(dsu_pmu, event->hw.idx);
430}
431
432static void dsu_pmu_stop(struct perf_event *event, int pmu_flags)
433{
434 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
435
436 if (event->hw.state & PERF_HES_STOPPED)
437 return;
438 dsu_pmu_disable_counter(dsu_pmu, event->hw.idx);
439 dsu_pmu_event_update(event);
440 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
441}
442
443static int dsu_pmu_add(struct perf_event *event, int flags)
444{
445 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
446 struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
447 struct hw_perf_event *hwc = &event->hw;
448 int idx;
449
450 if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
451 &dsu_pmu->associated_cpus)))
452 return -ENOENT;
453
454 idx = dsu_pmu_get_event_idx(hw_events, event);
455 if (idx < 0)
456 return idx;
457
458 hwc->idx = idx;
459 hw_events->events[idx] = event;
460 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
461
462 if (flags & PERF_EF_START)
463 dsu_pmu_start(event, PERF_EF_RELOAD);
464
465 perf_event_update_userpage(event);
466 return 0;
467}
468
469static void dsu_pmu_del(struct perf_event *event, int flags)
470{
471 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
472 struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
473 struct hw_perf_event *hwc = &event->hw;
474 int idx = hwc->idx;
475
476 dsu_pmu_stop(event, PERF_EF_UPDATE);
477 hw_events->events[idx] = NULL;
478 clear_bit(idx, hw_events->used_mask);
479 perf_event_update_userpage(event);
480}
481
482static void dsu_pmu_enable(struct pmu *pmu)
483{
484 u32 pmcr;
485 unsigned long flags;
486 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
487
488 /* If no counters are added, skip enabling the PMU */
489 if (bitmap_empty(dsu_pmu->hw_events.used_mask, DSU_PMU_MAX_HW_CNTRS))
490 return;
491
492 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
493 pmcr = __dsu_pmu_read_pmcr();
494 pmcr |= CLUSTERPMCR_E;
495 __dsu_pmu_write_pmcr(pmcr);
496 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
497}
498
499static void dsu_pmu_disable(struct pmu *pmu)
500{
501 u32 pmcr;
502 unsigned long flags;
503 struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
504
505 raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
506 pmcr = __dsu_pmu_read_pmcr();
507 pmcr &= ~CLUSTERPMCR_E;
508 __dsu_pmu_write_pmcr(pmcr);
509 raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
510}
511
512static bool dsu_pmu_validate_event(struct pmu *pmu,
513 struct dsu_hw_events *hw_events,
514 struct perf_event *event)
515{
516 if (is_software_event(event))
517 return true;
518 /* Reject groups spanning multiple HW PMUs. */
519 if (event->pmu != pmu)
520 return false;
521 return dsu_pmu_get_event_idx(hw_events, event) >= 0;
522}
523
524/*
525 * Make sure the group of events can be scheduled at once
526 * on the PMU.
527 */
528static bool dsu_pmu_validate_group(struct perf_event *event)
529{
530 struct perf_event *sibling, *leader = event->group_leader;
531 struct dsu_hw_events fake_hw;
532
533 if (event->group_leader == event)
534 return true;
535
536 memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask));
537 if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader))
538 return false;
539 list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
540 if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling))
541 return false;
542 }
543 return dsu_pmu_validate_event(event->pmu, &fake_hw, event);
544}
545
546static int dsu_pmu_event_init(struct perf_event *event)
547{
548 struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
549
550 if (event->attr.type != event->pmu->type)
551 return -ENOENT;
552
553 /* We don't support sampling */
554 if (is_sampling_event(event)) {
555 dev_dbg(dsu_pmu->pmu.dev, "Can't support sampling events\n");
556 return -EOPNOTSUPP;
557 }
558
559 /* We cannot support task bound events */
560 if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
561 dev_dbg(dsu_pmu->pmu.dev, "Can't support per-task counters\n");
562 return -EINVAL;
563 }
564
565 if (has_branch_stack(event) ||
566 event->attr.exclude_user ||
567 event->attr.exclude_kernel ||
568 event->attr.exclude_hv ||
569 event->attr.exclude_idle ||
570 event->attr.exclude_host ||
571 event->attr.exclude_guest) {
572 dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n");
573 return -EINVAL;
574 }
575
576 if (!cpumask_test_cpu(event->cpu, &dsu_pmu->associated_cpus)) {
577 dev_dbg(dsu_pmu->pmu.dev,
578 "Requested cpu is not associated with the DSU\n");
579 return -EINVAL;
580 }
581 /*
582 * Choose the current active CPU to read the events. We don't want
583 * to migrate the event contexts, irq handling etc to the requested
584 * CPU. As long as the requested CPU is within the same DSU, we
585 * are fine.
586 */
587 event->cpu = cpumask_first(&dsu_pmu->active_cpu);
588 if (event->cpu >= nr_cpu_ids)
589 return -EINVAL;
590 if (!dsu_pmu_validate_group(event))
591 return -EINVAL;
592
593 event->hw.config_base = event->attr.config;
594 return 0;
595}
596
597static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
598{
599 struct dsu_pmu *dsu_pmu;
600
601 dsu_pmu = devm_kzalloc(&pdev->dev, sizeof(*dsu_pmu), GFP_KERNEL);
602 if (!dsu_pmu)
603 return ERR_PTR(-ENOMEM);
604
605 raw_spin_lock_init(&dsu_pmu->pmu_lock);
606 /*
607 * Initialise the number of counters to -1, until we probe
608 * the real number on a connected CPU.
609 */
610 dsu_pmu->num_counters = -1;
611 return dsu_pmu;
612}
613
614/**
615 * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster.
616 */
617static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask)
618{
619 int i = 0, n, cpu;
620 struct device_node *cpu_node;
621
622 n = of_count_phandle_with_args(dev, "cpus", NULL);
623 if (n <= 0)
624 return -ENODEV;
625 for (; i < n; i++) {
626 cpu_node = of_parse_phandle(dev, "cpus", i);
627 if (!cpu_node)
628 break;
629 cpu = of_cpu_node_to_id(cpu_node);
630 of_node_put(cpu_node);
631 /*
632 * We have to ignore the failures here and continue scanning
633 * the list to handle cases where the nr_cpus could be capped
634 * in the running kernel.
635 */
636 if (cpu < 0)
637 continue;
638 cpumask_set_cpu(cpu, mask);
639 }
640 return 0;
641}
642
643/*
644 * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster.
645 */
646static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu)
647{
648 u64 num_counters;
649 u32 cpmceid[2];
650
651 num_counters = (__dsu_pmu_read_pmcr() >> CLUSTERPMCR_N_SHIFT) &
652 CLUSTERPMCR_N_MASK;
653 /* We can only support up to 31 independent counters */
654 if (WARN_ON(num_counters > 31))
655 num_counters = 31;
656 dsu_pmu->num_counters = num_counters;
657 if (!dsu_pmu->num_counters)
658 return;
659 cpmceid[0] = __dsu_pmu_read_pmceid(0);
660 cpmceid[1] = __dsu_pmu_read_pmceid(1);
661 bitmap_from_u32array(dsu_pmu->cpmceid_bitmap,
662 DSU_PMU_MAX_COMMON_EVENTS,
663 cpmceid,
664 ARRAY_SIZE(cpmceid));
665}
666
667static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu)
668{
669 cpumask_set_cpu(cpu, &dsu_pmu->active_cpu);
670 if (irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu))
671 pr_warn("Failed to set irq affinity to %d\n", cpu);
672}
673
674/*
675 * dsu_pmu_init_pmu: Initialise the DSU PMU configurations if
676 * we haven't done it already.
677 */
678static void dsu_pmu_init_pmu(struct dsu_pmu *dsu_pmu)
679{
680 if (dsu_pmu->num_counters == -1)
681 dsu_pmu_probe_pmu(dsu_pmu);
682 /* Reset the interrupt overflow mask */
683 dsu_pmu_get_reset_overflow();
684}
685
686static int dsu_pmu_device_probe(struct platform_device *pdev)
687{
688 int irq, rc;
689 struct dsu_pmu *dsu_pmu;
690 char *name;
691 static atomic_t pmu_idx = ATOMIC_INIT(-1);
692
693 dsu_pmu = dsu_pmu_alloc(pdev);
694 if (IS_ERR(dsu_pmu))
695 return PTR_ERR(dsu_pmu);
696
697 rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus);
698 if (rc) {
699 dev_warn(&pdev->dev, "Failed to parse the CPUs\n");
700 return rc;
701 }
702
703 irq = platform_get_irq(pdev, 0);
704 if (irq < 0) {
705 dev_warn(&pdev->dev, "Failed to find IRQ\n");
706 return -EINVAL;
707 }
708
709 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d",
710 PMUNAME, atomic_inc_return(&pmu_idx));
711 if (!name)
712 return -ENOMEM;
713 rc = devm_request_irq(&pdev->dev, irq, dsu_pmu_handle_irq,
714 IRQF_NOBALANCING, name, dsu_pmu);
715 if (rc) {
716 dev_warn(&pdev->dev, "Failed to request IRQ %d\n", irq);
717 return rc;
718 }
719
720 dsu_pmu->irq = irq;
721 platform_set_drvdata(pdev, dsu_pmu);
722 rc = cpuhp_state_add_instance(dsu_pmu_cpuhp_state,
723 &dsu_pmu->cpuhp_node);
724 if (rc)
725 return rc;
726
727 dsu_pmu->pmu = (struct pmu) {
728 .task_ctx_nr = perf_invalid_context,
729 .module = THIS_MODULE,
730 .pmu_enable = dsu_pmu_enable,
731 .pmu_disable = dsu_pmu_disable,
732 .event_init = dsu_pmu_event_init,
733 .add = dsu_pmu_add,
734 .del = dsu_pmu_del,
735 .start = dsu_pmu_start,
736 .stop = dsu_pmu_stop,
737 .read = dsu_pmu_read,
738
739 .attr_groups = dsu_pmu_attr_groups,
740 };
741
742 rc = perf_pmu_register(&dsu_pmu->pmu, name, -1);
743 if (rc) {
744 cpuhp_state_remove_instance(dsu_pmu_cpuhp_state,
745 &dsu_pmu->cpuhp_node);
746 irq_set_affinity_hint(dsu_pmu->irq, NULL);
747 }
748
749 return rc;
750}
751
752static int dsu_pmu_device_remove(struct platform_device *pdev)
753{
754 struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev);
755
756 perf_pmu_unregister(&dsu_pmu->pmu);
757 cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
758 irq_set_affinity_hint(dsu_pmu->irq, NULL);
759
760 return 0;
761}
762
763static const struct of_device_id dsu_pmu_of_match[] = {
764 { .compatible = "arm,dsu-pmu", },
765 {},
766};
767
768static struct platform_driver dsu_pmu_driver = {
769 .driver = {
770 .name = DRVNAME,
771 .of_match_table = of_match_ptr(dsu_pmu_of_match),
772 },
773 .probe = dsu_pmu_device_probe,
774 .remove = dsu_pmu_device_remove,
775};
776
777static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
778{
779 struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
780 cpuhp_node);
781
782 if (!cpumask_test_cpu(cpu, &dsu_pmu->associated_cpus))
783 return 0;
784
785 /* If the PMU is already managed, there is nothing to do */
786 if (!cpumask_empty(&dsu_pmu->active_cpu))
787 return 0;
788
789 dsu_pmu_init_pmu(dsu_pmu);
790 dsu_pmu_set_active_cpu(cpu, dsu_pmu);
791
792 return 0;
793}
794
795static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
796{
797 int dst;
798 struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
799 cpuhp_node);
800
801 if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
802 return 0;
803
804 dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
805 /* If there are no active CPUs in the DSU, leave IRQ disabled */
806 if (dst >= nr_cpu_ids) {
807 irq_set_affinity_hint(dsu_pmu->irq, NULL);
808 return 0;
809 }
810
811 perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst);
812 dsu_pmu_set_active_cpu(dst, dsu_pmu);
813
814 return 0;
815}
816
817static int __init dsu_pmu_init(void)
818{
819 int ret;
820
821 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
822 DRVNAME,
823 dsu_pmu_cpu_online,
824 dsu_pmu_cpu_teardown);
825 if (ret < 0)
826 return ret;
827 dsu_pmu_cpuhp_state = ret;
828 return platform_driver_register(&dsu_pmu_driver);
829}
830
831static void __exit dsu_pmu_exit(void)
832{
833 platform_driver_unregister(&dsu_pmu_driver);
834 cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
835}
836
837module_init(dsu_pmu_init);
838module_exit(dsu_pmu_exit);
839
840MODULE_DEVICE_TABLE(of, dsu_pmu_of_match);
841MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit");
842MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>");
843MODULE_LICENSE("GPL v2");