aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Leeder <nleeder@codeaurora.org>2019-03-26 11:17:51 -0400
committerWill Deacon <will.deacon@arm.com>2019-04-04 11:49:21 -0400
commit7d839b4b9e00645e49345d6ce5dfa8edf53c1a21 (patch)
tree38271bc1daf4fe24b78978e5f4ee6fa5f9fde43d
parent24e516049360eda85cf3fe9903221d43886c2689 (diff)
perf/smmuv3: Add arm64 smmuv3 pmu driver
Adds a new driver to support the SMMUv3 PMU and add it into the perf events framework. Each SMMU node may have multiple PMUs associated with it, each of which may support different events. SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where <phys_addr_page> is the physical page address of the SMMU PMCG wrapped to 4K boundary. For example, the PMCG at 0xff88840000 is named smmuv3_pmcg_ff88840 Filtering by stream id is done by specifying filtering parameters with the event. options are: filter_enable - 0 = no filtering, 1 = filtering enabled filter_span - 0 = exact match, 1 = pattern match filter_stream_id - pattern to filter against Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1, filter_span=1,filter_stream_id=0x42/ -a netperf Applies filter pattern 0x42 to transaction events, which means events matching stream ids 0x42 & 0x43 are counted as only upper StreamID bits are required to match the given filter. Further filtering information is available in the SMMU documentation. SMMU events are not attributable to a CPU, so task mode and sampling are not supported. Signed-off-by: Neil Leeder <nleeder@codeaurora.org> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> [will: fold in review feedback from Robin] [will: rewrite Kconfig text and allow building as a module] Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c773
3 files changed, 783 insertions, 0 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index af9bc178495d..a94e586a58b2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -52,6 +52,15 @@ config ARM_PMU_ACPI
52 depends on ARM_PMU && ACPI 52 depends on ARM_PMU && ACPI
53 def_bool y 53 def_bool y
54 54
55config ARM_SMMU_V3_PMU
56 tristate "ARM SMMUv3 Performance Monitors Extension"
57 depends on ARM64 && ACPI && ARM_SMMU_V3
58 help
59 Provides support for the ARM SMMUv3 Performance Monitor Counter
60 Groups (PMCG), which provide monitoring of transactions passing
61 through the SMMU and allow the resulting information to be filtered
62 based on the Stream ID of the corresponding master.
63
55config ARM_DSU_PMU 64config ARM_DSU_PMU
56 tristate "ARM DynamIQ Shared Unit (DSU) PMU" 65 tristate "ARM DynamIQ Shared Unit (DSU) PMU"
57 depends on ARM64 66 depends on ARM64
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 909f27fd9db3..30489941f3d6 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_ARM_CCN) += arm-ccn.o
4obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o 4obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
6obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o 6obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
7obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
7obj-$(CONFIG_HISI_PMU) += hisilicon/ 8obj-$(CONFIG_HISI_PMU) += hisilicon/
8obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o 9obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
9obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o 10obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
new file mode 100644
index 000000000000..a6d2e3ce94df
--- /dev/null
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -0,0 +1,773 @@
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * This driver adds support for perf events to use the Performance
5 * Monitor Counter Groups (PMCG) associated with an SMMUv3 node
6 * to monitor that node.
7 *
8 * SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where
9 * <phys_addr_page> is the physical page address of the SMMU PMCG wrapped
10 * to 4K boundary. For example, the PMCG at 0xff88840000 is named
11 * smmuv3_pmcg_ff88840
12 *
13 * Filtering by stream id is done by specifying filtering parameters
14 * with the event. options are:
15 * filter_enable - 0 = no filtering, 1 = filtering enabled
16 * filter_span - 0 = exact match, 1 = pattern match
17 * filter_stream_id - pattern to filter against
18 *
19 * To match a partial StreamID where the X most-significant bits must match
20 * but the Y least-significant bits might differ, STREAMID is programmed
21 * with a value that contains:
22 * STREAMID[Y - 1] == 0.
23 * STREAMID[Y - 2:0] == 1 (where Y > 1).
24 * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards)
25 * contain a value to match from the corresponding bits of event StreamID.
26 *
27 * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1,
28 * filter_span=1,filter_stream_id=0x42/ -a netperf
29 * Applies filter pattern 0x42 to transaction events, which means events
30 * matching stream ids 0x42 and 0x43 are counted. Further filtering
31 * information is available in the SMMU documentation.
32 *
33 * SMMU events are not attributable to a CPU, so task mode and sampling
34 * are not supported.
35 */
36
37#include <linux/acpi.h>
38#include <linux/bitfield.h>
39#include <linux/bitops.h>
40#include <linux/cpuhotplug.h>
41#include <linux/cpumask.h>
42#include <linux/device.h>
43#include <linux/errno.h>
44#include <linux/interrupt.h>
45#include <linux/irq.h>
46#include <linux/kernel.h>
47#include <linux/list.h>
48#include <linux/msi.h>
49#include <linux/perf_event.h>
50#include <linux/platform_device.h>
51#include <linux/smp.h>
52#include <linux/sysfs.h>
53#include <linux/types.h>
54
55#define SMMU_PMCG_EVCNTR0 0x0
56#define SMMU_PMCG_EVCNTR(n, stride) (SMMU_PMCG_EVCNTR0 + (n) * (stride))
57#define SMMU_PMCG_EVTYPER0 0x400
58#define SMMU_PMCG_EVTYPER(n) (SMMU_PMCG_EVTYPER0 + (n) * 4)
59#define SMMU_PMCG_SID_SPAN_SHIFT 29
60#define SMMU_PMCG_SMR0 0xA00
61#define SMMU_PMCG_SMR(n) (SMMU_PMCG_SMR0 + (n) * 4)
62#define SMMU_PMCG_CNTENSET0 0xC00
63#define SMMU_PMCG_CNTENCLR0 0xC20
64#define SMMU_PMCG_INTENSET0 0xC40
65#define SMMU_PMCG_INTENCLR0 0xC60
66#define SMMU_PMCG_OVSCLR0 0xC80
67#define SMMU_PMCG_OVSSET0 0xCC0
68#define SMMU_PMCG_CFGR 0xE00
69#define SMMU_PMCG_CFGR_SID_FILTER_TYPE BIT(23)
70#define SMMU_PMCG_CFGR_RELOC_CTRS BIT(20)
71#define SMMU_PMCG_CFGR_SIZE GENMASK(13, 8)
72#define SMMU_PMCG_CFGR_NCTR GENMASK(5, 0)
73#define SMMU_PMCG_CR 0xE04
74#define SMMU_PMCG_CR_ENABLE BIT(0)
75#define SMMU_PMCG_CEID0 0xE20
76#define SMMU_PMCG_CEID1 0xE28
77#define SMMU_PMCG_IRQ_CTRL 0xE50
78#define SMMU_PMCG_IRQ_CTRL_IRQEN BIT(0)
79#define SMMU_PMCG_IRQ_CFG0 0xE58
80
81#define SMMU_PMCG_DEFAULT_FILTER_SPAN 1
82#define SMMU_PMCG_DEFAULT_FILTER_SID GENMASK(31, 0)
83
84#define SMMU_PMCG_MAX_COUNTERS 64
85#define SMMU_PMCG_ARCH_MAX_EVENTS 128
86
87#define SMMU_PMCG_PA_SHIFT 12
88
89static int cpuhp_state_num;
90
91struct smmu_pmu {
92 struct hlist_node node;
93 struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
94 DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
95 DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
96 unsigned int irq;
97 unsigned int on_cpu;
98 struct pmu pmu;
99 unsigned int num_counters;
100 struct device *dev;
101 void __iomem *reg_base;
102 void __iomem *reloc_base;
103 u64 counter_mask;
104 bool global_filter;
105 u32 global_filter_span;
106 u32 global_filter_sid;
107};
108
109#define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu))
110
111#define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
112 static inline u32 get_##_name(struct perf_event *event) \
113 { \
114 return FIELD_GET(GENMASK_ULL(_end, _start), \
115 event->attr._config); \
116 } \
117
118SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15);
119SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31);
120SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32);
121SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33);
122
123static inline void smmu_pmu_enable(struct pmu *pmu)
124{
125 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
126
127 writel(SMMU_PMCG_IRQ_CTRL_IRQEN,
128 smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
129 writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
130}
131
132static inline void smmu_pmu_disable(struct pmu *pmu)
133{
134 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
135
136 writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR);
137 writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
138}
139
140static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
141 u32 idx, u64 value)
142{
143 if (smmu_pmu->counter_mask & BIT(32))
144 writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
145 else
146 writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
147}
148
149static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
150{
151 u64 value;
152
153 if (smmu_pmu->counter_mask & BIT(32))
154 value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
155 else
156 value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
157
158 return value;
159}
160
161static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx)
162{
163 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0);
164}
165
166static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx)
167{
168 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
169}
170
171static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx)
172{
173 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0);
174}
175
176static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu,
177 u32 idx)
178{
179 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
180}
181
182static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx,
183 u32 val)
184{
185 writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
186}
187
188static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val)
189{
190 writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx));
191}
192
193static void smmu_pmu_event_update(struct perf_event *event)
194{
195 struct hw_perf_event *hwc = &event->hw;
196 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
197 u64 delta, prev, now;
198 u32 idx = hwc->idx;
199
200 do {
201 prev = local64_read(&hwc->prev_count);
202 now = smmu_pmu_counter_get_value(smmu_pmu, idx);
203 } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
204
205 /* handle overflow. */
206 delta = now - prev;
207 delta &= smmu_pmu->counter_mask;
208
209 local64_add(delta, &event->count);
210}
211
212static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu,
213 struct hw_perf_event *hwc)
214{
215 u32 idx = hwc->idx;
216 u64 new;
217
218 /*
219 * We limit the max period to half the max counter value of the counter
220 * size, so that even in the case of extreme interrupt latency the
221 * counter will (hopefully) not wrap past its initial value.
222 */
223 new = smmu_pmu->counter_mask >> 1;
224
225 local64_set(&hwc->prev_count, new);
226 smmu_pmu_counter_set_value(smmu_pmu, idx, new);
227}
228
229static void smmu_pmu_set_event_filter(struct perf_event *event,
230 int idx, u32 span, u32 sid)
231{
232 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
233 u32 evtyper;
234
235 evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT;
236 smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper);
237 smmu_pmu_set_smr(smmu_pmu, idx, sid);
238}
239
240static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
241 struct perf_event *event, int idx)
242{
243 u32 span, sid;
244 unsigned int num_ctrs = smmu_pmu->num_counters;
245 bool filter_en = !!get_filter_enable(event);
246
247 span = filter_en ? get_filter_span(event) :
248 SMMU_PMCG_DEFAULT_FILTER_SPAN;
249 sid = filter_en ? get_filter_stream_id(event) :
250 SMMU_PMCG_DEFAULT_FILTER_SID;
251
252 /* Support individual filter settings */
253 if (!smmu_pmu->global_filter) {
254 smmu_pmu_set_event_filter(event, idx, span, sid);
255 return 0;
256 }
257
258 /* Requested settings same as current global settings*/
259 if (span == smmu_pmu->global_filter_span &&
260 sid == smmu_pmu->global_filter_sid)
261 return 0;
262
263 if (!bitmap_empty(smmu_pmu->used_counters, num_ctrs))
264 return -EAGAIN;
265
266 smmu_pmu_set_event_filter(event, 0, span, sid);
267 smmu_pmu->global_filter_span = span;
268 smmu_pmu->global_filter_sid = sid;
269 return 0;
270}
271
272static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu,
273 struct perf_event *event)
274{
275 int idx, err;
276 unsigned int num_ctrs = smmu_pmu->num_counters;
277
278 idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs);
279 if (idx == num_ctrs)
280 /* The counters are all in use. */
281 return -EAGAIN;
282
283 err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx);
284 if (err)
285 return err;
286
287 set_bit(idx, smmu_pmu->used_counters);
288
289 return idx;
290}
291
292/*
293 * Implementation of abstract pmu functionality required by
294 * the core perf events code.
295 */
296
297static int smmu_pmu_event_init(struct perf_event *event)
298{
299 struct hw_perf_event *hwc = &event->hw;
300 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
301 struct device *dev = smmu_pmu->dev;
302 struct perf_event *sibling;
303 u16 event_id;
304
305 if (event->attr.type != event->pmu->type)
306 return -ENOENT;
307
308 if (hwc->sample_period) {
309 dev_dbg(dev, "Sampling not supported\n");
310 return -EOPNOTSUPP;
311 }
312
313 if (event->cpu < 0) {
314 dev_dbg(dev, "Per-task mode not supported\n");
315 return -EOPNOTSUPP;
316 }
317
318 /* Verify specified event is supported on this PMU */
319 event_id = get_event(event);
320 if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS &&
321 (!test_bit(event_id, smmu_pmu->supported_events))) {
322 dev_dbg(dev, "Invalid event %d for this PMU\n", event_id);
323 return -EINVAL;
324 }
325
326 /* Don't allow groups with mixed PMUs, except for s/w events */
327 if (event->group_leader->pmu != event->pmu &&
328 !is_software_event(event->group_leader)) {
329 dev_dbg(dev, "Can't create mixed PMU group\n");
330 return -EINVAL;
331 }
332
333 for_each_sibling_event(sibling, event->group_leader) {
334 if (sibling->pmu != event->pmu &&
335 !is_software_event(sibling)) {
336 dev_dbg(dev, "Can't create mixed PMU group\n");
337 return -EINVAL;
338 }
339 }
340
341 hwc->idx = -1;
342
343 /*
344 * Ensure all events are on the same cpu so all events are in the
345 * same cpu context, to avoid races on pmu_enable etc.
346 */
347 event->cpu = smmu_pmu->on_cpu;
348
349 return 0;
350}
351
352static void smmu_pmu_event_start(struct perf_event *event, int flags)
353{
354 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
355 struct hw_perf_event *hwc = &event->hw;
356 int idx = hwc->idx;
357
358 hwc->state = 0;
359
360 smmu_pmu_set_period(smmu_pmu, hwc);
361
362 smmu_pmu_counter_enable(smmu_pmu, idx);
363}
364
365static void smmu_pmu_event_stop(struct perf_event *event, int flags)
366{
367 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
368 struct hw_perf_event *hwc = &event->hw;
369 int idx = hwc->idx;
370
371 if (hwc->state & PERF_HES_STOPPED)
372 return;
373
374 smmu_pmu_counter_disable(smmu_pmu, idx);
375 /* As the counter gets updated on _start, ignore PERF_EF_UPDATE */
376 smmu_pmu_event_update(event);
377 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
378}
379
380static int smmu_pmu_event_add(struct perf_event *event, int flags)
381{
382 struct hw_perf_event *hwc = &event->hw;
383 int idx;
384 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
385
386 idx = smmu_pmu_get_event_idx(smmu_pmu, event);
387 if (idx < 0)
388 return idx;
389
390 hwc->idx = idx;
391 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
392 smmu_pmu->events[idx] = event;
393 local64_set(&hwc->prev_count, 0);
394
395 smmu_pmu_interrupt_enable(smmu_pmu, idx);
396
397 if (flags & PERF_EF_START)
398 smmu_pmu_event_start(event, flags);
399
400 /* Propagate changes to the userspace mapping. */
401 perf_event_update_userpage(event);
402
403 return 0;
404}
405
406static void smmu_pmu_event_del(struct perf_event *event, int flags)
407{
408 struct hw_perf_event *hwc = &event->hw;
409 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
410 int idx = hwc->idx;
411
412 smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE);
413 smmu_pmu_interrupt_disable(smmu_pmu, idx);
414 smmu_pmu->events[idx] = NULL;
415 clear_bit(idx, smmu_pmu->used_counters);
416
417 perf_event_update_userpage(event);
418}
419
420static void smmu_pmu_event_read(struct perf_event *event)
421{
422 smmu_pmu_event_update(event);
423}
424
425/* cpumask */
426
427static ssize_t smmu_pmu_cpumask_show(struct device *dev,
428 struct device_attribute *attr,
429 char *buf)
430{
431 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
432
433 return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
434}
435
436static struct device_attribute smmu_pmu_cpumask_attr =
437 __ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL);
438
439static struct attribute *smmu_pmu_cpumask_attrs[] = {
440 &smmu_pmu_cpumask_attr.attr,
441 NULL
442};
443
444static struct attribute_group smmu_pmu_cpumask_group = {
445 .attrs = smmu_pmu_cpumask_attrs,
446};
447
448/* Events */
449
450static ssize_t smmu_pmu_event_show(struct device *dev,
451 struct device_attribute *attr, char *page)
452{
453 struct perf_pmu_events_attr *pmu_attr;
454
455 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
456
457 return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
458}
459
460#define SMMU_EVENT_ATTR(name, config) \
461 PMU_EVENT_ATTR(name, smmu_event_attr_##name, \
462 config, smmu_pmu_event_show)
463SMMU_EVENT_ATTR(cycles, 0);
464SMMU_EVENT_ATTR(transaction, 1);
465SMMU_EVENT_ATTR(tlb_miss, 2);
466SMMU_EVENT_ATTR(config_cache_miss, 3);
467SMMU_EVENT_ATTR(trans_table_walk_access, 4);
468SMMU_EVENT_ATTR(config_struct_access, 5);
469SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6);
470SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7);
471
472static struct attribute *smmu_pmu_events[] = {
473 &smmu_event_attr_cycles.attr.attr,
474 &smmu_event_attr_transaction.attr.attr,
475 &smmu_event_attr_tlb_miss.attr.attr,
476 &smmu_event_attr_config_cache_miss.attr.attr,
477 &smmu_event_attr_trans_table_walk_access.attr.attr,
478 &smmu_event_attr_config_struct_access.attr.attr,
479 &smmu_event_attr_pcie_ats_trans_rq.attr.attr,
480 &smmu_event_attr_pcie_ats_trans_passed.attr.attr,
481 NULL
482};
483
484static umode_t smmu_pmu_event_is_visible(struct kobject *kobj,
485 struct attribute *attr, int unused)
486{
487 struct device *dev = kobj_to_dev(kobj);
488 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
489 struct perf_pmu_events_attr *pmu_attr;
490
491 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
492
493 if (test_bit(pmu_attr->id, smmu_pmu->supported_events))
494 return attr->mode;
495
496 return 0;
497}
498
499static struct attribute_group smmu_pmu_events_group = {
500 .name = "events",
501 .attrs = smmu_pmu_events,
502 .is_visible = smmu_pmu_event_is_visible,
503};
504
505/* Formats */
506PMU_FORMAT_ATTR(event, "config:0-15");
507PMU_FORMAT_ATTR(filter_stream_id, "config1:0-31");
508PMU_FORMAT_ATTR(filter_span, "config1:32");
509PMU_FORMAT_ATTR(filter_enable, "config1:33");
510
511static struct attribute *smmu_pmu_formats[] = {
512 &format_attr_event.attr,
513 &format_attr_filter_stream_id.attr,
514 &format_attr_filter_span.attr,
515 &format_attr_filter_enable.attr,
516 NULL
517};
518
519static struct attribute_group smmu_pmu_format_group = {
520 .name = "format",
521 .attrs = smmu_pmu_formats,
522};
523
524static const struct attribute_group *smmu_pmu_attr_grps[] = {
525 &smmu_pmu_cpumask_group,
526 &smmu_pmu_events_group,
527 &smmu_pmu_format_group,
528 NULL
529};
530
531/*
532 * Generic device handlers
533 */
534
535static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
536{
537 struct smmu_pmu *smmu_pmu;
538 unsigned int target;
539
540 smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
541 if (cpu != smmu_pmu->on_cpu)
542 return 0;
543
544 target = cpumask_any_but(cpu_online_mask, cpu);
545 if (target >= nr_cpu_ids)
546 return 0;
547
548 perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
549 smmu_pmu->on_cpu = target;
550 WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
551
552 return 0;
553}
554
555static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
556{
557 struct smmu_pmu *smmu_pmu = data;
558 u64 ovsr;
559 unsigned int idx;
560
561 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
562 if (!ovsr)
563 return IRQ_NONE;
564
565 writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
566
567 for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
568 struct perf_event *event = smmu_pmu->events[idx];
569 struct hw_perf_event *hwc;
570
571 if (WARN_ON_ONCE(!event))
572 continue;
573
574 smmu_pmu_event_update(event);
575 hwc = &event->hw;
576
577 smmu_pmu_set_period(smmu_pmu, hwc);
578 }
579
580 return IRQ_HANDLED;
581}
582
583static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
584{
585 unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
586 int irq, ret = -ENXIO;
587
588 irq = pmu->irq;
589 if (irq)
590 ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
591 flags, "smmuv3-pmu", pmu);
592 return ret;
593}
594
595static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
596{
597 u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0);
598
599 smmu_pmu_disable(&smmu_pmu->pmu);
600
601 /* Disable counter and interrupt */
602 writeq_relaxed(counter_present_mask,
603 smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
604 writeq_relaxed(counter_present_mask,
605 smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
606 writeq_relaxed(counter_present_mask,
607 smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
608}
609
610static int smmu_pmu_probe(struct platform_device *pdev)
611{
612 struct smmu_pmu *smmu_pmu;
613 struct resource *res_0, *res_1;
614 u32 cfgr, reg_size;
615 u64 ceid_64[2];
616 int irq, err;
617 char *name;
618 struct device *dev = &pdev->dev;
619
620 smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL);
621 if (!smmu_pmu)
622 return -ENOMEM;
623
624 smmu_pmu->dev = dev;
625 platform_set_drvdata(pdev, smmu_pmu);
626
627 smmu_pmu->pmu = (struct pmu) {
628 .task_ctx_nr = perf_invalid_context,
629 .pmu_enable = smmu_pmu_enable,
630 .pmu_disable = smmu_pmu_disable,
631 .event_init = smmu_pmu_event_init,
632 .add = smmu_pmu_event_add,
633 .del = smmu_pmu_event_del,
634 .start = smmu_pmu_event_start,
635 .stop = smmu_pmu_event_stop,
636 .read = smmu_pmu_event_read,
637 .attr_groups = smmu_pmu_attr_grps,
638 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
639 };
640
641 res_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
642 smmu_pmu->reg_base = devm_ioremap_resource(dev, res_0);
643 if (IS_ERR(smmu_pmu->reg_base))
644 return PTR_ERR(smmu_pmu->reg_base);
645
646 cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR);
647
648 /* Determine if page 1 is present */
649 if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) {
650 res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
651 smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1);
652 if (IS_ERR(smmu_pmu->reloc_base))
653 return PTR_ERR(smmu_pmu->reloc_base);
654 } else {
655 smmu_pmu->reloc_base = smmu_pmu->reg_base;
656 }
657
658 irq = platform_get_irq(pdev, 0);
659 if (irq > 0)
660 smmu_pmu->irq = irq;
661
662 ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
663 ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
664 bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
665 SMMU_PMCG_ARCH_MAX_EVENTS);
666
667 smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1;
668
669 smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE);
670
671 reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr);
672 smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0);
673
674 smmu_pmu_reset(smmu_pmu);
675
676 err = smmu_pmu_setup_irq(smmu_pmu);
677 if (err) {
678 dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
679 return err;
680 }
681
682 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
683 (res_0->start) >> SMMU_PMCG_PA_SHIFT);
684 if (!name) {
685 dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start);
686 return -EINVAL;
687 }
688
689 /* Pick one CPU to be the preferred one to use */
690 smmu_pmu->on_cpu = raw_smp_processor_id();
691 WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
692 cpumask_of(smmu_pmu->on_cpu)));
693
694 err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
695 &smmu_pmu->node);
696 if (err) {
697 dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
698 err, &res_0->start);
699 goto out_cpuhp_err;
700 }
701
702 err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
703 if (err) {
704 dev_err(dev, "Error %d registering PMU @%pa\n",
705 err, &res_0->start);
706 goto out_unregister;
707 }
708
709 dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n",
710 &res_0->start, smmu_pmu->num_counters,
711 smmu_pmu->global_filter ? "Global(Counter0)" :
712 "Individual");
713
714 return 0;
715
716out_unregister:
717 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
718out_cpuhp_err:
719 put_cpu();
720 return err;
721}
722
723static int smmu_pmu_remove(struct platform_device *pdev)
724{
725 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
726
727 perf_pmu_unregister(&smmu_pmu->pmu);
728 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
729
730 return 0;
731}
732
733static void smmu_pmu_shutdown(struct platform_device *pdev)
734{
735 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
736
737 smmu_pmu_disable(&smmu_pmu->pmu);
738}
739
740static struct platform_driver smmu_pmu_driver = {
741 .driver = {
742 .name = "arm-smmu-v3-pmcg",
743 },
744 .probe = smmu_pmu_probe,
745 .remove = smmu_pmu_remove,
746 .shutdown = smmu_pmu_shutdown,
747};
748
749static int __init arm_smmu_pmu_init(void)
750{
751 cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
752 "perf/arm/pmcg:online",
753 NULL,
754 smmu_pmu_offline_cpu);
755 if (cpuhp_state_num < 0)
756 return cpuhp_state_num;
757
758 return platform_driver_register(&smmu_pmu_driver);
759}
760module_init(arm_smmu_pmu_init);
761
762static void __exit arm_smmu_pmu_exit(void)
763{
764 platform_driver_unregister(&smmu_pmu_driver);
765 cpuhp_remove_multi_state(cpuhp_state_num);
766}
767
768module_exit(arm_smmu_pmu_exit);
769
770MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
771MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
772MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
773MODULE_LICENSE("GPL v2");