aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Murphy <robin.murphy@arm.com>2018-02-15 13:51:42 -0500
committerArnd Bergmann <arnd@arndb.de>2018-03-06 11:26:17 -0500
commit3de6be7a3dd8934e59d85fc60a170d4ab2f0a0f2 (patch)
tree57526a619d1fe827b191906ae2e455eb963afef8
parent1888d3ddc3d6a2511be86045cfb2e7ea5fc67c44 (diff)
drivers/bus: Split Arm CCI driver
The arm-cci driver is really two entirely separate drivers; one for MCPM port control and the other for the performance monitors. Since they are already relatively self-contained, let's take the plunge and move the PMU parts out to drivers/perf where they belong these days. For non-MCPM systems this leaves a small dependency on the remaining "bus" stub for initial probing and discovery, but we end up with something that still fits the general pattern of its fellow system PMU drivers to ease future maintenance. Moving code to a new file also offers a perfect excuse to modernise the license/copyright headers and clean up some funky linewraps on the way. Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com> Acked-by: Punit Agrawal <punit.agrawal@arm.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-rw-r--r--drivers/bus/Kconfig28
-rw-r--r--drivers/bus/arm-cci.c1745
-rw-r--r--drivers/perf/Kconfig26
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm-cci.c1747
5 files changed, 1776 insertions, 1771 deletions
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 116446c42c6b..39ddb63be993 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -8,25 +8,10 @@ menu "Bus devices"
8config ARM_CCI 8config ARM_CCI
9 bool 9 bool
10 10
11config ARM_CCI_PMU
12 bool
13 select ARM_CCI
14
15config ARM_CCI400_COMMON 11config ARM_CCI400_COMMON
16 bool 12 bool
17 select ARM_CCI 13 select ARM_CCI
18 14
19config ARM_CCI400_PMU
20 bool "ARM CCI400 PMU support"
21 depends on (ARM && CPU_V7) || ARM64
22 depends on PERF_EVENTS
23 select ARM_CCI400_COMMON
24 select ARM_CCI_PMU
25 help
26 Support for PMU events monitoring on the ARM CCI-400 (cache coherent
27 interconnect). CCI-400 supports counting events related to the
28 connected slave/master interfaces.
29
30config ARM_CCI400_PORT_CTRL 15config ARM_CCI400_PORT_CTRL
31 bool 16 bool
32 depends on ARM && OF && CPU_V7 17 depends on ARM && OF && CPU_V7
@@ -35,19 +20,6 @@ config ARM_CCI400_PORT_CTRL
35 Low level power management driver for CCI400 cache coherent 20 Low level power management driver for CCI400 cache coherent
36 interconnect for ARM platforms. 21 interconnect for ARM platforms.
37 22
38config ARM_CCI5xx_PMU
39 bool "ARM CCI-500/CCI-550 PMU support"
40 depends on (ARM && CPU_V7) || ARM64
41 depends on PERF_EVENTS
42 select ARM_CCI_PMU
43 help
44 Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
45 coherent interconnects. Both of them provide 8 independent event counters,
46 which can count events pertaining to the slave/master interfaces as well
47 as the internal events to the CCI.
48
49 If unsure, say Y
50
51config BRCMSTB_GISB_ARB 23config BRCMSTB_GISB_ARB
52 bool "Broadcom STB GISB bus arbiter" 24 bool "Broadcom STB GISB bus arbiter"
53 depends on ARM || ARM64 || MIPS 25 depends on ARM || ARM64 || MIPS
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 5426c04fe24b..503c1789dd02 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -16,20 +16,17 @@
16 16
17#include <linux/arm-cci.h> 17#include <linux/arm-cci.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/interrupt.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/of_address.h> 20#include <linux/of_address.h>
22#include <linux/of_irq.h>
23#include <linux/of_platform.h> 21#include <linux/of_platform.h>
24#include <linux/perf_event.h>
25#include <linux/platform_device.h> 22#include <linux/platform_device.h>
26#include <linux/slab.h> 23#include <linux/slab.h>
27#include <linux/spinlock.h>
28 24
29#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
30#include <asm/smp_plat.h> 26#include <asm/smp_plat.h>
31 27
32static void __iomem *cci_ctrl_base; 28/* Referenced read-only by the PMU driver; see drivers/perf/arm-cci.c */
29void __iomem *cci_ctrl_base;
33static unsigned long cci_ctrl_phys; 30static unsigned long cci_ctrl_phys;
34 31
35#ifdef CONFIG_ARM_CCI400_PORT_CTRL 32#ifdef CONFIG_ARM_CCI400_PORT_CTRL
@@ -59,1716 +56,7 @@ static const struct of_device_id arm_cci_matches[] = {
59 {}, 56 {},
60}; 57};
61 58
62#ifdef CONFIG_ARM_CCI_PMU
63
64#define DRIVER_NAME "ARM-CCI" 59#define DRIVER_NAME "ARM-CCI"
65#define DRIVER_NAME_PMU DRIVER_NAME " PMU"
66
67#define CCI_PMCR 0x0100
68#define CCI_PID2 0x0fe8
69
70#define CCI_PMCR_CEN 0x00000001
71#define CCI_PMCR_NCNT_MASK 0x0000f800
72#define CCI_PMCR_NCNT_SHIFT 11
73
74#define CCI_PID2_REV_MASK 0xf0
75#define CCI_PID2_REV_SHIFT 4
76
77#define CCI_PMU_EVT_SEL 0x000
78#define CCI_PMU_CNTR 0x004
79#define CCI_PMU_CNTR_CTRL 0x008
80#define CCI_PMU_OVRFLW 0x00c
81
82#define CCI_PMU_OVRFLW_FLAG 1
83
84#define CCI_PMU_CNTR_SIZE(model) ((model)->cntr_size)
85#define CCI_PMU_CNTR_BASE(model, idx) ((idx) * CCI_PMU_CNTR_SIZE(model))
86#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1)
87#define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1)
88
89#define CCI_PMU_MAX_HW_CNTRS(model) \
90 ((model)->num_hw_cntrs + (model)->fixed_hw_cntrs)
91
92/* Types of interfaces that can generate events */
93enum {
94 CCI_IF_SLAVE,
95 CCI_IF_MASTER,
96#ifdef CONFIG_ARM_CCI5xx_PMU
97 CCI_IF_GLOBAL,
98#endif
99 CCI_IF_MAX,
100};
101
102struct event_range {
103 u32 min;
104 u32 max;
105};
106
107struct cci_pmu_hw_events {
108 struct perf_event **events;
109 unsigned long *used_mask;
110 raw_spinlock_t pmu_lock;
111};
112
113struct cci_pmu;
114/*
115 * struct cci_pmu_model:
116 * @fixed_hw_cntrs - Number of fixed event counters
117 * @num_hw_cntrs - Maximum number of programmable event counters
118 * @cntr_size - Size of an event counter mapping
119 */
120struct cci_pmu_model {
121 char *name;
122 u32 fixed_hw_cntrs;
123 u32 num_hw_cntrs;
124 u32 cntr_size;
125 struct attribute **format_attrs;
126 struct attribute **event_attrs;
127 struct event_range event_ranges[CCI_IF_MAX];
128 int (*validate_hw_event)(struct cci_pmu *, unsigned long);
129 int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
130 void (*write_counters)(struct cci_pmu *, unsigned long *);
131};
132
133static struct cci_pmu_model cci_pmu_models[];
134
135struct cci_pmu {
136 void __iomem *base;
137 struct pmu pmu;
138 int nr_irqs;
139 int *irqs;
140 unsigned long active_irqs;
141 const struct cci_pmu_model *model;
142 struct cci_pmu_hw_events hw_events;
143 struct platform_device *plat_device;
144 int num_cntrs;
145 atomic_t active_events;
146 struct mutex reserve_mutex;
147 struct hlist_node node;
148 cpumask_t cpus;
149};
150
151#define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu))
152
153enum cci_models {
154#ifdef CONFIG_ARM_CCI400_PMU
155 CCI400_R0,
156 CCI400_R1,
157#endif
158#ifdef CONFIG_ARM_CCI5xx_PMU
159 CCI500_R0,
160 CCI550_R0,
161#endif
162 CCI_MODEL_MAX
163};
164
165static void pmu_write_counters(struct cci_pmu *cci_pmu,
166 unsigned long *mask);
167static ssize_t cci_pmu_format_show(struct device *dev,
168 struct device_attribute *attr, char *buf);
169static ssize_t cci_pmu_event_show(struct device *dev,
170 struct device_attribute *attr, char *buf);
171
172#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
173 &((struct dev_ext_attribute[]) { \
174 { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \
175 })[0].attr.attr
176
177#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
178 CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
179#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \
180 CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config)
181
182/* CCI400 PMU Specific definitions */
183
184#ifdef CONFIG_ARM_CCI400_PMU
185
186/* Port ids */
187#define CCI400_PORT_S0 0
188#define CCI400_PORT_S1 1
189#define CCI400_PORT_S2 2
190#define CCI400_PORT_S3 3
191#define CCI400_PORT_S4 4
192#define CCI400_PORT_M0 5
193#define CCI400_PORT_M1 6
194#define CCI400_PORT_M2 7
195
196#define CCI400_R1_PX 5
197
198/*
199 * Instead of an event id to monitor CCI cycles, a dedicated counter is
200 * provided. Use 0xff to represent CCI cycles and hope that no future revisions
201 * make use of this event in hardware.
202 */
203enum cci400_perf_events {
204 CCI400_PMU_CYCLES = 0xff
205};
206
207#define CCI400_PMU_CYCLE_CNTR_IDX 0
208#define CCI400_PMU_CNTR0_IDX 1
209
210/*
211 * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
212 * ports and bits 4:0 are event codes. There are different event codes
213 * associated with each port type.
214 *
215 * Additionally, the range of events associated with the port types changed
216 * between Rev0 and Rev1.
217 *
218 * The constants below define the range of valid codes for each port type for
219 * the different revisions and are used to validate the event to be monitored.
220 */
221
222#define CCI400_PMU_EVENT_MASK 0xffUL
223#define CCI400_PMU_EVENT_SOURCE_SHIFT 5
224#define CCI400_PMU_EVENT_SOURCE_MASK 0x7
225#define CCI400_PMU_EVENT_CODE_SHIFT 0
226#define CCI400_PMU_EVENT_CODE_MASK 0x1f
227#define CCI400_PMU_EVENT_SOURCE(event) \
228 ((event >> CCI400_PMU_EVENT_SOURCE_SHIFT) & \
229 CCI400_PMU_EVENT_SOURCE_MASK)
230#define CCI400_PMU_EVENT_CODE(event) \
231 ((event >> CCI400_PMU_EVENT_CODE_SHIFT) & CCI400_PMU_EVENT_CODE_MASK)
232
233#define CCI400_R0_SLAVE_PORT_MIN_EV 0x00
234#define CCI400_R0_SLAVE_PORT_MAX_EV 0x13
235#define CCI400_R0_MASTER_PORT_MIN_EV 0x14
236#define CCI400_R0_MASTER_PORT_MAX_EV 0x1a
237
238#define CCI400_R1_SLAVE_PORT_MIN_EV 0x00
239#define CCI400_R1_SLAVE_PORT_MAX_EV 0x14
240#define CCI400_R1_MASTER_PORT_MIN_EV 0x00
241#define CCI400_R1_MASTER_PORT_MAX_EV 0x11
242
243#define CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(_name, _config) \
244 CCI_EXT_ATTR_ENTRY(_name, cci400_pmu_cycle_event_show, \
245 (unsigned long)_config)
246
247static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
248 struct device_attribute *attr, char *buf);
249
250static struct attribute *cci400_pmu_format_attrs[] = {
251 CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
252 CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"),
253 NULL
254};
255
256static struct attribute *cci400_r0_pmu_event_attrs[] = {
257 /* Slave events */
258 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
259 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
260 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
261 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
262 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
263 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
264 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
265 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
266 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
267 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
268 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
269 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
270 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
271 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
272 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
273 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
274 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
275 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
276 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
277 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
278 /* Master events */
279 CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x14),
280 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_addr_hazard, 0x15),
281 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_id_hazard, 0x16),
282 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_tt_full, 0x17),
283 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x18),
284 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x19),
285 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A),
286 /* Special event for cycles counter */
287 CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
288 NULL
289};
290
291static struct attribute *cci400_r1_pmu_event_attrs[] = {
292 /* Slave events */
293 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
294 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
295 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
296 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
297 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
298 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
299 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
300 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
301 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
302 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
303 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
304 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
305 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
306 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
307 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
308 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
309 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
310 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
311 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
312 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
313 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_slave_id_hazard, 0x14),
314 /* Master events */
315 CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x0),
316 CCI_EVENT_EXT_ATTR_ENTRY(mi_stall_cycle_addr_hazard, 0x1),
317 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_master_id_hazard, 0x2),
318 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_hi_prio_rtq_full, 0x3),
319 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x4),
320 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x5),
321 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_wtq_full, 0x6),
322 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_low_prio_rtq_full, 0x7),
323 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_mid_prio_rtq_full, 0x8),
324 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn0, 0x9),
325 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn1, 0xA),
326 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn2, 0xB),
327 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn3, 0xC),
328 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn0, 0xD),
329 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn1, 0xE),
330 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn2, 0xF),
331 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn3, 0x10),
332 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11),
333 /* Special event for cycles counter */
334 CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
335 NULL
336};
337
338static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
339 struct device_attribute *attr, char *buf)
340{
341 struct dev_ext_attribute *eattr = container_of(attr,
342 struct dev_ext_attribute, attr);
343 return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var);
344}
345
346static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
347 struct cci_pmu_hw_events *hw,
348 unsigned long cci_event)
349{
350 int idx;
351
352 /* cycles event idx is fixed */
353 if (cci_event == CCI400_PMU_CYCLES) {
354 if (test_and_set_bit(CCI400_PMU_CYCLE_CNTR_IDX, hw->used_mask))
355 return -EAGAIN;
356
357 return CCI400_PMU_CYCLE_CNTR_IDX;
358 }
359
360 for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
361 if (!test_and_set_bit(idx, hw->used_mask))
362 return idx;
363
364 /* No counters available */
365 return -EAGAIN;
366}
367
368static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
369{
370 u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
371 u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
372 int if_type;
373
374 if (hw_event & ~CCI400_PMU_EVENT_MASK)
375 return -ENOENT;
376
377 if (hw_event == CCI400_PMU_CYCLES)
378 return hw_event;
379
380 switch (ev_source) {
381 case CCI400_PORT_S0:
382 case CCI400_PORT_S1:
383 case CCI400_PORT_S2:
384 case CCI400_PORT_S3:
385 case CCI400_PORT_S4:
386 /* Slave Interface */
387 if_type = CCI_IF_SLAVE;
388 break;
389 case CCI400_PORT_M0:
390 case CCI400_PORT_M1:
391 case CCI400_PORT_M2:
392 /* Master Interface */
393 if_type = CCI_IF_MASTER;
394 break;
395 default:
396 return -ENOENT;
397 }
398
399 if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
400 ev_code <= cci_pmu->model->event_ranges[if_type].max)
401 return hw_event;
402
403 return -ENOENT;
404}
405
406static int probe_cci400_revision(void)
407{
408 int rev;
409 rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK;
410 rev >>= CCI_PID2_REV_SHIFT;
411
412 if (rev < CCI400_R1_PX)
413 return CCI400_R0;
414 else
415 return CCI400_R1;
416}
417
418static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
419{
420 if (platform_has_secure_cci_access())
421 return &cci_pmu_models[probe_cci400_revision()];
422 return NULL;
423}
424#else /* !CONFIG_ARM_CCI400_PMU */
425static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
426{
427 return NULL;
428}
429#endif /* CONFIG_ARM_CCI400_PMU */
430
431#ifdef CONFIG_ARM_CCI5xx_PMU
432
433/*
434 * CCI5xx PMU event id is an 9-bit value made of two parts.
435 * bits [8:5] - Source for the event
436 * bits [4:0] - Event code (specific to type of interface)
437 *
438 *
439 */
440
441/* Port ids */
442#define CCI5xx_PORT_S0 0x0
443#define CCI5xx_PORT_S1 0x1
444#define CCI5xx_PORT_S2 0x2
445#define CCI5xx_PORT_S3 0x3
446#define CCI5xx_PORT_S4 0x4
447#define CCI5xx_PORT_S5 0x5
448#define CCI5xx_PORT_S6 0x6
449
450#define CCI5xx_PORT_M0 0x8
451#define CCI5xx_PORT_M1 0x9
452#define CCI5xx_PORT_M2 0xa
453#define CCI5xx_PORT_M3 0xb
454#define CCI5xx_PORT_M4 0xc
455#define CCI5xx_PORT_M5 0xd
456#define CCI5xx_PORT_M6 0xe
457
458#define CCI5xx_PORT_GLOBAL 0xf
459
460#define CCI5xx_PMU_EVENT_MASK 0x1ffUL
461#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5
462#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf
463#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0
464#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f
465
466#define CCI5xx_PMU_EVENT_SOURCE(event) \
467 ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK)
468#define CCI5xx_PMU_EVENT_CODE(event) \
469 ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK)
470
471#define CCI5xx_SLAVE_PORT_MIN_EV 0x00
472#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f
473#define CCI5xx_MASTER_PORT_MIN_EV 0x00
474#define CCI5xx_MASTER_PORT_MAX_EV 0x06
475#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00
476#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f
477
478
479#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
480 CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \
481 (unsigned long) _config)
482
483static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
484 struct device_attribute *attr, char *buf);
485
486static struct attribute *cci5xx_pmu_format_attrs[] = {
487 CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
488 CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
489 NULL,
490};
491
492static struct attribute *cci5xx_pmu_event_attrs[] = {
493 /* Slave events */
494 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
495 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
496 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_nonshareable, 0x2),
497 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_non_alloc, 0x3),
498 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_alloc, 0x4),
499 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_invalidate, 0x5),
500 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maint, 0x6),
501 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
502 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rval, 0x8),
503 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rlast_snoop, 0x9),
504 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_awalid, 0xA),
505 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_dev, 0xB),
506 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_non_shareable, 0xC),
507 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wb, 0xD),
508 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wlu, 0xE),
509 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wunique, 0xF),
510 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_evict, 0x10),
511 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_wrevict, 0x11),
512 CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_beat, 0x12),
513 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_acvalid, 0x13),
514 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_read, 0x14),
515 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_clean, 0x15),
516 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_data_transfer_low, 0x16),
517 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_arvalid, 0x17),
518 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall, 0x18),
519 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall, 0x19),
520 CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_stall, 0x1A),
521 CCI_EVENT_EXT_ATTR_ENTRY(si_w_resp_stall, 0x1B),
522 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_stall, 0x1C),
523 CCI_EVENT_EXT_ATTR_ENTRY(si_s_data_stall, 0x1D),
524 CCI_EVENT_EXT_ATTR_ENTRY(si_rq_stall_ot_limit, 0x1E),
525 CCI_EVENT_EXT_ATTR_ENTRY(si_r_stall_arbit, 0x1F),
526
527 /* Master events */
528 CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_beat_any, 0x0),
529 CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_beat_any, 0x1),
530 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall, 0x2),
531 CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_stall, 0x3),
532 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall, 0x4),
533 CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_stall, 0x5),
534 CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
535
536 /* Global events */
537 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
538 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
539 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
540 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
541 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
542 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
543 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
544 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
545 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
546 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
547 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
548 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
549 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
550 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
551 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE),
552 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
553 NULL
554};
555
556static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
557 struct device_attribute *attr, char *buf)
558{
559 struct dev_ext_attribute *eattr = container_of(attr,
560 struct dev_ext_attribute, attr);
561 /* Global events have single fixed source code */
562 return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
563 (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
564}
565
566/*
567 * CCI500 provides 8 independent event counters that can count
568 * any of the events available.
569 * CCI500 PMU event source ids
570 * 0x0-0x6 - Slave interfaces
571 * 0x8-0xD - Master interfaces
572 * 0xf - Global Events
573 * 0x7,0xe - Reserved
574 */
575static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
576 unsigned long hw_event)
577{
578 u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
579 u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
580 int if_type;
581
582 if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
583 return -ENOENT;
584
585 switch (ev_source) {
586 case CCI5xx_PORT_S0:
587 case CCI5xx_PORT_S1:
588 case CCI5xx_PORT_S2:
589 case CCI5xx_PORT_S3:
590 case CCI5xx_PORT_S4:
591 case CCI5xx_PORT_S5:
592 case CCI5xx_PORT_S6:
593 if_type = CCI_IF_SLAVE;
594 break;
595 case CCI5xx_PORT_M0:
596 case CCI5xx_PORT_M1:
597 case CCI5xx_PORT_M2:
598 case CCI5xx_PORT_M3:
599 case CCI5xx_PORT_M4:
600 case CCI5xx_PORT_M5:
601 if_type = CCI_IF_MASTER;
602 break;
603 case CCI5xx_PORT_GLOBAL:
604 if_type = CCI_IF_GLOBAL;
605 break;
606 default:
607 return -ENOENT;
608 }
609
610 if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
611 ev_code <= cci_pmu->model->event_ranges[if_type].max)
612 return hw_event;
613
614 return -ENOENT;
615}
616
617/*
618 * CCI550 provides 8 independent event counters that can count
619 * any of the events available.
620 * CCI550 PMU event source ids
621 * 0x0-0x6 - Slave interfaces
622 * 0x8-0xe - Master interfaces
623 * 0xf - Global Events
624 * 0x7 - Reserved
625 */
626static int cci550_validate_hw_event(struct cci_pmu *cci_pmu,
627 unsigned long hw_event)
628{
629 u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
630 u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
631 int if_type;
632
633 if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
634 return -ENOENT;
635
636 switch (ev_source) {
637 case CCI5xx_PORT_S0:
638 case CCI5xx_PORT_S1:
639 case CCI5xx_PORT_S2:
640 case CCI5xx_PORT_S3:
641 case CCI5xx_PORT_S4:
642 case CCI5xx_PORT_S5:
643 case CCI5xx_PORT_S6:
644 if_type = CCI_IF_SLAVE;
645 break;
646 case CCI5xx_PORT_M0:
647 case CCI5xx_PORT_M1:
648 case CCI5xx_PORT_M2:
649 case CCI5xx_PORT_M3:
650 case CCI5xx_PORT_M4:
651 case CCI5xx_PORT_M5:
652 case CCI5xx_PORT_M6:
653 if_type = CCI_IF_MASTER;
654 break;
655 case CCI5xx_PORT_GLOBAL:
656 if_type = CCI_IF_GLOBAL;
657 break;
658 default:
659 return -ENOENT;
660 }
661
662 if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
663 ev_code <= cci_pmu->model->event_ranges[if_type].max)
664 return hw_event;
665
666 return -ENOENT;
667}
668
669#endif /* CONFIG_ARM_CCI5xx_PMU */
670
671/*
672 * Program the CCI PMU counters which have PERF_HES_ARCH set
673 * with the event period and mark them ready before we enable
674 * PMU.
675 */
676static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
677{
678 int i;
679 struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
680
681 DECLARE_BITMAP(mask, cci_pmu->num_cntrs);
682
683 bitmap_zero(mask, cci_pmu->num_cntrs);
684 for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
685 struct perf_event *event = cci_hw->events[i];
686
687 if (WARN_ON(!event))
688 continue;
689
690 /* Leave the events which are not counting */
691 if (event->hw.state & PERF_HES_STOPPED)
692 continue;
693 if (event->hw.state & PERF_HES_ARCH) {
694 set_bit(i, mask);
695 event->hw.state &= ~PERF_HES_ARCH;
696 }
697 }
698
699 pmu_write_counters(cci_pmu, mask);
700}
701
702/* Should be called with cci_pmu->hw_events->pmu_lock held */
703static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu)
704{
705 u32 val;
706
707 /* Enable all the PMU counters. */
708 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
709 writel(val, cci_ctrl_base + CCI_PMCR);
710}
711
712/* Should be called with cci_pmu->hw_events->pmu_lock held */
713static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu)
714{
715 cci_pmu_sync_counters(cci_pmu);
716 __cci_pmu_enable_nosync(cci_pmu);
717}
718
719/* Should be called with cci_pmu->hw_events->pmu_lock held */
720static void __cci_pmu_disable(void)
721{
722 u32 val;
723
724 /* Disable all the PMU counters. */
725 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
726 writel(val, cci_ctrl_base + CCI_PMCR);
727}
728
729static ssize_t cci_pmu_format_show(struct device *dev,
730 struct device_attribute *attr, char *buf)
731{
732 struct dev_ext_attribute *eattr = container_of(attr,
733 struct dev_ext_attribute, attr);
734 return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
735}
736
737static ssize_t cci_pmu_event_show(struct device *dev,
738 struct device_attribute *attr, char *buf)
739{
740 struct dev_ext_attribute *eattr = container_of(attr,
741 struct dev_ext_attribute, attr);
742 /* source parameter is mandatory for normal PMU events */
743 return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n",
744 (unsigned long)eattr->var);
745}
746
747static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
748{
749 return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu);
750}
751
752static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset)
753{
754 return readl_relaxed(cci_pmu->base +
755 CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
756}
757
758static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
759 int idx, unsigned int offset)
760{
761 writel_relaxed(value, cci_pmu->base +
762 CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
763}
764
765static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
766{
767 pmu_write_register(cci_pmu, 0, idx, CCI_PMU_CNTR_CTRL);
768}
769
770static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
771{
772 pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
773}
774
775static bool __maybe_unused
776pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx)
777{
778 return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0;
779}
780
781static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
782{
783 pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
784}
785
786/*
787 * For all counters on the CCI-PMU, disable any 'enabled' counters,
788 * saving the changed counters in the mask, so that we can restore
789 * it later using pmu_restore_counters. The mask is private to the
790 * caller. We cannot rely on the used_mask maintained by the CCI_PMU
791 * as it only tells us if the counter is assigned to perf_event or not.
792 * The state of the perf_event cannot be locked by the PMU layer, hence
793 * we check the individual counter status (which can be locked by
794 * cci_pm->hw_events->pmu_lock).
795 *
796 * @mask should be initialised to empty by the caller.
797 */
798static void __maybe_unused
799pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
800{
801 int i;
802
803 for (i = 0; i < cci_pmu->num_cntrs; i++) {
804 if (pmu_counter_is_enabled(cci_pmu, i)) {
805 set_bit(i, mask);
806 pmu_disable_counter(cci_pmu, i);
807 }
808 }
809}
810
811/*
812 * Restore the status of the counters. Reversal of the pmu_save_counters().
813 * For each counter set in the mask, enable the counter back.
814 */
815static void __maybe_unused
816pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
817{
818 int i;
819
820 for_each_set_bit(i, mask, cci_pmu->num_cntrs)
821 pmu_enable_counter(cci_pmu, i);
822}
823
824/*
825 * Returns the number of programmable counters actually implemented
826 * by the cci
827 */
828static u32 pmu_get_max_counters(void)
829{
830 return (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
831 CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
832}
833
834static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
835{
836 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
837 unsigned long cci_event = event->hw.config_base;
838 int idx;
839
840 if (cci_pmu->model->get_event_idx)
841 return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
842
843 /* Generic code to find an unused idx from the mask */
844 for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
845 if (!test_and_set_bit(idx, hw->used_mask))
846 return idx;
847
848 /* No counters available */
849 return -EAGAIN;
850}
851
852static int pmu_map_event(struct perf_event *event)
853{
854 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
855
856 if (event->attr.type < PERF_TYPE_MAX ||
857 !cci_pmu->model->validate_hw_event)
858 return -ENOENT;
859
860 return cci_pmu->model->validate_hw_event(cci_pmu, event->attr.config);
861}
862
863static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
864{
865 int i;
866 struct platform_device *pmu_device = cci_pmu->plat_device;
867
868 if (unlikely(!pmu_device))
869 return -ENODEV;
870
871 if (cci_pmu->nr_irqs < 1) {
872 dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n");
873 return -ENODEV;
874 }
875
876 /*
877 * Register all available CCI PMU interrupts. In the interrupt handler
878 * we iterate over the counters checking for interrupt source (the
879 * overflowing counter) and clear it.
880 *
881 * This should allow handling of non-unique interrupt for the counters.
882 */
883 for (i = 0; i < cci_pmu->nr_irqs; i++) {
884 int err = request_irq(cci_pmu->irqs[i], handler, IRQF_SHARED,
885 "arm-cci-pmu", cci_pmu);
886 if (err) {
887 dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
888 cci_pmu->irqs[i]);
889 return err;
890 }
891
892 set_bit(i, &cci_pmu->active_irqs);
893 }
894
895 return 0;
896}
897
898static void pmu_free_irq(struct cci_pmu *cci_pmu)
899{
900 int i;
901
902 for (i = 0; i < cci_pmu->nr_irqs; i++) {
903 if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
904 continue;
905
906 free_irq(cci_pmu->irqs[i], cci_pmu);
907 }
908}
909
910static u32 pmu_read_counter(struct perf_event *event)
911{
912 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
913 struct hw_perf_event *hw_counter = &event->hw;
914 int idx = hw_counter->idx;
915 u32 value;
916
917 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
918 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
919 return 0;
920 }
921 value = pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR);
922
923 return value;
924}
925
926static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
927{
928 pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
929}
930
931static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
932{
933 int i;
934 struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
935
936 for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
937 struct perf_event *event = cci_hw->events[i];
938
939 if (WARN_ON(!event))
940 continue;
941 pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
942 }
943}
944
945static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
946{
947 if (cci_pmu->model->write_counters)
948 cci_pmu->model->write_counters(cci_pmu, mask);
949 else
950 __pmu_write_counters(cci_pmu, mask);
951}
952
953#ifdef CONFIG_ARM_CCI5xx_PMU
954
955/*
956 * CCI-500/CCI-550 has advanced power saving policies, which could gate the
957 * clocks to the PMU counters, which makes the writes to them ineffective.
958 * The only way to write to those counters is when the global counters
959 * are enabled and the particular counter is enabled.
960 *
961 * So we do the following :
962 *
963 * 1) Disable all the PMU counters, saving their current state
964 * 2) Enable the global PMU profiling, now that all counters are
965 * disabled.
966 *
967 * For each counter to be programmed, repeat steps 3-7:
968 *
969 * 3) Write an invalid event code to the event control register for the
970 counter, so that the counters are not modified.
971 * 4) Enable the counter control for the counter.
972 * 5) Set the counter value
973 * 6) Disable the counter
974 * 7) Restore the event in the target counter
975 *
976 * 8) Disable the global PMU.
977 * 9) Restore the status of the rest of the counters.
978 *
979 * We choose an event which for CCI-5xx is guaranteed not to count.
980 * We use the highest possible event code (0x1f) for the master interface 0.
981 */
982#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \
983 (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT))
984static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
985{
986 int i;
987 DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs);
988
989 bitmap_zero(saved_mask, cci_pmu->num_cntrs);
990 pmu_save_counters(cci_pmu, saved_mask);
991
992 /*
993 * Now that all the counters are disabled, we can safely turn the PMU on,
994 * without syncing the status of the counters
995 */
996 __cci_pmu_enable_nosync(cci_pmu);
997
998 for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
999 struct perf_event *event = cci_pmu->hw_events.events[i];
1000
1001 if (WARN_ON(!event))
1002 continue;
1003
1004 pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT);
1005 pmu_enable_counter(cci_pmu, i);
1006 pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
1007 pmu_disable_counter(cci_pmu, i);
1008 pmu_set_event(cci_pmu, i, event->hw.config_base);
1009 }
1010
1011 __cci_pmu_disable();
1012
1013 pmu_restore_counters(cci_pmu, saved_mask);
1014}
1015
1016#endif /* CONFIG_ARM_CCI5xx_PMU */
1017
1018static u64 pmu_event_update(struct perf_event *event)
1019{
1020 struct hw_perf_event *hwc = &event->hw;
1021 u64 delta, prev_raw_count, new_raw_count;
1022
1023 do {
1024 prev_raw_count = local64_read(&hwc->prev_count);
1025 new_raw_count = pmu_read_counter(event);
1026 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
1027 new_raw_count) != prev_raw_count);
1028
1029 delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK;
1030
1031 local64_add(delta, &event->count);
1032
1033 return new_raw_count;
1034}
1035
1036static void pmu_read(struct perf_event *event)
1037{
1038 pmu_event_update(event);
1039}
1040
1041static void pmu_event_set_period(struct perf_event *event)
1042{
1043 struct hw_perf_event *hwc = &event->hw;
1044 /*
1045 * The CCI PMU counters have a period of 2^32. To account for the
1046 * possiblity of extreme interrupt latency we program for a period of
1047 * half that. Hopefully we can handle the interrupt before another 2^31
1048 * events occur and the counter overtakes its previous value.
1049 */
1050 u64 val = 1ULL << 31;
1051 local64_set(&hwc->prev_count, val);
1052
1053 /*
1054 * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose
1055 * values needs to be sync-ed with the s/w state before the PMU is
1056 * enabled.
1057 * Mark this counter for sync.
1058 */
1059 hwc->state |= PERF_HES_ARCH;
1060}
1061
1062static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
1063{
1064 unsigned long flags;
1065 struct cci_pmu *cci_pmu = dev;
1066 struct cci_pmu_hw_events *events = &cci_pmu->hw_events;
1067 int idx, handled = IRQ_NONE;
1068
1069 raw_spin_lock_irqsave(&events->pmu_lock, flags);
1070
1071 /* Disable the PMU while we walk through the counters */
1072 __cci_pmu_disable();
1073 /*
1074 * Iterate over counters and update the corresponding perf events.
1075 * This should work regardless of whether we have per-counter overflow
1076 * interrupt or a combined overflow interrupt.
1077 */
1078 for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
1079 struct perf_event *event = events->events[idx];
1080
1081 if (!event)
1082 continue;
1083
1084 /* Did this counter overflow? */
1085 if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) &
1086 CCI_PMU_OVRFLW_FLAG))
1087 continue;
1088
1089 pmu_write_register(cci_pmu, CCI_PMU_OVRFLW_FLAG, idx,
1090 CCI_PMU_OVRFLW);
1091
1092 pmu_event_update(event);
1093 pmu_event_set_period(event);
1094 handled = IRQ_HANDLED;
1095 }
1096
1097 /* Enable the PMU and sync possibly overflowed counters */
1098 __cci_pmu_enable_sync(cci_pmu);
1099 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
1100
1101 return IRQ_RETVAL(handled);
1102}
1103
1104static int cci_pmu_get_hw(struct cci_pmu *cci_pmu)
1105{
1106 int ret = pmu_request_irq(cci_pmu, pmu_handle_irq);
1107 if (ret) {
1108 pmu_free_irq(cci_pmu);
1109 return ret;
1110 }
1111 return 0;
1112}
1113
1114static void cci_pmu_put_hw(struct cci_pmu *cci_pmu)
1115{
1116 pmu_free_irq(cci_pmu);
1117}
1118
1119static void hw_perf_event_destroy(struct perf_event *event)
1120{
1121 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1122 atomic_t *active_events = &cci_pmu->active_events;
1123 struct mutex *reserve_mutex = &cci_pmu->reserve_mutex;
1124
1125 if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) {
1126 cci_pmu_put_hw(cci_pmu);
1127 mutex_unlock(reserve_mutex);
1128 }
1129}
1130
1131static void cci_pmu_enable(struct pmu *pmu)
1132{
1133 struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
1134 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1135 int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
1136 unsigned long flags;
1137
1138 if (!enabled)
1139 return;
1140
1141 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
1142 __cci_pmu_enable_sync(cci_pmu);
1143 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
1144
1145}
1146
1147static void cci_pmu_disable(struct pmu *pmu)
1148{
1149 struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
1150 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1151 unsigned long flags;
1152
1153 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
1154 __cci_pmu_disable();
1155 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
1156}
1157
1158/*
1159 * Check if the idx represents a non-programmable counter.
1160 * All the fixed event counters are mapped before the programmable
1161 * counters.
1162 */
1163static bool pmu_fixed_hw_idx(struct cci_pmu *cci_pmu, int idx)
1164{
1165 return (idx >= 0) && (idx < cci_pmu->model->fixed_hw_cntrs);
1166}
1167
1168static void cci_pmu_start(struct perf_event *event, int pmu_flags)
1169{
1170 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1171 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1172 struct hw_perf_event *hwc = &event->hw;
1173 int idx = hwc->idx;
1174 unsigned long flags;
1175
1176 /*
1177 * To handle interrupt latency, we always reprogram the period
1178 * regardlesss of PERF_EF_RELOAD.
1179 */
1180 if (pmu_flags & PERF_EF_RELOAD)
1181 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
1182
1183 hwc->state = 0;
1184
1185 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
1186 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
1187 return;
1188 }
1189
1190 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
1191
1192 /* Configure the counter unless you are counting a fixed event */
1193 if (!pmu_fixed_hw_idx(cci_pmu, idx))
1194 pmu_set_event(cci_pmu, idx, hwc->config_base);
1195
1196 pmu_event_set_period(event);
1197 pmu_enable_counter(cci_pmu, idx);
1198
1199 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
1200}
1201
1202static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
1203{
1204 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1205 struct hw_perf_event *hwc = &event->hw;
1206 int idx = hwc->idx;
1207
1208 if (hwc->state & PERF_HES_STOPPED)
1209 return;
1210
1211 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
1212 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
1213 return;
1214 }
1215
1216 /*
1217 * We always reprogram the counter, so ignore PERF_EF_UPDATE. See
1218 * cci_pmu_start()
1219 */
1220 pmu_disable_counter(cci_pmu, idx);
1221 pmu_event_update(event);
1222 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
1223}
1224
1225static int cci_pmu_add(struct perf_event *event, int flags)
1226{
1227 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1228 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1229 struct hw_perf_event *hwc = &event->hw;
1230 int idx;
1231 int err = 0;
1232
1233 perf_pmu_disable(event->pmu);
1234
1235 /* If we don't have a space for the counter then finish early. */
1236 idx = pmu_get_event_idx(hw_events, event);
1237 if (idx < 0) {
1238 err = idx;
1239 goto out;
1240 }
1241
1242 event->hw.idx = idx;
1243 hw_events->events[idx] = event;
1244
1245 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
1246 if (flags & PERF_EF_START)
1247 cci_pmu_start(event, PERF_EF_RELOAD);
1248
1249 /* Propagate our changes to the userspace mapping. */
1250 perf_event_update_userpage(event);
1251
1252out:
1253 perf_pmu_enable(event->pmu);
1254 return err;
1255}
1256
1257static void cci_pmu_del(struct perf_event *event, int flags)
1258{
1259 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1260 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1261 struct hw_perf_event *hwc = &event->hw;
1262 int idx = hwc->idx;
1263
1264 cci_pmu_stop(event, PERF_EF_UPDATE);
1265 hw_events->events[idx] = NULL;
1266 clear_bit(idx, hw_events->used_mask);
1267
1268 perf_event_update_userpage(event);
1269}
1270
1271static int
1272validate_event(struct pmu *cci_pmu,
1273 struct cci_pmu_hw_events *hw_events,
1274 struct perf_event *event)
1275{
1276 if (is_software_event(event))
1277 return 1;
1278
1279 /*
1280 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
1281 * core perf code won't check that the pmu->ctx == leader->ctx
1282 * until after pmu->event_init(event).
1283 */
1284 if (event->pmu != cci_pmu)
1285 return 0;
1286
1287 if (event->state < PERF_EVENT_STATE_OFF)
1288 return 1;
1289
1290 if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
1291 return 1;
1292
1293 return pmu_get_event_idx(hw_events, event) >= 0;
1294}
1295
1296static int
1297validate_group(struct perf_event *event)
1298{
1299 struct perf_event *sibling, *leader = event->group_leader;
1300 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1301 unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
1302 struct cci_pmu_hw_events fake_pmu = {
1303 /*
1304 * Initialise the fake PMU. We only need to populate the
1305 * used_mask for the purposes of validation.
1306 */
1307 .used_mask = mask,
1308 };
1309 memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
1310
1311 if (!validate_event(event->pmu, &fake_pmu, leader))
1312 return -EINVAL;
1313
1314 list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
1315 if (!validate_event(event->pmu, &fake_pmu, sibling))
1316 return -EINVAL;
1317 }
1318
1319 if (!validate_event(event->pmu, &fake_pmu, event))
1320 return -EINVAL;
1321
1322 return 0;
1323}
1324
1325static int
1326__hw_perf_event_init(struct perf_event *event)
1327{
1328 struct hw_perf_event *hwc = &event->hw;
1329 int mapping;
1330
1331 mapping = pmu_map_event(event);
1332
1333 if (mapping < 0) {
1334 pr_debug("event %x:%llx not supported\n", event->attr.type,
1335 event->attr.config);
1336 return mapping;
1337 }
1338
1339 /*
1340 * We don't assign an index until we actually place the event onto
1341 * hardware. Use -1 to signify that we haven't decided where to put it
1342 * yet.
1343 */
1344 hwc->idx = -1;
1345 hwc->config_base = 0;
1346 hwc->config = 0;
1347 hwc->event_base = 0;
1348
1349 /*
1350 * Store the event encoding into the config_base field.
1351 */
1352 hwc->config_base |= (unsigned long)mapping;
1353
1354 /*
1355 * Limit the sample_period to half of the counter width. That way, the
1356 * new counter value is far less likely to overtake the previous one
1357 * unless you have some serious IRQ latency issues.
1358 */
1359 hwc->sample_period = CCI_PMU_CNTR_MASK >> 1;
1360 hwc->last_period = hwc->sample_period;
1361 local64_set(&hwc->period_left, hwc->sample_period);
1362
1363 if (event->group_leader != event) {
1364 if (validate_group(event) != 0)
1365 return -EINVAL;
1366 }
1367
1368 return 0;
1369}
1370
1371static int cci_pmu_event_init(struct perf_event *event)
1372{
1373 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1374 atomic_t *active_events = &cci_pmu->active_events;
1375 int err = 0;
1376 int cpu;
1377
1378 if (event->attr.type != event->pmu->type)
1379 return -ENOENT;
1380
1381 /* Shared by all CPUs, no meaningful state to sample */
1382 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
1383 return -EOPNOTSUPP;
1384
1385 /* We have no filtering of any kind */
1386 if (event->attr.exclude_user ||
1387 event->attr.exclude_kernel ||
1388 event->attr.exclude_hv ||
1389 event->attr.exclude_idle ||
1390 event->attr.exclude_host ||
1391 event->attr.exclude_guest)
1392 return -EINVAL;
1393
1394 /*
1395 * Following the example set by other "uncore" PMUs, we accept any CPU
1396 * and rewrite its affinity dynamically rather than having perf core
1397 * handle cpu == -1 and pid == -1 for this case.
1398 *
1399 * The perf core will pin online CPUs for the duration of this call and
1400 * the event being installed into its context, so the PMU's CPU can't
1401 * change under our feet.
1402 */
1403 cpu = cpumask_first(&cci_pmu->cpus);
1404 if (event->cpu < 0 || cpu < 0)
1405 return -EINVAL;
1406 event->cpu = cpu;
1407
1408 event->destroy = hw_perf_event_destroy;
1409 if (!atomic_inc_not_zero(active_events)) {
1410 mutex_lock(&cci_pmu->reserve_mutex);
1411 if (atomic_read(active_events) == 0)
1412 err = cci_pmu_get_hw(cci_pmu);
1413 if (!err)
1414 atomic_inc(active_events);
1415 mutex_unlock(&cci_pmu->reserve_mutex);
1416 }
1417 if (err)
1418 return err;
1419
1420 err = __hw_perf_event_init(event);
1421 if (err)
1422 hw_perf_event_destroy(event);
1423
1424 return err;
1425}
1426
1427static ssize_t pmu_cpumask_attr_show(struct device *dev,
1428 struct device_attribute *attr, char *buf)
1429{
1430 struct pmu *pmu = dev_get_drvdata(dev);
1431 struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
1432
1433 int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
1434 cpumask_pr_args(&cci_pmu->cpus));
1435 buf[n++] = '\n';
1436 buf[n] = '\0';
1437 return n;
1438}
1439
1440static struct device_attribute pmu_cpumask_attr =
1441 __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL);
1442
1443static struct attribute *pmu_attrs[] = {
1444 &pmu_cpumask_attr.attr,
1445 NULL,
1446};
1447
1448static struct attribute_group pmu_attr_group = {
1449 .attrs = pmu_attrs,
1450};
1451
1452static struct attribute_group pmu_format_attr_group = {
1453 .name = "format",
1454 .attrs = NULL, /* Filled in cci_pmu_init_attrs */
1455};
1456
1457static struct attribute_group pmu_event_attr_group = {
1458 .name = "events",
1459 .attrs = NULL, /* Filled in cci_pmu_init_attrs */
1460};
1461
1462static const struct attribute_group *pmu_attr_groups[] = {
1463 &pmu_attr_group,
1464 &pmu_format_attr_group,
1465 &pmu_event_attr_group,
1466 NULL
1467};
1468
1469static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
1470{
1471 const struct cci_pmu_model *model = cci_pmu->model;
1472 char *name = model->name;
1473 u32 num_cntrs;
1474
1475 pmu_event_attr_group.attrs = model->event_attrs;
1476 pmu_format_attr_group.attrs = model->format_attrs;
1477
1478 cci_pmu->pmu = (struct pmu) {
1479 .name = cci_pmu->model->name,
1480 .task_ctx_nr = perf_invalid_context,
1481 .pmu_enable = cci_pmu_enable,
1482 .pmu_disable = cci_pmu_disable,
1483 .event_init = cci_pmu_event_init,
1484 .add = cci_pmu_add,
1485 .del = cci_pmu_del,
1486 .start = cci_pmu_start,
1487 .stop = cci_pmu_stop,
1488 .read = pmu_read,
1489 .attr_groups = pmu_attr_groups,
1490 };
1491
1492 cci_pmu->plat_device = pdev;
1493 num_cntrs = pmu_get_max_counters();
1494 if (num_cntrs > cci_pmu->model->num_hw_cntrs) {
1495 dev_warn(&pdev->dev,
1496 "PMU implements more counters(%d) than supported by"
1497 " the model(%d), truncated.",
1498 num_cntrs, cci_pmu->model->num_hw_cntrs);
1499 num_cntrs = cci_pmu->model->num_hw_cntrs;
1500 }
1501 cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs;
1502
1503 return perf_pmu_register(&cci_pmu->pmu, name, -1);
1504}
1505
1506static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
1507{
1508 struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node);
1509 unsigned int target;
1510
1511 if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
1512 return 0;
1513 target = cpumask_any_but(cpu_online_mask, cpu);
1514 if (target >= nr_cpu_ids)
1515 return 0;
1516 /*
1517 * TODO: migrate context once core races on event->ctx have
1518 * been fixed.
1519 */
1520 cpumask_set_cpu(target, &cci_pmu->cpus);
1521 return 0;
1522}
1523
1524static struct cci_pmu_model cci_pmu_models[] = {
1525#ifdef CONFIG_ARM_CCI400_PMU
1526 [CCI400_R0] = {
1527 .name = "CCI_400",
1528 .fixed_hw_cntrs = 1, /* Cycle counter */
1529 .num_hw_cntrs = 4,
1530 .cntr_size = SZ_4K,
1531 .format_attrs = cci400_pmu_format_attrs,
1532 .event_attrs = cci400_r0_pmu_event_attrs,
1533 .event_ranges = {
1534 [CCI_IF_SLAVE] = {
1535 CCI400_R0_SLAVE_PORT_MIN_EV,
1536 CCI400_R0_SLAVE_PORT_MAX_EV,
1537 },
1538 [CCI_IF_MASTER] = {
1539 CCI400_R0_MASTER_PORT_MIN_EV,
1540 CCI400_R0_MASTER_PORT_MAX_EV,
1541 },
1542 },
1543 .validate_hw_event = cci400_validate_hw_event,
1544 .get_event_idx = cci400_get_event_idx,
1545 },
1546 [CCI400_R1] = {
1547 .name = "CCI_400_r1",
1548 .fixed_hw_cntrs = 1, /* Cycle counter */
1549 .num_hw_cntrs = 4,
1550 .cntr_size = SZ_4K,
1551 .format_attrs = cci400_pmu_format_attrs,
1552 .event_attrs = cci400_r1_pmu_event_attrs,
1553 .event_ranges = {
1554 [CCI_IF_SLAVE] = {
1555 CCI400_R1_SLAVE_PORT_MIN_EV,
1556 CCI400_R1_SLAVE_PORT_MAX_EV,
1557 },
1558 [CCI_IF_MASTER] = {
1559 CCI400_R1_MASTER_PORT_MIN_EV,
1560 CCI400_R1_MASTER_PORT_MAX_EV,
1561 },
1562 },
1563 .validate_hw_event = cci400_validate_hw_event,
1564 .get_event_idx = cci400_get_event_idx,
1565 },
1566#endif
1567#ifdef CONFIG_ARM_CCI5xx_PMU
1568 [CCI500_R0] = {
1569 .name = "CCI_500",
1570 .fixed_hw_cntrs = 0,
1571 .num_hw_cntrs = 8,
1572 .cntr_size = SZ_64K,
1573 .format_attrs = cci5xx_pmu_format_attrs,
1574 .event_attrs = cci5xx_pmu_event_attrs,
1575 .event_ranges = {
1576 [CCI_IF_SLAVE] = {
1577 CCI5xx_SLAVE_PORT_MIN_EV,
1578 CCI5xx_SLAVE_PORT_MAX_EV,
1579 },
1580 [CCI_IF_MASTER] = {
1581 CCI5xx_MASTER_PORT_MIN_EV,
1582 CCI5xx_MASTER_PORT_MAX_EV,
1583 },
1584 [CCI_IF_GLOBAL] = {
1585 CCI5xx_GLOBAL_PORT_MIN_EV,
1586 CCI5xx_GLOBAL_PORT_MAX_EV,
1587 },
1588 },
1589 .validate_hw_event = cci500_validate_hw_event,
1590 .write_counters = cci5xx_pmu_write_counters,
1591 },
1592 [CCI550_R0] = {
1593 .name = "CCI_550",
1594 .fixed_hw_cntrs = 0,
1595 .num_hw_cntrs = 8,
1596 .cntr_size = SZ_64K,
1597 .format_attrs = cci5xx_pmu_format_attrs,
1598 .event_attrs = cci5xx_pmu_event_attrs,
1599 .event_ranges = {
1600 [CCI_IF_SLAVE] = {
1601 CCI5xx_SLAVE_PORT_MIN_EV,
1602 CCI5xx_SLAVE_PORT_MAX_EV,
1603 },
1604 [CCI_IF_MASTER] = {
1605 CCI5xx_MASTER_PORT_MIN_EV,
1606 CCI5xx_MASTER_PORT_MAX_EV,
1607 },
1608 [CCI_IF_GLOBAL] = {
1609 CCI5xx_GLOBAL_PORT_MIN_EV,
1610 CCI5xx_GLOBAL_PORT_MAX_EV,
1611 },
1612 },
1613 .validate_hw_event = cci550_validate_hw_event,
1614 .write_counters = cci5xx_pmu_write_counters,
1615 },
1616#endif
1617};
1618
1619static const struct of_device_id arm_cci_pmu_matches[] = {
1620#ifdef CONFIG_ARM_CCI400_PMU
1621 {
1622 .compatible = "arm,cci-400-pmu",
1623 .data = NULL,
1624 },
1625 {
1626 .compatible = "arm,cci-400-pmu,r0",
1627 .data = &cci_pmu_models[CCI400_R0],
1628 },
1629 {
1630 .compatible = "arm,cci-400-pmu,r1",
1631 .data = &cci_pmu_models[CCI400_R1],
1632 },
1633#endif
1634#ifdef CONFIG_ARM_CCI5xx_PMU
1635 {
1636 .compatible = "arm,cci-500-pmu,r0",
1637 .data = &cci_pmu_models[CCI500_R0],
1638 },
1639 {
1640 .compatible = "arm,cci-550-pmu,r0",
1641 .data = &cci_pmu_models[CCI550_R0],
1642 },
1643#endif
1644 {},
1645};
1646
1647static inline const struct cci_pmu_model *get_cci_model(struct platform_device *pdev)
1648{
1649 const struct of_device_id *match = of_match_node(arm_cci_pmu_matches,
1650 pdev->dev.of_node);
1651 if (!match)
1652 return NULL;
1653 if (match->data)
1654 return match->data;
1655
1656 dev_warn(&pdev->dev, "DEPRECATED compatible property,"
1657 "requires secure access to CCI registers");
1658 return probe_cci_model(pdev);
1659}
1660
1661static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
1662{
1663 int i;
1664
1665 for (i = 0; i < nr_irqs; i++)
1666 if (irq == irqs[i])
1667 return true;
1668
1669 return false;
1670}
1671
1672static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev)
1673{
1674 struct cci_pmu *cci_pmu;
1675 const struct cci_pmu_model *model;
1676
1677 /*
1678 * All allocations are devm_* hence we don't have to free
1679 * them explicitly on an error, as it would end up in driver
1680 * detach.
1681 */
1682 model = get_cci_model(pdev);
1683 if (!model) {
1684 dev_warn(&pdev->dev, "CCI PMU version not supported\n");
1685 return ERR_PTR(-ENODEV);
1686 }
1687
1688 cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL);
1689 if (!cci_pmu)
1690 return ERR_PTR(-ENOMEM);
1691
1692 cci_pmu->model = model;
1693 cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model),
1694 sizeof(*cci_pmu->irqs), GFP_KERNEL);
1695 if (!cci_pmu->irqs)
1696 return ERR_PTR(-ENOMEM);
1697 cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev,
1698 CCI_PMU_MAX_HW_CNTRS(model),
1699 sizeof(*cci_pmu->hw_events.events),
1700 GFP_KERNEL);
1701 if (!cci_pmu->hw_events.events)
1702 return ERR_PTR(-ENOMEM);
1703 cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev,
1704 BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
1705 sizeof(*cci_pmu->hw_events.used_mask),
1706 GFP_KERNEL);
1707 if (!cci_pmu->hw_events.used_mask)
1708 return ERR_PTR(-ENOMEM);
1709
1710 return cci_pmu;
1711}
1712
1713
1714static int cci_pmu_probe(struct platform_device *pdev)
1715{
1716 struct resource *res;
1717 struct cci_pmu *cci_pmu;
1718 int i, ret, irq;
1719
1720 cci_pmu = cci_pmu_alloc(pdev);
1721 if (IS_ERR(cci_pmu))
1722 return PTR_ERR(cci_pmu);
1723
1724 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1725 cci_pmu->base = devm_ioremap_resource(&pdev->dev, res);
1726 if (IS_ERR(cci_pmu->base))
1727 return -ENOMEM;
1728
1729 /*
1730 * CCI PMU has one overflow interrupt per counter; but some may be tied
1731 * together to a common interrupt.
1732 */
1733 cci_pmu->nr_irqs = 0;
1734 for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) {
1735 irq = platform_get_irq(pdev, i);
1736 if (irq < 0)
1737 break;
1738
1739 if (is_duplicate_irq(irq, cci_pmu->irqs, cci_pmu->nr_irqs))
1740 continue;
1741
1742 cci_pmu->irqs[cci_pmu->nr_irqs++] = irq;
1743 }
1744
1745 /*
1746 * Ensure that the device tree has as many interrupts as the number
1747 * of counters.
1748 */
1749 if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) {
1750 dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
1751 i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model));
1752 return -EINVAL;
1753 }
1754
1755 raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
1756 mutex_init(&cci_pmu->reserve_mutex);
1757 atomic_set(&cci_pmu->active_events, 0);
1758 cpumask_set_cpu(get_cpu(), &cci_pmu->cpus);
1759
1760 ret = cci_pmu_init(cci_pmu, pdev);
1761 if (ret) {
1762 put_cpu();
1763 return ret;
1764 }
1765
1766 cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
1767 &cci_pmu->node);
1768 put_cpu();
1769 pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
1770 return 0;
1771}
1772 60
1773static int cci_platform_probe(struct platform_device *pdev) 61static int cci_platform_probe(struct platform_device *pdev)
1774{ 62{
@@ -1778,14 +66,6 @@ static int cci_platform_probe(struct platform_device *pdev)
1778 return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); 66 return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
1779} 67}
1780 68
1781static struct platform_driver cci_pmu_driver = {
1782 .driver = {
1783 .name = DRIVER_NAME_PMU,
1784 .of_match_table = arm_cci_pmu_matches,
1785 },
1786 .probe = cci_pmu_probe,
1787};
1788
1789static struct platform_driver cci_platform_driver = { 69static struct platform_driver cci_platform_driver = {
1790 .driver = { 70 .driver = {
1791 .name = DRIVER_NAME, 71 .name = DRIVER_NAME,
@@ -1796,30 +76,9 @@ static struct platform_driver cci_platform_driver = {
1796 76
1797static int __init cci_platform_init(void) 77static int __init cci_platform_init(void)
1798{ 78{
1799 int ret;
1800
1801 ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE,
1802 "perf/arm/cci:online", NULL,
1803 cci_pmu_offline_cpu);
1804 if (ret)
1805 return ret;
1806
1807 ret = platform_driver_register(&cci_pmu_driver);
1808 if (ret)
1809 return ret;
1810
1811 return platform_driver_register(&cci_platform_driver); 79 return platform_driver_register(&cci_platform_driver);
1812} 80}
1813 81
1814#else /* !CONFIG_ARM_CCI_PMU */
1815
1816static int __init cci_platform_init(void)
1817{
1818 return 0;
1819}
1820
1821#endif /* CONFIG_ARM_CCI_PMU */
1822
1823#ifdef CONFIG_ARM_CCI400_PORT_CTRL 82#ifdef CONFIG_ARM_CCI400_PORT_CTRL
1824 83
1825#define CCI_PORT_CTRL 0x0 84#define CCI_PORT_CTRL 0x0
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 331b6d992b5a..28bb5a029558 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -5,6 +5,32 @@
5menu "Performance monitor support" 5menu "Performance monitor support"
6 depends on PERF_EVENTS 6 depends on PERF_EVENTS
7 7
8config ARM_CCI_PMU
9 bool
10 select ARM_CCI
11
12config ARM_CCI400_PMU
13 bool "ARM CCI400 PMU support"
14 depends on (ARM && CPU_V7) || ARM64
15 select ARM_CCI400_COMMON
16 select ARM_CCI_PMU
17 help
18 Support for PMU events monitoring on the ARM CCI-400 (cache coherent
19 interconnect). CCI-400 supports counting events related to the
20 connected slave/master interfaces.
21
22config ARM_CCI5xx_PMU
23 bool "ARM CCI-500/CCI-550 PMU support"
24 depends on (ARM && CPU_V7) || ARM64
25 select ARM_CCI_PMU
26 help
27 Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
28 coherent interconnects. Both of them provide 8 independent event counters,
29 which can count events pertaining to the slave/master interfaces as well
30 as the internal events to the CCI.
31
32 If unsure, say Y
33
8config ARM_CCN 34config ARM_CCN
9 tristate "ARM CCN driver support" 35 tristate "ARM CCN driver support"
10 depends on ARM || ARM64 36 depends on ARM || ARM64
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 5004abee0f3a..b3902bd37d53 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,4 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o
2obj-$(CONFIG_ARM_CCN) += arm-ccn.o 3obj-$(CONFIG_ARM_CCN) += arm-ccn.o
3obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o 4obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
4obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
new file mode 100644
index 000000000000..d5f8c750fd41
--- /dev/null
+++ b/drivers/perf/arm-cci.c
@@ -0,0 +1,1747 @@
1// SPDX-License-Identifier: GPL-2.0
2// CCI Cache Coherent Interconnect PMU driver
3// Copyright (C) 2013-2018 Arm Ltd.
4// Author: Punit Agrawal <punit.agrawal@arm.com>, Suzuki Poulose <suzuki.poulose@arm.com>
5
6#include <linux/arm-cci.h>
7#include <linux/io.h>
8#include <linux/interrupt.h>
9#include <linux/module.h>
10#include <linux/of_address.h>
11#include <linux/of_irq.h>
12#include <linux/of_platform.h>
13#include <linux/perf_event.h>
14#include <linux/platform_device.h>
15#include <linux/slab.h>
16#include <linux/spinlock.h>
17
18extern void __iomem *const cci_ctrl_base;
19
20#define DRIVER_NAME "ARM-CCI PMU"
21
22#define CCI_PMCR 0x0100
23#define CCI_PID2 0x0fe8
24
25#define CCI_PMCR_CEN 0x00000001
26#define CCI_PMCR_NCNT_MASK 0x0000f800
27#define CCI_PMCR_NCNT_SHIFT 11
28
29#define CCI_PID2_REV_MASK 0xf0
30#define CCI_PID2_REV_SHIFT 4
31
32#define CCI_PMU_EVT_SEL 0x000
33#define CCI_PMU_CNTR 0x004
34#define CCI_PMU_CNTR_CTRL 0x008
35#define CCI_PMU_OVRFLW 0x00c
36
37#define CCI_PMU_OVRFLW_FLAG 1
38
39#define CCI_PMU_CNTR_SIZE(model) ((model)->cntr_size)
40#define CCI_PMU_CNTR_BASE(model, idx) ((idx) * CCI_PMU_CNTR_SIZE(model))
41#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1)
42#define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1)
43
44#define CCI_PMU_MAX_HW_CNTRS(model) \
45 ((model)->num_hw_cntrs + (model)->fixed_hw_cntrs)
46
47/* Types of interfaces that can generate events */
48enum {
49 CCI_IF_SLAVE,
50 CCI_IF_MASTER,
51#ifdef CONFIG_ARM_CCI5xx_PMU
52 CCI_IF_GLOBAL,
53#endif
54 CCI_IF_MAX,
55};
56
57struct event_range {
58 u32 min;
59 u32 max;
60};
61
62struct cci_pmu_hw_events {
63 struct perf_event **events;
64 unsigned long *used_mask;
65 raw_spinlock_t pmu_lock;
66};
67
68struct cci_pmu;
69/*
70 * struct cci_pmu_model:
71 * @fixed_hw_cntrs - Number of fixed event counters
72 * @num_hw_cntrs - Maximum number of programmable event counters
73 * @cntr_size - Size of an event counter mapping
74 */
75struct cci_pmu_model {
76 char *name;
77 u32 fixed_hw_cntrs;
78 u32 num_hw_cntrs;
79 u32 cntr_size;
80 struct attribute **format_attrs;
81 struct attribute **event_attrs;
82 struct event_range event_ranges[CCI_IF_MAX];
83 int (*validate_hw_event)(struct cci_pmu *, unsigned long);
84 int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
85 void (*write_counters)(struct cci_pmu *, unsigned long *);
86};
87
88static struct cci_pmu_model cci_pmu_models[];
89
90struct cci_pmu {
91 void __iomem *base;
92 struct pmu pmu;
93 int nr_irqs;
94 int *irqs;
95 unsigned long active_irqs;
96 const struct cci_pmu_model *model;
97 struct cci_pmu_hw_events hw_events;
98 struct platform_device *plat_device;
99 int num_cntrs;
100 atomic_t active_events;
101 struct mutex reserve_mutex;
102 struct hlist_node node;
103 cpumask_t cpus;
104};
105
106#define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu))
107
108enum cci_models {
109#ifdef CONFIG_ARM_CCI400_PMU
110 CCI400_R0,
111 CCI400_R1,
112#endif
113#ifdef CONFIG_ARM_CCI5xx_PMU
114 CCI500_R0,
115 CCI550_R0,
116#endif
117 CCI_MODEL_MAX
118};
119
120static void pmu_write_counters(struct cci_pmu *cci_pmu,
121 unsigned long *mask);
122static ssize_t cci_pmu_format_show(struct device *dev,
123 struct device_attribute *attr, char *buf);
124static ssize_t cci_pmu_event_show(struct device *dev,
125 struct device_attribute *attr, char *buf);
126
127#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
128 &((struct dev_ext_attribute[]) { \
129 { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config } \
130 })[0].attr.attr
131
132#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
133 CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
134#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \
135 CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config)
136
137/* CCI400 PMU Specific definitions */
138
139#ifdef CONFIG_ARM_CCI400_PMU
140
141/* Port ids */
142#define CCI400_PORT_S0 0
143#define CCI400_PORT_S1 1
144#define CCI400_PORT_S2 2
145#define CCI400_PORT_S3 3
146#define CCI400_PORT_S4 4
147#define CCI400_PORT_M0 5
148#define CCI400_PORT_M1 6
149#define CCI400_PORT_M2 7
150
151#define CCI400_R1_PX 5
152
153/*
154 * Instead of an event id to monitor CCI cycles, a dedicated counter is
155 * provided. Use 0xff to represent CCI cycles and hope that no future revisions
156 * make use of this event in hardware.
157 */
158enum cci400_perf_events {
159 CCI400_PMU_CYCLES = 0xff
160};
161
162#define CCI400_PMU_CYCLE_CNTR_IDX 0
163#define CCI400_PMU_CNTR0_IDX 1
164
165/*
166 * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
167 * ports and bits 4:0 are event codes. There are different event codes
168 * associated with each port type.
169 *
170 * Additionally, the range of events associated with the port types changed
171 * between Rev0 and Rev1.
172 *
173 * The constants below define the range of valid codes for each port type for
174 * the different revisions and are used to validate the event to be monitored.
175 */
176
177#define CCI400_PMU_EVENT_MASK 0xffUL
178#define CCI400_PMU_EVENT_SOURCE_SHIFT 5
179#define CCI400_PMU_EVENT_SOURCE_MASK 0x7
180#define CCI400_PMU_EVENT_CODE_SHIFT 0
181#define CCI400_PMU_EVENT_CODE_MASK 0x1f
182#define CCI400_PMU_EVENT_SOURCE(event) \
183 ((event >> CCI400_PMU_EVENT_SOURCE_SHIFT) & \
184 CCI400_PMU_EVENT_SOURCE_MASK)
185#define CCI400_PMU_EVENT_CODE(event) \
186 ((event >> CCI400_PMU_EVENT_CODE_SHIFT) & CCI400_PMU_EVENT_CODE_MASK)
187
188#define CCI400_R0_SLAVE_PORT_MIN_EV 0x00
189#define CCI400_R0_SLAVE_PORT_MAX_EV 0x13
190#define CCI400_R0_MASTER_PORT_MIN_EV 0x14
191#define CCI400_R0_MASTER_PORT_MAX_EV 0x1a
192
193#define CCI400_R1_SLAVE_PORT_MIN_EV 0x00
194#define CCI400_R1_SLAVE_PORT_MAX_EV 0x14
195#define CCI400_R1_MASTER_PORT_MIN_EV 0x00
196#define CCI400_R1_MASTER_PORT_MAX_EV 0x11
197
198#define CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(_name, _config) \
199 CCI_EXT_ATTR_ENTRY(_name, cci400_pmu_cycle_event_show, \
200 (unsigned long)_config)
201
202static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
203 struct device_attribute *attr, char *buf);
204
205static struct attribute *cci400_pmu_format_attrs[] = {
206 CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
207 CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"),
208 NULL
209};
210
211static struct attribute *cci400_r0_pmu_event_attrs[] = {
212 /* Slave events */
213 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
214 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
215 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
216 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
217 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
218 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
219 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
220 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
221 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
222 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
223 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
224 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
225 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
226 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
227 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
228 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
229 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
230 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
231 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
232 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
233 /* Master events */
234 CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x14),
235 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_addr_hazard, 0x15),
236 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_id_hazard, 0x16),
237 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_tt_full, 0x17),
238 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x18),
239 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x19),
240 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A),
241 /* Special event for cycles counter */
242 CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
243 NULL
244};
245
246static struct attribute *cci400_r1_pmu_event_attrs[] = {
247 /* Slave events */
248 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
249 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
250 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
251 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
252 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
253 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
254 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
255 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
256 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
257 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
258 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
259 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
260 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
261 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
262 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
263 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
264 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
265 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
266 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
267 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
268 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_slave_id_hazard, 0x14),
269 /* Master events */
270 CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x0),
271 CCI_EVENT_EXT_ATTR_ENTRY(mi_stall_cycle_addr_hazard, 0x1),
272 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_master_id_hazard, 0x2),
273 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_hi_prio_rtq_full, 0x3),
274 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x4),
275 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x5),
276 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_wtq_full, 0x6),
277 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_low_prio_rtq_full, 0x7),
278 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_mid_prio_rtq_full, 0x8),
279 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn0, 0x9),
280 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn1, 0xA),
281 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn2, 0xB),
282 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn3, 0xC),
283 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn0, 0xD),
284 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn1, 0xE),
285 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn2, 0xF),
286 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn3, 0x10),
287 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11),
288 /* Special event for cycles counter */
289 CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
290 NULL
291};
292
293static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
294 struct device_attribute *attr, char *buf)
295{
296 struct dev_ext_attribute *eattr = container_of(attr,
297 struct dev_ext_attribute, attr);
298 return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var);
299}
300
301static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
302 struct cci_pmu_hw_events *hw,
303 unsigned long cci_event)
304{
305 int idx;
306
307 /* cycles event idx is fixed */
308 if (cci_event == CCI400_PMU_CYCLES) {
309 if (test_and_set_bit(CCI400_PMU_CYCLE_CNTR_IDX, hw->used_mask))
310 return -EAGAIN;
311
312 return CCI400_PMU_CYCLE_CNTR_IDX;
313 }
314
315 for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
316 if (!test_and_set_bit(idx, hw->used_mask))
317 return idx;
318
319 /* No counters available */
320 return -EAGAIN;
321}
322
323static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
324{
325 u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
326 u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
327 int if_type;
328
329 if (hw_event & ~CCI400_PMU_EVENT_MASK)
330 return -ENOENT;
331
332 if (hw_event == CCI400_PMU_CYCLES)
333 return hw_event;
334
335 switch (ev_source) {
336 case CCI400_PORT_S0:
337 case CCI400_PORT_S1:
338 case CCI400_PORT_S2:
339 case CCI400_PORT_S3:
340 case CCI400_PORT_S4:
341 /* Slave Interface */
342 if_type = CCI_IF_SLAVE;
343 break;
344 case CCI400_PORT_M0:
345 case CCI400_PORT_M1:
346 case CCI400_PORT_M2:
347 /* Master Interface */
348 if_type = CCI_IF_MASTER;
349 break;
350 default:
351 return -ENOENT;
352 }
353
354 if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
355 ev_code <= cci_pmu->model->event_ranges[if_type].max)
356 return hw_event;
357
358 return -ENOENT;
359}
360
361static int probe_cci400_revision(void)
362{
363 int rev;
364 rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK;
365 rev >>= CCI_PID2_REV_SHIFT;
366
367 if (rev < CCI400_R1_PX)
368 return CCI400_R0;
369 else
370 return CCI400_R1;
371}
372
373static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
374{
375 if (platform_has_secure_cci_access())
376 return &cci_pmu_models[probe_cci400_revision()];
377 return NULL;
378}
379#else /* !CONFIG_ARM_CCI400_PMU */
380static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
381{
382 return NULL;
383}
384#endif /* CONFIG_ARM_CCI400_PMU */
385
386#ifdef CONFIG_ARM_CCI5xx_PMU
387
388/*
389 * CCI5xx PMU event id is an 9-bit value made of two parts.
390 * bits [8:5] - Source for the event
391 * bits [4:0] - Event code (specific to type of interface)
392 *
393 *
394 */
395
396/* Port ids */
397#define CCI5xx_PORT_S0 0x0
398#define CCI5xx_PORT_S1 0x1
399#define CCI5xx_PORT_S2 0x2
400#define CCI5xx_PORT_S3 0x3
401#define CCI5xx_PORT_S4 0x4
402#define CCI5xx_PORT_S5 0x5
403#define CCI5xx_PORT_S6 0x6
404
405#define CCI5xx_PORT_M0 0x8
406#define CCI5xx_PORT_M1 0x9
407#define CCI5xx_PORT_M2 0xa
408#define CCI5xx_PORT_M3 0xb
409#define CCI5xx_PORT_M4 0xc
410#define CCI5xx_PORT_M5 0xd
411#define CCI5xx_PORT_M6 0xe
412
413#define CCI5xx_PORT_GLOBAL 0xf
414
415#define CCI5xx_PMU_EVENT_MASK 0x1ffUL
416#define CCI5xx_PMU_EVENT_SOURCE_SHIFT 0x5
417#define CCI5xx_PMU_EVENT_SOURCE_MASK 0xf
418#define CCI5xx_PMU_EVENT_CODE_SHIFT 0x0
419#define CCI5xx_PMU_EVENT_CODE_MASK 0x1f
420
421#define CCI5xx_PMU_EVENT_SOURCE(event) \
422 ((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK)
423#define CCI5xx_PMU_EVENT_CODE(event) \
424 ((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK)
425
426#define CCI5xx_SLAVE_PORT_MIN_EV 0x00
427#define CCI5xx_SLAVE_PORT_MAX_EV 0x1f
428#define CCI5xx_MASTER_PORT_MIN_EV 0x00
429#define CCI5xx_MASTER_PORT_MAX_EV 0x06
430#define CCI5xx_GLOBAL_PORT_MIN_EV 0x00
431#define CCI5xx_GLOBAL_PORT_MAX_EV 0x0f
432
433
434#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
435 CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \
436 (unsigned long) _config)
437
438static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
439 struct device_attribute *attr, char *buf);
440
441static struct attribute *cci5xx_pmu_format_attrs[] = {
442 CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
443 CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
444 NULL,
445};
446
447static struct attribute *cci5xx_pmu_event_attrs[] = {
448 /* Slave events */
449 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
450 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
451 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_nonshareable, 0x2),
452 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_non_alloc, 0x3),
453 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_alloc, 0x4),
454 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_invalidate, 0x5),
455 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maint, 0x6),
456 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
457 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rval, 0x8),
458 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rlast_snoop, 0x9),
459 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_awalid, 0xA),
460 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_dev, 0xB),
461 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_non_shareable, 0xC),
462 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wb, 0xD),
463 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wlu, 0xE),
464 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wunique, 0xF),
465 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_evict, 0x10),
466 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_wrevict, 0x11),
467 CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_beat, 0x12),
468 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_acvalid, 0x13),
469 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_read, 0x14),
470 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_clean, 0x15),
471 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_data_transfer_low, 0x16),
472 CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_arvalid, 0x17),
473 CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall, 0x18),
474 CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall, 0x19),
475 CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_stall, 0x1A),
476 CCI_EVENT_EXT_ATTR_ENTRY(si_w_resp_stall, 0x1B),
477 CCI_EVENT_EXT_ATTR_ENTRY(si_srq_stall, 0x1C),
478 CCI_EVENT_EXT_ATTR_ENTRY(si_s_data_stall, 0x1D),
479 CCI_EVENT_EXT_ATTR_ENTRY(si_rq_stall_ot_limit, 0x1E),
480 CCI_EVENT_EXT_ATTR_ENTRY(si_r_stall_arbit, 0x1F),
481
482 /* Master events */
483 CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_beat_any, 0x0),
484 CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_beat_any, 0x1),
485 CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall, 0x2),
486 CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_stall, 0x3),
487 CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall, 0x4),
488 CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_stall, 0x5),
489 CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
490
491 /* Global events */
492 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
493 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
494 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
495 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
496 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
497 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
498 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
499 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
500 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
501 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
502 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
503 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
504 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
505 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
506 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE),
507 CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
508 NULL
509};
510
511static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
512 struct device_attribute *attr, char *buf)
513{
514 struct dev_ext_attribute *eattr = container_of(attr,
515 struct dev_ext_attribute, attr);
516 /* Global events have single fixed source code */
517 return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
518 (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
519}
520
521/*
522 * CCI500 provides 8 independent event counters that can count
523 * any of the events available.
524 * CCI500 PMU event source ids
525 * 0x0-0x6 - Slave interfaces
526 * 0x8-0xD - Master interfaces
527 * 0xf - Global Events
528 * 0x7,0xe - Reserved
529 */
530static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
531 unsigned long hw_event)
532{
533 u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
534 u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
535 int if_type;
536
537 if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
538 return -ENOENT;
539
540 switch (ev_source) {
541 case CCI5xx_PORT_S0:
542 case CCI5xx_PORT_S1:
543 case CCI5xx_PORT_S2:
544 case CCI5xx_PORT_S3:
545 case CCI5xx_PORT_S4:
546 case CCI5xx_PORT_S5:
547 case CCI5xx_PORT_S6:
548 if_type = CCI_IF_SLAVE;
549 break;
550 case CCI5xx_PORT_M0:
551 case CCI5xx_PORT_M1:
552 case CCI5xx_PORT_M2:
553 case CCI5xx_PORT_M3:
554 case CCI5xx_PORT_M4:
555 case CCI5xx_PORT_M5:
556 if_type = CCI_IF_MASTER;
557 break;
558 case CCI5xx_PORT_GLOBAL:
559 if_type = CCI_IF_GLOBAL;
560 break;
561 default:
562 return -ENOENT;
563 }
564
565 if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
566 ev_code <= cci_pmu->model->event_ranges[if_type].max)
567 return hw_event;
568
569 return -ENOENT;
570}
571
572/*
573 * CCI550 provides 8 independent event counters that can count
574 * any of the events available.
575 * CCI550 PMU event source ids
576 * 0x0-0x6 - Slave interfaces
577 * 0x8-0xe - Master interfaces
578 * 0xf - Global Events
579 * 0x7 - Reserved
580 */
581static int cci550_validate_hw_event(struct cci_pmu *cci_pmu,
582 unsigned long hw_event)
583{
584 u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
585 u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
586 int if_type;
587
588 if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
589 return -ENOENT;
590
591 switch (ev_source) {
592 case CCI5xx_PORT_S0:
593 case CCI5xx_PORT_S1:
594 case CCI5xx_PORT_S2:
595 case CCI5xx_PORT_S3:
596 case CCI5xx_PORT_S4:
597 case CCI5xx_PORT_S5:
598 case CCI5xx_PORT_S6:
599 if_type = CCI_IF_SLAVE;
600 break;
601 case CCI5xx_PORT_M0:
602 case CCI5xx_PORT_M1:
603 case CCI5xx_PORT_M2:
604 case CCI5xx_PORT_M3:
605 case CCI5xx_PORT_M4:
606 case CCI5xx_PORT_M5:
607 case CCI5xx_PORT_M6:
608 if_type = CCI_IF_MASTER;
609 break;
610 case CCI5xx_PORT_GLOBAL:
611 if_type = CCI_IF_GLOBAL;
612 break;
613 default:
614 return -ENOENT;
615 }
616
617 if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
618 ev_code <= cci_pmu->model->event_ranges[if_type].max)
619 return hw_event;
620
621 return -ENOENT;
622}
623
624#endif /* CONFIG_ARM_CCI5xx_PMU */
625
626/*
627 * Program the CCI PMU counters which have PERF_HES_ARCH set
628 * with the event period and mark them ready before we enable
629 * PMU.
630 */
631static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
632{
633 int i;
634 struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
635
636 DECLARE_BITMAP(mask, cci_pmu->num_cntrs);
637
638 bitmap_zero(mask, cci_pmu->num_cntrs);
639 for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
640 struct perf_event *event = cci_hw->events[i];
641
642 if (WARN_ON(!event))
643 continue;
644
645 /* Leave the events which are not counting */
646 if (event->hw.state & PERF_HES_STOPPED)
647 continue;
648 if (event->hw.state & PERF_HES_ARCH) {
649 set_bit(i, mask);
650 event->hw.state &= ~PERF_HES_ARCH;
651 }
652 }
653
654 pmu_write_counters(cci_pmu, mask);
655}
656
657/* Should be called with cci_pmu->hw_events->pmu_lock held */
658static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu)
659{
660 u32 val;
661
662 /* Enable all the PMU counters. */
663 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
664 writel(val, cci_ctrl_base + CCI_PMCR);
665}
666
667/* Should be called with cci_pmu->hw_events->pmu_lock held */
668static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu)
669{
670 cci_pmu_sync_counters(cci_pmu);
671 __cci_pmu_enable_nosync(cci_pmu);
672}
673
674/* Should be called with cci_pmu->hw_events->pmu_lock held */
675static void __cci_pmu_disable(void)
676{
677 u32 val;
678
679 /* Disable all the PMU counters. */
680 val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
681 writel(val, cci_ctrl_base + CCI_PMCR);
682}
683
684static ssize_t cci_pmu_format_show(struct device *dev,
685 struct device_attribute *attr, char *buf)
686{
687 struct dev_ext_attribute *eattr = container_of(attr,
688 struct dev_ext_attribute, attr);
689 return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
690}
691
692static ssize_t cci_pmu_event_show(struct device *dev,
693 struct device_attribute *attr, char *buf)
694{
695 struct dev_ext_attribute *eattr = container_of(attr,
696 struct dev_ext_attribute, attr);
697 /* source parameter is mandatory for normal PMU events */
698 return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n",
699 (unsigned long)eattr->var);
700}
701
702static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
703{
704 return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu);
705}
706
707static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset)
708{
709 return readl_relaxed(cci_pmu->base +
710 CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
711}
712
713static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
714 int idx, unsigned int offset)
715{
716 writel_relaxed(value, cci_pmu->base +
717 CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
718}
719
720static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
721{
722 pmu_write_register(cci_pmu, 0, idx, CCI_PMU_CNTR_CTRL);
723}
724
725static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
726{
727 pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
728}
729
730static bool __maybe_unused
731pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx)
732{
733 return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0;
734}
735
736static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
737{
738 pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
739}
740
741/*
742 * For all counters on the CCI-PMU, disable any 'enabled' counters,
743 * saving the changed counters in the mask, so that we can restore
744 * it later using pmu_restore_counters. The mask is private to the
745 * caller. We cannot rely on the used_mask maintained by the CCI_PMU
746 * as it only tells us if the counter is assigned to perf_event or not.
747 * The state of the perf_event cannot be locked by the PMU layer, hence
748 * we check the individual counter status (which can be locked by
749 * cci_pm->hw_events->pmu_lock).
750 *
751 * @mask should be initialised to empty by the caller.
752 */
753static void __maybe_unused
754pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
755{
756 int i;
757
758 for (i = 0; i < cci_pmu->num_cntrs; i++) {
759 if (pmu_counter_is_enabled(cci_pmu, i)) {
760 set_bit(i, mask);
761 pmu_disable_counter(cci_pmu, i);
762 }
763 }
764}
765
766/*
767 * Restore the status of the counters. Reversal of the pmu_save_counters().
768 * For each counter set in the mask, enable the counter back.
769 */
770static void __maybe_unused
771pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
772{
773 int i;
774
775 for_each_set_bit(i, mask, cci_pmu->num_cntrs)
776 pmu_enable_counter(cci_pmu, i);
777}
778
779/*
780 * Returns the number of programmable counters actually implemented
781 * by the cci
782 */
783static u32 pmu_get_max_counters(void)
784{
785 return (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
786 CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
787}
788
789static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
790{
791 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
792 unsigned long cci_event = event->hw.config_base;
793 int idx;
794
795 if (cci_pmu->model->get_event_idx)
796 return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
797
798 /* Generic code to find an unused idx from the mask */
799 for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
800 if (!test_and_set_bit(idx, hw->used_mask))
801 return idx;
802
803 /* No counters available */
804 return -EAGAIN;
805}
806
807static int pmu_map_event(struct perf_event *event)
808{
809 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
810
811 if (event->attr.type < PERF_TYPE_MAX ||
812 !cci_pmu->model->validate_hw_event)
813 return -ENOENT;
814
815 return cci_pmu->model->validate_hw_event(cci_pmu, event->attr.config);
816}
817
818static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
819{
820 int i;
821 struct platform_device *pmu_device = cci_pmu->plat_device;
822
823 if (unlikely(!pmu_device))
824 return -ENODEV;
825
826 if (cci_pmu->nr_irqs < 1) {
827 dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n");
828 return -ENODEV;
829 }
830
831 /*
832 * Register all available CCI PMU interrupts. In the interrupt handler
833 * we iterate over the counters checking for interrupt source (the
834 * overflowing counter) and clear it.
835 *
836 * This should allow handling of non-unique interrupt for the counters.
837 */
838 for (i = 0; i < cci_pmu->nr_irqs; i++) {
839 int err = request_irq(cci_pmu->irqs[i], handler, IRQF_SHARED,
840 "arm-cci-pmu", cci_pmu);
841 if (err) {
842 dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
843 cci_pmu->irqs[i]);
844 return err;
845 }
846
847 set_bit(i, &cci_pmu->active_irqs);
848 }
849
850 return 0;
851}
852
853static void pmu_free_irq(struct cci_pmu *cci_pmu)
854{
855 int i;
856
857 for (i = 0; i < cci_pmu->nr_irqs; i++) {
858 if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
859 continue;
860
861 free_irq(cci_pmu->irqs[i], cci_pmu);
862 }
863}
864
865static u32 pmu_read_counter(struct perf_event *event)
866{
867 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
868 struct hw_perf_event *hw_counter = &event->hw;
869 int idx = hw_counter->idx;
870 u32 value;
871
872 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
873 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
874 return 0;
875 }
876 value = pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR);
877
878 return value;
879}
880
881static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
882{
883 pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
884}
885
886static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
887{
888 int i;
889 struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
890
891 for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
892 struct perf_event *event = cci_hw->events[i];
893
894 if (WARN_ON(!event))
895 continue;
896 pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
897 }
898}
899
900static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
901{
902 if (cci_pmu->model->write_counters)
903 cci_pmu->model->write_counters(cci_pmu, mask);
904 else
905 __pmu_write_counters(cci_pmu, mask);
906}
907
908#ifdef CONFIG_ARM_CCI5xx_PMU
909
910/*
911 * CCI-500/CCI-550 has advanced power saving policies, which could gate the
912 * clocks to the PMU counters, which makes the writes to them ineffective.
913 * The only way to write to those counters is when the global counters
914 * are enabled and the particular counter is enabled.
915 *
916 * So we do the following :
917 *
918 * 1) Disable all the PMU counters, saving their current state
919 * 2) Enable the global PMU profiling, now that all counters are
920 * disabled.
921 *
922 * For each counter to be programmed, repeat steps 3-7:
923 *
924 * 3) Write an invalid event code to the event control register for the
925 counter, so that the counters are not modified.
926 * 4) Enable the counter control for the counter.
927 * 5) Set the counter value
928 * 6) Disable the counter
929 * 7) Restore the event in the target counter
930 *
931 * 8) Disable the global PMU.
932 * 9) Restore the status of the rest of the counters.
933 *
934 * We choose an event which for CCI-5xx is guaranteed not to count.
935 * We use the highest possible event code (0x1f) for the master interface 0.
936 */
937#define CCI5xx_INVALID_EVENT ((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \
938 (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT))
939static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
940{
941 int i;
942 DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs);
943
944 bitmap_zero(saved_mask, cci_pmu->num_cntrs);
945 pmu_save_counters(cci_pmu, saved_mask);
946
947 /*
948 * Now that all the counters are disabled, we can safely turn the PMU on,
949 * without syncing the status of the counters
950 */
951 __cci_pmu_enable_nosync(cci_pmu);
952
953 for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
954 struct perf_event *event = cci_pmu->hw_events.events[i];
955
956 if (WARN_ON(!event))
957 continue;
958
959 pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT);
960 pmu_enable_counter(cci_pmu, i);
961 pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
962 pmu_disable_counter(cci_pmu, i);
963 pmu_set_event(cci_pmu, i, event->hw.config_base);
964 }
965
966 __cci_pmu_disable();
967
968 pmu_restore_counters(cci_pmu, saved_mask);
969}
970
971#endif /* CONFIG_ARM_CCI5xx_PMU */
972
973static u64 pmu_event_update(struct perf_event *event)
974{
975 struct hw_perf_event *hwc = &event->hw;
976 u64 delta, prev_raw_count, new_raw_count;
977
978 do {
979 prev_raw_count = local64_read(&hwc->prev_count);
980 new_raw_count = pmu_read_counter(event);
981 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
982 new_raw_count) != prev_raw_count);
983
984 delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK;
985
986 local64_add(delta, &event->count);
987
988 return new_raw_count;
989}
990
991static void pmu_read(struct perf_event *event)
992{
993 pmu_event_update(event);
994}
995
996static void pmu_event_set_period(struct perf_event *event)
997{
998 struct hw_perf_event *hwc = &event->hw;
999 /*
1000 * The CCI PMU counters have a period of 2^32. To account for the
1001 * possiblity of extreme interrupt latency we program for a period of
1002 * half that. Hopefully we can handle the interrupt before another 2^31
1003 * events occur and the counter overtakes its previous value.
1004 */
1005 u64 val = 1ULL << 31;
1006 local64_set(&hwc->prev_count, val);
1007
1008 /*
1009 * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose
1010 * values needs to be sync-ed with the s/w state before the PMU is
1011 * enabled.
1012 * Mark this counter for sync.
1013 */
1014 hwc->state |= PERF_HES_ARCH;
1015}
1016
1017static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
1018{
1019 unsigned long flags;
1020 struct cci_pmu *cci_pmu = dev;
1021 struct cci_pmu_hw_events *events = &cci_pmu->hw_events;
1022 int idx, handled = IRQ_NONE;
1023
1024 raw_spin_lock_irqsave(&events->pmu_lock, flags);
1025
1026 /* Disable the PMU while we walk through the counters */
1027 __cci_pmu_disable();
1028 /*
1029 * Iterate over counters and update the corresponding perf events.
1030 * This should work regardless of whether we have per-counter overflow
1031 * interrupt or a combined overflow interrupt.
1032 */
1033 for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
1034 struct perf_event *event = events->events[idx];
1035
1036 if (!event)
1037 continue;
1038
1039 /* Did this counter overflow? */
1040 if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) &
1041 CCI_PMU_OVRFLW_FLAG))
1042 continue;
1043
1044 pmu_write_register(cci_pmu, CCI_PMU_OVRFLW_FLAG, idx,
1045 CCI_PMU_OVRFLW);
1046
1047 pmu_event_update(event);
1048 pmu_event_set_period(event);
1049 handled = IRQ_HANDLED;
1050 }
1051
1052 /* Enable the PMU and sync possibly overflowed counters */
1053 __cci_pmu_enable_sync(cci_pmu);
1054 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
1055
1056 return IRQ_RETVAL(handled);
1057}
1058
1059static int cci_pmu_get_hw(struct cci_pmu *cci_pmu)
1060{
1061 int ret = pmu_request_irq(cci_pmu, pmu_handle_irq);
1062 if (ret) {
1063 pmu_free_irq(cci_pmu);
1064 return ret;
1065 }
1066 return 0;
1067}
1068
1069static void cci_pmu_put_hw(struct cci_pmu *cci_pmu)
1070{
1071 pmu_free_irq(cci_pmu);
1072}
1073
1074static void hw_perf_event_destroy(struct perf_event *event)
1075{
1076 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1077 atomic_t *active_events = &cci_pmu->active_events;
1078 struct mutex *reserve_mutex = &cci_pmu->reserve_mutex;
1079
1080 if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) {
1081 cci_pmu_put_hw(cci_pmu);
1082 mutex_unlock(reserve_mutex);
1083 }
1084}
1085
1086static void cci_pmu_enable(struct pmu *pmu)
1087{
1088 struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
1089 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1090 int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
1091 unsigned long flags;
1092
1093 if (!enabled)
1094 return;
1095
1096 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
1097 __cci_pmu_enable_sync(cci_pmu);
1098 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
1099
1100}
1101
1102static void cci_pmu_disable(struct pmu *pmu)
1103{
1104 struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
1105 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1106 unsigned long flags;
1107
1108 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
1109 __cci_pmu_disable();
1110 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
1111}
1112
1113/*
1114 * Check if the idx represents a non-programmable counter.
1115 * All the fixed event counters are mapped before the programmable
1116 * counters.
1117 */
1118static bool pmu_fixed_hw_idx(struct cci_pmu *cci_pmu, int idx)
1119{
1120 return (idx >= 0) && (idx < cci_pmu->model->fixed_hw_cntrs);
1121}
1122
1123static void cci_pmu_start(struct perf_event *event, int pmu_flags)
1124{
1125 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1126 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1127 struct hw_perf_event *hwc = &event->hw;
1128 int idx = hwc->idx;
1129 unsigned long flags;
1130
1131 /*
1132 * To handle interrupt latency, we always reprogram the period
1133 * regardlesss of PERF_EF_RELOAD.
1134 */
1135 if (pmu_flags & PERF_EF_RELOAD)
1136 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
1137
1138 hwc->state = 0;
1139
1140 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
1141 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
1142 return;
1143 }
1144
1145 raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
1146
1147 /* Configure the counter unless you are counting a fixed event */
1148 if (!pmu_fixed_hw_idx(cci_pmu, idx))
1149 pmu_set_event(cci_pmu, idx, hwc->config_base);
1150
1151 pmu_event_set_period(event);
1152 pmu_enable_counter(cci_pmu, idx);
1153
1154 raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
1155}
1156
1157static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
1158{
1159 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1160 struct hw_perf_event *hwc = &event->hw;
1161 int idx = hwc->idx;
1162
1163 if (hwc->state & PERF_HES_STOPPED)
1164 return;
1165
1166 if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
1167 dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
1168 return;
1169 }
1170
1171 /*
1172 * We always reprogram the counter, so ignore PERF_EF_UPDATE. See
1173 * cci_pmu_start()
1174 */
1175 pmu_disable_counter(cci_pmu, idx);
1176 pmu_event_update(event);
1177 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
1178}
1179
1180static int cci_pmu_add(struct perf_event *event, int flags)
1181{
1182 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1183 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1184 struct hw_perf_event *hwc = &event->hw;
1185 int idx;
1186 int err = 0;
1187
1188 perf_pmu_disable(event->pmu);
1189
1190 /* If we don't have a space for the counter then finish early. */
1191 idx = pmu_get_event_idx(hw_events, event);
1192 if (idx < 0) {
1193 err = idx;
1194 goto out;
1195 }
1196
1197 event->hw.idx = idx;
1198 hw_events->events[idx] = event;
1199
1200 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
1201 if (flags & PERF_EF_START)
1202 cci_pmu_start(event, PERF_EF_RELOAD);
1203
1204 /* Propagate our changes to the userspace mapping. */
1205 perf_event_update_userpage(event);
1206
1207out:
1208 perf_pmu_enable(event->pmu);
1209 return err;
1210}
1211
1212static void cci_pmu_del(struct perf_event *event, int flags)
1213{
1214 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1215 struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
1216 struct hw_perf_event *hwc = &event->hw;
1217 int idx = hwc->idx;
1218
1219 cci_pmu_stop(event, PERF_EF_UPDATE);
1220 hw_events->events[idx] = NULL;
1221 clear_bit(idx, hw_events->used_mask);
1222
1223 perf_event_update_userpage(event);
1224}
1225
1226static int validate_event(struct pmu *cci_pmu,
1227 struct cci_pmu_hw_events *hw_events,
1228 struct perf_event *event)
1229{
1230 if (is_software_event(event))
1231 return 1;
1232
1233 /*
1234 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
1235 * core perf code won't check that the pmu->ctx == leader->ctx
1236 * until after pmu->event_init(event).
1237 */
1238 if (event->pmu != cci_pmu)
1239 return 0;
1240
1241 if (event->state < PERF_EVENT_STATE_OFF)
1242 return 1;
1243
1244 if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
1245 return 1;
1246
1247 return pmu_get_event_idx(hw_events, event) >= 0;
1248}
1249
1250static int validate_group(struct perf_event *event)
1251{
1252 struct perf_event *sibling, *leader = event->group_leader;
1253 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1254 unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
1255 struct cci_pmu_hw_events fake_pmu = {
1256 /*
1257 * Initialise the fake PMU. We only need to populate the
1258 * used_mask for the purposes of validation.
1259 */
1260 .used_mask = mask,
1261 };
1262 memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
1263
1264 if (!validate_event(event->pmu, &fake_pmu, leader))
1265 return -EINVAL;
1266
1267 list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
1268 if (!validate_event(event->pmu, &fake_pmu, sibling))
1269 return -EINVAL;
1270 }
1271
1272 if (!validate_event(event->pmu, &fake_pmu, event))
1273 return -EINVAL;
1274
1275 return 0;
1276}
1277
1278static int __hw_perf_event_init(struct perf_event *event)
1279{
1280 struct hw_perf_event *hwc = &event->hw;
1281 int mapping;
1282
1283 mapping = pmu_map_event(event);
1284
1285 if (mapping < 0) {
1286 pr_debug("event %x:%llx not supported\n", event->attr.type,
1287 event->attr.config);
1288 return mapping;
1289 }
1290
1291 /*
1292 * We don't assign an index until we actually place the event onto
1293 * hardware. Use -1 to signify that we haven't decided where to put it
1294 * yet.
1295 */
1296 hwc->idx = -1;
1297 hwc->config_base = 0;
1298 hwc->config = 0;
1299 hwc->event_base = 0;
1300
1301 /*
1302 * Store the event encoding into the config_base field.
1303 */
1304 hwc->config_base |= (unsigned long)mapping;
1305
1306 /*
1307 * Limit the sample_period to half of the counter width. That way, the
1308 * new counter value is far less likely to overtake the previous one
1309 * unless you have some serious IRQ latency issues.
1310 */
1311 hwc->sample_period = CCI_PMU_CNTR_MASK >> 1;
1312 hwc->last_period = hwc->sample_period;
1313 local64_set(&hwc->period_left, hwc->sample_period);
1314
1315 if (event->group_leader != event) {
1316 if (validate_group(event) != 0)
1317 return -EINVAL;
1318 }
1319
1320 return 0;
1321}
1322
1323static int cci_pmu_event_init(struct perf_event *event)
1324{
1325 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1326 atomic_t *active_events = &cci_pmu->active_events;
1327 int err = 0;
1328 int cpu;
1329
1330 if (event->attr.type != event->pmu->type)
1331 return -ENOENT;
1332
1333 /* Shared by all CPUs, no meaningful state to sample */
1334 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
1335 return -EOPNOTSUPP;
1336
1337 /* We have no filtering of any kind */
1338 if (event->attr.exclude_user ||
1339 event->attr.exclude_kernel ||
1340 event->attr.exclude_hv ||
1341 event->attr.exclude_idle ||
1342 event->attr.exclude_host ||
1343 event->attr.exclude_guest)
1344 return -EINVAL;
1345
1346 /*
1347 * Following the example set by other "uncore" PMUs, we accept any CPU
1348 * and rewrite its affinity dynamically rather than having perf core
1349 * handle cpu == -1 and pid == -1 for this case.
1350 *
1351 * The perf core will pin online CPUs for the duration of this call and
1352 * the event being installed into its context, so the PMU's CPU can't
1353 * change under our feet.
1354 */
1355 cpu = cpumask_first(&cci_pmu->cpus);
1356 if (event->cpu < 0 || cpu < 0)
1357 return -EINVAL;
1358 event->cpu = cpu;
1359
1360 event->destroy = hw_perf_event_destroy;
1361 if (!atomic_inc_not_zero(active_events)) {
1362 mutex_lock(&cci_pmu->reserve_mutex);
1363 if (atomic_read(active_events) == 0)
1364 err = cci_pmu_get_hw(cci_pmu);
1365 if (!err)
1366 atomic_inc(active_events);
1367 mutex_unlock(&cci_pmu->reserve_mutex);
1368 }
1369 if (err)
1370 return err;
1371
1372 err = __hw_perf_event_init(event);
1373 if (err)
1374 hw_perf_event_destroy(event);
1375
1376 return err;
1377}
1378
1379static ssize_t pmu_cpumask_attr_show(struct device *dev,
1380 struct device_attribute *attr, char *buf)
1381{
1382 struct pmu *pmu = dev_get_drvdata(dev);
1383 struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
1384
1385 int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
1386 cpumask_pr_args(&cci_pmu->cpus));
1387 buf[n++] = '\n';
1388 buf[n] = '\0';
1389 return n;
1390}
1391
1392static struct device_attribute pmu_cpumask_attr =
1393 __ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL);
1394
1395static struct attribute *pmu_attrs[] = {
1396 &pmu_cpumask_attr.attr,
1397 NULL,
1398};
1399
1400static struct attribute_group pmu_attr_group = {
1401 .attrs = pmu_attrs,
1402};
1403
1404static struct attribute_group pmu_format_attr_group = {
1405 .name = "format",
1406 .attrs = NULL, /* Filled in cci_pmu_init_attrs */
1407};
1408
1409static struct attribute_group pmu_event_attr_group = {
1410 .name = "events",
1411 .attrs = NULL, /* Filled in cci_pmu_init_attrs */
1412};
1413
1414static const struct attribute_group *pmu_attr_groups[] = {
1415 &pmu_attr_group,
1416 &pmu_format_attr_group,
1417 &pmu_event_attr_group,
1418 NULL
1419};
1420
1421static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
1422{
1423 const struct cci_pmu_model *model = cci_pmu->model;
1424 char *name = model->name;
1425 u32 num_cntrs;
1426
1427 pmu_event_attr_group.attrs = model->event_attrs;
1428 pmu_format_attr_group.attrs = model->format_attrs;
1429
1430 cci_pmu->pmu = (struct pmu) {
1431 .name = cci_pmu->model->name,
1432 .task_ctx_nr = perf_invalid_context,
1433 .pmu_enable = cci_pmu_enable,
1434 .pmu_disable = cci_pmu_disable,
1435 .event_init = cci_pmu_event_init,
1436 .add = cci_pmu_add,
1437 .del = cci_pmu_del,
1438 .start = cci_pmu_start,
1439 .stop = cci_pmu_stop,
1440 .read = pmu_read,
1441 .attr_groups = pmu_attr_groups,
1442 };
1443
1444 cci_pmu->plat_device = pdev;
1445 num_cntrs = pmu_get_max_counters();
1446 if (num_cntrs > cci_pmu->model->num_hw_cntrs) {
1447 dev_warn(&pdev->dev,
1448 "PMU implements more counters(%d) than supported by"
1449 " the model(%d), truncated.",
1450 num_cntrs, cci_pmu->model->num_hw_cntrs);
1451 num_cntrs = cci_pmu->model->num_hw_cntrs;
1452 }
1453 cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs;
1454
1455 return perf_pmu_register(&cci_pmu->pmu, name, -1);
1456}
1457
1458static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
1459{
1460 struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node);
1461 unsigned int target;
1462
1463 if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
1464 return 0;
1465 target = cpumask_any_but(cpu_online_mask, cpu);
1466 if (target >= nr_cpu_ids)
1467 return 0;
1468 /*
1469 * TODO: migrate context once core races on event->ctx have
1470 * been fixed.
1471 */
1472 cpumask_set_cpu(target, &cci_pmu->cpus);
1473 return 0;
1474}
1475
1476static struct cci_pmu_model cci_pmu_models[] = {
1477#ifdef CONFIG_ARM_CCI400_PMU
1478 [CCI400_R0] = {
1479 .name = "CCI_400",
1480 .fixed_hw_cntrs = 1, /* Cycle counter */
1481 .num_hw_cntrs = 4,
1482 .cntr_size = SZ_4K,
1483 .format_attrs = cci400_pmu_format_attrs,
1484 .event_attrs = cci400_r0_pmu_event_attrs,
1485 .event_ranges = {
1486 [CCI_IF_SLAVE] = {
1487 CCI400_R0_SLAVE_PORT_MIN_EV,
1488 CCI400_R0_SLAVE_PORT_MAX_EV,
1489 },
1490 [CCI_IF_MASTER] = {
1491 CCI400_R0_MASTER_PORT_MIN_EV,
1492 CCI400_R0_MASTER_PORT_MAX_EV,
1493 },
1494 },
1495 .validate_hw_event = cci400_validate_hw_event,
1496 .get_event_idx = cci400_get_event_idx,
1497 },
1498 [CCI400_R1] = {
1499 .name = "CCI_400_r1",
1500 .fixed_hw_cntrs = 1, /* Cycle counter */
1501 .num_hw_cntrs = 4,
1502 .cntr_size = SZ_4K,
1503 .format_attrs = cci400_pmu_format_attrs,
1504 .event_attrs = cci400_r1_pmu_event_attrs,
1505 .event_ranges = {
1506 [CCI_IF_SLAVE] = {
1507 CCI400_R1_SLAVE_PORT_MIN_EV,
1508 CCI400_R1_SLAVE_PORT_MAX_EV,
1509 },
1510 [CCI_IF_MASTER] = {
1511 CCI400_R1_MASTER_PORT_MIN_EV,
1512 CCI400_R1_MASTER_PORT_MAX_EV,
1513 },
1514 },
1515 .validate_hw_event = cci400_validate_hw_event,
1516 .get_event_idx = cci400_get_event_idx,
1517 },
1518#endif
1519#ifdef CONFIG_ARM_CCI5xx_PMU
1520 [CCI500_R0] = {
1521 .name = "CCI_500",
1522 .fixed_hw_cntrs = 0,
1523 .num_hw_cntrs = 8,
1524 .cntr_size = SZ_64K,
1525 .format_attrs = cci5xx_pmu_format_attrs,
1526 .event_attrs = cci5xx_pmu_event_attrs,
1527 .event_ranges = {
1528 [CCI_IF_SLAVE] = {
1529 CCI5xx_SLAVE_PORT_MIN_EV,
1530 CCI5xx_SLAVE_PORT_MAX_EV,
1531 },
1532 [CCI_IF_MASTER] = {
1533 CCI5xx_MASTER_PORT_MIN_EV,
1534 CCI5xx_MASTER_PORT_MAX_EV,
1535 },
1536 [CCI_IF_GLOBAL] = {
1537 CCI5xx_GLOBAL_PORT_MIN_EV,
1538 CCI5xx_GLOBAL_PORT_MAX_EV,
1539 },
1540 },
1541 .validate_hw_event = cci500_validate_hw_event,
1542 .write_counters = cci5xx_pmu_write_counters,
1543 },
1544 [CCI550_R0] = {
1545 .name = "CCI_550",
1546 .fixed_hw_cntrs = 0,
1547 .num_hw_cntrs = 8,
1548 .cntr_size = SZ_64K,
1549 .format_attrs = cci5xx_pmu_format_attrs,
1550 .event_attrs = cci5xx_pmu_event_attrs,
1551 .event_ranges = {
1552 [CCI_IF_SLAVE] = {
1553 CCI5xx_SLAVE_PORT_MIN_EV,
1554 CCI5xx_SLAVE_PORT_MAX_EV,
1555 },
1556 [CCI_IF_MASTER] = {
1557 CCI5xx_MASTER_PORT_MIN_EV,
1558 CCI5xx_MASTER_PORT_MAX_EV,
1559 },
1560 [CCI_IF_GLOBAL] = {
1561 CCI5xx_GLOBAL_PORT_MIN_EV,
1562 CCI5xx_GLOBAL_PORT_MAX_EV,
1563 },
1564 },
1565 .validate_hw_event = cci550_validate_hw_event,
1566 .write_counters = cci5xx_pmu_write_counters,
1567 },
1568#endif
1569};
1570
1571static const struct of_device_id arm_cci_pmu_matches[] = {
1572#ifdef CONFIG_ARM_CCI400_PMU
1573 {
1574 .compatible = "arm,cci-400-pmu",
1575 .data = NULL,
1576 },
1577 {
1578 .compatible = "arm,cci-400-pmu,r0",
1579 .data = &cci_pmu_models[CCI400_R0],
1580 },
1581 {
1582 .compatible = "arm,cci-400-pmu,r1",
1583 .data = &cci_pmu_models[CCI400_R1],
1584 },
1585#endif
1586#ifdef CONFIG_ARM_CCI5xx_PMU
1587 {
1588 .compatible = "arm,cci-500-pmu,r0",
1589 .data = &cci_pmu_models[CCI500_R0],
1590 },
1591 {
1592 .compatible = "arm,cci-550-pmu,r0",
1593 .data = &cci_pmu_models[CCI550_R0],
1594 },
1595#endif
1596 {},
1597};
1598
1599static inline const struct cci_pmu_model *get_cci_model(struct platform_device *pdev)
1600{
1601 const struct of_device_id *match = of_match_node(arm_cci_pmu_matches,
1602 pdev->dev.of_node);
1603 if (!match)
1604 return NULL;
1605 if (match->data)
1606 return match->data;
1607
1608 dev_warn(&pdev->dev, "DEPRECATED compatible property,"
1609 "requires secure access to CCI registers");
1610 return probe_cci_model(pdev);
1611}
1612
1613static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
1614{
1615 int i;
1616
1617 for (i = 0; i < nr_irqs; i++)
1618 if (irq == irqs[i])
1619 return true;
1620
1621 return false;
1622}
1623
1624static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev)
1625{
1626 struct cci_pmu *cci_pmu;
1627 const struct cci_pmu_model *model;
1628
1629 /*
1630 * All allocations are devm_* hence we don't have to free
1631 * them explicitly on an error, as it would end up in driver
1632 * detach.
1633 */
1634 model = get_cci_model(pdev);
1635 if (!model) {
1636 dev_warn(&pdev->dev, "CCI PMU version not supported\n");
1637 return ERR_PTR(-ENODEV);
1638 }
1639
1640 cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL);
1641 if (!cci_pmu)
1642 return ERR_PTR(-ENOMEM);
1643
1644 cci_pmu->model = model;
1645 cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model),
1646 sizeof(*cci_pmu->irqs), GFP_KERNEL);
1647 if (!cci_pmu->irqs)
1648 return ERR_PTR(-ENOMEM);
1649 cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev,
1650 CCI_PMU_MAX_HW_CNTRS(model),
1651 sizeof(*cci_pmu->hw_events.events),
1652 GFP_KERNEL);
1653 if (!cci_pmu->hw_events.events)
1654 return ERR_PTR(-ENOMEM);
1655 cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev,
1656 BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
1657 sizeof(*cci_pmu->hw_events.used_mask),
1658 GFP_KERNEL);
1659 if (!cci_pmu->hw_events.used_mask)
1660 return ERR_PTR(-ENOMEM);
1661
1662 return cci_pmu;
1663}
1664
1665static int cci_pmu_probe(struct platform_device *pdev)
1666{
1667 struct resource *res;
1668 struct cci_pmu *cci_pmu;
1669 int i, ret, irq;
1670
1671 cci_pmu = cci_pmu_alloc(pdev);
1672 if (IS_ERR(cci_pmu))
1673 return PTR_ERR(cci_pmu);
1674
1675 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1676 cci_pmu->base = devm_ioremap_resource(&pdev->dev, res);
1677 if (IS_ERR(cci_pmu->base))
1678 return -ENOMEM;
1679
1680 /*
1681 * CCI PMU has one overflow interrupt per counter; but some may be tied
1682 * together to a common interrupt.
1683 */
1684 cci_pmu->nr_irqs = 0;
1685 for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) {
1686 irq = platform_get_irq(pdev, i);
1687 if (irq < 0)
1688 break;
1689
1690 if (is_duplicate_irq(irq, cci_pmu->irqs, cci_pmu->nr_irqs))
1691 continue;
1692
1693 cci_pmu->irqs[cci_pmu->nr_irqs++] = irq;
1694 }
1695
1696 /*
1697 * Ensure that the device tree has as many interrupts as the number
1698 * of counters.
1699 */
1700 if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) {
1701 dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
1702 i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model));
1703 return -EINVAL;
1704 }
1705
1706 raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
1707 mutex_init(&cci_pmu->reserve_mutex);
1708 atomic_set(&cci_pmu->active_events, 0);
1709 cpumask_set_cpu(get_cpu(), &cci_pmu->cpus);
1710
1711 ret = cci_pmu_init(cci_pmu, pdev);
1712 if (ret) {
1713 put_cpu();
1714 return ret;
1715 }
1716
1717 cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
1718 &cci_pmu->node);
1719 put_cpu();
1720 pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
1721 return 0;
1722}
1723
1724static struct platform_driver cci_pmu_driver = {
1725 .driver = {
1726 .name = DRIVER_NAME,
1727 .of_match_table = arm_cci_pmu_matches,
1728 },
1729 .probe = cci_pmu_probe,
1730};
1731
1732static int __init cci_platform_init(void)
1733{
1734 int ret;
1735
1736 ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE,
1737 "perf/arm/cci:online", NULL,
1738 cci_pmu_offline_cpu);
1739 if (ret)
1740 return ret;
1741
1742 return platform_driver_register(&cci_pmu_driver);
1743}
1744
1745device_initcall(cci_platform_init);
1746MODULE_LICENSE("GPL");
1747MODULE_DESCRIPTION("ARM CCI PMU support");