aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/perf
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2018-07-27 09:39:04 -0400
committerWill Deacon <will.deacon@arm.com>2018-07-27 09:39:04 -0400
commitba70ffa7d20d771ae47a1597799da84980aafe15 (patch)
treea25f393d7c97683673f12396369a8a6f95e136dc /drivers/perf
parentc5157101e7793b42a56e07368c7f4cb73fb58008 (diff)
parent809092dc3e606f3508b53baa624b27bfff8f0e7f (diff)
Merge branch 'for-next/perf' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into aarch64/for-next/core
Pull in arm perf updates, including support for 64-bit (chained) event counters and some non-critical fixes for some of the system PMU drivers. Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'drivers/perf')
-rw-r--r--drivers/perf/arm-cci.c38
-rw-r--r--drivers/perf/arm-ccn.c14
-rw-r--r--drivers/perf/arm_pmu.c38
-rw-r--r--drivers/perf/arm_pmu_platform.c2
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c12
5 files changed, 59 insertions, 45 deletions
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 0d09d8e669cd..1bfeb160c5b1 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -53,6 +53,16 @@ enum {
53 CCI_IF_MAX, 53 CCI_IF_MAX,
54}; 54};
55 55
56#define NUM_HW_CNTRS_CII_4XX 4
57#define NUM_HW_CNTRS_CII_5XX 8
58#define NUM_HW_CNTRS_MAX NUM_HW_CNTRS_CII_5XX
59
60#define FIXED_HW_CNTRS_CII_4XX 1
61#define FIXED_HW_CNTRS_CII_5XX 0
62#define FIXED_HW_CNTRS_MAX FIXED_HW_CNTRS_CII_4XX
63
64#define HW_CNTRS_MAX (NUM_HW_CNTRS_MAX + FIXED_HW_CNTRS_MAX)
65
56struct event_range { 66struct event_range {
57 u32 min; 67 u32 min;
58 u32 max; 68 u32 max;
@@ -633,8 +643,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
633{ 643{
634 int i; 644 int i;
635 struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; 645 struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
636 646 DECLARE_BITMAP(mask, HW_CNTRS_MAX);
637 DECLARE_BITMAP(mask, cci_pmu->num_cntrs);
638 647
639 bitmap_zero(mask, cci_pmu->num_cntrs); 648 bitmap_zero(mask, cci_pmu->num_cntrs);
640 for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { 649 for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
@@ -940,7 +949,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
940static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) 949static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
941{ 950{
942 int i; 951 int i;
943 DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); 952 DECLARE_BITMAP(saved_mask, HW_CNTRS_MAX);
944 953
945 bitmap_zero(saved_mask, cci_pmu->num_cntrs); 954 bitmap_zero(saved_mask, cci_pmu->num_cntrs);
946 pmu_save_counters(cci_pmu, saved_mask); 955 pmu_save_counters(cci_pmu, saved_mask);
@@ -1245,7 +1254,7 @@ static int validate_group(struct perf_event *event)
1245{ 1254{
1246 struct perf_event *sibling, *leader = event->group_leader; 1255 struct perf_event *sibling, *leader = event->group_leader;
1247 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); 1256 struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
1248 unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; 1257 unsigned long mask[BITS_TO_LONGS(HW_CNTRS_MAX)];
1249 struct cci_pmu_hw_events fake_pmu = { 1258 struct cci_pmu_hw_events fake_pmu = {
1250 /* 1259 /*
1251 * Initialise the fake PMU. We only need to populate the 1260 * Initialise the fake PMU. We only need to populate the
@@ -1403,6 +1412,11 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
1403 char *name = model->name; 1412 char *name = model->name;
1404 u32 num_cntrs; 1413 u32 num_cntrs;
1405 1414
1415 if (WARN_ON(model->num_hw_cntrs > NUM_HW_CNTRS_MAX))
1416 return -EINVAL;
1417 if (WARN_ON(model->fixed_hw_cntrs > FIXED_HW_CNTRS_MAX))
1418 return -EINVAL;
1419
1406 pmu_event_attr_group.attrs = model->event_attrs; 1420 pmu_event_attr_group.attrs = model->event_attrs;
1407 pmu_format_attr_group.attrs = model->format_attrs; 1421 pmu_format_attr_group.attrs = model->format_attrs;
1408 1422
@@ -1455,8 +1469,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = {
1455#ifdef CONFIG_ARM_CCI400_PMU 1469#ifdef CONFIG_ARM_CCI400_PMU
1456 [CCI400_R0] = { 1470 [CCI400_R0] = {
1457 .name = "CCI_400", 1471 .name = "CCI_400",
1458 .fixed_hw_cntrs = 1, /* Cycle counter */ 1472 .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */
1459 .num_hw_cntrs = 4, 1473 .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX,
1460 .cntr_size = SZ_4K, 1474 .cntr_size = SZ_4K,
1461 .format_attrs = cci400_pmu_format_attrs, 1475 .format_attrs = cci400_pmu_format_attrs,
1462 .event_attrs = cci400_r0_pmu_event_attrs, 1476 .event_attrs = cci400_r0_pmu_event_attrs,
@@ -1475,8 +1489,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = {
1475 }, 1489 },
1476 [CCI400_R1] = { 1490 [CCI400_R1] = {
1477 .name = "CCI_400_r1", 1491 .name = "CCI_400_r1",
1478 .fixed_hw_cntrs = 1, /* Cycle counter */ 1492 .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */
1479 .num_hw_cntrs = 4, 1493 .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX,
1480 .cntr_size = SZ_4K, 1494 .cntr_size = SZ_4K,
1481 .format_attrs = cci400_pmu_format_attrs, 1495 .format_attrs = cci400_pmu_format_attrs,
1482 .event_attrs = cci400_r1_pmu_event_attrs, 1496 .event_attrs = cci400_r1_pmu_event_attrs,
@@ -1497,8 +1511,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = {
1497#ifdef CONFIG_ARM_CCI5xx_PMU 1511#ifdef CONFIG_ARM_CCI5xx_PMU
1498 [CCI500_R0] = { 1512 [CCI500_R0] = {
1499 .name = "CCI_500", 1513 .name = "CCI_500",
1500 .fixed_hw_cntrs = 0, 1514 .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX,
1501 .num_hw_cntrs = 8, 1515 .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX,
1502 .cntr_size = SZ_64K, 1516 .cntr_size = SZ_64K,
1503 .format_attrs = cci5xx_pmu_format_attrs, 1517 .format_attrs = cci5xx_pmu_format_attrs,
1504 .event_attrs = cci5xx_pmu_event_attrs, 1518 .event_attrs = cci5xx_pmu_event_attrs,
@@ -1521,8 +1535,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = {
1521 }, 1535 },
1522 [CCI550_R0] = { 1536 [CCI550_R0] = {
1523 .name = "CCI_550", 1537 .name = "CCI_550",
1524 .fixed_hw_cntrs = 0, 1538 .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX,
1525 .num_hw_cntrs = 8, 1539 .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX,
1526 .cntr_size = SZ_64K, 1540 .cntr_size = SZ_64K,
1527 .format_attrs = cci5xx_pmu_format_attrs, 1541 .format_attrs = cci5xx_pmu_format_attrs,
1528 .event_attrs = cci5xx_pmu_event_attrs, 1542 .event_attrs = cci5xx_pmu_event_attrs,
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index b416ee18e6bb..4b15c36f4631 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1485,17 +1485,9 @@ static int arm_ccn_probe(struct platform_device *pdev)
1485 platform_set_drvdata(pdev, ccn); 1485 platform_set_drvdata(pdev, ccn);
1486 1486
1487 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1487 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1488 if (!res) 1488 ccn->base = devm_ioremap_resource(ccn->dev, res);
1489 return -EINVAL; 1489 if (IS_ERR(ccn->base))
1490 1490 return PTR_ERR(ccn->base);
1491 if (!devm_request_mem_region(ccn->dev, res->start,
1492 resource_size(res), pdev->name))
1493 return -EBUSY;
1494
1495 ccn->base = devm_ioremap(ccn->dev, res->start,
1496 resource_size(res));
1497 if (!ccn->base)
1498 return -EFAULT;
1499 1491
1500 res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); 1492 res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
1501 if (!res) 1493 if (!res)
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index a6347d487635..7f01f6f60b87 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -28,6 +28,14 @@
28static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); 28static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
29static DEFINE_PER_CPU(int, cpu_irq); 29static DEFINE_PER_CPU(int, cpu_irq);
30 30
31static inline u64 arm_pmu_event_max_period(struct perf_event *event)
32{
33 if (event->hw.flags & ARMPMU_EVT_64BIT)
34 return GENMASK_ULL(63, 0);
35 else
36 return GENMASK_ULL(31, 0);
37}
38
31static int 39static int
32armpmu_map_cache_event(const unsigned (*cache_map) 40armpmu_map_cache_event(const unsigned (*cache_map)
33 [PERF_COUNT_HW_CACHE_MAX] 41 [PERF_COUNT_HW_CACHE_MAX]
@@ -114,8 +122,10 @@ int armpmu_event_set_period(struct perf_event *event)
114 struct hw_perf_event *hwc = &event->hw; 122 struct hw_perf_event *hwc = &event->hw;
115 s64 left = local64_read(&hwc->period_left); 123 s64 left = local64_read(&hwc->period_left);
116 s64 period = hwc->sample_period; 124 s64 period = hwc->sample_period;
125 u64 max_period;
117 int ret = 0; 126 int ret = 0;
118 127
128 max_period = arm_pmu_event_max_period(event);
119 if (unlikely(left <= -period)) { 129 if (unlikely(left <= -period)) {
120 left = period; 130 left = period;
121 local64_set(&hwc->period_left, left); 131 local64_set(&hwc->period_left, left);
@@ -136,12 +146,12 @@ int armpmu_event_set_period(struct perf_event *event)
136 * effect we are reducing max_period to account for 146 * effect we are reducing max_period to account for
137 * interrupt latency (and we are being very conservative). 147 * interrupt latency (and we are being very conservative).
138 */ 148 */
139 if (left > (armpmu->max_period >> 1)) 149 if (left > (max_period >> 1))
140 left = armpmu->max_period >> 1; 150 left = (max_period >> 1);
141 151
142 local64_set(&hwc->prev_count, (u64)-left); 152 local64_set(&hwc->prev_count, (u64)-left);
143 153
144 armpmu->write_counter(event, (u64)(-left) & 0xffffffff); 154 armpmu->write_counter(event, (u64)(-left) & max_period);
145 155
146 perf_event_update_userpage(event); 156 perf_event_update_userpage(event);
147 157
@@ -153,6 +163,7 @@ u64 armpmu_event_update(struct perf_event *event)
153 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 163 struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
154 struct hw_perf_event *hwc = &event->hw; 164 struct hw_perf_event *hwc = &event->hw;
155 u64 delta, prev_raw_count, new_raw_count; 165 u64 delta, prev_raw_count, new_raw_count;
166 u64 max_period = arm_pmu_event_max_period(event);
156 167
157again: 168again:
158 prev_raw_count = local64_read(&hwc->prev_count); 169 prev_raw_count = local64_read(&hwc->prev_count);
@@ -162,7 +173,7 @@ again:
162 new_raw_count) != prev_raw_count) 173 new_raw_count) != prev_raw_count)
163 goto again; 174 goto again;
164 175
165 delta = (new_raw_count - prev_raw_count) & armpmu->max_period; 176 delta = (new_raw_count - prev_raw_count) & max_period;
166 177
167 local64_add(delta, &event->count); 178 local64_add(delta, &event->count);
168 local64_sub(delta, &hwc->period_left); 179 local64_sub(delta, &hwc->period_left);
@@ -227,11 +238,10 @@ armpmu_del(struct perf_event *event, int flags)
227 238
228 armpmu_stop(event, PERF_EF_UPDATE); 239 armpmu_stop(event, PERF_EF_UPDATE);
229 hw_events->events[idx] = NULL; 240 hw_events->events[idx] = NULL;
230 clear_bit(idx, hw_events->used_mask); 241 armpmu->clear_event_idx(hw_events, event);
231 if (armpmu->clear_event_idx)
232 armpmu->clear_event_idx(hw_events, event);
233
234 perf_event_update_userpage(event); 242 perf_event_update_userpage(event);
243 /* Clear the allocated counter */
244 hwc->idx = -1;
235} 245}
236 246
237static int 247static int
@@ -360,6 +370,7 @@ __hw_perf_event_init(struct perf_event *event)
360 struct hw_perf_event *hwc = &event->hw; 370 struct hw_perf_event *hwc = &event->hw;
361 int mapping; 371 int mapping;
362 372
373 hwc->flags = 0;
363 mapping = armpmu->map_event(event); 374 mapping = armpmu->map_event(event);
364 375
365 if (mapping < 0) { 376 if (mapping < 0) {
@@ -402,7 +413,7 @@ __hw_perf_event_init(struct perf_event *event)
402 * is far less likely to overtake the previous one unless 413 * is far less likely to overtake the previous one unless
403 * you have some serious IRQ latency issues. 414 * you have some serious IRQ latency issues.
404 */ 415 */
405 hwc->sample_period = armpmu->max_period >> 1; 416 hwc->sample_period = arm_pmu_event_max_period(event) >> 1;
406 hwc->last_period = hwc->sample_period; 417 hwc->last_period = hwc->sample_period;
407 local64_set(&hwc->period_left, hwc->sample_period); 418 local64_set(&hwc->period_left, hwc->sample_period);
408 } 419 }
@@ -654,14 +665,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
654 int idx; 665 int idx;
655 666
656 for (idx = 0; idx < armpmu->num_events; idx++) { 667 for (idx = 0; idx < armpmu->num_events; idx++) {
657 /*
658 * If the counter is not used skip it, there is no
659 * need of stopping/restarting it.
660 */
661 if (!test_bit(idx, hw_events->used_mask))
662 continue;
663
664 event = hw_events->events[idx]; 668 event = hw_events->events[idx];
669 if (!event)
670 continue;
665 671
666 switch (cmd) { 672 switch (cmd) {
667 case CPU_PM_ENTER: 673 case CPU_PM_ENTER:
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 971ff336494a..96075cecb0ae 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -160,7 +160,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
160static int armpmu_request_irqs(struct arm_pmu *armpmu) 160static int armpmu_request_irqs(struct arm_pmu *armpmu)
161{ 161{
162 struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; 162 struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
163 int cpu, err; 163 int cpu, err = 0;
164 164
165 for_each_cpu(cpu, &armpmu->supported_cpus) { 165 for_each_cpu(cpu, &armpmu->supported_cpus) {
166 int irq = per_cpu(hw_events->irq, cpu); 166 int irq = per_cpu(hw_events->irq, cpu);
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 44df61397a38..9efd2413240c 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -350,19 +350,21 @@ void hisi_uncore_pmu_disable(struct pmu *pmu)
350 350
351/* 351/*
352 * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. 352 * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1.
353 * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID 353 * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2]
354 * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is 354 * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID
355 * in MPIDR[aff1]. If this changes in future, this shall be updated. 355 * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1].
356 */ 356 */
357static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) 357static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
358{ 358{
359 u64 mpidr = read_cpuid_mpidr(); 359 u64 mpidr = read_cpuid_mpidr();
360 360
361 if (mpidr & MPIDR_MT_BITMASK) { 361 if (mpidr & MPIDR_MT_BITMASK) {
362 int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
363
362 if (sccl_id) 364 if (sccl_id)
363 *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); 365 *sccl_id = aff2 >> 3;
364 if (ccl_id) 366 if (ccl_id)
365 *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); 367 *ccl_id = aff2 & 0x7;
366 } else { 368 } else {
367 if (sccl_id) 369 if (sccl_id)
368 *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); 370 *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);