diff options
author | Will Deacon <will.deacon@arm.com> | 2018-07-27 09:39:04 -0400 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2018-07-27 09:39:04 -0400 |
commit | ba70ffa7d20d771ae47a1597799da84980aafe15 (patch) | |
tree | a25f393d7c97683673f12396369a8a6f95e136dc /drivers/perf | |
parent | c5157101e7793b42a56e07368c7f4cb73fb58008 (diff) | |
parent | 809092dc3e606f3508b53baa624b27bfff8f0e7f (diff) |
Merge branch 'for-next/perf' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into aarch64/for-next/core
Pull in arm perf updates, including support for 64-bit (chained) event
counters and some non-critical fixes for some of the system PMU drivers.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'drivers/perf')
-rw-r--r-- | drivers/perf/arm-cci.c | 38 | ||||
-rw-r--r-- | drivers/perf/arm-ccn.c | 14 | ||||
-rw-r--r-- | drivers/perf/arm_pmu.c | 38 | ||||
-rw-r--r-- | drivers/perf/arm_pmu_platform.c | 2 | ||||
-rw-r--r-- | drivers/perf/hisilicon/hisi_uncore_pmu.c | 12 |
5 files changed, 59 insertions, 45 deletions
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 0d09d8e669cd..1bfeb160c5b1 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c | |||
@@ -53,6 +53,16 @@ enum { | |||
53 | CCI_IF_MAX, | 53 | CCI_IF_MAX, |
54 | }; | 54 | }; |
55 | 55 | ||
56 | #define NUM_HW_CNTRS_CII_4XX 4 | ||
57 | #define NUM_HW_CNTRS_CII_5XX 8 | ||
58 | #define NUM_HW_CNTRS_MAX NUM_HW_CNTRS_CII_5XX | ||
59 | |||
60 | #define FIXED_HW_CNTRS_CII_4XX 1 | ||
61 | #define FIXED_HW_CNTRS_CII_5XX 0 | ||
62 | #define FIXED_HW_CNTRS_MAX FIXED_HW_CNTRS_CII_4XX | ||
63 | |||
64 | #define HW_CNTRS_MAX (NUM_HW_CNTRS_MAX + FIXED_HW_CNTRS_MAX) | ||
65 | |||
56 | struct event_range { | 66 | struct event_range { |
57 | u32 min; | 67 | u32 min; |
58 | u32 max; | 68 | u32 max; |
@@ -633,8 +643,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) | |||
633 | { | 643 | { |
634 | int i; | 644 | int i; |
635 | struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; | 645 | struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; |
636 | 646 | DECLARE_BITMAP(mask, HW_CNTRS_MAX); | |
637 | DECLARE_BITMAP(mask, cci_pmu->num_cntrs); | ||
638 | 647 | ||
639 | bitmap_zero(mask, cci_pmu->num_cntrs); | 648 | bitmap_zero(mask, cci_pmu->num_cntrs); |
640 | for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { | 649 | for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { |
@@ -940,7 +949,7 @@ static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) | |||
940 | static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) | 949 | static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask) |
941 | { | 950 | { |
942 | int i; | 951 | int i; |
943 | DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs); | 952 | DECLARE_BITMAP(saved_mask, HW_CNTRS_MAX); |
944 | 953 | ||
945 | bitmap_zero(saved_mask, cci_pmu->num_cntrs); | 954 | bitmap_zero(saved_mask, cci_pmu->num_cntrs); |
946 | pmu_save_counters(cci_pmu, saved_mask); | 955 | pmu_save_counters(cci_pmu, saved_mask); |
@@ -1245,7 +1254,7 @@ static int validate_group(struct perf_event *event) | |||
1245 | { | 1254 | { |
1246 | struct perf_event *sibling, *leader = event->group_leader; | 1255 | struct perf_event *sibling, *leader = event->group_leader; |
1247 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | 1256 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
1248 | unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)]; | 1257 | unsigned long mask[BITS_TO_LONGS(HW_CNTRS_MAX)]; |
1249 | struct cci_pmu_hw_events fake_pmu = { | 1258 | struct cci_pmu_hw_events fake_pmu = { |
1250 | /* | 1259 | /* |
1251 | * Initialise the fake PMU. We only need to populate the | 1260 | * Initialise the fake PMU. We only need to populate the |
@@ -1403,6 +1412,11 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) | |||
1403 | char *name = model->name; | 1412 | char *name = model->name; |
1404 | u32 num_cntrs; | 1413 | u32 num_cntrs; |
1405 | 1414 | ||
1415 | if (WARN_ON(model->num_hw_cntrs > NUM_HW_CNTRS_MAX)) | ||
1416 | return -EINVAL; | ||
1417 | if (WARN_ON(model->fixed_hw_cntrs > FIXED_HW_CNTRS_MAX)) | ||
1418 | return -EINVAL; | ||
1419 | |||
1406 | pmu_event_attr_group.attrs = model->event_attrs; | 1420 | pmu_event_attr_group.attrs = model->event_attrs; |
1407 | pmu_format_attr_group.attrs = model->format_attrs; | 1421 | pmu_format_attr_group.attrs = model->format_attrs; |
1408 | 1422 | ||
@@ -1455,8 +1469,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { | |||
1455 | #ifdef CONFIG_ARM_CCI400_PMU | 1469 | #ifdef CONFIG_ARM_CCI400_PMU |
1456 | [CCI400_R0] = { | 1470 | [CCI400_R0] = { |
1457 | .name = "CCI_400", | 1471 | .name = "CCI_400", |
1458 | .fixed_hw_cntrs = 1, /* Cycle counter */ | 1472 | .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ |
1459 | .num_hw_cntrs = 4, | 1473 | .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, |
1460 | .cntr_size = SZ_4K, | 1474 | .cntr_size = SZ_4K, |
1461 | .format_attrs = cci400_pmu_format_attrs, | 1475 | .format_attrs = cci400_pmu_format_attrs, |
1462 | .event_attrs = cci400_r0_pmu_event_attrs, | 1476 | .event_attrs = cci400_r0_pmu_event_attrs, |
@@ -1475,8 +1489,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { | |||
1475 | }, | 1489 | }, |
1476 | [CCI400_R1] = { | 1490 | [CCI400_R1] = { |
1477 | .name = "CCI_400_r1", | 1491 | .name = "CCI_400_r1", |
1478 | .fixed_hw_cntrs = 1, /* Cycle counter */ | 1492 | .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */ |
1479 | .num_hw_cntrs = 4, | 1493 | .num_hw_cntrs = NUM_HW_CNTRS_CII_4XX, |
1480 | .cntr_size = SZ_4K, | 1494 | .cntr_size = SZ_4K, |
1481 | .format_attrs = cci400_pmu_format_attrs, | 1495 | .format_attrs = cci400_pmu_format_attrs, |
1482 | .event_attrs = cci400_r1_pmu_event_attrs, | 1496 | .event_attrs = cci400_r1_pmu_event_attrs, |
@@ -1497,8 +1511,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { | |||
1497 | #ifdef CONFIG_ARM_CCI5xx_PMU | 1511 | #ifdef CONFIG_ARM_CCI5xx_PMU |
1498 | [CCI500_R0] = { | 1512 | [CCI500_R0] = { |
1499 | .name = "CCI_500", | 1513 | .name = "CCI_500", |
1500 | .fixed_hw_cntrs = 0, | 1514 | .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, |
1501 | .num_hw_cntrs = 8, | 1515 | .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, |
1502 | .cntr_size = SZ_64K, | 1516 | .cntr_size = SZ_64K, |
1503 | .format_attrs = cci5xx_pmu_format_attrs, | 1517 | .format_attrs = cci5xx_pmu_format_attrs, |
1504 | .event_attrs = cci5xx_pmu_event_attrs, | 1518 | .event_attrs = cci5xx_pmu_event_attrs, |
@@ -1521,8 +1535,8 @@ static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { | |||
1521 | }, | 1535 | }, |
1522 | [CCI550_R0] = { | 1536 | [CCI550_R0] = { |
1523 | .name = "CCI_550", | 1537 | .name = "CCI_550", |
1524 | .fixed_hw_cntrs = 0, | 1538 | .fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX, |
1525 | .num_hw_cntrs = 8, | 1539 | .num_hw_cntrs = NUM_HW_CNTRS_CII_5XX, |
1526 | .cntr_size = SZ_64K, | 1540 | .cntr_size = SZ_64K, |
1527 | .format_attrs = cci5xx_pmu_format_attrs, | 1541 | .format_attrs = cci5xx_pmu_format_attrs, |
1528 | .event_attrs = cci5xx_pmu_event_attrs, | 1542 | .event_attrs = cci5xx_pmu_event_attrs, |
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index b416ee18e6bb..4b15c36f4631 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c | |||
@@ -1485,17 +1485,9 @@ static int arm_ccn_probe(struct platform_device *pdev) | |||
1485 | platform_set_drvdata(pdev, ccn); | 1485 | platform_set_drvdata(pdev, ccn); |
1486 | 1486 | ||
1487 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 1487 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
1488 | if (!res) | 1488 | ccn->base = devm_ioremap_resource(ccn->dev, res); |
1489 | return -EINVAL; | 1489 | if (IS_ERR(ccn->base)) |
1490 | 1490 | return PTR_ERR(ccn->base); | |
1491 | if (!devm_request_mem_region(ccn->dev, res->start, | ||
1492 | resource_size(res), pdev->name)) | ||
1493 | return -EBUSY; | ||
1494 | |||
1495 | ccn->base = devm_ioremap(ccn->dev, res->start, | ||
1496 | resource_size(res)); | ||
1497 | if (!ccn->base) | ||
1498 | return -EFAULT; | ||
1499 | 1491 | ||
1500 | res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); | 1492 | res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); |
1501 | if (!res) | 1493 | if (!res) |
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a6347d487635..7f01f6f60b87 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c | |||
@@ -28,6 +28,14 @@ | |||
28 | static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); | 28 | static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); |
29 | static DEFINE_PER_CPU(int, cpu_irq); | 29 | static DEFINE_PER_CPU(int, cpu_irq); |
30 | 30 | ||
31 | static inline u64 arm_pmu_event_max_period(struct perf_event *event) | ||
32 | { | ||
33 | if (event->hw.flags & ARMPMU_EVT_64BIT) | ||
34 | return GENMASK_ULL(63, 0); | ||
35 | else | ||
36 | return GENMASK_ULL(31, 0); | ||
37 | } | ||
38 | |||
31 | static int | 39 | static int |
32 | armpmu_map_cache_event(const unsigned (*cache_map) | 40 | armpmu_map_cache_event(const unsigned (*cache_map) |
33 | [PERF_COUNT_HW_CACHE_MAX] | 41 | [PERF_COUNT_HW_CACHE_MAX] |
@@ -114,8 +122,10 @@ int armpmu_event_set_period(struct perf_event *event) | |||
114 | struct hw_perf_event *hwc = &event->hw; | 122 | struct hw_perf_event *hwc = &event->hw; |
115 | s64 left = local64_read(&hwc->period_left); | 123 | s64 left = local64_read(&hwc->period_left); |
116 | s64 period = hwc->sample_period; | 124 | s64 period = hwc->sample_period; |
125 | u64 max_period; | ||
117 | int ret = 0; | 126 | int ret = 0; |
118 | 127 | ||
128 | max_period = arm_pmu_event_max_period(event); | ||
119 | if (unlikely(left <= -period)) { | 129 | if (unlikely(left <= -period)) { |
120 | left = period; | 130 | left = period; |
121 | local64_set(&hwc->period_left, left); | 131 | local64_set(&hwc->period_left, left); |
@@ -136,12 +146,12 @@ int armpmu_event_set_period(struct perf_event *event) | |||
136 | * effect we are reducing max_period to account for | 146 | * effect we are reducing max_period to account for |
137 | * interrupt latency (and we are being very conservative). | 147 | * interrupt latency (and we are being very conservative). |
138 | */ | 148 | */ |
139 | if (left > (armpmu->max_period >> 1)) | 149 | if (left > (max_period >> 1)) |
140 | left = armpmu->max_period >> 1; | 150 | left = (max_period >> 1); |
141 | 151 | ||
142 | local64_set(&hwc->prev_count, (u64)-left); | 152 | local64_set(&hwc->prev_count, (u64)-left); |
143 | 153 | ||
144 | armpmu->write_counter(event, (u64)(-left) & 0xffffffff); | 154 | armpmu->write_counter(event, (u64)(-left) & max_period); |
145 | 155 | ||
146 | perf_event_update_userpage(event); | 156 | perf_event_update_userpage(event); |
147 | 157 | ||
@@ -153,6 +163,7 @@ u64 armpmu_event_update(struct perf_event *event) | |||
153 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | 163 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); |
154 | struct hw_perf_event *hwc = &event->hw; | 164 | struct hw_perf_event *hwc = &event->hw; |
155 | u64 delta, prev_raw_count, new_raw_count; | 165 | u64 delta, prev_raw_count, new_raw_count; |
166 | u64 max_period = arm_pmu_event_max_period(event); | ||
156 | 167 | ||
157 | again: | 168 | again: |
158 | prev_raw_count = local64_read(&hwc->prev_count); | 169 | prev_raw_count = local64_read(&hwc->prev_count); |
@@ -162,7 +173,7 @@ again: | |||
162 | new_raw_count) != prev_raw_count) | 173 | new_raw_count) != prev_raw_count) |
163 | goto again; | 174 | goto again; |
164 | 175 | ||
165 | delta = (new_raw_count - prev_raw_count) & armpmu->max_period; | 176 | delta = (new_raw_count - prev_raw_count) & max_period; |
166 | 177 | ||
167 | local64_add(delta, &event->count); | 178 | local64_add(delta, &event->count); |
168 | local64_sub(delta, &hwc->period_left); | 179 | local64_sub(delta, &hwc->period_left); |
@@ -227,11 +238,10 @@ armpmu_del(struct perf_event *event, int flags) | |||
227 | 238 | ||
228 | armpmu_stop(event, PERF_EF_UPDATE); | 239 | armpmu_stop(event, PERF_EF_UPDATE); |
229 | hw_events->events[idx] = NULL; | 240 | hw_events->events[idx] = NULL; |
230 | clear_bit(idx, hw_events->used_mask); | 241 | armpmu->clear_event_idx(hw_events, event); |
231 | if (armpmu->clear_event_idx) | ||
232 | armpmu->clear_event_idx(hw_events, event); | ||
233 | |||
234 | perf_event_update_userpage(event); | 242 | perf_event_update_userpage(event); |
243 | /* Clear the allocated counter */ | ||
244 | hwc->idx = -1; | ||
235 | } | 245 | } |
236 | 246 | ||
237 | static int | 247 | static int |
@@ -360,6 +370,7 @@ __hw_perf_event_init(struct perf_event *event) | |||
360 | struct hw_perf_event *hwc = &event->hw; | 370 | struct hw_perf_event *hwc = &event->hw; |
361 | int mapping; | 371 | int mapping; |
362 | 372 | ||
373 | hwc->flags = 0; | ||
363 | mapping = armpmu->map_event(event); | 374 | mapping = armpmu->map_event(event); |
364 | 375 | ||
365 | if (mapping < 0) { | 376 | if (mapping < 0) { |
@@ -402,7 +413,7 @@ __hw_perf_event_init(struct perf_event *event) | |||
402 | * is far less likely to overtake the previous one unless | 413 | * is far less likely to overtake the previous one unless |
403 | * you have some serious IRQ latency issues. | 414 | * you have some serious IRQ latency issues. |
404 | */ | 415 | */ |
405 | hwc->sample_period = armpmu->max_period >> 1; | 416 | hwc->sample_period = arm_pmu_event_max_period(event) >> 1; |
406 | hwc->last_period = hwc->sample_period; | 417 | hwc->last_period = hwc->sample_period; |
407 | local64_set(&hwc->period_left, hwc->sample_period); | 418 | local64_set(&hwc->period_left, hwc->sample_period); |
408 | } | 419 | } |
@@ -654,14 +665,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) | |||
654 | int idx; | 665 | int idx; |
655 | 666 | ||
656 | for (idx = 0; idx < armpmu->num_events; idx++) { | 667 | for (idx = 0; idx < armpmu->num_events; idx++) { |
657 | /* | ||
658 | * If the counter is not used skip it, there is no | ||
659 | * need of stopping/restarting it. | ||
660 | */ | ||
661 | if (!test_bit(idx, hw_events->used_mask)) | ||
662 | continue; | ||
663 | |||
664 | event = hw_events->events[idx]; | 668 | event = hw_events->events[idx]; |
669 | if (!event) | ||
670 | continue; | ||
665 | 671 | ||
666 | switch (cmd) { | 672 | switch (cmd) { |
667 | case CPU_PM_ENTER: | 673 | case CPU_PM_ENTER: |
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 971ff336494a..96075cecb0ae 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c | |||
@@ -160,7 +160,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) | |||
160 | static int armpmu_request_irqs(struct arm_pmu *armpmu) | 160 | static int armpmu_request_irqs(struct arm_pmu *armpmu) |
161 | { | 161 | { |
162 | struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; | 162 | struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; |
163 | int cpu, err; | 163 | int cpu, err = 0; |
164 | 164 | ||
165 | for_each_cpu(cpu, &armpmu->supported_cpus) { | 165 | for_each_cpu(cpu, &armpmu->supported_cpus) { |
166 | int irq = per_cpu(hw_events->irq, cpu); | 166 | int irq = per_cpu(hw_events->irq, cpu); |
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 44df61397a38..9efd2413240c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c | |||
@@ -350,19 +350,21 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) | |||
350 | 350 | ||
351 | /* | 351 | /* |
352 | * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. | 352 | * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. |
353 | * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID | 353 | * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] |
354 | * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is | 354 | * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID |
355 | * in MPIDR[aff1]. If this changes in future, this shall be updated. | 355 | * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. |
356 | */ | 356 | */ |
357 | static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) | 357 | static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) |
358 | { | 358 | { |
359 | u64 mpidr = read_cpuid_mpidr(); | 359 | u64 mpidr = read_cpuid_mpidr(); |
360 | 360 | ||
361 | if (mpidr & MPIDR_MT_BITMASK) { | 361 | if (mpidr & MPIDR_MT_BITMASK) { |
362 | int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); | ||
363 | |||
362 | if (sccl_id) | 364 | if (sccl_id) |
363 | *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); | 365 | *sccl_id = aff2 >> 3; |
364 | if (ccl_id) | 366 | if (ccl_id) |
365 | *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); | 367 | *ccl_id = aff2 & 0x7; |
366 | } else { | 368 | } else { |
367 | if (sccl_id) | 369 | if (sccl_id) |
368 | *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); | 370 | *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); |