aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/silicon-errata.txt1
-rw-r--r--arch/arm64/kernel/perf_event.c4
-rw-r--r--drivers/acpi/arm64/iort.c131
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm-cci.c21
-rw-r--r--drivers/perf/arm-ccn.c25
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c865
-rw-r--r--include/linux/acpi_iort.h8
9 files changed, 1018 insertions, 47 deletions
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index d5a124d7e242..68d9b74fd751 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -78,6 +78,7 @@ stable kernels.
78| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | 78| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 |
79| Hisilicon | Hip0{6,7} | #161010701 | N/A | 79| Hisilicon | Hip0{6,7} | #161010701 | N/A |
80| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 | 80| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
81| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
81| | | | | 82| | | | |
82| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | 83| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
83| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | 84| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4addb38bc250..6164d389eed6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -431,7 +431,7 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
431 return val; 431 return val;
432} 432}
433 433
434static inline u64 armv8pmu_read_counter(struct perf_event *event) 434static u64 armv8pmu_read_counter(struct perf_event *event)
435{ 435{
436 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 436 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
437 struct hw_perf_event *hwc = &event->hw; 437 struct hw_perf_event *hwc = &event->hw;
@@ -468,7 +468,7 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event,
468 } 468 }
469} 469}
470 470
471static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) 471static void armv8pmu_write_counter(struct perf_event *event, u64 value)
472{ 472{
473 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 473 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
474 struct hw_perf_event *hwc = &event->hw; 474 struct hw_perf_event *hwc = &event->hw;
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a46c2c162c03..adbf7cbedf80 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -356,7 +356,8 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
356 if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) { 356 if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
357 if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT || 357 if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
358 node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX || 358 node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX ||
359 node->type == ACPI_IORT_NODE_SMMU_V3) { 359 node->type == ACPI_IORT_NODE_SMMU_V3 ||
360 node->type == ACPI_IORT_NODE_PMCG) {
360 *id_out = map->output_base; 361 *id_out = map->output_base;
361 return parent; 362 return parent;
362 } 363 }
@@ -394,6 +395,8 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node)
394 } 395 }
395 396
396 return smmu->id_mapping_index; 397 return smmu->id_mapping_index;
398 case ACPI_IORT_NODE_PMCG:
399 return 0;
397 default: 400 default:
398 return -EINVAL; 401 return -EINVAL;
399 } 402 }
@@ -1218,14 +1221,23 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
1218 } 1221 }
1219} 1222}
1220 1223
1221static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node) 1224static void __init arm_smmu_v3_dma_configure(struct device *dev,
1225 struct acpi_iort_node *node)
1222{ 1226{
1223 struct acpi_iort_smmu_v3 *smmu; 1227 struct acpi_iort_smmu_v3 *smmu;
1228 enum dev_dma_attr attr;
1224 1229
1225 /* Retrieve SMMUv3 specific data */ 1230 /* Retrieve SMMUv3 specific data */
1226 smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 1231 smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
1227 1232
1228 return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE; 1233 attr = (smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) ?
1234 DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
1235
1236 /* We expect the dma masks to be equivalent for all SMMUv3 set-ups */
1237 dev->dma_mask = &dev->coherent_dma_mask;
1238
1239 /* Configure DMA for the page table walker */
1240 acpi_dma_configure(dev, attr);
1229} 1241}
1230 1242
1231#if defined(CONFIG_ACPI_NUMA) 1243#if defined(CONFIG_ACPI_NUMA)
@@ -1307,30 +1319,96 @@ static void __init arm_smmu_init_resources(struct resource *res,
1307 } 1319 }
1308} 1320}
1309 1321
1310static bool __init arm_smmu_is_coherent(struct acpi_iort_node *node) 1322static void __init arm_smmu_dma_configure(struct device *dev,
1323 struct acpi_iort_node *node)
1311{ 1324{
1312 struct acpi_iort_smmu *smmu; 1325 struct acpi_iort_smmu *smmu;
1326 enum dev_dma_attr attr;
1313 1327
1314 /* Retrieve SMMU specific data */ 1328 /* Retrieve SMMU specific data */
1315 smmu = (struct acpi_iort_smmu *)node->node_data; 1329 smmu = (struct acpi_iort_smmu *)node->node_data;
1316 1330
1317 return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK; 1331 attr = (smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK) ?
1332 DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
1333
1334 /* We expect the dma masks to be equivalent for SMMU set-ups */
1335 dev->dma_mask = &dev->coherent_dma_mask;
1336
1337 /* Configure DMA for the page table walker */
1338 acpi_dma_configure(dev, attr);
1339}
1340
1341static int __init arm_smmu_v3_pmcg_count_resources(struct acpi_iort_node *node)
1342{
1343 struct acpi_iort_pmcg *pmcg;
1344
1345 /* Retrieve PMCG specific data */
1346 pmcg = (struct acpi_iort_pmcg *)node->node_data;
1347
1348 /*
1349 * There are always 2 memory resources.
1350 * If the overflow_gsiv is present then add that for a total of 3.
1351 */
1352 return pmcg->overflow_gsiv ? 3 : 2;
1353}
1354
1355static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
1356 struct acpi_iort_node *node)
1357{
1358 struct acpi_iort_pmcg *pmcg;
1359
1360 /* Retrieve PMCG specific data */
1361 pmcg = (struct acpi_iort_pmcg *)node->node_data;
1362
1363 res[0].start = pmcg->page0_base_address;
1364 res[0].end = pmcg->page0_base_address + SZ_4K - 1;
1365 res[0].flags = IORESOURCE_MEM;
1366 res[1].start = pmcg->page1_base_address;
1367 res[1].end = pmcg->page1_base_address + SZ_4K - 1;
1368 res[1].flags = IORESOURCE_MEM;
1369
1370 if (pmcg->overflow_gsiv)
1371 acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow",
1372 ACPI_EDGE_SENSITIVE, &res[2]);
1373}
1374
1375static struct acpi_platform_list pmcg_plat_info[] __initdata = {
1376 /* HiSilicon Hip08 Platform */
1377 {"HISI ", "HIP08 ", 0, ACPI_SIG_IORT, greater_than_or_equal,
1378 "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08},
1379 { }
1380};
1381
1382static int __init arm_smmu_v3_pmcg_add_platdata(struct platform_device *pdev)
1383{
1384 u32 model;
1385 int idx;
1386
1387 idx = acpi_match_platform_list(pmcg_plat_info);
1388 if (idx >= 0)
1389 model = pmcg_plat_info[idx].data;
1390 else
1391 model = IORT_SMMU_V3_PMCG_GENERIC;
1392
1393 return platform_device_add_data(pdev, &model, sizeof(model));
1318} 1394}
1319 1395
1320struct iort_dev_config { 1396struct iort_dev_config {
1321 const char *name; 1397 const char *name;
1322 int (*dev_init)(struct acpi_iort_node *node); 1398 int (*dev_init)(struct acpi_iort_node *node);
1323 bool (*dev_is_coherent)(struct acpi_iort_node *node); 1399 void (*dev_dma_configure)(struct device *dev,
1400 struct acpi_iort_node *node);
1324 int (*dev_count_resources)(struct acpi_iort_node *node); 1401 int (*dev_count_resources)(struct acpi_iort_node *node);
1325 void (*dev_init_resources)(struct resource *res, 1402 void (*dev_init_resources)(struct resource *res,
1326 struct acpi_iort_node *node); 1403 struct acpi_iort_node *node);
1327 int (*dev_set_proximity)(struct device *dev, 1404 int (*dev_set_proximity)(struct device *dev,
1328 struct acpi_iort_node *node); 1405 struct acpi_iort_node *node);
1406 int (*dev_add_platdata)(struct platform_device *pdev);
1329}; 1407};
1330 1408
1331static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = { 1409static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
1332 .name = "arm-smmu-v3", 1410 .name = "arm-smmu-v3",
1333 .dev_is_coherent = arm_smmu_v3_is_coherent, 1411 .dev_dma_configure = arm_smmu_v3_dma_configure,
1334 .dev_count_resources = arm_smmu_v3_count_resources, 1412 .dev_count_resources = arm_smmu_v3_count_resources,
1335 .dev_init_resources = arm_smmu_v3_init_resources, 1413 .dev_init_resources = arm_smmu_v3_init_resources,
1336 .dev_set_proximity = arm_smmu_v3_set_proximity, 1414 .dev_set_proximity = arm_smmu_v3_set_proximity,
@@ -1338,9 +1416,16 @@ static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
1338 1416
1339static const struct iort_dev_config iort_arm_smmu_cfg __initconst = { 1417static const struct iort_dev_config iort_arm_smmu_cfg __initconst = {
1340 .name = "arm-smmu", 1418 .name = "arm-smmu",
1341 .dev_is_coherent = arm_smmu_is_coherent, 1419 .dev_dma_configure = arm_smmu_dma_configure,
1342 .dev_count_resources = arm_smmu_count_resources, 1420 .dev_count_resources = arm_smmu_count_resources,
1343 .dev_init_resources = arm_smmu_init_resources 1421 .dev_init_resources = arm_smmu_init_resources,
1422};
1423
1424static const struct iort_dev_config iort_arm_smmu_v3_pmcg_cfg __initconst = {
1425 .name = "arm-smmu-v3-pmcg",
1426 .dev_count_resources = arm_smmu_v3_pmcg_count_resources,
1427 .dev_init_resources = arm_smmu_v3_pmcg_init_resources,
1428 .dev_add_platdata = arm_smmu_v3_pmcg_add_platdata,
1344}; 1429};
1345 1430
1346static __init const struct iort_dev_config *iort_get_dev_cfg( 1431static __init const struct iort_dev_config *iort_get_dev_cfg(
@@ -1351,6 +1436,8 @@ static __init const struct iort_dev_config *iort_get_dev_cfg(
1351 return &iort_arm_smmu_v3_cfg; 1436 return &iort_arm_smmu_v3_cfg;
1352 case ACPI_IORT_NODE_SMMU: 1437 case ACPI_IORT_NODE_SMMU:
1353 return &iort_arm_smmu_cfg; 1438 return &iort_arm_smmu_cfg;
1439 case ACPI_IORT_NODE_PMCG:
1440 return &iort_arm_smmu_v3_pmcg_cfg;
1354 default: 1441 default:
1355 return NULL; 1442 return NULL;
1356 } 1443 }
@@ -1368,7 +1455,6 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
1368 struct fwnode_handle *fwnode; 1455 struct fwnode_handle *fwnode;
1369 struct platform_device *pdev; 1456 struct platform_device *pdev;
1370 struct resource *r; 1457 struct resource *r;
1371 enum dev_dma_attr attr;
1372 int ret, count; 1458 int ret, count;
1373 1459
1374 pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO); 1460 pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
@@ -1402,19 +1488,19 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
1402 goto dev_put; 1488 goto dev_put;
1403 1489
1404 /* 1490 /*
1405 * Add a copy of IORT node pointer to platform_data to 1491 * Platform devices based on PMCG nodes uses platform_data to
1406 * be used to retrieve IORT data information. 1492 * pass the hardware model info to the driver. For others, add
1493 * a copy of IORT node pointer to platform_data to be used to
1494 * retrieve IORT data information.
1407 */ 1495 */
1408 ret = platform_device_add_data(pdev, &node, sizeof(node)); 1496 if (ops->dev_add_platdata)
1497 ret = ops->dev_add_platdata(pdev);
1498 else
1499 ret = platform_device_add_data(pdev, &node, sizeof(node));
1500
1409 if (ret) 1501 if (ret)
1410 goto dev_put; 1502 goto dev_put;
1411 1503
1412 /*
1413 * We expect the dma masks to be equivalent for
1414 * all SMMUs set-ups
1415 */
1416 pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
1417
1418 fwnode = iort_get_fwnode(node); 1504 fwnode = iort_get_fwnode(node);
1419 1505
1420 if (!fwnode) { 1506 if (!fwnode) {
@@ -1424,11 +1510,8 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
1424 1510
1425 pdev->dev.fwnode = fwnode; 1511 pdev->dev.fwnode = fwnode;
1426 1512
1427 attr = ops->dev_is_coherent && ops->dev_is_coherent(node) ? 1513 if (ops->dev_dma_configure)
1428 DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT; 1514 ops->dev_dma_configure(&pdev->dev, node);
1429
1430 /* Configure DMA for the page table walker */
1431 acpi_dma_configure(&pdev->dev, attr);
1432 1515
1433 iort_set_device_domain(&pdev->dev, node); 1516 iort_set_device_domain(&pdev->dev, node);
1434 1517
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index af9bc178495d..a94e586a58b2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -52,6 +52,15 @@ config ARM_PMU_ACPI
52 depends on ARM_PMU && ACPI 52 depends on ARM_PMU && ACPI
53 def_bool y 53 def_bool y
54 54
55config ARM_SMMU_V3_PMU
56 tristate "ARM SMMUv3 Performance Monitors Extension"
57 depends on ARM64 && ACPI && ARM_SMMU_V3
58 help
59 Provides support for the ARM SMMUv3 Performance Monitor Counter
60 Groups (PMCG), which provide monitoring of transactions passing
61 through the SMMU and allow the resulting information to be filtered
62 based on the Stream ID of the corresponding master.
63
55config ARM_DSU_PMU 64config ARM_DSU_PMU
56 tristate "ARM DynamIQ Shared Unit (DSU) PMU" 65 tristate "ARM DynamIQ Shared Unit (DSU) PMU"
57 depends on ARM64 66 depends on ARM64
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 909f27fd9db3..30489941f3d6 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_ARM_CCN) += arm-ccn.o
4obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o 4obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o 5obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
6obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o 6obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
7obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
7obj-$(CONFIG_HISI_PMU) += hisilicon/ 8obj-$(CONFIG_HISI_PMU) += hisilicon/
8obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o 9obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
9obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o 10obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index bfd03e023308..8f8606b9bc9e 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1684,21 +1684,24 @@ static int cci_pmu_probe(struct platform_device *pdev)
1684 raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); 1684 raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
1685 mutex_init(&cci_pmu->reserve_mutex); 1685 mutex_init(&cci_pmu->reserve_mutex);
1686 atomic_set(&cci_pmu->active_events, 0); 1686 atomic_set(&cci_pmu->active_events, 0);
1687 cci_pmu->cpu = get_cpu();
1688
1689 ret = cci_pmu_init(cci_pmu, pdev);
1690 if (ret) {
1691 put_cpu();
1692 return ret;
1693 }
1694 1687
1688 cci_pmu->cpu = raw_smp_processor_id();
1689 g_cci_pmu = cci_pmu;
1695 cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, 1690 cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
1696 "perf/arm/cci:online", NULL, 1691 "perf/arm/cci:online", NULL,
1697 cci_pmu_offline_cpu); 1692 cci_pmu_offline_cpu);
1698 put_cpu(); 1693
1699 g_cci_pmu = cci_pmu; 1694 ret = cci_pmu_init(cci_pmu, pdev);
1695 if (ret)
1696 goto error_pmu_init;
1697
1700 pr_info("ARM %s PMU driver probed", cci_pmu->model->name); 1698 pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
1701 return 0; 1699 return 0;
1700
1701error_pmu_init:
1702 cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
1703 g_cci_pmu = NULL;
1704 return ret;
1702} 1705}
1703 1706
1704static int cci_pmu_remove(struct platform_device *pdev) 1707static int cci_pmu_remove(struct platform_device *pdev)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 2ae76026e947..0bb52d9bdcf7 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -167,7 +167,7 @@ struct arm_ccn_dt {
167 167
168 struct hrtimer hrtimer; 168 struct hrtimer hrtimer;
169 169
170 cpumask_t cpu; 170 unsigned int cpu;
171 struct hlist_node node; 171 struct hlist_node node;
172 172
173 struct pmu pmu; 173 struct pmu pmu;
@@ -559,7 +559,7 @@ static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev,
559{ 559{
560 struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); 560 struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
561 561
562 return cpumap_print_to_pagebuf(true, buf, &ccn->dt.cpu); 562 return cpumap_print_to_pagebuf(true, buf, cpumask_of(ccn->dt.cpu));
563} 563}
564 564
565static struct device_attribute arm_ccn_pmu_cpumask_attr = 565static struct device_attribute arm_ccn_pmu_cpumask_attr =
@@ -759,7 +759,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
759 * mitigate this, we enforce CPU assignment to one, selected 759 * mitigate this, we enforce CPU assignment to one, selected
760 * processor (the one described in the "cpumask" attribute). 760 * processor (the one described in the "cpumask" attribute).
761 */ 761 */
762 event->cpu = cpumask_first(&ccn->dt.cpu); 762 event->cpu = ccn->dt.cpu;
763 763
764 node_xp = CCN_CONFIG_NODE(event->attr.config); 764 node_xp = CCN_CONFIG_NODE(event->attr.config);
765 type = CCN_CONFIG_TYPE(event->attr.config); 765 type = CCN_CONFIG_TYPE(event->attr.config);
@@ -1215,15 +1215,15 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
1215 struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt); 1215 struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
1216 unsigned int target; 1216 unsigned int target;
1217 1217
1218 if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu)) 1218 if (cpu != dt->cpu)
1219 return 0; 1219 return 0;
1220 target = cpumask_any_but(cpu_online_mask, cpu); 1220 target = cpumask_any_but(cpu_online_mask, cpu);
1221 if (target >= nr_cpu_ids) 1221 if (target >= nr_cpu_ids)
1222 return 0; 1222 return 0;
1223 perf_pmu_migrate_context(&dt->pmu, cpu, target); 1223 perf_pmu_migrate_context(&dt->pmu, cpu, target);
1224 cpumask_set_cpu(target, &dt->cpu); 1224 dt->cpu = target;
1225 if (ccn->irq) 1225 if (ccn->irq)
1226 WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0); 1226 WARN_ON(irq_set_affinity_hint(ccn->irq, cpumask_of(dt->cpu)));
1227 return 0; 1227 return 0;
1228} 1228}
1229 1229
@@ -1299,29 +1299,30 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
1299 } 1299 }
1300 1300
1301 /* Pick one CPU which we will use to collect data from CCN... */ 1301 /* Pick one CPU which we will use to collect data from CCN... */
1302 cpumask_set_cpu(get_cpu(), &ccn->dt.cpu); 1302 ccn->dt.cpu = raw_smp_processor_id();
1303 1303
1304 /* Also make sure that the overflow interrupt is handled by this CPU */ 1304 /* Also make sure that the overflow interrupt is handled by this CPU */
1305 if (ccn->irq) { 1305 if (ccn->irq) {
1306 err = irq_set_affinity_hint(ccn->irq, &ccn->dt.cpu); 1306 err = irq_set_affinity_hint(ccn->irq, cpumask_of(ccn->dt.cpu));
1307 if (err) { 1307 if (err) {
1308 dev_err(ccn->dev, "Failed to set interrupt affinity!\n"); 1308 dev_err(ccn->dev, "Failed to set interrupt affinity!\n");
1309 goto error_set_affinity; 1309 goto error_set_affinity;
1310 } 1310 }
1311 } 1311 }
1312 1312
1313 cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
1314 &ccn->dt.node);
1315
1313 err = perf_pmu_register(&ccn->dt.pmu, name, -1); 1316 err = perf_pmu_register(&ccn->dt.pmu, name, -1);
1314 if (err) 1317 if (err)
1315 goto error_pmu_register; 1318 goto error_pmu_register;
1316 1319
1317 cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
1318 &ccn->dt.node);
1319 put_cpu();
1320 return 0; 1320 return 0;
1321 1321
1322error_pmu_register: 1322error_pmu_register:
1323 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
1324 &ccn->dt.node);
1323error_set_affinity: 1325error_set_affinity:
1324 put_cpu();
1325error_choose_name: 1326error_choose_name:
1326 ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); 1327 ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
1327 for (i = 0; i < ccn->num_xps; i++) 1328 for (i = 0; i < ccn->num_xps; i++)
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
new file mode 100644
index 000000000000..da71c741cb46
--- /dev/null
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -0,0 +1,865 @@
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * This driver adds support for perf events to use the Performance
5 * Monitor Counter Groups (PMCG) associated with an SMMUv3 node
6 * to monitor that node.
7 *
8 * SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where
9 * <phys_addr_page> is the physical page address of the SMMU PMCG wrapped
10 * to 4K boundary. For example, the PMCG at 0xff88840000 is named
11 * smmuv3_pmcg_ff88840
12 *
13 * Filtering by stream id is done by specifying filtering parameters
14 * with the event. options are:
15 * filter_enable - 0 = no filtering, 1 = filtering enabled
16 * filter_span - 0 = exact match, 1 = pattern match
17 * filter_stream_id - pattern to filter against
18 *
19 * To match a partial StreamID where the X most-significant bits must match
20 * but the Y least-significant bits might differ, STREAMID is programmed
21 * with a value that contains:
22 * STREAMID[Y - 1] == 0.
23 * STREAMID[Y - 2:0] == 1 (where Y > 1).
24 * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards)
25 * contain a value to match from the corresponding bits of event StreamID.
26 *
27 * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1,
28 * filter_span=1,filter_stream_id=0x42/ -a netperf
29 * Applies filter pattern 0x42 to transaction events, which means events
30 * matching stream ids 0x42 and 0x43 are counted. Further filtering
31 * information is available in the SMMU documentation.
32 *
33 * SMMU events are not attributable to a CPU, so task mode and sampling
34 * are not supported.
35 */
36
37#include <linux/acpi.h>
38#include <linux/acpi_iort.h>
39#include <linux/bitfield.h>
40#include <linux/bitops.h>
41#include <linux/cpuhotplug.h>
42#include <linux/cpumask.h>
43#include <linux/device.h>
44#include <linux/errno.h>
45#include <linux/interrupt.h>
46#include <linux/irq.h>
47#include <linux/kernel.h>
48#include <linux/list.h>
49#include <linux/msi.h>
50#include <linux/perf_event.h>
51#include <linux/platform_device.h>
52#include <linux/smp.h>
53#include <linux/sysfs.h>
54#include <linux/types.h>
55
56#define SMMU_PMCG_EVCNTR0 0x0
57#define SMMU_PMCG_EVCNTR(n, stride) (SMMU_PMCG_EVCNTR0 + (n) * (stride))
58#define SMMU_PMCG_EVTYPER0 0x400
59#define SMMU_PMCG_EVTYPER(n) (SMMU_PMCG_EVTYPER0 + (n) * 4)
60#define SMMU_PMCG_SID_SPAN_SHIFT 29
61#define SMMU_PMCG_SMR0 0xA00
62#define SMMU_PMCG_SMR(n) (SMMU_PMCG_SMR0 + (n) * 4)
63#define SMMU_PMCG_CNTENSET0 0xC00
64#define SMMU_PMCG_CNTENCLR0 0xC20
65#define SMMU_PMCG_INTENSET0 0xC40
66#define SMMU_PMCG_INTENCLR0 0xC60
67#define SMMU_PMCG_OVSCLR0 0xC80
68#define SMMU_PMCG_OVSSET0 0xCC0
69#define SMMU_PMCG_CFGR 0xE00
70#define SMMU_PMCG_CFGR_SID_FILTER_TYPE BIT(23)
71#define SMMU_PMCG_CFGR_MSI BIT(21)
72#define SMMU_PMCG_CFGR_RELOC_CTRS BIT(20)
73#define SMMU_PMCG_CFGR_SIZE GENMASK(13, 8)
74#define SMMU_PMCG_CFGR_NCTR GENMASK(5, 0)
75#define SMMU_PMCG_CR 0xE04
76#define SMMU_PMCG_CR_ENABLE BIT(0)
77#define SMMU_PMCG_CEID0 0xE20
78#define SMMU_PMCG_CEID1 0xE28
79#define SMMU_PMCG_IRQ_CTRL 0xE50
80#define SMMU_PMCG_IRQ_CTRL_IRQEN BIT(0)
81#define SMMU_PMCG_IRQ_CFG0 0xE58
82#define SMMU_PMCG_IRQ_CFG1 0xE60
83#define SMMU_PMCG_IRQ_CFG2 0xE64
84
85/* MSI config fields */
86#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
87#define MSI_CFG2_MEMATTR_DEVICE_nGnRE 0x1
88
89#define SMMU_PMCG_DEFAULT_FILTER_SPAN 1
90#define SMMU_PMCG_DEFAULT_FILTER_SID GENMASK(31, 0)
91
92#define SMMU_PMCG_MAX_COUNTERS 64
93#define SMMU_PMCG_ARCH_MAX_EVENTS 128
94
95#define SMMU_PMCG_PA_SHIFT 12
96
97#define SMMU_PMCG_EVCNTR_RDONLY BIT(0)
98
99static int cpuhp_state_num;
100
101struct smmu_pmu {
102 struct hlist_node node;
103 struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
104 DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
105 DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
106 unsigned int irq;
107 unsigned int on_cpu;
108 struct pmu pmu;
109 unsigned int num_counters;
110 struct device *dev;
111 void __iomem *reg_base;
112 void __iomem *reloc_base;
113 u64 counter_mask;
114 u32 options;
115 bool global_filter;
116 u32 global_filter_span;
117 u32 global_filter_sid;
118};
119
120#define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu))
121
122#define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
123 static inline u32 get_##_name(struct perf_event *event) \
124 { \
125 return FIELD_GET(GENMASK_ULL(_end, _start), \
126 event->attr._config); \
127 } \
128
129SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15);
130SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31);
131SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32);
132SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33);
133
134static inline void smmu_pmu_enable(struct pmu *pmu)
135{
136 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
137
138 writel(SMMU_PMCG_IRQ_CTRL_IRQEN,
139 smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
140 writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
141}
142
143static inline void smmu_pmu_disable(struct pmu *pmu)
144{
145 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
146
147 writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR);
148 writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
149}
150
151static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
152 u32 idx, u64 value)
153{
154 if (smmu_pmu->counter_mask & BIT(32))
155 writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
156 else
157 writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
158}
159
160static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
161{
162 u64 value;
163
164 if (smmu_pmu->counter_mask & BIT(32))
165 value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
166 else
167 value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
168
169 return value;
170}
171
172static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx)
173{
174 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0);
175}
176
177static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx)
178{
179 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
180}
181
182static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx)
183{
184 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0);
185}
186
187static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu,
188 u32 idx)
189{
190 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
191}
192
193static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx,
194 u32 val)
195{
196 writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
197}
198
199static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val)
200{
201 writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx));
202}
203
204static void smmu_pmu_event_update(struct perf_event *event)
205{
206 struct hw_perf_event *hwc = &event->hw;
207 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
208 u64 delta, prev, now;
209 u32 idx = hwc->idx;
210
211 do {
212 prev = local64_read(&hwc->prev_count);
213 now = smmu_pmu_counter_get_value(smmu_pmu, idx);
214 } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
215
216 /* handle overflow. */
217 delta = now - prev;
218 delta &= smmu_pmu->counter_mask;
219
220 local64_add(delta, &event->count);
221}
222
223static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu,
224 struct hw_perf_event *hwc)
225{
226 u32 idx = hwc->idx;
227 u64 new;
228
229 if (smmu_pmu->options & SMMU_PMCG_EVCNTR_RDONLY) {
230 /*
231 * On platforms that require this quirk, if the counter starts
232 * at < half_counter value and wraps, the current logic of
233 * handling the overflow may not work. It is expected that,
234 * those platforms will have full 64 counter bits implemented
235 * so that such a possibility is remote(eg: HiSilicon HIP08).
236 */
237 new = smmu_pmu_counter_get_value(smmu_pmu, idx);
238 } else {
239 /*
240 * We limit the max period to half the max counter value
241 * of the counter size, so that even in the case of extreme
242 * interrupt latency the counter will (hopefully) not wrap
243 * past its initial value.
244 */
245 new = smmu_pmu->counter_mask >> 1;
246 smmu_pmu_counter_set_value(smmu_pmu, idx, new);
247 }
248
249 local64_set(&hwc->prev_count, new);
250}
251
252static void smmu_pmu_set_event_filter(struct perf_event *event,
253 int idx, u32 span, u32 sid)
254{
255 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
256 u32 evtyper;
257
258 evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT;
259 smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper);
260 smmu_pmu_set_smr(smmu_pmu, idx, sid);
261}
262
263static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
264 struct perf_event *event, int idx)
265{
266 u32 span, sid;
267 unsigned int num_ctrs = smmu_pmu->num_counters;
268 bool filter_en = !!get_filter_enable(event);
269
270 span = filter_en ? get_filter_span(event) :
271 SMMU_PMCG_DEFAULT_FILTER_SPAN;
272 sid = filter_en ? get_filter_stream_id(event) :
273 SMMU_PMCG_DEFAULT_FILTER_SID;
274
275 /* Support individual filter settings */
276 if (!smmu_pmu->global_filter) {
277 smmu_pmu_set_event_filter(event, idx, span, sid);
278 return 0;
279 }
280
281 /* Requested settings same as current global settings*/
282 if (span == smmu_pmu->global_filter_span &&
283 sid == smmu_pmu->global_filter_sid)
284 return 0;
285
286 if (!bitmap_empty(smmu_pmu->used_counters, num_ctrs))
287 return -EAGAIN;
288
289 smmu_pmu_set_event_filter(event, 0, span, sid);
290 smmu_pmu->global_filter_span = span;
291 smmu_pmu->global_filter_sid = sid;
292 return 0;
293}
294
295static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu,
296 struct perf_event *event)
297{
298 int idx, err;
299 unsigned int num_ctrs = smmu_pmu->num_counters;
300
301 idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs);
302 if (idx == num_ctrs)
303 /* The counters are all in use. */
304 return -EAGAIN;
305
306 err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx);
307 if (err)
308 return err;
309
310 set_bit(idx, smmu_pmu->used_counters);
311
312 return idx;
313}
314
315/*
316 * Implementation of abstract pmu functionality required by
317 * the core perf events code.
318 */
319
320static int smmu_pmu_event_init(struct perf_event *event)
321{
322 struct hw_perf_event *hwc = &event->hw;
323 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
324 struct device *dev = smmu_pmu->dev;
325 struct perf_event *sibling;
326 u16 event_id;
327
328 if (event->attr.type != event->pmu->type)
329 return -ENOENT;
330
331 if (hwc->sample_period) {
332 dev_dbg(dev, "Sampling not supported\n");
333 return -EOPNOTSUPP;
334 }
335
336 if (event->cpu < 0) {
337 dev_dbg(dev, "Per-task mode not supported\n");
338 return -EOPNOTSUPP;
339 }
340
341 /* Verify specified event is supported on this PMU */
342 event_id = get_event(event);
343 if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS &&
344 (!test_bit(event_id, smmu_pmu->supported_events))) {
345 dev_dbg(dev, "Invalid event %d for this PMU\n", event_id);
346 return -EINVAL;
347 }
348
349 /* Don't allow groups with mixed PMUs, except for s/w events */
350 if (event->group_leader->pmu != event->pmu &&
351 !is_software_event(event->group_leader)) {
352 dev_dbg(dev, "Can't create mixed PMU group\n");
353 return -EINVAL;
354 }
355
356 for_each_sibling_event(sibling, event->group_leader) {
357 if (sibling->pmu != event->pmu &&
358 !is_software_event(sibling)) {
359 dev_dbg(dev, "Can't create mixed PMU group\n");
360 return -EINVAL;
361 }
362 }
363
364 hwc->idx = -1;
365
366 /*
367 * Ensure all events are on the same cpu so all events are in the
368 * same cpu context, to avoid races on pmu_enable etc.
369 */
370 event->cpu = smmu_pmu->on_cpu;
371
372 return 0;
373}
374
375static void smmu_pmu_event_start(struct perf_event *event, int flags)
376{
377 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
378 struct hw_perf_event *hwc = &event->hw;
379 int idx = hwc->idx;
380
381 hwc->state = 0;
382
383 smmu_pmu_set_period(smmu_pmu, hwc);
384
385 smmu_pmu_counter_enable(smmu_pmu, idx);
386}
387
388static void smmu_pmu_event_stop(struct perf_event *event, int flags)
389{
390 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
391 struct hw_perf_event *hwc = &event->hw;
392 int idx = hwc->idx;
393
394 if (hwc->state & PERF_HES_STOPPED)
395 return;
396
397 smmu_pmu_counter_disable(smmu_pmu, idx);
398 /* As the counter gets updated on _start, ignore PERF_EF_UPDATE */
399 smmu_pmu_event_update(event);
400 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
401}
402
403static int smmu_pmu_event_add(struct perf_event *event, int flags)
404{
405 struct hw_perf_event *hwc = &event->hw;
406 int idx;
407 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
408
409 idx = smmu_pmu_get_event_idx(smmu_pmu, event);
410 if (idx < 0)
411 return idx;
412
413 hwc->idx = idx;
414 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
415 smmu_pmu->events[idx] = event;
416 local64_set(&hwc->prev_count, 0);
417
418 smmu_pmu_interrupt_enable(smmu_pmu, idx);
419
420 if (flags & PERF_EF_START)
421 smmu_pmu_event_start(event, flags);
422
423 /* Propagate changes to the userspace mapping. */
424 perf_event_update_userpage(event);
425
426 return 0;
427}
428
429static void smmu_pmu_event_del(struct perf_event *event, int flags)
430{
431 struct hw_perf_event *hwc = &event->hw;
432 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
433 int idx = hwc->idx;
434
435 smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE);
436 smmu_pmu_interrupt_disable(smmu_pmu, idx);
437 smmu_pmu->events[idx] = NULL;
438 clear_bit(idx, smmu_pmu->used_counters);
439
440 perf_event_update_userpage(event);
441}
442
443static void smmu_pmu_event_read(struct perf_event *event)
444{
445 smmu_pmu_event_update(event);
446}
447
448/* cpumask */
449
450static ssize_t smmu_pmu_cpumask_show(struct device *dev,
451 struct device_attribute *attr,
452 char *buf)
453{
454 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
455
456 return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
457}
458
459static struct device_attribute smmu_pmu_cpumask_attr =
460 __ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL);
461
462static struct attribute *smmu_pmu_cpumask_attrs[] = {
463 &smmu_pmu_cpumask_attr.attr,
464 NULL
465};
466
467static struct attribute_group smmu_pmu_cpumask_group = {
468 .attrs = smmu_pmu_cpumask_attrs,
469};
470
471/* Events */
472
473static ssize_t smmu_pmu_event_show(struct device *dev,
474 struct device_attribute *attr, char *page)
475{
476 struct perf_pmu_events_attr *pmu_attr;
477
478 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
479
480 return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
481}
482
483#define SMMU_EVENT_ATTR(name, config) \
484 PMU_EVENT_ATTR(name, smmu_event_attr_##name, \
485 config, smmu_pmu_event_show)
486SMMU_EVENT_ATTR(cycles, 0);
487SMMU_EVENT_ATTR(transaction, 1);
488SMMU_EVENT_ATTR(tlb_miss, 2);
489SMMU_EVENT_ATTR(config_cache_miss, 3);
490SMMU_EVENT_ATTR(trans_table_walk_access, 4);
491SMMU_EVENT_ATTR(config_struct_access, 5);
492SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6);
493SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7);
494
495static struct attribute *smmu_pmu_events[] = {
496 &smmu_event_attr_cycles.attr.attr,
497 &smmu_event_attr_transaction.attr.attr,
498 &smmu_event_attr_tlb_miss.attr.attr,
499 &smmu_event_attr_config_cache_miss.attr.attr,
500 &smmu_event_attr_trans_table_walk_access.attr.attr,
501 &smmu_event_attr_config_struct_access.attr.attr,
502 &smmu_event_attr_pcie_ats_trans_rq.attr.attr,
503 &smmu_event_attr_pcie_ats_trans_passed.attr.attr,
504 NULL
505};
506
507static umode_t smmu_pmu_event_is_visible(struct kobject *kobj,
508 struct attribute *attr, int unused)
509{
510 struct device *dev = kobj_to_dev(kobj);
511 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
512 struct perf_pmu_events_attr *pmu_attr;
513
514 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
515
516 if (test_bit(pmu_attr->id, smmu_pmu->supported_events))
517 return attr->mode;
518
519 return 0;
520}
521
522static struct attribute_group smmu_pmu_events_group = {
523 .name = "events",
524 .attrs = smmu_pmu_events,
525 .is_visible = smmu_pmu_event_is_visible,
526};
527
528/* Formats */
529PMU_FORMAT_ATTR(event, "config:0-15");
530PMU_FORMAT_ATTR(filter_stream_id, "config1:0-31");
531PMU_FORMAT_ATTR(filter_span, "config1:32");
532PMU_FORMAT_ATTR(filter_enable, "config1:33");
533
534static struct attribute *smmu_pmu_formats[] = {
535 &format_attr_event.attr,
536 &format_attr_filter_stream_id.attr,
537 &format_attr_filter_span.attr,
538 &format_attr_filter_enable.attr,
539 NULL
540};
541
542static struct attribute_group smmu_pmu_format_group = {
543 .name = "format",
544 .attrs = smmu_pmu_formats,
545};
546
547static const struct attribute_group *smmu_pmu_attr_grps[] = {
548 &smmu_pmu_cpumask_group,
549 &smmu_pmu_events_group,
550 &smmu_pmu_format_group,
551 NULL
552};
553
554/*
555 * Generic device handlers
556 */
557
558static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
559{
560 struct smmu_pmu *smmu_pmu;
561 unsigned int target;
562
563 smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
564 if (cpu != smmu_pmu->on_cpu)
565 return 0;
566
567 target = cpumask_any_but(cpu_online_mask, cpu);
568 if (target >= nr_cpu_ids)
569 return 0;
570
571 perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
572 smmu_pmu->on_cpu = target;
573 WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
574
575 return 0;
576}
577
578static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
579{
580 struct smmu_pmu *smmu_pmu = data;
581 u64 ovsr;
582 unsigned int idx;
583
584 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
585 if (!ovsr)
586 return IRQ_NONE;
587
588 writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
589
590 for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
591 struct perf_event *event = smmu_pmu->events[idx];
592 struct hw_perf_event *hwc;
593
594 if (WARN_ON_ONCE(!event))
595 continue;
596
597 smmu_pmu_event_update(event);
598 hwc = &event->hw;
599
600 smmu_pmu_set_period(smmu_pmu, hwc);
601 }
602
603 return IRQ_HANDLED;
604}
605
606static void smmu_pmu_free_msis(void *data)
607{
608 struct device *dev = data;
609
610 platform_msi_domain_free_irqs(dev);
611}
612
613static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
614{
615 phys_addr_t doorbell;
616 struct device *dev = msi_desc_to_dev(desc);
617 struct smmu_pmu *pmu = dev_get_drvdata(dev);
618
619 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
620 doorbell &= MSI_CFG0_ADDR_MASK;
621
622 writeq_relaxed(doorbell, pmu->reg_base + SMMU_PMCG_IRQ_CFG0);
623 writel_relaxed(msg->data, pmu->reg_base + SMMU_PMCG_IRQ_CFG1);
624 writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE,
625 pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
626}
627
628static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
629{
630 struct msi_desc *desc;
631 struct device *dev = pmu->dev;
632 int ret;
633
634 /* Clear MSI address reg */
635 writeq_relaxed(0, pmu->reg_base + SMMU_PMCG_IRQ_CFG0);
636
637 /* MSI supported or not */
638 if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
639 return;
640
641 ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
642 if (ret) {
643 dev_warn(dev, "failed to allocate MSIs\n");
644 return;
645 }
646
647 desc = first_msi_entry(dev);
648 if (desc)
649 pmu->irq = desc->irq;
650
651 /* Add callback to free MSIs on teardown */
652 devm_add_action(dev, smmu_pmu_free_msis, dev);
653}
654
655static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
656{
657 unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
658 int irq, ret = -ENXIO;
659
660 smmu_pmu_setup_msi(pmu);
661
662 irq = pmu->irq;
663 if (irq)
664 ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
665 flags, "smmuv3-pmu", pmu);
666 return ret;
667}
668
669static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
670{
671 u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0);
672
673 smmu_pmu_disable(&smmu_pmu->pmu);
674
675 /* Disable counter and interrupt */
676 writeq_relaxed(counter_present_mask,
677 smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
678 writeq_relaxed(counter_present_mask,
679 smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
680 writeq_relaxed(counter_present_mask,
681 smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
682}
683
684static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
685{
686 u32 model;
687
688 model = *(u32 *)dev_get_platdata(smmu_pmu->dev);
689
690 switch (model) {
691 case IORT_SMMU_V3_PMCG_HISI_HIP08:
692 /* HiSilicon Erratum 162001800 */
693 smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY;
694 break;
695 }
696
697 dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options);
698}
699
700static int smmu_pmu_probe(struct platform_device *pdev)
701{
702 struct smmu_pmu *smmu_pmu;
703 struct resource *res_0, *res_1;
704 u32 cfgr, reg_size;
705 u64 ceid_64[2];
706 int irq, err;
707 char *name;
708 struct device *dev = &pdev->dev;
709
710 smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL);
711 if (!smmu_pmu)
712 return -ENOMEM;
713
714 smmu_pmu->dev = dev;
715 platform_set_drvdata(pdev, smmu_pmu);
716
717 smmu_pmu->pmu = (struct pmu) {
718 .task_ctx_nr = perf_invalid_context,
719 .pmu_enable = smmu_pmu_enable,
720 .pmu_disable = smmu_pmu_disable,
721 .event_init = smmu_pmu_event_init,
722 .add = smmu_pmu_event_add,
723 .del = smmu_pmu_event_del,
724 .start = smmu_pmu_event_start,
725 .stop = smmu_pmu_event_stop,
726 .read = smmu_pmu_event_read,
727 .attr_groups = smmu_pmu_attr_grps,
728 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
729 };
730
731 res_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
732 smmu_pmu->reg_base = devm_ioremap_resource(dev, res_0);
733 if (IS_ERR(smmu_pmu->reg_base))
734 return PTR_ERR(smmu_pmu->reg_base);
735
736 cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR);
737
738 /* Determine if page 1 is present */
739 if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) {
740 res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
741 smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1);
742 if (IS_ERR(smmu_pmu->reloc_base))
743 return PTR_ERR(smmu_pmu->reloc_base);
744 } else {
745 smmu_pmu->reloc_base = smmu_pmu->reg_base;
746 }
747
748 irq = platform_get_irq(pdev, 0);
749 if (irq > 0)
750 smmu_pmu->irq = irq;
751
752 ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
753 ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
754 bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
755 SMMU_PMCG_ARCH_MAX_EVENTS);
756
757 smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1;
758
759 smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE);
760
761 reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr);
762 smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0);
763
764 smmu_pmu_reset(smmu_pmu);
765
766 err = smmu_pmu_setup_irq(smmu_pmu);
767 if (err) {
768 dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
769 return err;
770 }
771
772 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
773 (res_0->start) >> SMMU_PMCG_PA_SHIFT);
774 if (!name) {
775 dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start);
776 return -EINVAL;
777 }
778
779 smmu_pmu_get_acpi_options(smmu_pmu);
780
781 /* Pick one CPU to be the preferred one to use */
782 smmu_pmu->on_cpu = raw_smp_processor_id();
783 WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
784 cpumask_of(smmu_pmu->on_cpu)));
785
786 err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
787 &smmu_pmu->node);
788 if (err) {
789 dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
790 err, &res_0->start);
791 goto out_cpuhp_err;
792 }
793
794 err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
795 if (err) {
796 dev_err(dev, "Error %d registering PMU @%pa\n",
797 err, &res_0->start);
798 goto out_unregister;
799 }
800
801 dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n",
802 &res_0->start, smmu_pmu->num_counters,
803 smmu_pmu->global_filter ? "Global(Counter0)" :
804 "Individual");
805
806 return 0;
807
808out_unregister:
809 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
810out_cpuhp_err:
811 put_cpu();
812 return err;
813}
814
815static int smmu_pmu_remove(struct platform_device *pdev)
816{
817 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
818
819 perf_pmu_unregister(&smmu_pmu->pmu);
820 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
821
822 return 0;
823}
824
825static void smmu_pmu_shutdown(struct platform_device *pdev)
826{
827 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
828
829 smmu_pmu_disable(&smmu_pmu->pmu);
830}
831
832static struct platform_driver smmu_pmu_driver = {
833 .driver = {
834 .name = "arm-smmu-v3-pmcg",
835 },
836 .probe = smmu_pmu_probe,
837 .remove = smmu_pmu_remove,
838 .shutdown = smmu_pmu_shutdown,
839};
840
841static int __init arm_smmu_pmu_init(void)
842{
843 cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
844 "perf/arm/pmcg:online",
845 NULL,
846 smmu_pmu_offline_cpu);
847 if (cpuhp_state_num < 0)
848 return cpuhp_state_num;
849
850 return platform_driver_register(&smmu_pmu_driver);
851}
852module_init(arm_smmu_pmu_init);
853
854static void __exit arm_smmu_pmu_exit(void)
855{
856 platform_driver_unregister(&smmu_pmu_driver);
857 cpuhp_remove_multi_state(cpuhp_state_num);
858}
859
860module_exit(arm_smmu_pmu_exit);
861
862MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
863MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
864MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
865MODULE_LICENSE("GPL v2");
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 38cd77b39a64..723e4dfa1c14 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -26,6 +26,14 @@
26#define IORT_IRQ_MASK(irq) (irq & 0xffffffffULL) 26#define IORT_IRQ_MASK(irq) (irq & 0xffffffffULL)
27#define IORT_IRQ_TRIGGER_MASK(irq) ((irq >> 32) & 0xffffffffULL) 27#define IORT_IRQ_TRIGGER_MASK(irq) ((irq >> 32) & 0xffffffffULL)
28 28
29/*
30 * PMCG model identifiers for use in smmu pmu driver. Please note
31 * that this is purely for the use of software and has nothing to
32 * do with hardware or with IORT specification.
33 */
34#define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */
35#define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */
36
29int iort_register_domain_token(int trans_id, phys_addr_t base, 37int iort_register_domain_token(int trans_id, phys_addr_t base,
30 struct fwnode_handle *fw_node); 38 struct fwnode_handle *fw_node);
31void iort_deregister_domain_token(int trans_id); 39void iort_deregister_domain_token(int trans_id);