aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoerg Roedel <jroedel@suse.de>2018-10-01 11:28:13 -0400
committerJoerg Roedel <jroedel@suse.de>2018-10-01 11:28:13 -0400
commit6f20a97e0979cfc1878ebfd85a6ee69a962e546f (patch)
treee3074bde20ec8f0541e84f718aa3f26c8a5aa71d
parent17b57b1883c1285f3d0dc2266e8f79286a7bef38 (diff)
parent44f6876a00e83df5fd28681502b19b0f51e4a3c6 (diff)
Merge branch 'for-joerg/arm-smmu/updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--drivers/iommu/arm-smmu-v3.c115
-rw-r--r--drivers/iommu/arm-smmu.c99
-rw-r--r--drivers/iommu/dma-iommu.c32
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c11
-rw-r--r--drivers/iommu/io-pgtable-arm.c23
-rw-r--r--drivers/iommu/io-pgtable.h5
-rw-r--r--drivers/iommu/iommu.c14
-rw-r--r--include/linux/iommu.h1
9 files changed, 243 insertions, 69 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 92eb1f42240d..7c992b6d27fa 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1749,6 +1749,18 @@
1749 nobypass [PPC/POWERNV] 1749 nobypass [PPC/POWERNV]
1750 Disable IOMMU bypass, using IOMMU for PCI devices. 1750 Disable IOMMU bypass, using IOMMU for PCI devices.
1751 1751
1752 iommu.strict= [ARM64] Configure TLB invalidation behaviour
1753 Format: { "0" | "1" }
1754 0 - Lazy mode.
1755 Request that DMA unmap operations use deferred
1756 invalidation of hardware TLBs, for increased
1757 throughput at the cost of reduced device isolation.
1758 Will fall back to strict mode if not supported by
1759 the relevant IOMMU driver.
1760 1 - Strict mode (default).
1761 DMA unmap operations invalidate IOMMU hardware TLBs
1762 synchronously.
1763
1752 iommu.passthrough= 1764 iommu.passthrough=
1753 [ARM64] Configure DMA to bypass the IOMMU by default. 1765 [ARM64] Configure DMA to bypass the IOMMU by default.
1754 Format: { "0" | "1" } 1766 Format: { "0" | "1" }
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5059d09f3202..db402e8b068b 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -567,7 +567,8 @@ struct arm_smmu_device {
567 567
568 int gerr_irq; 568 int gerr_irq;
569 int combined_irq; 569 int combined_irq;
570 atomic_t sync_nr; 570 u32 sync_nr;
571 u8 prev_cmd_opcode;
571 572
572 unsigned long ias; /* IPA */ 573 unsigned long ias; /* IPA */
573 unsigned long oas; /* PA */ 574 unsigned long oas; /* PA */
@@ -611,6 +612,7 @@ struct arm_smmu_domain {
611 struct mutex init_mutex; /* Protects smmu pointer */ 612 struct mutex init_mutex; /* Protects smmu pointer */
612 613
613 struct io_pgtable_ops *pgtbl_ops; 614 struct io_pgtable_ops *pgtbl_ops;
615 bool non_strict;
614 616
615 enum arm_smmu_domain_stage stage; 617 enum arm_smmu_domain_stage stage;
616 union { 618 union {
@@ -708,7 +710,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
708} 710}
709 711
710/* 712/*
711 * Wait for the SMMU to consume items. If drain is true, wait until the queue 713 * Wait for the SMMU to consume items. If sync is true, wait until the queue
712 * is empty. Otherwise, wait until there is at least one free slot. 714 * is empty. Otherwise, wait until there is at least one free slot.
713 */ 715 */
714static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) 716static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
@@ -901,6 +903,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
901 struct arm_smmu_queue *q = &smmu->cmdq.q; 903 struct arm_smmu_queue *q = &smmu->cmdq.q;
902 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 904 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
903 905
906 smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
907
904 while (queue_insert_raw(q, cmd) == -ENOSPC) { 908 while (queue_insert_raw(q, cmd) == -ENOSPC) {
905 if (queue_poll_cons(q, false, wfe)) 909 if (queue_poll_cons(q, false, wfe))
906 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 910 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
@@ -948,15 +952,21 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
948 struct arm_smmu_cmdq_ent ent = { 952 struct arm_smmu_cmdq_ent ent = {
949 .opcode = CMDQ_OP_CMD_SYNC, 953 .opcode = CMDQ_OP_CMD_SYNC,
950 .sync = { 954 .sync = {
951 .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
952 .msiaddr = virt_to_phys(&smmu->sync_count), 955 .msiaddr = virt_to_phys(&smmu->sync_count),
953 }, 956 },
954 }; 957 };
955 958
956 arm_smmu_cmdq_build_cmd(cmd, &ent);
957
958 spin_lock_irqsave(&smmu->cmdq.lock, flags); 959 spin_lock_irqsave(&smmu->cmdq.lock, flags);
959 arm_smmu_cmdq_insert_cmd(smmu, cmd); 960
961 /* Piggy-back on the previous command if it's a SYNC */
962 if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
963 ent.sync.msidata = smmu->sync_nr;
964 } else {
965 ent.sync.msidata = ++smmu->sync_nr;
966 arm_smmu_cmdq_build_cmd(cmd, &ent);
967 arm_smmu_cmdq_insert_cmd(smmu, cmd);
968 }
969
960 spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 970 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
961 971
962 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); 972 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
@@ -1398,6 +1408,12 @@ static void arm_smmu_tlb_inv_context(void *cookie)
1398 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1408 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1399 } 1409 }
1400 1410
1411 /*
1412 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1413 * PTEs previously cleared by unmaps on the current CPU not yet visible
1414 * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1415 * to guarantee those are observed before the TLBI. Do be careful, 007.
1416 */
1401 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1417 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1402 __arm_smmu_tlb_sync(smmu); 1418 __arm_smmu_tlb_sync(smmu);
1403} 1419}
@@ -1624,6 +1640,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1624 if (smmu->features & ARM_SMMU_FEAT_COHERENCY) 1640 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1625 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; 1641 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1626 1642
1643 if (smmu_domain->non_strict)
1644 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1645
1627 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 1646 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1628 if (!pgtbl_ops) 1647 if (!pgtbl_ops)
1629 return -ENOMEM; 1648 return -ENOMEM;
@@ -1772,6 +1791,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1772 return ops->unmap(ops, iova, size); 1791 return ops->unmap(ops, iova, size);
1773} 1792}
1774 1793
1794static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1795{
1796 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1797
1798 if (smmu_domain->smmu)
1799 arm_smmu_tlb_inv_context(smmu_domain);
1800}
1801
1775static void arm_smmu_iotlb_sync(struct iommu_domain *domain) 1802static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1776{ 1803{
1777 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; 1804 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1917,15 +1944,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1917{ 1944{
1918 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1945 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1919 1946
1920 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 1947 switch (domain->type) {
1921 return -EINVAL; 1948 case IOMMU_DOMAIN_UNMANAGED:
1922 1949 switch (attr) {
1923 switch (attr) { 1950 case DOMAIN_ATTR_NESTING:
1924 case DOMAIN_ATTR_NESTING: 1951 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1925 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); 1952 return 0;
1926 return 0; 1953 default:
1954 return -ENODEV;
1955 }
1956 break;
1957 case IOMMU_DOMAIN_DMA:
1958 switch (attr) {
1959 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1960 *(int *)data = smmu_domain->non_strict;
1961 return 0;
1962 default:
1963 return -ENODEV;
1964 }
1965 break;
1927 default: 1966 default:
1928 return -ENODEV; 1967 return -EINVAL;
1929 } 1968 }
1930} 1969}
1931 1970
@@ -1935,26 +1974,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1935 int ret = 0; 1974 int ret = 0;
1936 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1975 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1937 1976
1938 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1939 return -EINVAL;
1940
1941 mutex_lock(&smmu_domain->init_mutex); 1977 mutex_lock(&smmu_domain->init_mutex);
1942 1978
1943 switch (attr) { 1979 switch (domain->type) {
1944 case DOMAIN_ATTR_NESTING: 1980 case IOMMU_DOMAIN_UNMANAGED:
1945 if (smmu_domain->smmu) { 1981 switch (attr) {
1946 ret = -EPERM; 1982 case DOMAIN_ATTR_NESTING:
1947 goto out_unlock; 1983 if (smmu_domain->smmu) {
1984 ret = -EPERM;
1985 goto out_unlock;
1986 }
1987
1988 if (*(int *)data)
1989 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1990 else
1991 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1992 break;
1993 default:
1994 ret = -ENODEV;
1995 }
1996 break;
1997 case IOMMU_DOMAIN_DMA:
1998 switch(attr) {
1999 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2000 smmu_domain->non_strict = *(int *)data;
2001 break;
2002 default:
2003 ret = -ENODEV;
1948 } 2004 }
1949
1950 if (*(int *)data)
1951 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1952 else
1953 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1954
1955 break; 2005 break;
1956 default: 2006 default:
1957 ret = -ENODEV; 2007 ret = -EINVAL;
1958 } 2008 }
1959 2009
1960out_unlock: 2010out_unlock:
@@ -1999,7 +2049,7 @@ static struct iommu_ops arm_smmu_ops = {
1999 .attach_dev = arm_smmu_attach_dev, 2049 .attach_dev = arm_smmu_attach_dev,
2000 .map = arm_smmu_map, 2050 .map = arm_smmu_map,
2001 .unmap = arm_smmu_unmap, 2051 .unmap = arm_smmu_unmap,
2002 .flush_iotlb_all = arm_smmu_iotlb_sync, 2052 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2003 .iotlb_sync = arm_smmu_iotlb_sync, 2053 .iotlb_sync = arm_smmu_iotlb_sync,
2004 .iova_to_phys = arm_smmu_iova_to_phys, 2054 .iova_to_phys = arm_smmu_iova_to_phys,
2005 .add_device = arm_smmu_add_device, 2055 .add_device = arm_smmu_add_device,
@@ -2180,7 +2230,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2180{ 2230{
2181 int ret; 2231 int ret;
2182 2232
2183 atomic_set(&smmu->sync_nr, 0);
2184 ret = arm_smmu_init_queues(smmu); 2233 ret = arm_smmu_init_queues(smmu);
2185 if (ret) 2234 if (ret)
2186 return ret; 2235 return ret;
@@ -2353,8 +2402,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2353 irq = smmu->combined_irq; 2402 irq = smmu->combined_irq;
2354 if (irq) { 2403 if (irq) {
2355 /* 2404 /*
2356 * Cavium ThunderX2 implementation doesn't not support unique 2405 * Cavium ThunderX2 implementation doesn't support unique irq
2357 * irq lines. Use single irq line for all the SMMUv3 interrupts. 2406 * lines. Use a single irq line for all the SMMUv3 interrupts.
2358 */ 2407 */
2359 ret = devm_request_threaded_irq(smmu->dev, irq, 2408 ret = devm_request_threaded_irq(smmu->dev, irq,
2360 arm_smmu_combined_irq_handler, 2409 arm_smmu_combined_irq_handler,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index fd1b80ef9490..1030027cbcc6 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -246,6 +246,7 @@ struct arm_smmu_domain {
246 const struct iommu_gather_ops *tlb_ops; 246 const struct iommu_gather_ops *tlb_ops;
247 struct arm_smmu_cfg cfg; 247 struct arm_smmu_cfg cfg;
248 enum arm_smmu_domain_stage stage; 248 enum arm_smmu_domain_stage stage;
249 bool non_strict;
249 struct mutex init_mutex; /* Protects smmu pointer */ 250 struct mutex init_mutex; /* Protects smmu pointer */
250 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ 251 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
251 struct iommu_domain domain; 252 struct iommu_domain domain;
@@ -447,7 +448,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)
447 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 448 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
448 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); 449 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
449 450
450 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); 451 /*
452 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
453 * cleared by the current CPU are visible to the SMMU before the TLBI.
454 */
455 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
451 arm_smmu_tlb_sync_context(cookie); 456 arm_smmu_tlb_sync_context(cookie);
452} 457}
453 458
@@ -457,7 +462,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
457 struct arm_smmu_device *smmu = smmu_domain->smmu; 462 struct arm_smmu_device *smmu = smmu_domain->smmu;
458 void __iomem *base = ARM_SMMU_GR0(smmu); 463 void __iomem *base = ARM_SMMU_GR0(smmu);
459 464
460 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); 465 /* NOTE: see above */
466 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
461 arm_smmu_tlb_sync_global(smmu); 467 arm_smmu_tlb_sync_global(smmu);
462} 468}
463 469
@@ -469,6 +475,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
469 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; 475 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
470 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); 476 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
471 477
478 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
479 wmb();
480
472 if (stage1) { 481 if (stage1) {
473 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; 482 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
474 483
@@ -510,6 +519,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
510 struct arm_smmu_domain *smmu_domain = cookie; 519 struct arm_smmu_domain *smmu_domain = cookie;
511 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); 520 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
512 521
522 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
523 wmb();
524
513 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); 525 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
514} 526}
515 527
@@ -863,6 +875,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
863 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) 875 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
864 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; 876 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
865 877
878 if (smmu_domain->non_strict)
879 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
880
866 smmu_domain->smmu = smmu; 881 smmu_domain->smmu = smmu;
867 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 882 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
868 if (!pgtbl_ops) { 883 if (!pgtbl_ops) {
@@ -1252,6 +1267,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1252 return ops->unmap(ops, iova, size); 1267 return ops->unmap(ops, iova, size);
1253} 1268}
1254 1269
1270static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1271{
1272 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1273
1274 if (smmu_domain->tlb_ops)
1275 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1276}
1277
1255static void arm_smmu_iotlb_sync(struct iommu_domain *domain) 1278static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1256{ 1279{
1257 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1280 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1470,15 +1493,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1470{ 1493{
1471 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1494 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1472 1495
1473 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 1496 switch(domain->type) {
1474 return -EINVAL; 1497 case IOMMU_DOMAIN_UNMANAGED:
1475 1498 switch (attr) {
1476 switch (attr) { 1499 case DOMAIN_ATTR_NESTING:
1477 case DOMAIN_ATTR_NESTING: 1500 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1478 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); 1501 return 0;
1479 return 0; 1502 default:
1503 return -ENODEV;
1504 }
1505 break;
1506 case IOMMU_DOMAIN_DMA:
1507 switch (attr) {
1508 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1509 *(int *)data = smmu_domain->non_strict;
1510 return 0;
1511 default:
1512 return -ENODEV;
1513 }
1514 break;
1480 default: 1515 default:
1481 return -ENODEV; 1516 return -EINVAL;
1482 } 1517 }
1483} 1518}
1484 1519
@@ -1488,28 +1523,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1488 int ret = 0; 1523 int ret = 0;
1489 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1524 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1490 1525
1491 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1492 return -EINVAL;
1493
1494 mutex_lock(&smmu_domain->init_mutex); 1526 mutex_lock(&smmu_domain->init_mutex);
1495 1527
1496 switch (attr) { 1528 switch(domain->type) {
1497 case DOMAIN_ATTR_NESTING: 1529 case IOMMU_DOMAIN_UNMANAGED:
1498 if (smmu_domain->smmu) { 1530 switch (attr) {
1499 ret = -EPERM; 1531 case DOMAIN_ATTR_NESTING:
1500 goto out_unlock; 1532 if (smmu_domain->smmu) {
1533 ret = -EPERM;
1534 goto out_unlock;
1535 }
1536
1537 if (*(int *)data)
1538 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1539 else
1540 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1541 break;
1542 default:
1543 ret = -ENODEV;
1544 }
1545 break;
1546 case IOMMU_DOMAIN_DMA:
1547 switch (attr) {
1548 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1549 smmu_domain->non_strict = *(int *)data;
1550 break;
1551 default:
1552 ret = -ENODEV;
1501 } 1553 }
1502
1503 if (*(int *)data)
1504 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1505 else
1506 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1507
1508 break; 1554 break;
1509 default: 1555 default:
1510 ret = -ENODEV; 1556 ret = -EINVAL;
1511 } 1557 }
1512
1513out_unlock: 1558out_unlock:
1514 mutex_unlock(&smmu_domain->init_mutex); 1559 mutex_unlock(&smmu_domain->init_mutex);
1515 return ret; 1560 return ret;
@@ -1562,7 +1607,7 @@ static struct iommu_ops arm_smmu_ops = {
1562 .attach_dev = arm_smmu_attach_dev, 1607 .attach_dev = arm_smmu_attach_dev,
1563 .map = arm_smmu_map, 1608 .map = arm_smmu_map,
1564 .unmap = arm_smmu_unmap, 1609 .unmap = arm_smmu_unmap,
1565 .flush_iotlb_all = arm_smmu_iotlb_sync, 1610 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1566 .iotlb_sync = arm_smmu_iotlb_sync, 1611 .iotlb_sync = arm_smmu_iotlb_sync,
1567 .iova_to_phys = arm_smmu_iova_to_phys, 1612 .iova_to_phys = arm_smmu_iova_to_phys,
1568 .add_device = arm_smmu_add_device, 1613 .add_device = arm_smmu_add_device,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 511ff9a1d6d9..cc1bf786cfac 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -55,6 +55,9 @@ struct iommu_dma_cookie {
55 }; 55 };
56 struct list_head msi_page_list; 56 struct list_head msi_page_list;
57 spinlock_t msi_lock; 57 spinlock_t msi_lock;
58
59 /* Domain for flush queue callback; NULL if flush queue not in use */
60 struct iommu_domain *fq_domain;
58}; 61};
59 62
60static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) 63static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -257,6 +260,20 @@ static int iova_reserve_iommu_regions(struct device *dev,
257 return ret; 260 return ret;
258} 261}
259 262
263static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
264{
265 struct iommu_dma_cookie *cookie;
266 struct iommu_domain *domain;
267
268 cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
269 domain = cookie->fq_domain;
270 /*
271 * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
272 * implies that ops->flush_iotlb_all must be non-NULL.
273 */
274 domain->ops->flush_iotlb_all(domain);
275}
276
260/** 277/**
261 * iommu_dma_init_domain - Initialise a DMA mapping domain 278 * iommu_dma_init_domain - Initialise a DMA mapping domain
262 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 279 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -275,6 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
275 struct iommu_dma_cookie *cookie = domain->iova_cookie; 292 struct iommu_dma_cookie *cookie = domain->iova_cookie;
276 struct iova_domain *iovad = &cookie->iovad; 293 struct iova_domain *iovad = &cookie->iovad;
277 unsigned long order, base_pfn, end_pfn; 294 unsigned long order, base_pfn, end_pfn;
295 int attr;
278 296
279 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) 297 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
280 return -EINVAL; 298 return -EINVAL;
@@ -308,6 +326,13 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
308 } 326 }
309 327
310 init_iova_domain(iovad, 1UL << order, base_pfn); 328 init_iova_domain(iovad, 1UL << order, base_pfn);
329
330 if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
331 DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
332 cookie->fq_domain = domain;
333 init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
334 }
335
311 if (!dev) 336 if (!dev)
312 return 0; 337 return 0;
313 338
@@ -393,6 +418,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
393 /* The MSI case is only ever cleaning up its most recent allocation */ 418 /* The MSI case is only ever cleaning up its most recent allocation */
394 if (cookie->type == IOMMU_DMA_MSI_COOKIE) 419 if (cookie->type == IOMMU_DMA_MSI_COOKIE)
395 cookie->msi_iova -= size; 420 cookie->msi_iova -= size;
421 else if (cookie->fq_domain) /* non-strict mode */
422 queue_iova(iovad, iova_pfn(iovad, iova),
423 size >> iova_shift(iovad), 0);
396 else 424 else
397 free_iova_fast(iovad, iova_pfn(iovad, iova), 425 free_iova_fast(iovad, iova_pfn(iovad, iova),
398 size >> iova_shift(iovad)); 426 size >> iova_shift(iovad));
@@ -408,7 +436,9 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
408 dma_addr -= iova_off; 436 dma_addr -= iova_off;
409 size = iova_align(iovad, size + iova_off); 437 size = iova_align(iovad, size + iova_off);
410 438
411 WARN_ON(iommu_unmap(domain, dma_addr, size) != size); 439 WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
440 if (!cookie->fq_domain)
441 iommu_tlb_sync(domain);
412 iommu_dma_free_iova(cookie, dma_addr, size); 442 iommu_dma_free_iova(cookie, dma_addr, size);
413} 443}
414 444
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index b5948ba6b3b3..445c3bde0480 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -587,6 +587,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
587 } 587 }
588 588
589 io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); 589 io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
590 io_pgtable_tlb_sync(&data->iop);
590 return size; 591 return size;
591} 592}
592 593
@@ -642,6 +643,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
642 io_pgtable_tlb_sync(iop); 643 io_pgtable_tlb_sync(iop);
643 ptep = iopte_deref(pte[i], lvl); 644 ptep = iopte_deref(pte[i], lvl);
644 __arm_v7s_free_table(ptep, lvl + 1, data); 645 __arm_v7s_free_table(ptep, lvl + 1, data);
646 } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
647 /*
648 * Order the PTE update against queueing the IOVA, to
649 * guarantee that a flush callback from a different CPU
650 * has observed it before the TLBIALL can be issued.
651 */
652 smp_wmb();
645 } else { 653 } else {
646 io_pgtable_tlb_add_flush(iop, iova, blk_size, 654 io_pgtable_tlb_add_flush(iop, iova, blk_size,
647 blk_size, true); 655 blk_size, true);
@@ -712,7 +720,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
712 IO_PGTABLE_QUIRK_NO_PERMS | 720 IO_PGTABLE_QUIRK_NO_PERMS |
713 IO_PGTABLE_QUIRK_TLBI_ON_MAP | 721 IO_PGTABLE_QUIRK_TLBI_ON_MAP |
714 IO_PGTABLE_QUIRK_ARM_MTK_4GB | 722 IO_PGTABLE_QUIRK_ARM_MTK_4GB |
715 IO_PGTABLE_QUIRK_NO_DMA)) 723 IO_PGTABLE_QUIRK_NO_DMA |
724 IO_PGTABLE_QUIRK_NON_STRICT))
716 return NULL; 725 return NULL;
717 726
718 /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ 727 /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 88641b4560bc..237cacd4a62b 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -574,13 +574,13 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
574 return 0; 574 return 0;
575 575
576 tablep = iopte_deref(pte, data); 576 tablep = iopte_deref(pte, data);
577 } else if (unmap_idx >= 0) {
578 io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
579 io_pgtable_tlb_sync(&data->iop);
580 return size;
577 } 581 }
578 582
579 if (unmap_idx < 0) 583 return __arm_lpae_unmap(data, iova, size, lvl, tablep);
580 return __arm_lpae_unmap(data, iova, size, lvl, tablep);
581
582 io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
583 return size;
584} 584}
585 585
586static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, 586static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -610,6 +610,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
610 io_pgtable_tlb_sync(iop); 610 io_pgtable_tlb_sync(iop);
611 ptep = iopte_deref(pte, data); 611 ptep = iopte_deref(pte, data);
612 __arm_lpae_free_pgtable(data, lvl + 1, ptep); 612 __arm_lpae_free_pgtable(data, lvl + 1, ptep);
613 } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
614 /*
615 * Order the PTE update against queueing the IOVA, to
616 * guarantee that a flush callback from a different CPU
617 * has observed it before the TLBIALL can be issued.
618 */
619 smp_wmb();
613 } else { 620 } else {
614 io_pgtable_tlb_add_flush(iop, iova, size, size, true); 621 io_pgtable_tlb_add_flush(iop, iova, size, size, true);
615 } 622 }
@@ -772,7 +779,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
772 u64 reg; 779 u64 reg;
773 struct arm_lpae_io_pgtable *data; 780 struct arm_lpae_io_pgtable *data;
774 781
775 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) 782 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
783 IO_PGTABLE_QUIRK_NON_STRICT))
776 return NULL; 784 return NULL;
777 785
778 data = arm_lpae_alloc_pgtable(cfg); 786 data = arm_lpae_alloc_pgtable(cfg);
@@ -864,7 +872,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
864 struct arm_lpae_io_pgtable *data; 872 struct arm_lpae_io_pgtable *data;
865 873
866 /* The NS quirk doesn't apply at stage 2 */ 874 /* The NS quirk doesn't apply at stage 2 */
867 if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) 875 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
876 IO_PGTABLE_QUIRK_NON_STRICT))
868 return NULL; 877 return NULL;
869 878
870 data = arm_lpae_alloc_pgtable(cfg); 879 data = arm_lpae_alloc_pgtable(cfg);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 2df79093cad9..47d5ae559329 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -71,12 +71,17 @@ struct io_pgtable_cfg {
71 * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a 71 * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
72 * software-emulated IOMMU), such that pagetable updates need not 72 * software-emulated IOMMU), such that pagetable updates need not
73 * be treated as explicit DMA data. 73 * be treated as explicit DMA data.
74 *
75 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
76 * on unmap, for DMA domains using the flush queue mechanism for
77 * delayed invalidation.
74 */ 78 */
75 #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) 79 #define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
76 #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) 80 #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
77 #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) 81 #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2)
78 #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) 82 #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3)
79 #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) 83 #define IO_PGTABLE_QUIRK_NO_DMA BIT(4)
84 #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5)
80 unsigned long quirks; 85 unsigned long quirks;
81 unsigned long pgsize_bitmap; 86 unsigned long pgsize_bitmap;
82 unsigned int ias; 87 unsigned int ias;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c15c5980299..2b6dad2aa9f1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -41,6 +41,7 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
41#else 41#else
42static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; 42static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
43#endif 43#endif
44static bool iommu_dma_strict __read_mostly = true;
44 45
45struct iommu_callback_data { 46struct iommu_callback_data {
46 const struct iommu_ops *ops; 47 const struct iommu_ops *ops;
@@ -131,6 +132,12 @@ static int __init iommu_set_def_domain_type(char *str)
131} 132}
132early_param("iommu.passthrough", iommu_set_def_domain_type); 133early_param("iommu.passthrough", iommu_set_def_domain_type);
133 134
135static int __init iommu_dma_setup(char *str)
136{
137 return kstrtobool(str, &iommu_dma_strict);
138}
139early_param("iommu.strict", iommu_dma_setup);
140
134static ssize_t iommu_group_attr_show(struct kobject *kobj, 141static ssize_t iommu_group_attr_show(struct kobject *kobj,
135 struct attribute *__attr, char *buf) 142 struct attribute *__attr, char *buf)
136{ 143{
@@ -1072,6 +1079,13 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
1072 group->default_domain = dom; 1079 group->default_domain = dom;
1073 if (!group->domain) 1080 if (!group->domain)
1074 group->domain = dom; 1081 group->domain = dom;
1082
1083 if (dom && !iommu_dma_strict) {
1084 int attr = 1;
1085 iommu_domain_set_attr(dom,
1086 DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
1087 &attr);
1088 }
1075 } 1089 }
1076 1090
1077 ret = iommu_group_add_device(group, dev); 1091 ret = iommu_group_add_device(group, dev);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 87994c265bf5..decabe8e8dbe 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,6 +124,7 @@ enum iommu_attr {
124 DOMAIN_ATTR_FSL_PAMU_ENABLE, 124 DOMAIN_ATTR_FSL_PAMU_ENABLE,
125 DOMAIN_ATTR_FSL_PAMUV1, 125 DOMAIN_ATTR_FSL_PAMUV1,
126 DOMAIN_ATTR_NESTING, /* two stages of translation */ 126 DOMAIN_ATTR_NESTING, /* two stages of translation */
127 DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
127 DOMAIN_ATTR_MAX, 128 DOMAIN_ATTR_MAX,
128}; 129};
129 130