aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 19:43:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 19:43:27 -0500
commit2cd83ba5bede2f72cc6c79a19a1bddf576b50e88 (patch)
tree6a02f6f93f90f3fea419c3a283ced0543b603fd4
parent670ffccb2f9183eb6cb32fe92257aea52b3f8a7d (diff)
parent56f19441da39e5f27824bcbdf3f60980414b5bd0 (diff)
Merge tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio
Pull IOMMU updates from Alex Williamson: "As Joerg mentioned[1], he's out on paternity leave through the end of the year and I'm filling in for him in the interim: - Enforce MSI multiple IRQ alignment in AMD IOMMU - VT-d PASID error handling fixes - Add r8a7795 IPMMU support - Manage runtime PM links on exynos at {add,remove}_device callbacks - Fix Mediatek driver name to avoid conflict - Add terminate support to qcom fault handler - 64-bit IOVA optimizations - Simplfy IOVA domain destruction, better use of rcache, and skip anchor nodes on copy - Convert to IOMMU TLB sync API in io-pgtable-arm{-v7s} - Drop command queue lock when waiting for CMD_SYNC completion on ARM SMMU implementations supporting MSI to cacheable memory - iomu-vmsa cleanup inspired by missed IOTLB sync callbacks - Fix sleeping lock with preemption disabled for RT - Dual MMU support for TI DRA7xx DSPs - Optional flush option on IOVA allocation avoiding overhead when caller can try other options [1] https://lkml.org/lkml/2017/10/22/72" * tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio: (54 commits) iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->fq iommu/mediatek: Fix driver name iommu/ipmmu-vmsa: Hook up r8a7795 DT matching code iommu/ipmmu-vmsa: Allow two bit SL0 iommu/ipmmu-vmsa: Make IMBUSCTR setup optional iommu/ipmmu-vmsa: Write IMCTR twice iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master iommu/ipmmu-vmsa: Make use of IOMMU_OF_DECLARE() iommu/ipmmu-vmsa: Enable multi context support iommu/ipmmu-vmsa: Add optional root device feature iommu/ipmmu-vmsa: Introduce features, break out alias iommu/ipmmu-vmsa: Unify ipmmu_ops iommu/ipmmu-vmsa: Clean up struct ipmmu_vmsa_iommu_priv iommu/ipmmu-vmsa: Simplify group allocation iommu/ipmmu-vmsa: Unify domain alloc/free iommu/ipmmu-vmsa: Fix return value check in ipmmu_find_group_dma() iommu/vt-d: Clear pasid table entry when memory unbound iommu/vt-d: Clear Page Request Overflow fault bit iommu/vt-d: Missing checks for pasid tables if allocation fails iommu/amd: Limit the IOVA page range to the specified addresses ...
-rw-r--r--drivers/gpu/drm/tegra/drm.c3
-rw-r--r--drivers/gpu/host1x/dev.c3
-rw-r--r--drivers/iommu/amd_iommu.c43
-rw-r--r--drivers/iommu/arm-smmu-v3.c214
-rw-r--r--drivers/iommu/arm-smmu.c31
-rw-r--r--drivers/iommu/dma-iommu.c24
-rw-r--r--drivers/iommu/dmar.c10
-rw-r--r--drivers/iommu/exynos-iommu.c23
-rw-r--r--drivers/iommu/intel-iommu.c28
-rw-r--r--drivers/iommu/intel-svm.c4
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c7
-rw-r--r--drivers/iommu/io-pgtable-arm.c7
-rw-r--r--drivers/iommu/iova.c220
-rw-r--r--drivers/iommu/ipmmu-vmsa.c527
-rw-r--r--drivers/iommu/mtk_iommu.c7
-rw-r--r--drivers/iommu/mtk_iommu_v1.c2
-rw-r--r--drivers/iommu/omap-iommu.c375
-rw-r--r--drivers/iommu/omap-iommu.h30
-rw-r--r--drivers/iommu/qcom_iommu.c33
-rw-r--r--drivers/misc/mic/scif/scif_rma.c3
-rw-r--r--include/linux/dmar.h1
-rw-r--r--include/linux/intel-iommu.h1
-rw-r--r--include/linux/iova.h14
23 files changed, 983 insertions, 627 deletions
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 597d563d636a..b822e484b7e5 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
155 155
156 order = __ffs(tegra->domain->pgsize_bitmap); 156 order = __ffs(tegra->domain->pgsize_bitmap);
157 init_iova_domain(&tegra->carveout.domain, 1UL << order, 157 init_iova_domain(&tegra->carveout.domain, 1UL << order,
158 carveout_start >> order, 158 carveout_start >> order);
159 carveout_end >> order);
160 159
161 tegra->carveout.shift = iova_shift(&tegra->carveout.domain); 160 tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
162 tegra->carveout.limit = carveout_end >> tegra->carveout.shift; 161 tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7f22c5c37660..5267c62e8896 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev)
198 198
199 order = __ffs(host->domain->pgsize_bitmap); 199 order = __ffs(host->domain->pgsize_bitmap);
200 init_iova_domain(&host->iova, 1UL << order, 200 init_iova_domain(&host->iova, 1UL << order,
201 geometry->aperture_start >> order, 201 geometry->aperture_start >> order);
202 geometry->aperture_end >> order);
203 host->iova_end = geometry->aperture_end; 202 host->iova_end = geometry->aperture_end;
204 } 203 }
205 204
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 9c848e36f209..7d5eb004091d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -63,7 +63,6 @@
63/* IO virtual address start page frame number */ 63/* IO virtual address start page frame number */
64#define IOVA_START_PFN (1) 64#define IOVA_START_PFN (1)
65#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 65#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
66#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
67 66
68/* Reserved IOVA ranges */ 67/* Reserved IOVA ranges */
69#define MSI_RANGE_START (0xfee00000) 68#define MSI_RANGE_START (0xfee00000)
@@ -1547,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev,
1547 1546
1548 if (dma_mask > DMA_BIT_MASK(32)) 1547 if (dma_mask > DMA_BIT_MASK(32))
1549 pfn = alloc_iova_fast(&dma_dom->iovad, pages, 1548 pfn = alloc_iova_fast(&dma_dom->iovad, pages,
1550 IOVA_PFN(DMA_BIT_MASK(32))); 1549 IOVA_PFN(DMA_BIT_MASK(32)), false);
1551 1550
1552 if (!pfn) 1551 if (!pfn)
1553 pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); 1552 pfn = alloc_iova_fast(&dma_dom->iovad, pages,
1553 IOVA_PFN(dma_mask), true);
1554 1554
1555 return (pfn << PAGE_SHIFT); 1555 return (pfn << PAGE_SHIFT);
1556} 1556}
@@ -1788,8 +1788,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
1788 if (!dma_dom->domain.pt_root) 1788 if (!dma_dom->domain.pt_root)
1789 goto free_dma_dom; 1789 goto free_dma_dom;
1790 1790
1791 init_iova_domain(&dma_dom->iovad, PAGE_SIZE, 1791 init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
1792 IOVA_START_PFN, DMA_32BIT_PFN);
1793 1792
1794 if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) 1793 if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
1795 goto free_dma_dom; 1794 goto free_dma_dom;
@@ -2383,11 +2382,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
2383 size_t size, 2382 size_t size,
2384 int dir) 2383 int dir)
2385{ 2384{
2386 dma_addr_t flush_addr;
2387 dma_addr_t i, start; 2385 dma_addr_t i, start;
2388 unsigned int pages; 2386 unsigned int pages;
2389 2387
2390 flush_addr = dma_addr;
2391 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 2388 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2392 dma_addr &= PAGE_MASK; 2389 dma_addr &= PAGE_MASK;
2393 start = dma_addr; 2390 start = dma_addr;
@@ -2696,8 +2693,7 @@ static int init_reserved_iova_ranges(void)
2696 struct pci_dev *pdev = NULL; 2693 struct pci_dev *pdev = NULL;
2697 struct iova *val; 2694 struct iova *val;
2698 2695
2699 init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, 2696 init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
2700 IOVA_START_PFN, DMA_32BIT_PFN);
2701 2697
2702 lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, 2698 lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
2703 &reserved_rbtree_key); 2699 &reserved_rbtree_key);
@@ -3155,7 +3151,7 @@ static void amd_iommu_apply_resv_region(struct device *dev,
3155 unsigned long start, end; 3151 unsigned long start, end;
3156 3152
3157 start = IOVA_PFN(region->start); 3153 start = IOVA_PFN(region->start);
3158 end = IOVA_PFN(region->start + region->length); 3154 end = IOVA_PFN(region->start + region->length - 1);
3159 3155
3160 WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); 3156 WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
3161} 3157}
@@ -3663,11 +3659,11 @@ out_unlock:
3663 return table; 3659 return table;
3664} 3660}
3665 3661
3666static int alloc_irq_index(u16 devid, int count) 3662static int alloc_irq_index(u16 devid, int count, bool align)
3667{ 3663{
3668 struct irq_remap_table *table; 3664 struct irq_remap_table *table;
3665 int index, c, alignment = 1;
3669 unsigned long flags; 3666 unsigned long flags;
3670 int index, c;
3671 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; 3667 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
3672 3668
3673 if (!iommu) 3669 if (!iommu)
@@ -3677,16 +3673,21 @@ static int alloc_irq_index(u16 devid, int count)
3677 if (!table) 3673 if (!table)
3678 return -ENODEV; 3674 return -ENODEV;
3679 3675
3676 if (align)
3677 alignment = roundup_pow_of_two(count);
3678
3680 spin_lock_irqsave(&table->lock, flags); 3679 spin_lock_irqsave(&table->lock, flags);
3681 3680
3682 /* Scan table for free entries */ 3681 /* Scan table for free entries */
3683 for (c = 0, index = table->min_index; 3682 for (index = ALIGN(table->min_index, alignment), c = 0;
3684 index < MAX_IRQS_PER_TABLE; 3683 index < MAX_IRQS_PER_TABLE;) {
3685 ++index) { 3684 if (!iommu->irte_ops->is_allocated(table, index)) {
3686 if (!iommu->irte_ops->is_allocated(table, index))
3687 c += 1; 3685 c += 1;
3688 else 3686 } else {
3689 c = 0; 3687 c = 0;
3688 index = ALIGN(index + 1, alignment);
3689 continue;
3690 }
3690 3691
3691 if (c == count) { 3692 if (c == count) {
3692 for (; c != 0; --c) 3693 for (; c != 0; --c)
@@ -3695,6 +3696,8 @@ static int alloc_irq_index(u16 devid, int count)
3695 index -= count - 1; 3696 index -= count - 1;
3696 goto out; 3697 goto out;
3697 } 3698 }
3699
3700 index++;
3698 } 3701 }
3699 3702
3700 index = -ENOSPC; 3703 index = -ENOSPC;
@@ -4099,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
4099 else 4102 else
4100 ret = -ENOMEM; 4103 ret = -ENOMEM;
4101 } else { 4104 } else {
4102 index = alloc_irq_index(devid, nr_irqs); 4105 bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
4106
4107 index = alloc_irq_index(devid, nr_irqs, align);
4103 } 4108 }
4104 if (index < 0) { 4109 if (index < 0) {
4105 pr_warn("Failed to allocate IRTE\n"); 4110 pr_warn("Failed to allocate IRTE\n");
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index e67ba6c40faf..f122071688fd 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -316,6 +316,7 @@
316#define ARM64_TCR_TBI0_MASK 0x1UL 316#define ARM64_TCR_TBI0_MASK 0x1UL
317 317
318#define CTXDESC_CD_0_AA64 (1UL << 41) 318#define CTXDESC_CD_0_AA64 (1UL << 41)
319#define CTXDESC_CD_0_S (1UL << 44)
319#define CTXDESC_CD_0_R (1UL << 45) 320#define CTXDESC_CD_0_R (1UL << 45)
320#define CTXDESC_CD_0_A (1UL << 46) 321#define CTXDESC_CD_0_A (1UL << 46)
321#define CTXDESC_CD_0_ASET_SHIFT 47 322#define CTXDESC_CD_0_ASET_SHIFT 47
@@ -377,7 +378,16 @@
377 378
378#define CMDQ_SYNC_0_CS_SHIFT 12 379#define CMDQ_SYNC_0_CS_SHIFT 12
379#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) 380#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT)
381#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT)
380#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) 382#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
383#define CMDQ_SYNC_0_MSH_SHIFT 22
384#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT)
385#define CMDQ_SYNC_0_MSIATTR_SHIFT 24
386#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
387#define CMDQ_SYNC_0_MSIDATA_SHIFT 32
388#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL
389#define CMDQ_SYNC_1_MSIADDR_SHIFT 0
390#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL
381 391
382/* Event queue */ 392/* Event queue */
383#define EVTQ_ENT_DWORDS 4 393#define EVTQ_ENT_DWORDS 4
@@ -408,20 +418,12 @@
408 418
409/* High-level queue structures */ 419/* High-level queue structures */
410#define ARM_SMMU_POLL_TIMEOUT_US 100 420#define ARM_SMMU_POLL_TIMEOUT_US 100
411#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ 421#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
422#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
412 423
413#define MSI_IOVA_BASE 0x8000000 424#define MSI_IOVA_BASE 0x8000000
414#define MSI_IOVA_LENGTH 0x100000 425#define MSI_IOVA_LENGTH 0x100000
415 426
416/* Until ACPICA headers cover IORT rev. C */
417#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
418#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1
419#endif
420
421#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
422#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2
423#endif
424
425static bool disable_bypass; 427static bool disable_bypass;
426module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); 428module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
427MODULE_PARM_DESC(disable_bypass, 429MODULE_PARM_DESC(disable_bypass,
@@ -504,6 +506,10 @@ struct arm_smmu_cmdq_ent {
504 } pri; 506 } pri;
505 507
506 #define CMDQ_OP_CMD_SYNC 0x46 508 #define CMDQ_OP_CMD_SYNC 0x46
509 struct {
510 u32 msidata;
511 u64 msiaddr;
512 } sync;
507 }; 513 };
508}; 514};
509 515
@@ -604,6 +610,7 @@ struct arm_smmu_device {
604#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10) 610#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
605#define ARM_SMMU_FEAT_STALLS (1 << 11) 611#define ARM_SMMU_FEAT_STALLS (1 << 11)
606#define ARM_SMMU_FEAT_HYP (1 << 12) 612#define ARM_SMMU_FEAT_HYP (1 << 12)
613#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
607 u32 features; 614 u32 features;
608 615
609#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) 616#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -616,6 +623,7 @@ struct arm_smmu_device {
616 623
617 int gerr_irq; 624 int gerr_irq;
618 int combined_irq; 625 int combined_irq;
626 atomic_t sync_nr;
619 627
620 unsigned long ias; /* IPA */ 628 unsigned long ias; /* IPA */
621 unsigned long oas; /* PA */ 629 unsigned long oas; /* PA */
@@ -634,6 +642,8 @@ struct arm_smmu_device {
634 642
635 struct arm_smmu_strtab_cfg strtab_cfg; 643 struct arm_smmu_strtab_cfg strtab_cfg;
636 644
645 u32 sync_count;
646
637 /* IOMMU core code handle */ 647 /* IOMMU core code handle */
638 struct iommu_device iommu; 648 struct iommu_device iommu;
639}; 649};
@@ -757,26 +767,29 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
757 * Wait for the SMMU to consume items. If drain is true, wait until the queue 767 * Wait for the SMMU to consume items. If drain is true, wait until the queue
758 * is empty. Otherwise, wait until there is at least one free slot. 768 * is empty. Otherwise, wait until there is at least one free slot.
759 */ 769 */
760static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) 770static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
761{ 771{
762 ktime_t timeout; 772 ktime_t timeout;
763 unsigned int delay = 1; 773 unsigned int delay = 1, spin_cnt = 0;
764 774
765 /* Wait longer if it's queue drain */ 775 /* Wait longer if it's a CMD_SYNC */
766 timeout = ktime_add_us(ktime_get(), drain ? 776 timeout = ktime_add_us(ktime_get(), sync ?
767 ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : 777 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
768 ARM_SMMU_POLL_TIMEOUT_US); 778 ARM_SMMU_POLL_TIMEOUT_US);
769 779
770 while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { 780 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
771 if (ktime_compare(ktime_get(), timeout) > 0) 781 if (ktime_compare(ktime_get(), timeout) > 0)
772 return -ETIMEDOUT; 782 return -ETIMEDOUT;
773 783
774 if (wfe) { 784 if (wfe) {
775 wfe(); 785 wfe();
776 } else { 786 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
777 cpu_relax(); 787 cpu_relax();
788 continue;
789 } else {
778 udelay(delay); 790 udelay(delay);
779 delay *= 2; 791 delay *= 2;
792 spin_cnt = 0;
780 } 793 }
781 } 794 }
782 795
@@ -878,7 +891,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
878 } 891 }
879 break; 892 break;
880 case CMDQ_OP_CMD_SYNC: 893 case CMDQ_OP_CMD_SYNC:
881 cmd[0] |= CMDQ_SYNC_0_CS_SEV; 894 if (ent->sync.msiaddr)
895 cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
896 else
897 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
898 cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
899 cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
900 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
882 break; 901 break;
883 default: 902 default:
884 return -ENOENT; 903 return -ENOENT;
@@ -936,13 +955,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
936 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); 955 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
937} 956}
938 957
958static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
959{
960 struct arm_smmu_queue *q = &smmu->cmdq.q;
961 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
962
963 while (queue_insert_raw(q, cmd) == -ENOSPC) {
964 if (queue_poll_cons(q, false, wfe))
965 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
966 }
967}
968
939static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 969static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
940 struct arm_smmu_cmdq_ent *ent) 970 struct arm_smmu_cmdq_ent *ent)
941{ 971{
942 u64 cmd[CMDQ_ENT_DWORDS]; 972 u64 cmd[CMDQ_ENT_DWORDS];
943 unsigned long flags; 973 unsigned long flags;
944 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
945 struct arm_smmu_queue *q = &smmu->cmdq.q;
946 974
947 if (arm_smmu_cmdq_build_cmd(cmd, ent)) { 975 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
948 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 976 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
@@ -951,14 +979,76 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
951 } 979 }
952 980
953 spin_lock_irqsave(&smmu->cmdq.lock, flags); 981 spin_lock_irqsave(&smmu->cmdq.lock, flags);
954 while (queue_insert_raw(q, cmd) == -ENOSPC) { 982 arm_smmu_cmdq_insert_cmd(smmu, cmd);
955 if (queue_poll_cons(q, false, wfe)) 983 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
956 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 984}
957 }
958 985
959 if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe)) 986/*
960 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); 987 * The difference between val and sync_idx is bounded by the maximum size of
988 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
989 */
990static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
991{
992 ktime_t timeout;
993 u32 val;
994
995 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
996 val = smp_cond_load_acquire(&smmu->sync_count,
997 (int)(VAL - sync_idx) >= 0 ||
998 !ktime_before(ktime_get(), timeout));
999
1000 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
1001}
1002
1003static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
1004{
1005 u64 cmd[CMDQ_ENT_DWORDS];
1006 unsigned long flags;
1007 struct arm_smmu_cmdq_ent ent = {
1008 .opcode = CMDQ_OP_CMD_SYNC,
1009 .sync = {
1010 .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
1011 .msiaddr = virt_to_phys(&smmu->sync_count),
1012 },
1013 };
1014
1015 arm_smmu_cmdq_build_cmd(cmd, &ent);
1016
1017 spin_lock_irqsave(&smmu->cmdq.lock, flags);
1018 arm_smmu_cmdq_insert_cmd(smmu, cmd);
1019 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
1020
1021 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
1022}
1023
1024static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1025{
1026 u64 cmd[CMDQ_ENT_DWORDS];
1027 unsigned long flags;
1028 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
1029 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
1030 int ret;
1031
1032 arm_smmu_cmdq_build_cmd(cmd, &ent);
1033
1034 spin_lock_irqsave(&smmu->cmdq.lock, flags);
1035 arm_smmu_cmdq_insert_cmd(smmu, cmd);
1036 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
961 spin_unlock_irqrestore(&smmu->cmdq.lock, flags); 1037 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
1038
1039 return ret;
1040}
1041
1042static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1043{
1044 int ret;
1045 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1046 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1047
1048 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1049 : __arm_smmu_cmdq_issue_sync(smmu);
1050 if (ret)
1051 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
962} 1052}
963 1053
964/* Context descriptor manipulation functions */ 1054/* Context descriptor manipulation functions */
@@ -996,6 +1086,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
996 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE | 1086 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
997 CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT | 1087 CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
998 CTXDESC_CD_0_V; 1088 CTXDESC_CD_0_V;
1089
1090 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1091 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1092 val |= CTXDESC_CD_0_S;
1093
999 cfg->cdptr[0] = cpu_to_le64(val); 1094 cfg->cdptr[0] = cpu_to_le64(val);
1000 1095
1001 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT; 1096 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
@@ -1029,8 +1124,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1029 }; 1124 };
1030 1125
1031 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1126 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1032 cmd.opcode = CMDQ_OP_CMD_SYNC; 1127 arm_smmu_cmdq_issue_sync(smmu);
1033 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1034} 1128}
1035 1129
1036static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, 1130static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
@@ -1094,7 +1188,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1094 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING 1188 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1095 << STRTAB_STE_1_SHCFG_SHIFT); 1189 << STRTAB_STE_1_SHCFG_SHIFT);
1096 dst[2] = 0; /* Nuke the VMID */ 1190 dst[2] = 0; /* Nuke the VMID */
1097 if (ste_live) 1191 /*
1192 * The SMMU can perform negative caching, so we must sync
1193 * the STE regardless of whether the old value was live.
1194 */
1195 if (smmu)
1098 arm_smmu_sync_ste_for_sid(smmu, sid); 1196 arm_smmu_sync_ste_for_sid(smmu, sid);
1099 return; 1197 return;
1100 } 1198 }
@@ -1112,7 +1210,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1112#endif 1210#endif
1113 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); 1211 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1114 1212
1115 if (smmu->features & ARM_SMMU_FEAT_STALLS) 1213 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1214 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1116 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); 1215 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1117 1216
1118 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK 1217 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
@@ -1275,12 +1374,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1275 return IRQ_HANDLED; 1374 return IRQ_HANDLED;
1276} 1375}
1277 1376
1278static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1279{
1280 /* We don't actually use CMD_SYNC interrupts for anything */
1281 return IRQ_HANDLED;
1282}
1283
1284static int arm_smmu_device_disable(struct arm_smmu_device *smmu); 1377static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1285 1378
1286static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) 1379static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
@@ -1313,10 +1406,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1313 if (active & GERROR_MSI_EVTQ_ABT_ERR) 1406 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1314 dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 1407 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1315 1408
1316 if (active & GERROR_MSI_CMDQ_ABT_ERR) { 1409 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1317 dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 1410 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1318 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1319 }
1320 1411
1321 if (active & GERROR_PRIQ_ABT_ERR) 1412 if (active & GERROR_PRIQ_ABT_ERR)
1322 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); 1413 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
@@ -1345,17 +1436,13 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1345static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 1436static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1346{ 1437{
1347 arm_smmu_gerror_handler(irq, dev); 1438 arm_smmu_gerror_handler(irq, dev);
1348 arm_smmu_cmdq_sync_handler(irq, dev);
1349 return IRQ_WAKE_THREAD; 1439 return IRQ_WAKE_THREAD;
1350} 1440}
1351 1441
1352/* IO_PGTABLE API */ 1442/* IO_PGTABLE API */
1353static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) 1443static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1354{ 1444{
1355 struct arm_smmu_cmdq_ent cmd; 1445 arm_smmu_cmdq_issue_sync(smmu);
1356
1357 cmd.opcode = CMDQ_OP_CMD_SYNC;
1358 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1359} 1446}
1360 1447
1361static void arm_smmu_tlb_sync(void *cookie) 1448static void arm_smmu_tlb_sync(void *cookie)
@@ -1743,6 +1830,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1743 return ops->unmap(ops, iova, size); 1830 return ops->unmap(ops, iova, size);
1744} 1831}
1745 1832
1833static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1834{
1835 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1836
1837 if (smmu)
1838 __arm_smmu_tlb_sync(smmu);
1839}
1840
1746static phys_addr_t 1841static phys_addr_t
1747arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 1842arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1748{ 1843{
@@ -1963,6 +2058,8 @@ static struct iommu_ops arm_smmu_ops = {
1963 .map = arm_smmu_map, 2058 .map = arm_smmu_map,
1964 .unmap = arm_smmu_unmap, 2059 .unmap = arm_smmu_unmap,
1965 .map_sg = default_iommu_map_sg, 2060 .map_sg = default_iommu_map_sg,
2061 .flush_iotlb_all = arm_smmu_iotlb_sync,
2062 .iotlb_sync = arm_smmu_iotlb_sync,
1966 .iova_to_phys = arm_smmu_iova_to_phys, 2063 .iova_to_phys = arm_smmu_iova_to_phys,
1967 .add_device = arm_smmu_add_device, 2064 .add_device = arm_smmu_add_device,
1968 .remove_device = arm_smmu_remove_device, 2065 .remove_device = arm_smmu_remove_device,
@@ -2147,6 +2244,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2147{ 2244{
2148 int ret; 2245 int ret;
2149 2246
2247 atomic_set(&smmu->sync_nr, 0);
2150 ret = arm_smmu_init_queues(smmu); 2248 ret = arm_smmu_init_queues(smmu);
2151 if (ret) 2249 if (ret)
2152 return ret; 2250 return ret;
@@ -2265,15 +2363,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2265 dev_warn(smmu->dev, "failed to enable evtq irq\n"); 2363 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2266 } 2364 }
2267 2365
2268 irq = smmu->cmdq.q.irq;
2269 if (irq) {
2270 ret = devm_request_irq(smmu->dev, irq,
2271 arm_smmu_cmdq_sync_handler, 0,
2272 "arm-smmu-v3-cmdq-sync", smmu);
2273 if (ret < 0)
2274 dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2275 }
2276
2277 irq = smmu->gerr_irq; 2366 irq = smmu->gerr_irq;
2278 if (irq) { 2367 if (irq) {
2279 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, 2368 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
@@ -2399,8 +2488,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2399 /* Invalidate any cached configuration */ 2488 /* Invalidate any cached configuration */
2400 cmd.opcode = CMDQ_OP_CFGI_ALL; 2489 cmd.opcode = CMDQ_OP_CFGI_ALL;
2401 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2490 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2402 cmd.opcode = CMDQ_OP_CMD_SYNC; 2491 arm_smmu_cmdq_issue_sync(smmu);
2403 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2404 2492
2405 /* Invalidate any stale TLB entries */ 2493 /* Invalidate any stale TLB entries */
2406 if (smmu->features & ARM_SMMU_FEAT_HYP) { 2494 if (smmu->features & ARM_SMMU_FEAT_HYP) {
@@ -2410,8 +2498,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2410 2498
2411 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; 2499 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2412 arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2500 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2413 cmd.opcode = CMDQ_OP_CMD_SYNC; 2501 arm_smmu_cmdq_issue_sync(smmu);
2414 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2415 2502
2416 /* Event queue */ 2503 /* Event queue */
2417 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 2504 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
@@ -2532,13 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2532 * register, but warn on mismatch. 2619 * register, but warn on mismatch.
2533 */ 2620 */
2534 if (!!(reg & IDR0_COHACC) != coherent) 2621 if (!!(reg & IDR0_COHACC) != coherent)
2535 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", 2622 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2536 coherent ? "true" : "false"); 2623 coherent ? "true" : "false");
2537 2624
2538 switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { 2625 switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2539 case IDR0_STALL_MODEL_STALL:
2540 /* Fallthrough */
2541 case IDR0_STALL_MODEL_FORCE: 2626 case IDR0_STALL_MODEL_FORCE:
2627 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2628 /* Fallthrough */
2629 case IDR0_STALL_MODEL_STALL:
2542 smmu->features |= ARM_SMMU_FEAT_STALLS; 2630 smmu->features |= ARM_SMMU_FEAT_STALLS;
2543 } 2631 }
2544 2632
@@ -2665,7 +2753,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2665 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 2753 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2666 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 2754 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2667 break; 2755 break;
2668 case ACPI_IORT_SMMU_HISILICON_HI161X: 2756 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2669 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 2757 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2670 break; 2758 break;
2671 } 2759 }
@@ -2783,10 +2871,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
2783 if (irq > 0) 2871 if (irq > 0)
2784 smmu->priq.q.irq = irq; 2872 smmu->priq.q.irq = irq;
2785 2873
2786 irq = platform_get_irq_byname(pdev, "cmdq-sync");
2787 if (irq > 0)
2788 smmu->cmdq.q.irq = irq;
2789
2790 irq = platform_get_irq_byname(pdev, "gerror"); 2874 irq = platform_get_irq_byname(pdev, "gerror");
2791 if (irq > 0) 2875 if (irq > 0)
2792 smmu->gerr_irq = irq; 2876 smmu->gerr_irq = irq;
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 3bdb799d3b4b..78d4c6b8f1ba 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -59,6 +59,7 @@
59#define ARM_MMU500_ACTLR_CPRE (1 << 1) 59#define ARM_MMU500_ACTLR_CPRE (1 << 1)
60 60
61#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) 61#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
62#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
62#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) 63#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
63 64
64#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ 65#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
@@ -119,14 +120,6 @@ enum arm_smmu_implementation {
119 CAVIUM_SMMUV2, 120 CAVIUM_SMMUV2,
120}; 121};
121 122
122/* Until ACPICA headers cover IORT rev. C */
123#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
124#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
125#endif
126#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
127#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
128#endif
129
130struct arm_smmu_s2cr { 123struct arm_smmu_s2cr {
131 struct iommu_group *group; 124 struct iommu_group *group;
132 int count; 125 int count;
@@ -250,6 +243,7 @@ enum arm_smmu_domain_stage {
250struct arm_smmu_domain { 243struct arm_smmu_domain {
251 struct arm_smmu_device *smmu; 244 struct arm_smmu_device *smmu;
252 struct io_pgtable_ops *pgtbl_ops; 245 struct io_pgtable_ops *pgtbl_ops;
246 const struct iommu_gather_ops *tlb_ops;
253 struct arm_smmu_cfg cfg; 247 struct arm_smmu_cfg cfg;
254 enum arm_smmu_domain_stage stage; 248 enum arm_smmu_domain_stage stage;
255 struct mutex init_mutex; /* Protects smmu pointer */ 249 struct mutex init_mutex; /* Protects smmu pointer */
@@ -735,7 +729,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
735 enum io_pgtable_fmt fmt; 729 enum io_pgtable_fmt fmt;
736 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 730 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
737 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 731 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
738 const struct iommu_gather_ops *tlb_ops;
739 732
740 mutex_lock(&smmu_domain->init_mutex); 733 mutex_lock(&smmu_domain->init_mutex);
741 if (smmu_domain->smmu) 734 if (smmu_domain->smmu)
@@ -813,7 +806,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
813 ias = min(ias, 32UL); 806 ias = min(ias, 32UL);
814 oas = min(oas, 32UL); 807 oas = min(oas, 32UL);
815 } 808 }
816 tlb_ops = &arm_smmu_s1_tlb_ops; 809 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
817 break; 810 break;
818 case ARM_SMMU_DOMAIN_NESTED: 811 case ARM_SMMU_DOMAIN_NESTED:
819 /* 812 /*
@@ -833,9 +826,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
833 oas = min(oas, 40UL); 826 oas = min(oas, 40UL);
834 } 827 }
835 if (smmu->version == ARM_SMMU_V2) 828 if (smmu->version == ARM_SMMU_V2)
836 tlb_ops = &arm_smmu_s2_tlb_ops_v2; 829 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
837 else 830 else
838 tlb_ops = &arm_smmu_s2_tlb_ops_v1; 831 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
839 break; 832 break;
840 default: 833 default:
841 ret = -EINVAL; 834 ret = -EINVAL;
@@ -863,7 +856,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
863 .pgsize_bitmap = smmu->pgsize_bitmap, 856 .pgsize_bitmap = smmu->pgsize_bitmap,
864 .ias = ias, 857 .ias = ias,
865 .oas = oas, 858 .oas = oas,
866 .tlb = tlb_ops, 859 .tlb = smmu_domain->tlb_ops,
867 .iommu_dev = smmu->dev, 860 .iommu_dev = smmu->dev,
868 }; 861 };
869 862
@@ -1259,6 +1252,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1259 return ops->unmap(ops, iova, size); 1252 return ops->unmap(ops, iova, size);
1260} 1253}
1261 1254
1255static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1256{
1257 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1258
1259 if (smmu_domain->tlb_ops)
1260 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1261}
1262
1262static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, 1263static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1263 dma_addr_t iova) 1264 dma_addr_t iova)
1264{ 1265{
@@ -1562,6 +1563,8 @@ static struct iommu_ops arm_smmu_ops = {
1562 .map = arm_smmu_map, 1563 .map = arm_smmu_map,
1563 .unmap = arm_smmu_unmap, 1564 .unmap = arm_smmu_unmap,
1564 .map_sg = default_iommu_map_sg, 1565 .map_sg = default_iommu_map_sg,
1566 .flush_iotlb_all = arm_smmu_iotlb_sync,
1567 .iotlb_sync = arm_smmu_iotlb_sync,
1565 .iova_to_phys = arm_smmu_iova_to_phys, 1568 .iova_to_phys = arm_smmu_iova_to_phys,
1566 .add_device = arm_smmu_add_device, 1569 .add_device = arm_smmu_add_device,
1567 .remove_device = arm_smmu_remove_device, 1570 .remove_device = arm_smmu_remove_device,
@@ -1606,7 +1609,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1606 * Allow unmatched Stream IDs to allocate bypass 1609 * Allow unmatched Stream IDs to allocate bypass
1607 * TLB entries for reduced latency. 1610 * TLB entries for reduced latency.
1608 */ 1611 */
1609 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN; 1612 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1610 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR); 1613 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1611 } 1614 }
1612 1615
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9d1cebe7f6cb..25914d36c5ac 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
292 /* ...then finally give it a kicking to make sure it fits */ 292 /* ...then finally give it a kicking to make sure it fits */
293 base_pfn = max_t(unsigned long, base_pfn, 293 base_pfn = max_t(unsigned long, base_pfn,
294 domain->geometry.aperture_start >> order); 294 domain->geometry.aperture_start >> order);
295 end_pfn = min_t(unsigned long, end_pfn,
296 domain->geometry.aperture_end >> order);
297 } 295 }
298 /*
299 * PCI devices may have larger DMA masks, but still prefer allocating
300 * within a 32-bit mask to avoid DAC addressing. Such limitations don't
301 * apply to the typical platform device, so for those we may as well
302 * leave the cache limit at the top of their range to save an rb_last()
303 * traversal on every allocation.
304 */
305 if (dev && dev_is_pci(dev))
306 end_pfn &= DMA_BIT_MASK(32) >> order;
307 296
308 /* start_pfn is always nonzero for an already-initialised domain */ 297 /* start_pfn is always nonzero for an already-initialised domain */
309 if (iovad->start_pfn) { 298 if (iovad->start_pfn) {
@@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
312 pr_warn("Incompatible range for DMA domain\n"); 301 pr_warn("Incompatible range for DMA domain\n");
313 return -EFAULT; 302 return -EFAULT;
314 } 303 }
315 /*
316 * If we have devices with different DMA masks, move the free
317 * area cache limit down for the benefit of the smaller one.
318 */
319 iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
320 304
321 return 0; 305 return 0;
322 } 306 }
323 307
324 init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); 308 init_iova_domain(iovad, 1UL << order, base_pfn);
325 if (!dev) 309 if (!dev)
326 return 0; 310 return 0;
327 311
@@ -386,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
386 370
387 /* Try to get PCI devices a SAC address */ 371 /* Try to get PCI devices a SAC address */
388 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) 372 if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
389 iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); 373 iova = alloc_iova_fast(iovad, iova_len,
374 DMA_BIT_MASK(32) >> shift, false);
390 375
391 if (!iova) 376 if (!iova)
392 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 377 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
378 true);
393 379
394 return (dma_addr_t)iova << shift; 380 return (dma_addr_t)iova << shift;
395} 381}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 57c920c1372d..9a7ffd13c7f0 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void)
801 dmar_free_pci_notify_info(info); 801 dmar_free_pci_notify_info(info);
802 } 802 }
803 } 803 }
804
805 bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
806 } 804 }
807 805
808 return dmar_dev_scope_status; 806 return dmar_dev_scope_status;
809} 807}
810 808
809void dmar_register_bus_notifier(void)
810{
811 bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
812}
813
811 814
812int __init dmar_table_init(void) 815int __init dmar_table_init(void)
813{ 816{
@@ -1676,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
1676 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1679 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1677 } 1680 }
1678 1681
1679 writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); 1682 writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1683 iommu->reg + DMAR_FSTS_REG);
1680 1684
1681unlock_exit: 1685unlock_exit:
1682 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1686 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 25c2c75f5332..79c45650f8de 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -263,6 +263,7 @@ struct exynos_iommu_domain {
263struct sysmmu_drvdata { 263struct sysmmu_drvdata {
264 struct device *sysmmu; /* SYSMMU controller device */ 264 struct device *sysmmu; /* SYSMMU controller device */
265 struct device *master; /* master device (owner) */ 265 struct device *master; /* master device (owner) */
266 struct device_link *link; /* runtime PM link to master */
266 void __iomem *sfrbase; /* our registers */ 267 void __iomem *sfrbase; /* our registers */
267 struct clk *clk; /* SYSMMU's clock */ 268 struct clk *clk; /* SYSMMU's clock */
268 struct clk *aclk; /* SYSMMU's aclk clock */ 269 struct clk *aclk; /* SYSMMU's aclk clock */
@@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev)
1250 1251
1251static int exynos_iommu_add_device(struct device *dev) 1252static int exynos_iommu_add_device(struct device *dev)
1252{ 1253{
1254 struct exynos_iommu_owner *owner = dev->archdata.iommu;
1255 struct sysmmu_drvdata *data;
1253 struct iommu_group *group; 1256 struct iommu_group *group;
1254 1257
1255 if (!has_sysmmu(dev)) 1258 if (!has_sysmmu(dev))
@@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev)
1260 if (IS_ERR(group)) 1263 if (IS_ERR(group))
1261 return PTR_ERR(group); 1264 return PTR_ERR(group);
1262 1265
1266 list_for_each_entry(data, &owner->controllers, owner_node) {
1267 /*
1268 * SYSMMU will be runtime activated via device link
1269 * (dependency) to its master device, so there are no
1270 * direct calls to pm_runtime_get/put in this driver.
1271 */
1272 data->link = device_link_add(dev, data->sysmmu,
1273 DL_FLAG_PM_RUNTIME);
1274 }
1263 iommu_group_put(group); 1275 iommu_group_put(group);
1264 1276
1265 return 0; 1277 return 0;
@@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev)
1268static void exynos_iommu_remove_device(struct device *dev) 1280static void exynos_iommu_remove_device(struct device *dev)
1269{ 1281{
1270 struct exynos_iommu_owner *owner = dev->archdata.iommu; 1282 struct exynos_iommu_owner *owner = dev->archdata.iommu;
1283 struct sysmmu_drvdata *data;
1271 1284
1272 if (!has_sysmmu(dev)) 1285 if (!has_sysmmu(dev))
1273 return; 1286 return;
@@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev)
1283 } 1296 }
1284 } 1297 }
1285 iommu_group_remove_device(dev); 1298 iommu_group_remove_device(dev);
1299
1300 list_for_each_entry(data, &owner->controllers, owner_node)
1301 device_link_del(data->link);
1286} 1302}
1287 1303
1288static int exynos_iommu_of_xlate(struct device *dev, 1304static int exynos_iommu_of_xlate(struct device *dev,
@@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev,
1316 list_add_tail(&data->owner_node, &owner->controllers); 1332 list_add_tail(&data->owner_node, &owner->controllers);
1317 data->master = dev; 1333 data->master = dev;
1318 1334
1319 /*
1320 * SYSMMU will be runtime activated via device link (dependency) to its
1321 * master device, so there are no direct calls to pm_runtime_get/put
1322 * in this driver.
1323 */
1324 device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME);
1325
1326 return 0; 1335 return 0;
1327} 1336}
1328 1337
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6784a05dd6b2..a0babdbf7146 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -82,8 +82,6 @@
82#define IOVA_START_PFN (1) 82#define IOVA_START_PFN (1)
83 83
84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 84#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
85#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
86#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
87 85
88/* page table handling */ 86/* page table handling */
89#define LEVEL_STRIDE (9) 87#define LEVEL_STRIDE (9)
@@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void)
1878 struct iova *iova; 1876 struct iova *iova;
1879 int i; 1877 int i;
1880 1878
1881 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, 1879 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1882 DMA_32BIT_PFN);
1883 1880
1884 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1881 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1885 &reserved_rbtree_key); 1882 &reserved_rbtree_key);
@@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1938 unsigned long sagaw; 1935 unsigned long sagaw;
1939 int err; 1936 int err;
1940 1937
1941 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, 1938 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1942 DMA_32BIT_PFN);
1943 1939
1944 err = init_iova_flush_queue(&domain->iovad, 1940 err = init_iova_flush_queue(&domain->iovad,
1945 iommu_flush_iova, iova_entry_free); 1941 iommu_flush_iova, iova_entry_free);
@@ -2058,7 +2054,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
2058 if (context_copied(context)) { 2054 if (context_copied(context)) {
2059 u16 did_old = context_domain_id(context); 2055 u16 did_old = context_domain_id(context);
2060 2056
2061 if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { 2057 if (did_old < cap_ndoms(iommu->cap)) {
2062 iommu->flush.flush_context(iommu, did_old, 2058 iommu->flush.flush_context(iommu, did_old,
2063 (((u16)bus) << 8) | devfn, 2059 (((u16)bus) << 8) | devfn,
2064 DMA_CCMD_MASK_NOBIT, 2060 DMA_CCMD_MASK_NOBIT,
@@ -3473,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev,
3473 * from higher range 3469 * from higher range
3474 */ 3470 */
3475 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, 3471 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3476 IOVA_PFN(DMA_BIT_MASK(32))); 3472 IOVA_PFN(DMA_BIT_MASK(32)), false);
3477 if (iova_pfn) 3473 if (iova_pfn)
3478 return iova_pfn; 3474 return iova_pfn;
3479 } 3475 }
3480 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask)); 3476 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3477 IOVA_PFN(dma_mask), true);
3481 if (unlikely(!iova_pfn)) { 3478 if (unlikely(!iova_pfn)) {
3482 pr_err("Allocating %ld-page iova for %s failed", 3479 pr_err("Allocating %ld-page iova for %s failed",
3483 nrpages, dev_name(dev)); 3480 nrpages, dev_name(dev));
@@ -4752,6 +4749,16 @@ int __init intel_iommu_init(void)
4752 goto out_free_dmar; 4749 goto out_free_dmar;
4753 } 4750 }
4754 4751
4752 up_write(&dmar_global_lock);
4753
4754 /*
4755 * The bus notifier takes the dmar_global_lock, so lockdep will
4756 * complain later when we register it under the lock.
4757 */
4758 dmar_register_bus_notifier();
4759
4760 down_write(&dmar_global_lock);
4761
4755 if (no_iommu || dmar_disabled) { 4762 if (no_iommu || dmar_disabled) {
4756 /* 4763 /*
4757 * We exit the function here to ensure IOMMU's remapping and 4764 * We exit the function here to ensure IOMMU's remapping and
@@ -4897,8 +4904,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
4897{ 4904{
4898 int adjust_width; 4905 int adjust_width;
4899 4906
4900 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, 4907 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4901 DMA_32BIT_PFN);
4902 domain_reserve_special_ranges(domain); 4908 domain_reserve_special_ranges(domain);
4903 4909
4904 /* calculate AGAW */ 4910 /* calculate AGAW */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index f6697e55c2d4..ed1cf7c5a43b 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -292,7 +292,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
292 int pasid_max; 292 int pasid_max;
293 int ret; 293 int ret;
294 294
295 if (WARN_ON(!iommu)) 295 if (WARN_ON(!iommu || !iommu->pasid_table))
296 return -EINVAL; 296 return -EINVAL;
297 297
298 if (dev_is_pci(dev)) { 298 if (dev_is_pci(dev)) {
@@ -458,6 +458,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
458 kfree_rcu(sdev, rcu); 458 kfree_rcu(sdev, rcu);
459 459
460 if (list_empty(&svm->devs)) { 460 if (list_empty(&svm->devs)) {
461 svm->iommu->pasid_table[svm->pasid].val = 0;
462 wmb();
461 463
462 idr_remove(&svm->iommu->pasid_idr, svm->pasid); 464 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
463 if (svm->mm) 465 if (svm->mm)
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 6961fc393f0b..2ca08dc9331c 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
660 size_t size) 660 size_t size)
661{ 661{
662 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 662 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
663 size_t unmapped;
664 663
665 if (WARN_ON(upper_32_bits(iova))) 664 if (WARN_ON(upper_32_bits(iova)))
666 return 0; 665 return 0;
667 666
668 unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); 667 return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
669 if (unmapped)
670 io_pgtable_tlb_sync(&data->iop);
671
672 return unmapped;
673} 668}
674 669
675static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, 670static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index e8018a308868..51e5c43caed1 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
609static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, 609static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
610 size_t size) 610 size_t size)
611{ 611{
612 size_t unmapped;
613 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); 612 struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
614 arm_lpae_iopte *ptep = data->pgd; 613 arm_lpae_iopte *ptep = data->pgd;
615 int lvl = ARM_LPAE_START_LVL(data); 614 int lvl = ARM_LPAE_START_LVL(data);
@@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
617 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) 616 if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
618 return 0; 617 return 0;
619 618
620 unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); 619 return __arm_lpae_unmap(data, iova, size, lvl, ptep);
621 if (unmapped)
622 io_pgtable_tlb_sync(&data->iop);
623
624 return unmapped;
625} 620}
626 621
627static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, 622static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 33edfa794ae9..466aaa8ba841 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -24,6 +24,9 @@
24#include <linux/bitops.h> 24#include <linux/bitops.h>
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26 26
27/* The anchor node sits above the top of the usable address space */
28#define IOVA_ANCHOR ~0UL
29
27static bool iova_rcache_insert(struct iova_domain *iovad, 30static bool iova_rcache_insert(struct iova_domain *iovad,
28 unsigned long pfn, 31 unsigned long pfn,
29 unsigned long size); 32 unsigned long size);
@@ -37,7 +40,7 @@ static void fq_flush_timeout(unsigned long data);
37 40
38void 41void
39init_iova_domain(struct iova_domain *iovad, unsigned long granule, 42init_iova_domain(struct iova_domain *iovad, unsigned long granule,
40 unsigned long start_pfn, unsigned long pfn_32bit) 43 unsigned long start_pfn)
41{ 44{
42 /* 45 /*
43 * IOVA granularity will normally be equal to the smallest 46 * IOVA granularity will normally be equal to the smallest
@@ -48,12 +51,16 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
48 51
49 spin_lock_init(&iovad->iova_rbtree_lock); 52 spin_lock_init(&iovad->iova_rbtree_lock);
50 iovad->rbroot = RB_ROOT; 53 iovad->rbroot = RB_ROOT;
51 iovad->cached32_node = NULL; 54 iovad->cached_node = &iovad->anchor.node;
55 iovad->cached32_node = &iovad->anchor.node;
52 iovad->granule = granule; 56 iovad->granule = granule;
53 iovad->start_pfn = start_pfn; 57 iovad->start_pfn = start_pfn;
54 iovad->dma_32bit_pfn = pfn_32bit + 1; 58 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
55 iovad->flush_cb = NULL; 59 iovad->flush_cb = NULL;
56 iovad->fq = NULL; 60 iovad->fq = NULL;
61 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
62 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
63 rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
57 init_iova_rcaches(iovad); 64 init_iova_rcaches(iovad);
58} 65}
59EXPORT_SYMBOL_GPL(init_iova_domain); 66EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -108,50 +115,36 @@ int init_iova_flush_queue(struct iova_domain *iovad,
108EXPORT_SYMBOL_GPL(init_iova_flush_queue); 115EXPORT_SYMBOL_GPL(init_iova_flush_queue);
109 116
110static struct rb_node * 117static struct rb_node *
111__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) 118__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
112{ 119{
113 if ((*limit_pfn > iovad->dma_32bit_pfn) || 120 if (limit_pfn <= iovad->dma_32bit_pfn)
114 (iovad->cached32_node == NULL)) 121 return iovad->cached32_node;
115 return rb_last(&iovad->rbroot); 122
116 else { 123 return iovad->cached_node;
117 struct rb_node *prev_node = rb_prev(iovad->cached32_node);
118 struct iova *curr_iova =
119 rb_entry(iovad->cached32_node, struct iova, node);
120 *limit_pfn = curr_iova->pfn_lo;
121 return prev_node;
122 }
123} 124}
124 125
125static void 126static void
126__cached_rbnode_insert_update(struct iova_domain *iovad, 127__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
127 unsigned long limit_pfn, struct iova *new)
128{ 128{
129 if (limit_pfn != iovad->dma_32bit_pfn) 129 if (new->pfn_hi < iovad->dma_32bit_pfn)
130 return; 130 iovad->cached32_node = &new->node;
131 iovad->cached32_node = &new->node; 131 else
132 iovad->cached_node = &new->node;
132} 133}
133 134
134static void 135static void
135__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) 136__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
136{ 137{
137 struct iova *cached_iova; 138 struct iova *cached_iova;
138 struct rb_node *curr;
139 139
140 if (!iovad->cached32_node) 140 cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
141 return; 141 if (free->pfn_hi < iovad->dma_32bit_pfn &&
142 curr = iovad->cached32_node; 142 free->pfn_lo >= cached_iova->pfn_lo)
143 cached_iova = rb_entry(curr, struct iova, node); 143 iovad->cached32_node = rb_next(&free->node);
144
145 if (free->pfn_lo >= cached_iova->pfn_lo) {
146 struct rb_node *node = rb_next(&free->node);
147 struct iova *iova = rb_entry(node, struct iova, node);
148 144
149 /* only cache if it's below 32bit pfn */ 145 cached_iova = rb_entry(iovad->cached_node, struct iova, node);
150 if (node && iova->pfn_lo < iovad->dma_32bit_pfn) 146 if (free->pfn_lo >= cached_iova->pfn_lo)
151 iovad->cached32_node = node; 147 iovad->cached_node = rb_next(&free->node);
152 else
153 iovad->cached32_node = NULL;
154 }
155} 148}
156 149
157/* Insert the iova into domain rbtree by holding writer lock */ 150/* Insert the iova into domain rbtree by holding writer lock */
@@ -182,63 +175,43 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
182 rb_insert_color(&iova->node, root); 175 rb_insert_color(&iova->node, root);
183} 176}
184 177
185/*
186 * Computes the padding size required, to make the start address
187 * naturally aligned on the power-of-two order of its size
188 */
189static unsigned int
190iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
191{
192 return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
193}
194
195static int __alloc_and_insert_iova_range(struct iova_domain *iovad, 178static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
196 unsigned long size, unsigned long limit_pfn, 179 unsigned long size, unsigned long limit_pfn,
197 struct iova *new, bool size_aligned) 180 struct iova *new, bool size_aligned)
198{ 181{
199 struct rb_node *prev, *curr = NULL; 182 struct rb_node *curr, *prev;
183 struct iova *curr_iova;
200 unsigned long flags; 184 unsigned long flags;
201 unsigned long saved_pfn; 185 unsigned long new_pfn;
202 unsigned int pad_size = 0; 186 unsigned long align_mask = ~0UL;
187
188 if (size_aligned)
189 align_mask <<= fls_long(size - 1);
203 190
204 /* Walk the tree backwards */ 191 /* Walk the tree backwards */
205 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 192 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
206 saved_pfn = limit_pfn; 193 curr = __get_cached_rbnode(iovad, limit_pfn);
207 curr = __get_cached_rbnode(iovad, &limit_pfn); 194 curr_iova = rb_entry(curr, struct iova, node);
208 prev = curr; 195 do {
209 while (curr) { 196 limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
210 struct iova *curr_iova = rb_entry(curr, struct iova, node); 197 new_pfn = (limit_pfn - size) & align_mask;
211
212 if (limit_pfn <= curr_iova->pfn_lo) {
213 goto move_left;
214 } else if (limit_pfn > curr_iova->pfn_hi) {
215 if (size_aligned)
216 pad_size = iova_get_pad_size(size, limit_pfn);
217 if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
218 break; /* found a free slot */
219 }
220 limit_pfn = curr_iova->pfn_lo;
221move_left:
222 prev = curr; 198 prev = curr;
223 curr = rb_prev(curr); 199 curr = rb_prev(curr);
224 } 200 curr_iova = rb_entry(curr, struct iova, node);
201 } while (curr && new_pfn <= curr_iova->pfn_hi);
225 202
226 if (!curr) { 203 if (limit_pfn < size || new_pfn < iovad->start_pfn) {
227 if (size_aligned) 204 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
228 pad_size = iova_get_pad_size(size, limit_pfn); 205 return -ENOMEM;
229 if ((iovad->start_pfn + size + pad_size) > limit_pfn) {
230 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
231 return -ENOMEM;
232 }
233 } 206 }
234 207
235 /* pfn_lo will point to size aligned address if size_aligned is set */ 208 /* pfn_lo will point to size aligned address if size_aligned is set */
236 new->pfn_lo = limit_pfn - (size + pad_size); 209 new->pfn_lo = new_pfn;
237 new->pfn_hi = new->pfn_lo + size - 1; 210 new->pfn_hi = new->pfn_lo + size - 1;
238 211
239 /* If we have 'prev', it's a valid place to start the insertion. */ 212 /* If we have 'prev', it's a valid place to start the insertion. */
240 iova_insert_rbtree(&iovad->rbroot, new, prev); 213 iova_insert_rbtree(&iovad->rbroot, new, prev);
241 __cached_rbnode_insert_update(iovad, saved_pfn, new); 214 __cached_rbnode_insert_update(iovad, new);
242 215
243 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 216 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
244 217
@@ -258,7 +231,8 @@ EXPORT_SYMBOL(alloc_iova_mem);
258 231
259void free_iova_mem(struct iova *iova) 232void free_iova_mem(struct iova *iova)
260{ 233{
261 kmem_cache_free(iova_cache, iova); 234 if (iova->pfn_lo != IOVA_ANCHOR)
235 kmem_cache_free(iova_cache, iova);
262} 236}
263EXPORT_SYMBOL(free_iova_mem); 237EXPORT_SYMBOL(free_iova_mem);
264 238
@@ -342,15 +316,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn)
342 while (node) { 316 while (node) {
343 struct iova *iova = rb_entry(node, struct iova, node); 317 struct iova *iova = rb_entry(node, struct iova, node);
344 318
345 /* If pfn falls within iova's range, return iova */
346 if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
347 return iova;
348 }
349
350 if (pfn < iova->pfn_lo) 319 if (pfn < iova->pfn_lo)
351 node = node->rb_left; 320 node = node->rb_left;
352 else if (pfn > iova->pfn_lo) 321 else if (pfn > iova->pfn_hi)
353 node = node->rb_right; 322 node = node->rb_right;
323 else
324 return iova; /* pfn falls within iova's range */
354 } 325 }
355 326
356 return NULL; 327 return NULL;
@@ -424,18 +395,19 @@ EXPORT_SYMBOL_GPL(free_iova);
424 * @iovad: - iova domain in question 395 * @iovad: - iova domain in question
425 * @size: - size of page frames to allocate 396 * @size: - size of page frames to allocate
426 * @limit_pfn: - max limit address 397 * @limit_pfn: - max limit address
398 * @flush_rcache: - set to flush rcache on regular allocation failure
427 * This function tries to satisfy an iova allocation from the rcache, 399 * This function tries to satisfy an iova allocation from the rcache,
428 * and falls back to regular allocation on failure. 400 * and falls back to regular allocation on failure. If regular allocation
401 * fails too and the flush_rcache flag is set then the rcache will be flushed.
429*/ 402*/
430unsigned long 403unsigned long
431alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 404alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
432 unsigned long limit_pfn) 405 unsigned long limit_pfn, bool flush_rcache)
433{ 406{
434 bool flushed_rcache = false;
435 unsigned long iova_pfn; 407 unsigned long iova_pfn;
436 struct iova *new_iova; 408 struct iova *new_iova;
437 409
438 iova_pfn = iova_rcache_get(iovad, size, limit_pfn); 410 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
439 if (iova_pfn) 411 if (iova_pfn)
440 return iova_pfn; 412 return iova_pfn;
441 413
@@ -444,11 +416,11 @@ retry:
444 if (!new_iova) { 416 if (!new_iova) {
445 unsigned int cpu; 417 unsigned int cpu;
446 418
447 if (flushed_rcache) 419 if (!flush_rcache)
448 return 0; 420 return 0;
449 421
450 /* Try replenishing IOVAs by flushing rcache. */ 422 /* Try replenishing IOVAs by flushing rcache. */
451 flushed_rcache = true; 423 flush_rcache = false;
452 for_each_online_cpu(cpu) 424 for_each_online_cpu(cpu)
453 free_cpu_cached_iovas(cpu, iovad); 425 free_cpu_cached_iovas(cpu, iovad);
454 goto retry; 426 goto retry;
@@ -570,7 +542,7 @@ void queue_iova(struct iova_domain *iovad,
570 unsigned long pfn, unsigned long pages, 542 unsigned long pfn, unsigned long pages,
571 unsigned long data) 543 unsigned long data)
572{ 544{
573 struct iova_fq *fq = get_cpu_ptr(iovad->fq); 545 struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
574 unsigned long flags; 546 unsigned long flags;
575 unsigned idx; 547 unsigned idx;
576 548
@@ -600,8 +572,6 @@ void queue_iova(struct iova_domain *iovad,
600 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) 572 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
601 mod_timer(&iovad->fq_timer, 573 mod_timer(&iovad->fq_timer,
602 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 574 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
603
604 put_cpu_ptr(iovad->fq);
605} 575}
606EXPORT_SYMBOL_GPL(queue_iova); 576EXPORT_SYMBOL_GPL(queue_iova);
607 577
@@ -612,21 +582,12 @@ EXPORT_SYMBOL_GPL(queue_iova);
612 */ 582 */
613void put_iova_domain(struct iova_domain *iovad) 583void put_iova_domain(struct iova_domain *iovad)
614{ 584{
615 struct rb_node *node; 585 struct iova *iova, *tmp;
616 unsigned long flags;
617 586
618 free_iova_flush_queue(iovad); 587 free_iova_flush_queue(iovad);
619 free_iova_rcaches(iovad); 588 free_iova_rcaches(iovad);
620 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 589 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
621 node = rb_first(&iovad->rbroot);
622 while (node) {
623 struct iova *iova = rb_entry(node, struct iova, node);
624
625 rb_erase(node, &iovad->rbroot);
626 free_iova_mem(iova); 590 free_iova_mem(iova);
627 node = rb_first(&iovad->rbroot);
628 }
629 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
630} 591}
631EXPORT_SYMBOL_GPL(put_iova_domain); 592EXPORT_SYMBOL_GPL(put_iova_domain);
632 593
@@ -695,6 +656,10 @@ reserve_iova(struct iova_domain *iovad,
695 struct iova *iova; 656 struct iova *iova;
696 unsigned int overlap = 0; 657 unsigned int overlap = 0;
697 658
659 /* Don't allow nonsensical pfns */
660 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
661 return NULL;
662
698 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 663 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
699 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { 664 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
700 if (__is_range_overlap(node, pfn_lo, pfn_hi)) { 665 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
@@ -738,6 +703,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
738 struct iova *iova = rb_entry(node, struct iova, node); 703 struct iova *iova = rb_entry(node, struct iova, node);
739 struct iova *new_iova; 704 struct iova *new_iova;
740 705
706 if (iova->pfn_lo == IOVA_ANCHOR)
707 continue;
708
741 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 709 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
742 if (!new_iova) 710 if (!new_iova)
743 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", 711 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
@@ -855,12 +823,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag)
855static unsigned long iova_magazine_pop(struct iova_magazine *mag, 823static unsigned long iova_magazine_pop(struct iova_magazine *mag,
856 unsigned long limit_pfn) 824 unsigned long limit_pfn)
857{ 825{
826 int i;
827 unsigned long pfn;
828
858 BUG_ON(iova_magazine_empty(mag)); 829 BUG_ON(iova_magazine_empty(mag));
859 830
860 if (mag->pfns[mag->size - 1] >= limit_pfn) 831 /* Only fall back to the rbtree if we have no suitable pfns at all */
861 return 0; 832 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
833 if (i == 0)
834 return 0;
862 835
863 return mag->pfns[--mag->size]; 836 /* Swap it to pop it */
837 pfn = mag->pfns[i];
838 mag->pfns[i] = mag->pfns[--mag->size];
839
840 return pfn;
864} 841}
865 842
866static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) 843static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
@@ -1011,27 +988,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
1011 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 988 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
1012 return 0; 989 return 0;
1013 990
1014 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); 991 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1015}
1016
1017/*
1018 * Free a cpu's rcache.
1019 */
1020static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
1021 struct iova_rcache *rcache)
1022{
1023 struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1024 unsigned long flags;
1025
1026 spin_lock_irqsave(&cpu_rcache->lock, flags);
1027
1028 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1029 iova_magazine_free(cpu_rcache->loaded);
1030
1031 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1032 iova_magazine_free(cpu_rcache->prev);
1033
1034 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1035} 992}
1036 993
1037/* 994/*
@@ -1040,21 +997,20 @@ static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
1040static void free_iova_rcaches(struct iova_domain *iovad) 997static void free_iova_rcaches(struct iova_domain *iovad)
1041{ 998{
1042 struct iova_rcache *rcache; 999 struct iova_rcache *rcache;
1043 unsigned long flags; 1000 struct iova_cpu_rcache *cpu_rcache;
1044 unsigned int cpu; 1001 unsigned int cpu;
1045 int i, j; 1002 int i, j;
1046 1003
1047 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1004 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1048 rcache = &iovad->rcaches[i]; 1005 rcache = &iovad->rcaches[i];
1049 for_each_possible_cpu(cpu) 1006 for_each_possible_cpu(cpu) {
1050 free_cpu_iova_rcache(cpu, iovad, rcache); 1007 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1051 spin_lock_irqsave(&rcache->lock, flags); 1008 iova_magazine_free(cpu_rcache->loaded);
1009 iova_magazine_free(cpu_rcache->prev);
1010 }
1052 free_percpu(rcache->cpu_rcaches); 1011 free_percpu(rcache->cpu_rcaches);
1053 for (j = 0; j < rcache->depot_size; ++j) { 1012 for (j = 0; j < rcache->depot_size; ++j)
1054 iova_magazine_free_pfns(rcache->depot[j], iovad);
1055 iova_magazine_free(rcache->depot[j]); 1013 iova_magazine_free(rcache->depot[j]);
1056 }
1057 spin_unlock_irqrestore(&rcache->lock, flags);
1058 } 1014 }
1059} 1015}
1060 1016
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 195d6e93ac71..8dce3a9de9d8 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -19,30 +19,49 @@
19#include <linux/iommu.h> 19#include <linux/iommu.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/of_device.h>
23#include <linux/of_iommu.h>
22#include <linux/of_platform.h> 24#include <linux/of_platform.h>
23#include <linux/platform_device.h> 25#include <linux/platform_device.h>
24#include <linux/sizes.h> 26#include <linux/sizes.h>
25#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/sys_soc.h>
26 29
27#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 30#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
28#include <asm/dma-iommu.h> 31#include <asm/dma-iommu.h>
29#include <asm/pgalloc.h> 32#include <asm/pgalloc.h>
33#else
34#define arm_iommu_create_mapping(...) NULL
35#define arm_iommu_attach_device(...) -ENODEV
36#define arm_iommu_release_mapping(...) do {} while (0)
37#define arm_iommu_detach_device(...) do {} while (0)
30#endif 38#endif
31 39
32#include "io-pgtable.h" 40#include "io-pgtable.h"
33 41
34#define IPMMU_CTX_MAX 1 42#define IPMMU_CTX_MAX 8
43
44struct ipmmu_features {
45 bool use_ns_alias_offset;
46 bool has_cache_leaf_nodes;
47 unsigned int number_of_contexts;
48 bool setup_imbuscr;
49 bool twobit_imttbcr_sl0;
50};
35 51
36struct ipmmu_vmsa_device { 52struct ipmmu_vmsa_device {
37 struct device *dev; 53 struct device *dev;
38 void __iomem *base; 54 void __iomem *base;
39 struct iommu_device iommu; 55 struct iommu_device iommu;
40 56 struct ipmmu_vmsa_device *root;
57 const struct ipmmu_features *features;
41 unsigned int num_utlbs; 58 unsigned int num_utlbs;
59 unsigned int num_ctx;
42 spinlock_t lock; /* Protects ctx and domains[] */ 60 spinlock_t lock; /* Protects ctx and domains[] */
43 DECLARE_BITMAP(ctx, IPMMU_CTX_MAX); 61 DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
44 struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX]; 62 struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
45 63
64 struct iommu_group *group;
46 struct dma_iommu_mapping *mapping; 65 struct dma_iommu_mapping *mapping;
47}; 66};
48 67
@@ -57,18 +76,12 @@ struct ipmmu_vmsa_domain {
57 spinlock_t lock; /* Protects mappings */ 76 spinlock_t lock; /* Protects mappings */
58}; 77};
59 78
60struct ipmmu_vmsa_iommu_priv {
61 struct ipmmu_vmsa_device *mmu;
62 struct device *dev;
63 struct list_head list;
64};
65
66static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) 79static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
67{ 80{
68 return container_of(dom, struct ipmmu_vmsa_domain, io_domain); 81 return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
69} 82}
70 83
71static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) 84static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
72{ 85{
73 return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; 86 return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL;
74} 87}
@@ -133,6 +146,10 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
133#define IMTTBCR_TSZ0_MASK (7 << 0) 146#define IMTTBCR_TSZ0_MASK (7 << 0)
134#define IMTTBCR_TSZ0_SHIFT O 147#define IMTTBCR_TSZ0_SHIFT O
135 148
149#define IMTTBCR_SL0_TWOBIT_LVL_3 (0 << 6)
150#define IMTTBCR_SL0_TWOBIT_LVL_2 (1 << 6)
151#define IMTTBCR_SL0_TWOBIT_LVL_1 (2 << 6)
152
136#define IMBUSCR 0x000c 153#define IMBUSCR 0x000c
137#define IMBUSCR_DVM (1 << 2) 154#define IMBUSCR_DVM (1 << 2)
138#define IMBUSCR_BUSSEL_SYS (0 << 0) 155#define IMBUSCR_BUSSEL_SYS (0 << 0)
@@ -194,6 +211,36 @@ static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
194#define IMUASID_ASID0_SHIFT 0 211#define IMUASID_ASID0_SHIFT 0
195 212
196/* ----------------------------------------------------------------------------- 213/* -----------------------------------------------------------------------------
214 * Root device handling
215 */
216
217static struct platform_driver ipmmu_driver;
218
219static bool ipmmu_is_root(struct ipmmu_vmsa_device *mmu)
220{
221 return mmu->root == mmu;
222}
223
224static int __ipmmu_check_device(struct device *dev, void *data)
225{
226 struct ipmmu_vmsa_device *mmu = dev_get_drvdata(dev);
227 struct ipmmu_vmsa_device **rootp = data;
228
229 if (ipmmu_is_root(mmu))
230 *rootp = mmu;
231
232 return 0;
233}
234
235static struct ipmmu_vmsa_device *ipmmu_find_root(void)
236{
237 struct ipmmu_vmsa_device *root = NULL;
238
239 return driver_for_each_device(&ipmmu_driver.driver, NULL, &root,
240 __ipmmu_check_device) == 0 ? root : NULL;
241}
242
243/* -----------------------------------------------------------------------------
197 * Read/Write Access 244 * Read/Write Access
198 */ 245 */
199 246
@@ -208,15 +255,29 @@ static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset,
208 iowrite32(data, mmu->base + offset); 255 iowrite32(data, mmu->base + offset);
209} 256}
210 257
211static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg) 258static u32 ipmmu_ctx_read_root(struct ipmmu_vmsa_domain *domain,
259 unsigned int reg)
212{ 260{
213 return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg); 261 return ipmmu_read(domain->mmu->root,
262 domain->context_id * IM_CTX_SIZE + reg);
214} 263}
215 264
216static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg, 265static void ipmmu_ctx_write_root(struct ipmmu_vmsa_domain *domain,
217 u32 data) 266 unsigned int reg, u32 data)
218{ 267{
219 ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data); 268 ipmmu_write(domain->mmu->root,
269 domain->context_id * IM_CTX_SIZE + reg, data);
270}
271
272static void ipmmu_ctx_write_all(struct ipmmu_vmsa_domain *domain,
273 unsigned int reg, u32 data)
274{
275 if (domain->mmu != domain->mmu->root)
276 ipmmu_write(domain->mmu,
277 domain->context_id * IM_CTX_SIZE + reg, data);
278
279 ipmmu_write(domain->mmu->root,
280 domain->context_id * IM_CTX_SIZE + reg, data);
220} 281}
221 282
222/* ----------------------------------------------------------------------------- 283/* -----------------------------------------------------------------------------
@@ -228,7 +289,7 @@ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain)
228{ 289{
229 unsigned int count = 0; 290 unsigned int count = 0;
230 291
231 while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) { 292 while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) {
232 cpu_relax(); 293 cpu_relax();
233 if (++count == TLB_LOOP_TIMEOUT) { 294 if (++count == TLB_LOOP_TIMEOUT) {
234 dev_err_ratelimited(domain->mmu->dev, 295 dev_err_ratelimited(domain->mmu->dev,
@@ -243,9 +304,9 @@ static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain)
243{ 304{
244 u32 reg; 305 u32 reg;
245 306
246 reg = ipmmu_ctx_read(domain, IMCTR); 307 reg = ipmmu_ctx_read_root(domain, IMCTR);
247 reg |= IMCTR_FLUSH; 308 reg |= IMCTR_FLUSH;
248 ipmmu_ctx_write(domain, IMCTR, reg); 309 ipmmu_ctx_write_all(domain, IMCTR, reg);
249 310
250 ipmmu_tlb_sync(domain); 311 ipmmu_tlb_sync(domain);
251} 312}
@@ -313,11 +374,12 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
313 374
314 spin_lock_irqsave(&mmu->lock, flags); 375 spin_lock_irqsave(&mmu->lock, flags);
315 376
316 ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX); 377 ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx);
317 if (ret != IPMMU_CTX_MAX) { 378 if (ret != mmu->num_ctx) {
318 mmu->domains[ret] = domain; 379 mmu->domains[ret] = domain;
319 set_bit(ret, mmu->ctx); 380 set_bit(ret, mmu->ctx);
320 } 381 } else
382 ret = -EBUSY;
321 383
322 spin_unlock_irqrestore(&mmu->lock, flags); 384 spin_unlock_irqrestore(&mmu->lock, flags);
323 385
@@ -340,6 +402,7 @@ static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
340static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) 402static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
341{ 403{
342 u64 ttbr; 404 u64 ttbr;
405 u32 tmp;
343 int ret; 406 int ret;
344 407
345 /* 408 /*
@@ -364,51 +427,59 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
364 * TODO: Add support for coherent walk through CCI with DVM and remove 427 * TODO: Add support for coherent walk through CCI with DVM and remove
365 * cache handling. For now, delegate it to the io-pgtable code. 428 * cache handling. For now, delegate it to the io-pgtable code.
366 */ 429 */
367 domain->cfg.iommu_dev = domain->mmu->dev; 430 domain->cfg.iommu_dev = domain->mmu->root->dev;
368 431
369 /* 432 /*
370 * Find an unused context. 433 * Find an unused context.
371 */ 434 */
372 ret = ipmmu_domain_allocate_context(domain->mmu, domain); 435 ret = ipmmu_domain_allocate_context(domain->mmu->root, domain);
373 if (ret == IPMMU_CTX_MAX) 436 if (ret < 0)
374 return -EBUSY; 437 return ret;
375 438
376 domain->context_id = ret; 439 domain->context_id = ret;
377 440
378 domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, 441 domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg,
379 domain); 442 domain);
380 if (!domain->iop) { 443 if (!domain->iop) {
381 ipmmu_domain_free_context(domain->mmu, domain->context_id); 444 ipmmu_domain_free_context(domain->mmu->root,
445 domain->context_id);
382 return -EINVAL; 446 return -EINVAL;
383 } 447 }
384 448
385 /* TTBR0 */ 449 /* TTBR0 */
386 ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; 450 ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
387 ipmmu_ctx_write(domain, IMTTLBR0, ttbr); 451 ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr);
388 ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); 452 ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32);
389 453
390 /* 454 /*
391 * TTBCR 455 * TTBCR
392 * We use long descriptors with inner-shareable WBWA tables and allocate 456 * We use long descriptors with inner-shareable WBWA tables and allocate
393 * the whole 32-bit VA space to TTBR0. 457 * the whole 32-bit VA space to TTBR0.
394 */ 458 */
395 ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE | 459 if (domain->mmu->features->twobit_imttbcr_sl0)
396 IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | 460 tmp = IMTTBCR_SL0_TWOBIT_LVL_1;
397 IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); 461 else
462 tmp = IMTTBCR_SL0_LVL_1;
463
464 ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE |
465 IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
466 IMTTBCR_IRGN0_WB_WA | tmp);
398 467
399 /* MAIR0 */ 468 /* MAIR0 */
400 ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); 469 ipmmu_ctx_write_root(domain, IMMAIR0,
470 domain->cfg.arm_lpae_s1_cfg.mair[0]);
401 471
402 /* IMBUSCR */ 472 /* IMBUSCR */
403 ipmmu_ctx_write(domain, IMBUSCR, 473 if (domain->mmu->features->setup_imbuscr)
404 ipmmu_ctx_read(domain, IMBUSCR) & 474 ipmmu_ctx_write_root(domain, IMBUSCR,
405 ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK)); 475 ipmmu_ctx_read_root(domain, IMBUSCR) &
476 ~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK));
406 477
407 /* 478 /*
408 * IMSTR 479 * IMSTR
409 * Clear all interrupt flags. 480 * Clear all interrupt flags.
410 */ 481 */
411 ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR)); 482 ipmmu_ctx_write_root(domain, IMSTR, ipmmu_ctx_read_root(domain, IMSTR));
412 483
413 /* 484 /*
414 * IMCTR 485 * IMCTR
@@ -417,7 +488,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
417 * software management as we have no use for it. Flush the TLB as 488 * software management as we have no use for it. Flush the TLB as
418 * required when modifying the context registers. 489 * required when modifying the context registers.
419 */ 490 */
420 ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN); 491 ipmmu_ctx_write_all(domain, IMCTR,
492 IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
421 493
422 return 0; 494 return 0;
423} 495}
@@ -430,9 +502,9 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
430 * 502 *
431 * TODO: Is TLB flush really needed ? 503 * TODO: Is TLB flush really needed ?
432 */ 504 */
433 ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH); 505 ipmmu_ctx_write_all(domain, IMCTR, IMCTR_FLUSH);
434 ipmmu_tlb_sync(domain); 506 ipmmu_tlb_sync(domain);
435 ipmmu_domain_free_context(domain->mmu, domain->context_id); 507 ipmmu_domain_free_context(domain->mmu->root, domain->context_id);
436} 508}
437 509
438/* ----------------------------------------------------------------------------- 510/* -----------------------------------------------------------------------------
@@ -446,11 +518,11 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
446 u32 status; 518 u32 status;
447 u32 iova; 519 u32 iova;
448 520
449 status = ipmmu_ctx_read(domain, IMSTR); 521 status = ipmmu_ctx_read_root(domain, IMSTR);
450 if (!(status & err_mask)) 522 if (!(status & err_mask))
451 return IRQ_NONE; 523 return IRQ_NONE;
452 524
453 iova = ipmmu_ctx_read(domain, IMEAR); 525 iova = ipmmu_ctx_read_root(domain, IMEAR);
454 526
455 /* 527 /*
456 * Clear the error status flags. Unlike traditional interrupt flag 528 * Clear the error status flags. Unlike traditional interrupt flag
@@ -458,7 +530,7 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
458 * seems to require 0. The error address register must be read before, 530 * seems to require 0. The error address register must be read before,
459 * otherwise its value will be 0. 531 * otherwise its value will be 0.
460 */ 532 */
461 ipmmu_ctx_write(domain, IMSTR, 0); 533 ipmmu_ctx_write_root(domain, IMSTR, 0);
462 534
463 /* Log fatal errors. */ 535 /* Log fatal errors. */
464 if (status & IMSTR_MHIT) 536 if (status & IMSTR_MHIT)
@@ -499,7 +571,7 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
499 /* 571 /*
500 * Check interrupts for all active contexts. 572 * Check interrupts for all active contexts.
501 */ 573 */
502 for (i = 0; i < IPMMU_CTX_MAX; i++) { 574 for (i = 0; i < mmu->num_ctx; i++) {
503 if (!mmu->domains[i]) 575 if (!mmu->domains[i])
504 continue; 576 continue;
505 if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED) 577 if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
@@ -528,6 +600,27 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
528 return &domain->io_domain; 600 return &domain->io_domain;
529} 601}
530 602
603static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
604{
605 struct iommu_domain *io_domain = NULL;
606
607 switch (type) {
608 case IOMMU_DOMAIN_UNMANAGED:
609 io_domain = __ipmmu_domain_alloc(type);
610 break;
611
612 case IOMMU_DOMAIN_DMA:
613 io_domain = __ipmmu_domain_alloc(type);
614 if (io_domain && iommu_get_dma_cookie(io_domain)) {
615 kfree(io_domain);
616 io_domain = NULL;
617 }
618 break;
619 }
620
621 return io_domain;
622}
623
531static void ipmmu_domain_free(struct iommu_domain *io_domain) 624static void ipmmu_domain_free(struct iommu_domain *io_domain)
532{ 625{
533 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 626 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -536,6 +629,7 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
536 * Free the domain resources. We assume that all devices have already 629 * Free the domain resources. We assume that all devices have already
537 * been detached. 630 * been detached.
538 */ 631 */
632 iommu_put_dma_cookie(io_domain);
539 ipmmu_domain_destroy_context(domain); 633 ipmmu_domain_destroy_context(domain);
540 free_io_pgtable_ops(domain->iop); 634 free_io_pgtable_ops(domain->iop);
541 kfree(domain); 635 kfree(domain);
@@ -544,15 +638,14 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
544static int ipmmu_attach_device(struct iommu_domain *io_domain, 638static int ipmmu_attach_device(struct iommu_domain *io_domain,
545 struct device *dev) 639 struct device *dev)
546{ 640{
547 struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
548 struct iommu_fwspec *fwspec = dev->iommu_fwspec; 641 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
549 struct ipmmu_vmsa_device *mmu = priv->mmu; 642 struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
550 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); 643 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
551 unsigned long flags; 644 unsigned long flags;
552 unsigned int i; 645 unsigned int i;
553 int ret = 0; 646 int ret = 0;
554 647
555 if (!priv || !priv->mmu) { 648 if (!mmu) {
556 dev_err(dev, "Cannot attach to IPMMU\n"); 649 dev_err(dev, "Cannot attach to IPMMU\n");
557 return -ENXIO; 650 return -ENXIO;
558 } 651 }
@@ -563,6 +656,13 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
563 /* The domain hasn't been used yet, initialize it. */ 656 /* The domain hasn't been used yet, initialize it. */
564 domain->mmu = mmu; 657 domain->mmu = mmu;
565 ret = ipmmu_domain_init_context(domain); 658 ret = ipmmu_domain_init_context(domain);
659 if (ret < 0) {
660 dev_err(dev, "Unable to initialize IPMMU context\n");
661 domain->mmu = NULL;
662 } else {
663 dev_info(dev, "Using IPMMU context %u\n",
664 domain->context_id);
665 }
566 } else if (domain->mmu != mmu) { 666 } else if (domain->mmu != mmu) {
567 /* 667 /*
568 * Something is wrong, we can't attach two devices using 668 * Something is wrong, we can't attach two devices using
@@ -619,6 +719,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
619 return domain->iop->unmap(domain->iop, iova, size); 719 return domain->iop->unmap(domain->iop, iova, size);
620} 720}
621 721
722static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
723{
724 struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
725
726 if (domain->mmu)
727 ipmmu_tlb_flush_all(domain);
728}
729
622static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, 730static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
623 dma_addr_t iova) 731 dma_addr_t iova)
624{ 732{
@@ -633,62 +741,53 @@ static int ipmmu_init_platform_device(struct device *dev,
633 struct of_phandle_args *args) 741 struct of_phandle_args *args)
634{ 742{
635 struct platform_device *ipmmu_pdev; 743 struct platform_device *ipmmu_pdev;
636 struct ipmmu_vmsa_iommu_priv *priv;
637 744
638 ipmmu_pdev = of_find_device_by_node(args->np); 745 ipmmu_pdev = of_find_device_by_node(args->np);
639 if (!ipmmu_pdev) 746 if (!ipmmu_pdev)
640 return -ENODEV; 747 return -ENODEV;
641 748
642 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 749 dev->iommu_fwspec->iommu_priv = platform_get_drvdata(ipmmu_pdev);
643 if (!priv)
644 return -ENOMEM;
645
646 priv->mmu = platform_get_drvdata(ipmmu_pdev);
647 priv->dev = dev;
648 dev->iommu_fwspec->iommu_priv = priv;
649 return 0; 750 return 0;
650} 751}
651 752
753static bool ipmmu_slave_whitelist(struct device *dev)
754{
755 /* By default, do not allow use of IPMMU */
756 return false;
757}
758
759static const struct soc_device_attribute soc_r8a7795[] = {
760 { .soc_id = "r8a7795", },
761 { /* sentinel */ }
762};
763
652static int ipmmu_of_xlate(struct device *dev, 764static int ipmmu_of_xlate(struct device *dev,
653 struct of_phandle_args *spec) 765 struct of_phandle_args *spec)
654{ 766{
767 /* For R-Car Gen3 use a white list to opt-in slave devices */
768 if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev))
769 return -ENODEV;
770
655 iommu_fwspec_add_ids(dev, spec->args, 1); 771 iommu_fwspec_add_ids(dev, spec->args, 1);
656 772
657 /* Initialize once - xlate() will call multiple times */ 773 /* Initialize once - xlate() will call multiple times */
658 if (to_priv(dev)) 774 if (to_ipmmu(dev))
659 return 0; 775 return 0;
660 776
661 return ipmmu_init_platform_device(dev, spec); 777 return ipmmu_init_platform_device(dev, spec);
662} 778}
663 779
664#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) 780static int ipmmu_init_arm_mapping(struct device *dev)
665
666static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
667{ 781{
668 if (type != IOMMU_DOMAIN_UNMANAGED) 782 struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
669 return NULL;
670
671 return __ipmmu_domain_alloc(type);
672}
673
674static int ipmmu_add_device(struct device *dev)
675{
676 struct ipmmu_vmsa_device *mmu = NULL;
677 struct iommu_group *group; 783 struct iommu_group *group;
678 int ret; 784 int ret;
679 785
680 /*
681 * Only let through devices that have been verified in xlate()
682 */
683 if (!to_priv(dev))
684 return -ENODEV;
685
686 /* Create a device group and add the device to it. */ 786 /* Create a device group and add the device to it. */
687 group = iommu_group_alloc(); 787 group = iommu_group_alloc();
688 if (IS_ERR(group)) { 788 if (IS_ERR(group)) {
689 dev_err(dev, "Failed to allocate IOMMU group\n"); 789 dev_err(dev, "Failed to allocate IOMMU group\n");
690 ret = PTR_ERR(group); 790 return PTR_ERR(group);
691 goto error;
692 } 791 }
693 792
694 ret = iommu_group_add_device(group, dev); 793 ret = iommu_group_add_device(group, dev);
@@ -696,8 +795,7 @@ static int ipmmu_add_device(struct device *dev)
696 795
697 if (ret < 0) { 796 if (ret < 0) {
698 dev_err(dev, "Failed to add device to IPMMU group\n"); 797 dev_err(dev, "Failed to add device to IPMMU group\n");
699 group = NULL; 798 return ret;
700 goto error;
701 } 799 }
702 800
703 /* 801 /*
@@ -709,7 +807,6 @@ static int ipmmu_add_device(struct device *dev)
709 * - Make the mapping size configurable ? We currently use a 2GB mapping 807 * - Make the mapping size configurable ? We currently use a 2GB mapping
710 * at a 1GB offset to ensure that NULL VAs will fault. 808 * at a 1GB offset to ensure that NULL VAs will fault.
711 */ 809 */
712 mmu = to_priv(dev)->mmu;
713 if (!mmu->mapping) { 810 if (!mmu->mapping) {
714 struct dma_iommu_mapping *mapping; 811 struct dma_iommu_mapping *mapping;
715 812
@@ -734,159 +831,73 @@ static int ipmmu_add_device(struct device *dev)
734 return 0; 831 return 0;
735 832
736error: 833error:
737 if (mmu) 834 iommu_group_remove_device(dev);
835 if (mmu->mapping)
738 arm_iommu_release_mapping(mmu->mapping); 836 arm_iommu_release_mapping(mmu->mapping);
739 837
740 if (!IS_ERR_OR_NULL(group))
741 iommu_group_remove_device(dev);
742
743 return ret; 838 return ret;
744} 839}
745 840
746static void ipmmu_remove_device(struct device *dev) 841static int ipmmu_add_device(struct device *dev)
747{
748 arm_iommu_detach_device(dev);
749 iommu_group_remove_device(dev);
750}
751
752static const struct iommu_ops ipmmu_ops = {
753 .domain_alloc = ipmmu_domain_alloc,
754 .domain_free = ipmmu_domain_free,
755 .attach_dev = ipmmu_attach_device,
756 .detach_dev = ipmmu_detach_device,
757 .map = ipmmu_map,
758 .unmap = ipmmu_unmap,
759 .map_sg = default_iommu_map_sg,
760 .iova_to_phys = ipmmu_iova_to_phys,
761 .add_device = ipmmu_add_device,
762 .remove_device = ipmmu_remove_device,
763 .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
764 .of_xlate = ipmmu_of_xlate,
765};
766
767#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
768
769#ifdef CONFIG_IOMMU_DMA
770
771static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
772static LIST_HEAD(ipmmu_slave_devices);
773
774static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
775{
776 struct iommu_domain *io_domain = NULL;
777
778 switch (type) {
779 case IOMMU_DOMAIN_UNMANAGED:
780 io_domain = __ipmmu_domain_alloc(type);
781 break;
782
783 case IOMMU_DOMAIN_DMA:
784 io_domain = __ipmmu_domain_alloc(type);
785 if (io_domain)
786 iommu_get_dma_cookie(io_domain);
787 break;
788 }
789
790 return io_domain;
791}
792
793static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
794{
795 switch (io_domain->type) {
796 case IOMMU_DOMAIN_DMA:
797 iommu_put_dma_cookie(io_domain);
798 /* fall-through */
799 default:
800 ipmmu_domain_free(io_domain);
801 break;
802 }
803}
804
805static int ipmmu_add_device_dma(struct device *dev)
806{ 842{
807 struct iommu_group *group; 843 struct iommu_group *group;
808 844
809 /* 845 /*
810 * Only let through devices that have been verified in xlate() 846 * Only let through devices that have been verified in xlate()
811 */ 847 */
812 if (!to_priv(dev)) 848 if (!to_ipmmu(dev))
813 return -ENODEV; 849 return -ENODEV;
814 850
851 if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
852 return ipmmu_init_arm_mapping(dev);
853
815 group = iommu_group_get_for_dev(dev); 854 group = iommu_group_get_for_dev(dev);
816 if (IS_ERR(group)) 855 if (IS_ERR(group))
817 return PTR_ERR(group); 856 return PTR_ERR(group);
818 857
819 spin_lock(&ipmmu_slave_devices_lock); 858 iommu_group_put(group);
820 list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
821 spin_unlock(&ipmmu_slave_devices_lock);
822 return 0; 859 return 0;
823} 860}
824 861
825static void ipmmu_remove_device_dma(struct device *dev) 862static void ipmmu_remove_device(struct device *dev)
826{ 863{
827 struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); 864 arm_iommu_detach_device(dev);
828
829 spin_lock(&ipmmu_slave_devices_lock);
830 list_del(&priv->list);
831 spin_unlock(&ipmmu_slave_devices_lock);
832
833 iommu_group_remove_device(dev); 865 iommu_group_remove_device(dev);
834} 866}
835 867
836static struct device *ipmmu_find_sibling_device(struct device *dev) 868static struct iommu_group *ipmmu_find_group(struct device *dev)
837{
838 struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
839 struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
840 bool found = false;
841
842 spin_lock(&ipmmu_slave_devices_lock);
843
844 list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
845 if (priv == sibling_priv)
846 continue;
847 if (sibling_priv->mmu == priv->mmu) {
848 found = true;
849 break;
850 }
851 }
852
853 spin_unlock(&ipmmu_slave_devices_lock);
854
855 return found ? sibling_priv->dev : NULL;
856}
857
858static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
859{ 869{
870 struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
860 struct iommu_group *group; 871 struct iommu_group *group;
861 struct device *sibling;
862 872
863 sibling = ipmmu_find_sibling_device(dev); 873 if (mmu->group)
864 if (sibling) 874 return iommu_group_ref_get(mmu->group);
865 group = iommu_group_get(sibling); 875
866 if (!sibling || IS_ERR(group)) 876 group = iommu_group_alloc();
867 group = generic_device_group(dev); 877 if (!IS_ERR(group))
878 mmu->group = group;
868 879
869 return group; 880 return group;
870} 881}
871 882
872static const struct iommu_ops ipmmu_ops = { 883static const struct iommu_ops ipmmu_ops = {
873 .domain_alloc = ipmmu_domain_alloc_dma, 884 .domain_alloc = ipmmu_domain_alloc,
874 .domain_free = ipmmu_domain_free_dma, 885 .domain_free = ipmmu_domain_free,
875 .attach_dev = ipmmu_attach_device, 886 .attach_dev = ipmmu_attach_device,
876 .detach_dev = ipmmu_detach_device, 887 .detach_dev = ipmmu_detach_device,
877 .map = ipmmu_map, 888 .map = ipmmu_map,
878 .unmap = ipmmu_unmap, 889 .unmap = ipmmu_unmap,
890 .flush_iotlb_all = ipmmu_iotlb_sync,
891 .iotlb_sync = ipmmu_iotlb_sync,
879 .map_sg = default_iommu_map_sg, 892 .map_sg = default_iommu_map_sg,
880 .iova_to_phys = ipmmu_iova_to_phys, 893 .iova_to_phys = ipmmu_iova_to_phys,
881 .add_device = ipmmu_add_device_dma, 894 .add_device = ipmmu_add_device,
882 .remove_device = ipmmu_remove_device_dma, 895 .remove_device = ipmmu_remove_device,
883 .device_group = ipmmu_find_group_dma, 896 .device_group = ipmmu_find_group,
884 .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, 897 .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
885 .of_xlate = ipmmu_of_xlate, 898 .of_xlate = ipmmu_of_xlate,
886}; 899};
887 900
888#endif /* CONFIG_IOMMU_DMA */
889
890/* ----------------------------------------------------------------------------- 901/* -----------------------------------------------------------------------------
891 * Probe/remove and init 902 * Probe/remove and init
892 */ 903 */
@@ -896,10 +907,40 @@ static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu)
896 unsigned int i; 907 unsigned int i;
897 908
898 /* Disable all contexts. */ 909 /* Disable all contexts. */
899 for (i = 0; i < 4; ++i) 910 for (i = 0; i < mmu->num_ctx; ++i)
900 ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0); 911 ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
901} 912}
902 913
914static const struct ipmmu_features ipmmu_features_default = {
915 .use_ns_alias_offset = true,
916 .has_cache_leaf_nodes = false,
917 .number_of_contexts = 1, /* software only tested with one context */
918 .setup_imbuscr = true,
919 .twobit_imttbcr_sl0 = false,
920};
921
922static const struct ipmmu_features ipmmu_features_r8a7795 = {
923 .use_ns_alias_offset = false,
924 .has_cache_leaf_nodes = true,
925 .number_of_contexts = 8,
926 .setup_imbuscr = false,
927 .twobit_imttbcr_sl0 = true,
928};
929
930static const struct of_device_id ipmmu_of_ids[] = {
931 {
932 .compatible = "renesas,ipmmu-vmsa",
933 .data = &ipmmu_features_default,
934 }, {
935 .compatible = "renesas,ipmmu-r8a7795",
936 .data = &ipmmu_features_r8a7795,
937 }, {
938 /* Terminator */
939 },
940};
941
942MODULE_DEVICE_TABLE(of, ipmmu_of_ids);
943
903static int ipmmu_probe(struct platform_device *pdev) 944static int ipmmu_probe(struct platform_device *pdev)
904{ 945{
905 struct ipmmu_vmsa_device *mmu; 946 struct ipmmu_vmsa_device *mmu;
@@ -917,6 +958,8 @@ static int ipmmu_probe(struct platform_device *pdev)
917 mmu->num_utlbs = 32; 958 mmu->num_utlbs = 32;
918 spin_lock_init(&mmu->lock); 959 spin_lock_init(&mmu->lock);
919 bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); 960 bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
961 mmu->features = of_device_get_match_data(&pdev->dev);
962 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
920 963
921 /* Map I/O memory and request IRQ. */ 964 /* Map I/O memory and request IRQ. */
922 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 965 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -936,34 +979,71 @@ static int ipmmu_probe(struct platform_device *pdev)
936 * Offset the registers base unconditionally to point to the non-secure 979 * Offset the registers base unconditionally to point to the non-secure
937 * alias space for now. 980 * alias space for now.
938 */ 981 */
939 mmu->base += IM_NS_ALIAS_OFFSET; 982 if (mmu->features->use_ns_alias_offset)
983 mmu->base += IM_NS_ALIAS_OFFSET;
984
985 mmu->num_ctx = min_t(unsigned int, IPMMU_CTX_MAX,
986 mmu->features->number_of_contexts);
940 987
941 irq = platform_get_irq(pdev, 0); 988 irq = platform_get_irq(pdev, 0);
942 if (irq < 0) {
943 dev_err(&pdev->dev, "no IRQ found\n");
944 return irq;
945 }
946 989
947 ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0, 990 /*
948 dev_name(&pdev->dev), mmu); 991 * Determine if this IPMMU instance is a root device by checking for
949 if (ret < 0) { 992 * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property.
950 dev_err(&pdev->dev, "failed to request IRQ %d\n", irq); 993 */
951 return ret; 994 if (!mmu->features->has_cache_leaf_nodes ||
952 } 995 !of_find_property(pdev->dev.of_node, "renesas,ipmmu-main", NULL))
996 mmu->root = mmu;
997 else
998 mmu->root = ipmmu_find_root();
953 999
954 ipmmu_device_reset(mmu); 1000 /*
1001 * Wait until the root device has been registered for sure.
1002 */
1003 if (!mmu->root)
1004 return -EPROBE_DEFER;
1005
1006 /* Root devices have mandatory IRQs */
1007 if (ipmmu_is_root(mmu)) {
1008 if (irq < 0) {
1009 dev_err(&pdev->dev, "no IRQ found\n");
1010 return irq;
1011 }
955 1012
956 ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, 1013 ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
957 dev_name(&pdev->dev)); 1014 dev_name(&pdev->dev), mmu);
958 if (ret) 1015 if (ret < 0) {
959 return ret; 1016 dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
1017 return ret;
1018 }
960 1019
961 iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); 1020 ipmmu_device_reset(mmu);
962 iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); 1021 }
963 1022
964 ret = iommu_device_register(&mmu->iommu); 1023 /*
965 if (ret) 1024 * Register the IPMMU to the IOMMU subsystem in the following cases:
966 return ret; 1025 * - R-Car Gen2 IPMMU (all devices registered)
1026 * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
1027 */
1028 if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
1029 ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
1030 dev_name(&pdev->dev));
1031 if (ret)
1032 return ret;
1033
1034 iommu_device_set_ops(&mmu->iommu, &ipmmu_ops);
1035 iommu_device_set_fwnode(&mmu->iommu,
1036 &pdev->dev.of_node->fwnode);
1037
1038 ret = iommu_device_register(&mmu->iommu);
1039 if (ret)
1040 return ret;
1041
1042#if defined(CONFIG_IOMMU_DMA)
1043 if (!iommu_present(&platform_bus_type))
1044 bus_set_iommu(&platform_bus_type, &ipmmu_ops);
1045#endif
1046 }
967 1047
968 /* 1048 /*
969 * We can't create the ARM mapping here as it requires the bus to have 1049 * We can't create the ARM mapping here as it requires the bus to have
@@ -983,20 +1063,13 @@ static int ipmmu_remove(struct platform_device *pdev)
983 iommu_device_sysfs_remove(&mmu->iommu); 1063 iommu_device_sysfs_remove(&mmu->iommu);
984 iommu_device_unregister(&mmu->iommu); 1064 iommu_device_unregister(&mmu->iommu);
985 1065
986#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
987 arm_iommu_release_mapping(mmu->mapping); 1066 arm_iommu_release_mapping(mmu->mapping);
988#endif
989 1067
990 ipmmu_device_reset(mmu); 1068 ipmmu_device_reset(mmu);
991 1069
992 return 0; 1070 return 0;
993} 1071}
994 1072
995static const struct of_device_id ipmmu_of_ids[] = {
996 { .compatible = "renesas,ipmmu-vmsa", },
997 { }
998};
999
1000static struct platform_driver ipmmu_driver = { 1073static struct platform_driver ipmmu_driver = {
1001 .driver = { 1074 .driver = {
1002 .name = "ipmmu-vmsa", 1075 .name = "ipmmu-vmsa",
@@ -1008,15 +1081,22 @@ static struct platform_driver ipmmu_driver = {
1008 1081
1009static int __init ipmmu_init(void) 1082static int __init ipmmu_init(void)
1010{ 1083{
1084 static bool setup_done;
1011 int ret; 1085 int ret;
1012 1086
1087 if (setup_done)
1088 return 0;
1089
1013 ret = platform_driver_register(&ipmmu_driver); 1090 ret = platform_driver_register(&ipmmu_driver);
1014 if (ret < 0) 1091 if (ret < 0)
1015 return ret; 1092 return ret;
1016 1093
1094#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
1017 if (!iommu_present(&platform_bus_type)) 1095 if (!iommu_present(&platform_bus_type))
1018 bus_set_iommu(&platform_bus_type, &ipmmu_ops); 1096 bus_set_iommu(&platform_bus_type, &ipmmu_ops);
1097#endif
1019 1098
1099 setup_done = true;
1020 return 0; 1100 return 0;
1021} 1101}
1022 1102
@@ -1028,6 +1108,19 @@ static void __exit ipmmu_exit(void)
1028subsys_initcall(ipmmu_init); 1108subsys_initcall(ipmmu_init);
1029module_exit(ipmmu_exit); 1109module_exit(ipmmu_exit);
1030 1110
1111#ifdef CONFIG_IOMMU_DMA
1112static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np)
1113{
1114 ipmmu_init();
1115 return 0;
1116}
1117
1118IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa",
1119 ipmmu_vmsa_iommu_of_setup);
1120IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795",
1121 ipmmu_vmsa_iommu_of_setup);
1122#endif
1123
1031MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); 1124MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
1032MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>"); 1125MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
1033MODULE_LICENSE("GPL v2"); 1126MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 16d33ac19db0..f227d73e7bf6 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -392,6 +392,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
392 return unmapsz; 392 return unmapsz;
393} 393}
394 394
395static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
396{
397 mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
398}
399
395static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, 400static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
396 dma_addr_t iova) 401 dma_addr_t iova)
397{ 402{
@@ -491,6 +496,8 @@ static struct iommu_ops mtk_iommu_ops = {
491 .map = mtk_iommu_map, 496 .map = mtk_iommu_map,
492 .unmap = mtk_iommu_unmap, 497 .unmap = mtk_iommu_unmap,
493 .map_sg = default_iommu_map_sg, 498 .map_sg = default_iommu_map_sg,
499 .flush_iotlb_all = mtk_iommu_iotlb_sync,
500 .iotlb_sync = mtk_iommu_iotlb_sync,
494 .iova_to_phys = mtk_iommu_iova_to_phys, 501 .iova_to_phys = mtk_iommu_iova_to_phys,
495 .add_device = mtk_iommu_add_device, 502 .add_device = mtk_iommu_add_device,
496 .remove_device = mtk_iommu_remove_device, 503 .remove_device = mtk_iommu_remove_device,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index bc1efbfb9ddf..542930cd183d 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -708,7 +708,7 @@ static struct platform_driver mtk_iommu_driver = {
708 .probe = mtk_iommu_probe, 708 .probe = mtk_iommu_probe,
709 .remove = mtk_iommu_remove, 709 .remove = mtk_iommu_remove,
710 .driver = { 710 .driver = {
711 .name = "mtk-iommu", 711 .name = "mtk-iommu-v1",
712 .of_match_table = mtk_iommu_of_ids, 712 .of_match_table = mtk_iommu_of_ids,
713 .pm = &mtk_iommu_pm_ops, 713 .pm = &mtk_iommu_pm_ops,
714 } 714 }
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index bd67e1b2c64e..e135ab830ebf 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -2,6 +2,7 @@
2 * omap iommu: tlb and pagetable primitives 2 * omap iommu: tlb and pagetable primitives
3 * 3 *
4 * Copyright (C) 2008-2010 Nokia Corporation 4 * Copyright (C) 2008-2010 Nokia Corporation
5 * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
5 * 6 *
6 * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>, 7 * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
7 * Paul Mundt and Toshihiro Kobayashi 8 * Paul Mundt and Toshihiro Kobayashi
@@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom)
71 **/ 72 **/
72void omap_iommu_save_ctx(struct device *dev) 73void omap_iommu_save_ctx(struct device *dev)
73{ 74{
74 struct omap_iommu *obj = dev_to_omap_iommu(dev); 75 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
75 u32 *p = obj->ctx; 76 struct omap_iommu *obj;
77 u32 *p;
76 int i; 78 int i;
77 79
78 for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { 80 if (!arch_data)
79 p[i] = iommu_read_reg(obj, i * sizeof(u32)); 81 return;
80 dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); 82
83 while (arch_data->iommu_dev) {
84 obj = arch_data->iommu_dev;
85 p = obj->ctx;
86 for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
87 p[i] = iommu_read_reg(obj, i * sizeof(u32));
88 dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
89 p[i]);
90 }
91 arch_data++;
81 } 92 }
82} 93}
83EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); 94EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
@@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
88 **/ 99 **/
89void omap_iommu_restore_ctx(struct device *dev) 100void omap_iommu_restore_ctx(struct device *dev)
90{ 101{
91 struct omap_iommu *obj = dev_to_omap_iommu(dev); 102 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
92 u32 *p = obj->ctx; 103 struct omap_iommu *obj;
104 u32 *p;
93 int i; 105 int i;
94 106
95 for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { 107 if (!arch_data)
96 iommu_write_reg(obj, p[i], i * sizeof(u32)); 108 return;
97 dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); 109
110 while (arch_data->iommu_dev) {
111 obj = arch_data->iommu_dev;
112 p = obj->ctx;
113 for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
114 iommu_write_reg(obj, p[i], i * sizeof(u32));
115 dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
116 p[i]);
117 }
118 arch_data++;
98 } 119 }
99} 120}
100EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); 121EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
@@ -805,7 +826,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
805 struct iommu_domain *domain = obj->domain; 826 struct iommu_domain *domain = obj->domain;
806 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 827 struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
807 828
808 if (!omap_domain->iommu_dev) 829 if (!omap_domain->dev)
809 return IRQ_NONE; 830 return IRQ_NONE;
810 831
811 errs = iommu_report_fault(obj, &da); 832 errs = iommu_report_fault(obj, &da);
@@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj)
893 dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); 914 dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
894} 915}
895 916
917static bool omap_iommu_can_register(struct platform_device *pdev)
918{
919 struct device_node *np = pdev->dev.of_node;
920
921 if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
922 return true;
923
924 /*
925 * restrict IOMMU core registration only for processor-port MDMA MMUs
926 * on DRA7 DSPs
927 */
928 if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) ||
929 (!strcmp(dev_name(&pdev->dev), "41501000.mmu")))
930 return true;
931
932 return false;
933}
934
896static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, 935static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
897 struct omap_iommu *obj) 936 struct omap_iommu *obj)
898{ 937{
@@ -984,19 +1023,22 @@ static int omap_iommu_probe(struct platform_device *pdev)
984 return err; 1023 return err;
985 platform_set_drvdata(pdev, obj); 1024 platform_set_drvdata(pdev, obj);
986 1025
987 obj->group = iommu_group_alloc(); 1026 if (omap_iommu_can_register(pdev)) {
988 if (IS_ERR(obj->group)) 1027 obj->group = iommu_group_alloc();
989 return PTR_ERR(obj->group); 1028 if (IS_ERR(obj->group))
1029 return PTR_ERR(obj->group);
990 1030
991 err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); 1031 err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL,
992 if (err) 1032 obj->name);
993 goto out_group; 1033 if (err)
1034 goto out_group;
994 1035
995 iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); 1036 iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
996 1037
997 err = iommu_device_register(&obj->iommu); 1038 err = iommu_device_register(&obj->iommu);
998 if (err) 1039 if (err)
999 goto out_sysfs; 1040 goto out_sysfs;
1041 }
1000 1042
1001 pm_runtime_irq_safe(obj->dev); 1043 pm_runtime_irq_safe(obj->dev);
1002 pm_runtime_enable(obj->dev); 1044 pm_runtime_enable(obj->dev);
@@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev)
1018{ 1060{
1019 struct omap_iommu *obj = platform_get_drvdata(pdev); 1061 struct omap_iommu *obj = platform_get_drvdata(pdev);
1020 1062
1021 iommu_group_put(obj->group); 1063 if (obj->group) {
1022 obj->group = NULL; 1064 iommu_group_put(obj->group);
1065 obj->group = NULL;
1023 1066
1024 iommu_device_sysfs_remove(&obj->iommu); 1067 iommu_device_sysfs_remove(&obj->iommu);
1025 iommu_device_unregister(&obj->iommu); 1068 iommu_device_unregister(&obj->iommu);
1069 }
1026 1070
1027 omap_iommu_debugfs_remove(obj); 1071 omap_iommu_debugfs_remove(obj);
1028 1072
@@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
1068 phys_addr_t pa, size_t bytes, int prot) 1112 phys_addr_t pa, size_t bytes, int prot)
1069{ 1113{
1070 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1114 struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
1071 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1115 struct device *dev = omap_domain->dev;
1072 struct device *dev = oiommu->dev; 1116 struct omap_iommu_device *iommu;
1117 struct omap_iommu *oiommu;
1073 struct iotlb_entry e; 1118 struct iotlb_entry e;
1074 int omap_pgsz; 1119 int omap_pgsz;
1075 u32 ret; 1120 u32 ret = -EINVAL;
1121 int i;
1076 1122
1077 omap_pgsz = bytes_to_iopgsz(bytes); 1123 omap_pgsz = bytes_to_iopgsz(bytes);
1078 if (omap_pgsz < 0) { 1124 if (omap_pgsz < 0) {
@@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
1084 1130
1085 iotlb_init_entry(&e, da, pa, omap_pgsz); 1131 iotlb_init_entry(&e, da, pa, omap_pgsz);
1086 1132
1087 ret = omap_iopgtable_store_entry(oiommu, &e); 1133 iommu = omap_domain->iommus;
1088 if (ret) 1134 for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
1089 dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); 1135 oiommu = iommu->iommu_dev;
1136 ret = omap_iopgtable_store_entry(oiommu, &e);
1137 if (ret) {
1138 dev_err(dev, "omap_iopgtable_store_entry failed: %d\n",
1139 ret);
1140 break;
1141 }
1142 }
1143
1144 if (ret) {
1145 while (i--) {
1146 iommu--;
1147 oiommu = iommu->iommu_dev;
1148 iopgtable_clear_entry(oiommu, da);
1149 }
1150 }
1090 1151
1091 return ret; 1152 return ret;
1092} 1153}
@@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
1095 size_t size) 1156 size_t size)
1096{ 1157{
1097 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1158 struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
1098 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1159 struct device *dev = omap_domain->dev;
1099 struct device *dev = oiommu->dev; 1160 struct omap_iommu_device *iommu;
1161 struct omap_iommu *oiommu;
1162 bool error = false;
1163 size_t bytes = 0;
1164 int i;
1100 1165
1101 dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); 1166 dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
1102 1167
1103 return iopgtable_clear_entry(oiommu, da); 1168 iommu = omap_domain->iommus;
1169 for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
1170 oiommu = iommu->iommu_dev;
1171 bytes = iopgtable_clear_entry(oiommu, da);
1172 if (!bytes)
1173 error = true;
1174 }
1175
1176 /*
1177 * simplify return - we are only checking if any of the iommus
1178 * reported an error, but not if all of them are unmapping the
1179 * same number of entries. This should not occur due to the
1180 * mirror programming.
1181 */
1182 return error ? 0 : bytes;
1183}
1184
1185static int omap_iommu_count(struct device *dev)
1186{
1187 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
1188 int count = 0;
1189
1190 while (arch_data->iommu_dev) {
1191 count++;
1192 arch_data++;
1193 }
1194
1195 return count;
1196}
1197
1198/* caller should call cleanup if this function fails */
1199static int omap_iommu_attach_init(struct device *dev,
1200 struct omap_iommu_domain *odomain)
1201{
1202 struct omap_iommu_device *iommu;
1203 int i;
1204
1205 odomain->num_iommus = omap_iommu_count(dev);
1206 if (!odomain->num_iommus)
1207 return -EINVAL;
1208
1209 odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu),
1210 GFP_ATOMIC);
1211 if (!odomain->iommus)
1212 return -ENOMEM;
1213
1214 iommu = odomain->iommus;
1215 for (i = 0; i < odomain->num_iommus; i++, iommu++) {
1216 iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC);
1217 if (!iommu->pgtable)
1218 return -ENOMEM;
1219
1220 /*
1221 * should never fail, but please keep this around to ensure
1222 * we keep the hardware happy
1223 */
1224 if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable,
1225 IOPGD_TABLE_SIZE)))
1226 return -EINVAL;
1227 }
1228
1229 return 0;
1230}
1231
1232static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain)
1233{
1234 int i;
1235 struct omap_iommu_device *iommu = odomain->iommus;
1236
1237 for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++)
1238 kfree(iommu->pgtable);
1239
1240 kfree(odomain->iommus);
1241 odomain->num_iommus = 0;
1242 odomain->iommus = NULL;
1104} 1243}
1105 1244
1106static int 1245static int
@@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
1108{ 1247{
1109 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1248 struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
1110 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; 1249 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
1250 struct omap_iommu_device *iommu;
1111 struct omap_iommu *oiommu; 1251 struct omap_iommu *oiommu;
1112 int ret = 0; 1252 int ret = 0;
1253 int i;
1113 1254
1114 if (!arch_data || !arch_data->iommu_dev) { 1255 if (!arch_data || !arch_data->iommu_dev) {
1115 dev_err(dev, "device doesn't have an associated iommu\n"); 1256 dev_err(dev, "device doesn't have an associated iommu\n");
@@ -1118,26 +1259,49 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
1118 1259
1119 spin_lock(&omap_domain->lock); 1260 spin_lock(&omap_domain->lock);
1120 1261
1121 /* only a single device is supported per domain for now */ 1262 /* only a single client device can be attached to a domain */
1122 if (omap_domain->iommu_dev) { 1263 if (omap_domain->dev) {
1123 dev_err(dev, "iommu domain is already attached\n"); 1264 dev_err(dev, "iommu domain is already attached\n");
1124 ret = -EBUSY; 1265 ret = -EBUSY;
1125 goto out; 1266 goto out;
1126 } 1267 }
1127 1268
1128 oiommu = arch_data->iommu_dev; 1269 ret = omap_iommu_attach_init(dev, omap_domain);
1129
1130 /* get a handle to and enable the omap iommu */
1131 ret = omap_iommu_attach(oiommu, omap_domain->pgtable);
1132 if (ret) { 1270 if (ret) {
1133 dev_err(dev, "can't get omap iommu: %d\n", ret); 1271 dev_err(dev, "failed to allocate required iommu data %d\n",
1134 goto out; 1272 ret);
1273 goto init_fail;
1274 }
1275
1276 iommu = omap_domain->iommus;
1277 for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) {
1278 /* configure and enable the omap iommu */
1279 oiommu = arch_data->iommu_dev;
1280 ret = omap_iommu_attach(oiommu, iommu->pgtable);
1281 if (ret) {
1282 dev_err(dev, "can't get omap iommu: %d\n", ret);
1283 goto attach_fail;
1284 }
1285
1286 oiommu->domain = domain;
1287 iommu->iommu_dev = oiommu;
1135 } 1288 }
1136 1289
1137 omap_domain->iommu_dev = oiommu;
1138 omap_domain->dev = dev; 1290 omap_domain->dev = dev;
1139 oiommu->domain = domain;
1140 1291
1292 goto out;
1293
1294attach_fail:
1295 while (i--) {
1296 iommu--;
1297 arch_data--;
1298 oiommu = iommu->iommu_dev;
1299 omap_iommu_detach(oiommu);
1300 iommu->iommu_dev = NULL;
1301 oiommu->domain = NULL;
1302 }
1303init_fail:
1304 omap_iommu_detach_fini(omap_domain);
1141out: 1305out:
1142 spin_unlock(&omap_domain->lock); 1306 spin_unlock(&omap_domain->lock);
1143 return ret; 1307 return ret;
@@ -1146,21 +1310,40 @@ out:
1146static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, 1310static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
1147 struct device *dev) 1311 struct device *dev)
1148{ 1312{
1149 struct omap_iommu *oiommu = dev_to_omap_iommu(dev); 1313 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
1314 struct omap_iommu_device *iommu = omap_domain->iommus;
1315 struct omap_iommu *oiommu;
1316 int i;
1317
1318 if (!omap_domain->dev) {
1319 dev_err(dev, "domain has no attached device\n");
1320 return;
1321 }
1150 1322
1151 /* only a single device is supported per domain for now */ 1323 /* only a single device is supported per domain for now */
1152 if (omap_domain->iommu_dev != oiommu) { 1324 if (omap_domain->dev != dev) {
1153 dev_err(dev, "invalid iommu device\n"); 1325 dev_err(dev, "invalid attached device\n");
1154 return; 1326 return;
1155 } 1327 }
1156 1328
1157 iopgtable_clear_entry_all(oiommu); 1329 /*
1330 * cleanup in the reverse order of attachment - this addresses
1331 * any h/w dependencies between multiple instances, if any
1332 */
1333 iommu += (omap_domain->num_iommus - 1);
1334 arch_data += (omap_domain->num_iommus - 1);
1335 for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) {
1336 oiommu = iommu->iommu_dev;
1337 iopgtable_clear_entry_all(oiommu);
1338
1339 omap_iommu_detach(oiommu);
1340 iommu->iommu_dev = NULL;
1341 oiommu->domain = NULL;
1342 }
1158 1343
1159 omap_iommu_detach(oiommu); 1344 omap_iommu_detach_fini(omap_domain);
1160 1345
1161 omap_domain->iommu_dev = NULL;
1162 omap_domain->dev = NULL; 1346 omap_domain->dev = NULL;
1163 oiommu->domain = NULL;
1164} 1347}
1165 1348
1166static void omap_iommu_detach_dev(struct iommu_domain *domain, 1349static void omap_iommu_detach_dev(struct iommu_domain *domain,
@@ -1182,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
1182 1365
1183 omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); 1366 omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
1184 if (!omap_domain) 1367 if (!omap_domain)
1185 goto out; 1368 return NULL;
1186
1187 omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL);
1188 if (!omap_domain->pgtable)
1189 goto fail_nomem;
1190
1191 /*
1192 * should never fail, but please keep this around to ensure
1193 * we keep the hardware happy
1194 */
1195 if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)))
1196 goto fail_align;
1197 1369
1198 spin_lock_init(&omap_domain->lock); 1370 spin_lock_init(&omap_domain->lock);
1199 1371
@@ -1202,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
1202 omap_domain->domain.geometry.force_aperture = true; 1374 omap_domain->domain.geometry.force_aperture = true;
1203 1375
1204 return &omap_domain->domain; 1376 return &omap_domain->domain;
1205
1206fail_align:
1207 kfree(omap_domain->pgtable);
1208fail_nomem:
1209 kfree(omap_domain);
1210out:
1211 return NULL;
1212} 1377}
1213 1378
1214static void omap_iommu_domain_free(struct iommu_domain *domain) 1379static void omap_iommu_domain_free(struct iommu_domain *domain)
@@ -1219,10 +1384,9 @@ static void omap_iommu_domain_free(struct iommu_domain *domain)
1219 * An iommu device is still attached 1384 * An iommu device is still attached
1220 * (currently, only one device can be attached) ? 1385 * (currently, only one device can be attached) ?
1221 */ 1386 */
1222 if (omap_domain->iommu_dev) 1387 if (omap_domain->dev)
1223 _omap_iommu_detach_dev(omap_domain, omap_domain->dev); 1388 _omap_iommu_detach_dev(omap_domain, omap_domain->dev);
1224 1389
1225 kfree(omap_domain->pgtable);
1226 kfree(omap_domain); 1390 kfree(omap_domain);
1227} 1391}
1228 1392
@@ -1230,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
1230 dma_addr_t da) 1394 dma_addr_t da)
1231{ 1395{
1232 struct omap_iommu_domain *omap_domain = to_omap_domain(domain); 1396 struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
1233 struct omap_iommu *oiommu = omap_domain->iommu_dev; 1397 struct omap_iommu_device *iommu = omap_domain->iommus;
1398 struct omap_iommu *oiommu = iommu->iommu_dev;
1234 struct device *dev = oiommu->dev; 1399 struct device *dev = oiommu->dev;
1235 u32 *pgd, *pte; 1400 u32 *pgd, *pte;
1236 phys_addr_t ret = 0; 1401 phys_addr_t ret = 0;
1237 1402
1403 /*
1404 * all the iommus within the domain will have identical programming,
1405 * so perform the lookup using just the first iommu
1406 */
1238 iopgtable_lookup_entry(oiommu, da, &pgd, &pte); 1407 iopgtable_lookup_entry(oiommu, da, &pgd, &pte);
1239 1408
1240 if (pte) { 1409 if (pte) {
@@ -1260,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
1260 1429
1261static int omap_iommu_add_device(struct device *dev) 1430static int omap_iommu_add_device(struct device *dev)
1262{ 1431{
1263 struct omap_iommu_arch_data *arch_data; 1432 struct omap_iommu_arch_data *arch_data, *tmp;
1264 struct omap_iommu *oiommu; 1433 struct omap_iommu *oiommu;
1265 struct iommu_group *group; 1434 struct iommu_group *group;
1266 struct device_node *np; 1435 struct device_node *np;
1267 struct platform_device *pdev; 1436 struct platform_device *pdev;
1437 int num_iommus, i;
1268 int ret; 1438 int ret;
1269 1439
1270 /* 1440 /*
@@ -1276,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev)
1276 if (!dev->of_node) 1446 if (!dev->of_node)
1277 return 0; 1447 return 0;
1278 1448
1279 np = of_parse_phandle(dev->of_node, "iommus", 0); 1449 /*
1280 if (!np) 1450 * retrieve the count of IOMMU nodes using phandle size as element size
1451 * since #iommu-cells = 0 for OMAP
1452 */
1453 num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus",
1454 sizeof(phandle));
1455 if (num_iommus < 0)
1281 return 0; 1456 return 0;
1282 1457
1283 pdev = of_find_device_by_node(np); 1458 arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL);
1284 if (WARN_ON(!pdev)) { 1459 if (!arch_data)
1285 of_node_put(np); 1460 return -ENOMEM;
1286 return -EINVAL;
1287 }
1288 1461
1289 oiommu = platform_get_drvdata(pdev); 1462 for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) {
1290 if (!oiommu) { 1463 np = of_parse_phandle(dev->of_node, "iommus", i);
1291 of_node_put(np); 1464 if (!np) {
1292 return -EINVAL; 1465 kfree(arch_data);
1293 } 1466 return -EINVAL;
1467 }
1468
1469 pdev = of_find_device_by_node(np);
1470 if (WARN_ON(!pdev)) {
1471 of_node_put(np);
1472 kfree(arch_data);
1473 return -EINVAL;
1474 }
1475
1476 oiommu = platform_get_drvdata(pdev);
1477 if (!oiommu) {
1478 of_node_put(np);
1479 kfree(arch_data);
1480 return -EINVAL;
1481 }
1482
1483 tmp->iommu_dev = oiommu;
1294 1484
1295 arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL);
1296 if (!arch_data) {
1297 of_node_put(np); 1485 of_node_put(np);
1298 return -ENOMEM;
1299 } 1486 }
1300 1487
1488 /*
1489 * use the first IOMMU alone for the sysfs device linking.
1490 * TODO: Evaluate if a single iommu_group needs to be
1491 * maintained for both IOMMUs
1492 */
1493 oiommu = arch_data->iommu_dev;
1301 ret = iommu_device_link(&oiommu->iommu, dev); 1494 ret = iommu_device_link(&oiommu->iommu, dev);
1302 if (ret) { 1495 if (ret) {
1303 kfree(arch_data); 1496 kfree(arch_data);
1304 of_node_put(np);
1305 return ret; 1497 return ret;
1306 } 1498 }
1307 1499
1308 arch_data->iommu_dev = oiommu;
1309 dev->archdata.iommu = arch_data; 1500 dev->archdata.iommu = arch_data;
1310 1501
1311 /* 1502 /*
@@ -1321,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev)
1321 } 1512 }
1322 iommu_group_put(group); 1513 iommu_group_put(group);
1323 1514
1324 of_node_put(np);
1325
1326 return 0; 1515 return 0;
1327} 1516}
1328 1517
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index a675af29a6ec..1703159ef5af 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -29,17 +29,26 @@ struct iotlb_entry {
29}; 29};
30 30
31/** 31/**
32 * struct omap_iommu_device - omap iommu device data
33 * @pgtable: page table used by an omap iommu attached to a domain
34 * @iommu_dev: pointer to store an omap iommu instance attached to a domain
35 */
36struct omap_iommu_device {
37 u32 *pgtable;
38 struct omap_iommu *iommu_dev;
39};
40
41/**
32 * struct omap_iommu_domain - omap iommu domain 42 * struct omap_iommu_domain - omap iommu domain
33 * @pgtable: the page table 43 * @num_iommus: number of iommus in this domain
34 * @iommu_dev: an omap iommu device attached to this domain. only a single 44 * @iommus: omap iommu device data for all iommus in this domain
35 * iommu device can be attached for now.
36 * @dev: Device using this domain. 45 * @dev: Device using this domain.
37 * @lock: domain lock, should be taken when attaching/detaching 46 * @lock: domain lock, should be taken when attaching/detaching
38 * @domain: generic domain handle used by iommu core code 47 * @domain: generic domain handle used by iommu core code
39 */ 48 */
40struct omap_iommu_domain { 49struct omap_iommu_domain {
41 u32 *pgtable; 50 u32 num_iommus;
42 struct omap_iommu *iommu_dev; 51 struct omap_iommu_device *iommus;
43 struct device *dev; 52 struct device *dev;
44 spinlock_t lock; 53 spinlock_t lock;
45 struct iommu_domain domain; 54 struct iommu_domain domain;
@@ -97,17 +106,6 @@ struct iotlb_lock {
97 short vict; 106 short vict;
98}; 107};
99 108
100/**
101 * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
102 * @dev: iommu client device
103 */
104static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
105{
106 struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
107
108 return arch_data->iommu_dev;
109}
110
111/* 109/*
112 * MMU Register offsets 110 * MMU Register offsets
113 */ 111 */
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index c8a587d034b0..e07f02d00c68 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -66,6 +66,7 @@ struct qcom_iommu_ctx {
66 void __iomem *base; 66 void __iomem *base;
67 bool secure_init; 67 bool secure_init;
68 u8 asid; /* asid and ctx bank # are 1:1 */ 68 u8 asid; /* asid and ctx bank # are 1:1 */
69 struct iommu_domain *domain;
69}; 70};
70 71
71struct qcom_iommu_domain { 72struct qcom_iommu_domain {
@@ -194,12 +195,15 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev)
194 fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); 195 fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0);
195 iova = iommu_readq(ctx, ARM_SMMU_CB_FAR); 196 iova = iommu_readq(ctx, ARM_SMMU_CB_FAR);
196 197
197 dev_err_ratelimited(ctx->dev, 198 if (!report_iommu_fault(ctx->domain, ctx->dev, iova, 0)) {
198 "Unhandled context fault: fsr=0x%x, " 199 dev_err_ratelimited(ctx->dev,
199 "iova=0x%016llx, fsynr=0x%x, cb=%d\n", 200 "Unhandled context fault: fsr=0x%x, "
200 fsr, iova, fsynr, ctx->asid); 201 "iova=0x%016llx, fsynr=0x%x, cb=%d\n",
202 fsr, iova, fsynr, ctx->asid);
203 }
201 204
202 iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); 205 iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr);
206 iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE);
203 207
204 return IRQ_HANDLED; 208 return IRQ_HANDLED;
205} 209}
@@ -274,12 +278,14 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
274 278
275 /* SCTLR */ 279 /* SCTLR */
276 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | 280 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
277 SCTLR_M | SCTLR_S1_ASIDPNE; 281 SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG;
278 282
279 if (IS_ENABLED(CONFIG_BIG_ENDIAN)) 283 if (IS_ENABLED(CONFIG_BIG_ENDIAN))
280 reg |= SCTLR_E; 284 reg |= SCTLR_E;
281 285
282 iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); 286 iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg);
287
288 ctx->domain = domain;
283 } 289 }
284 290
285 mutex_unlock(&qcom_domain->init_mutex); 291 mutex_unlock(&qcom_domain->init_mutex);
@@ -395,6 +401,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
395 401
396 /* Disable the context bank: */ 402 /* Disable the context bank: */
397 iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); 403 iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
404
405 ctx->domain = NULL;
398 } 406 }
399 pm_runtime_put_sync(qcom_iommu->dev); 407 pm_runtime_put_sync(qcom_iommu->dev);
400 408
@@ -443,6 +451,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
443 return ret; 451 return ret;
444} 452}
445 453
454static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
455{
456 struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
457 struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
458 struct io_pgtable, ops);
459 if (!qcom_domain->pgtbl_ops)
460 return;
461
462 pm_runtime_get_sync(qcom_domain->iommu->dev);
463 qcom_iommu_tlb_sync(pgtable->cookie);
464 pm_runtime_put_sync(qcom_domain->iommu->dev);
465}
466
446static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, 467static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
447 dma_addr_t iova) 468 dma_addr_t iova)
448{ 469{
@@ -570,6 +591,8 @@ static const struct iommu_ops qcom_iommu_ops = {
570 .map = qcom_iommu_map, 591 .map = qcom_iommu_map,
571 .unmap = qcom_iommu_unmap, 592 .unmap = qcom_iommu_unmap,
572 .map_sg = default_iommu_map_sg, 593 .map_sg = default_iommu_map_sg,
594 .flush_iotlb_all = qcom_iommu_iotlb_sync,
595 .iotlb_sync = qcom_iommu_iotlb_sync,
573 .iova_to_phys = qcom_iommu_iova_to_phys, 596 .iova_to_phys = qcom_iommu_iova_to_phys,
574 .add_device = qcom_iommu_add_device, 597 .add_device = qcom_iommu_add_device,
575 .remove_device = qcom_iommu_remove_device, 598 .remove_device = qcom_iommu_remove_device,
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 329727e00e97..c824329f7012 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep)
39 struct scif_endpt_rma_info *rma = &ep->rma_info; 39 struct scif_endpt_rma_info *rma = &ep->rma_info;
40 40
41 mutex_init(&rma->rma_lock); 41 mutex_init(&rma->rma_lock);
42 init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, 42 init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
43 SCIF_DMA_64BIT_PFN);
44 spin_lock_init(&rma->tc_lock); 43 spin_lock_init(&rma->tc_lock);
45 mutex_init(&rma->mmn_lock); 44 mutex_init(&rma->mmn_lock);
46 INIT_LIST_HEAD(&rma->reg_list); 45 INIT_LIST_HEAD(&rma->reg_list);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e8ffba1052d3..e2433bc50210 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void)
112 112
113extern int dmar_table_init(void); 113extern int dmar_table_init(void);
114extern int dmar_dev_scope_init(void); 114extern int dmar_dev_scope_init(void);
115extern void dmar_register_bus_notifier(void);
115extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, 116extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
116 struct dmar_dev_scope **devices, u16 segment); 117 struct dmar_dev_scope **devices, u16 segment);
117extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); 118extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 485a5b48f038..f3274d9f46a2 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -212,6 +212,7 @@
212#define DMA_FSTS_IQE (1 << 4) 212#define DMA_FSTS_IQE (1 << 4)
213#define DMA_FSTS_ICE (1 << 5) 213#define DMA_FSTS_ICE (1 << 5)
214#define DMA_FSTS_ITE (1 << 6) 214#define DMA_FSTS_ITE (1 << 6)
215#define DMA_FSTS_PRO (1 << 7)
215#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) 216#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
216 217
217/* FRCD_REG, 32 bits access */ 218/* FRCD_REG, 32 bits access */
diff --git a/include/linux/iova.h b/include/linux/iova.h
index d179b9bf7814..928442dda565 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -70,10 +70,12 @@ struct iova_fq {
70struct iova_domain { 70struct iova_domain {
71 spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ 71 spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
72 struct rb_root rbroot; /* iova domain rbtree root */ 72 struct rb_root rbroot; /* iova domain rbtree root */
73 struct rb_node *cached32_node; /* Save last alloced node */ 73 struct rb_node *cached_node; /* Save last alloced node */
74 struct rb_node *cached32_node; /* Save last 32-bit alloced node */
74 unsigned long granule; /* pfn granularity for this domain */ 75 unsigned long granule; /* pfn granularity for this domain */
75 unsigned long start_pfn; /* Lower limit for this domain */ 76 unsigned long start_pfn; /* Lower limit for this domain */
76 unsigned long dma_32bit_pfn; 77 unsigned long dma_32bit_pfn;
78 struct iova anchor; /* rbtree lookup anchor */
77 struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ 79 struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */
78 80
79 iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU 81 iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU
@@ -148,12 +150,12 @@ void queue_iova(struct iova_domain *iovad,
148 unsigned long pfn, unsigned long pages, 150 unsigned long pfn, unsigned long pages,
149 unsigned long data); 151 unsigned long data);
150unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 152unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
151 unsigned long limit_pfn); 153 unsigned long limit_pfn, bool flush_rcache);
152struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, 154struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
153 unsigned long pfn_hi); 155 unsigned long pfn_hi);
154void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); 156void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
155void init_iova_domain(struct iova_domain *iovad, unsigned long granule, 157void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
156 unsigned long start_pfn, unsigned long pfn_32bit); 158 unsigned long start_pfn);
157int init_iova_flush_queue(struct iova_domain *iovad, 159int init_iova_flush_queue(struct iova_domain *iovad,
158 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); 160 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
159struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); 161struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@@ -210,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad,
210 212
211static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, 213static inline unsigned long alloc_iova_fast(struct iova_domain *iovad,
212 unsigned long size, 214 unsigned long size,
213 unsigned long limit_pfn) 215 unsigned long limit_pfn,
216 bool flush_rcache)
214{ 217{
215 return 0; 218 return 0;
216} 219}
@@ -229,8 +232,7 @@ static inline void copy_reserved_iova(struct iova_domain *from,
229 232
230static inline void init_iova_domain(struct iova_domain *iovad, 233static inline void init_iova_domain(struct iova_domain *iovad,
231 unsigned long granule, 234 unsigned long granule,
232 unsigned long start_pfn, 235 unsigned long start_pfn)
233 unsigned long pfn_32bit)
234{ 236{
235} 237}
236 238