aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-11-14 12:17:54 -0500
committerWill Deacon <will.deacon@arm.com>2015-01-19 09:46:45 -0500
commit518f7136244c167538f732691be589959310b295 (patch)
tree6cf1ecefa2e905ea847240e54103c07bd98b905a
parentc896c132b01895fd1445d178e36155b671c6f9ee (diff)
iommu/arm-smmu: make use of generic LPAE allocator
The ARM SMMU can walk LPAE page tables, so make use of the generic allocator. Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--drivers/iommu/Kconfig6
-rw-r--r--drivers/iommu/arm-smmu.c886
3 files changed, 266 insertions, 627 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1f9a20a3677..528c3fd2d4c1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -349,7 +349,6 @@ config ARM64_VA_BITS_42
349 349
350config ARM64_VA_BITS_48 350config ARM64_VA_BITS_48
351 bool "48-bit" 351 bool "48-bit"
352 depends on !ARM_SMMU
353 352
354endchoice 353endchoice
355 354
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 9fd9909867cd..87060ad6829d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -330,13 +330,13 @@ config SPAPR_TCE_IOMMU
330 330
331config ARM_SMMU 331config ARM_SMMU
332 bool "ARM Ltd. System MMU (SMMU) Support" 332 bool "ARM Ltd. System MMU (SMMU) Support"
333 depends on ARM64 || (ARM_LPAE && OF) 333 depends on ARM64 || ARM
334 select IOMMU_API 334 select IOMMU_API
335 select IOMMU_IO_PGTABLE_LPAE
335 select ARM_DMA_USE_IOMMU if ARM 336 select ARM_DMA_USE_IOMMU if ARM
336 help 337 help
337 Support for implementations of the ARM System MMU architecture 338 Support for implementations of the ARM System MMU architecture
338 versions 1 and 2. The driver supports both v7l and v8l table 339 versions 1 and 2.
339 formats with 4k and 64k page sizes.
340 340
341 Say Y here if your SoC includes an IOMMU device implementing 341 Say Y here if your SoC includes an IOMMU device implementing
342 the ARM SMMU architecture. 342 the ARM SMMU architecture.
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 6cd47b75286f..919ba433d219 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -23,8 +23,6 @@
23 * - Stream-matching and stream-indexing 23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format 24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU 25 * - Non-secure access to the SMMU
26 * - 4k and 64k pages, with contiguous pte hints.
27 * - Up to 48-bit addressing (dependent on VA_BITS)
28 * - Context fault reporting 26 * - Context fault reporting
29 */ 27 */
30 28
@@ -36,7 +34,6 @@
36#include <linux/interrupt.h> 34#include <linux/interrupt.h>
37#include <linux/io.h> 35#include <linux/io.h>
38#include <linux/iommu.h> 36#include <linux/iommu.h>
39#include <linux/mm.h>
40#include <linux/module.h> 37#include <linux/module.h>
41#include <linux/of.h> 38#include <linux/of.h>
42#include <linux/pci.h> 39#include <linux/pci.h>
@@ -46,7 +43,7 @@
46 43
47#include <linux/amba/bus.h> 44#include <linux/amba/bus.h>
48 45
49#include <asm/pgalloc.h> 46#include "io-pgtable.h"
50 47
51/* Maximum number of stream IDs assigned to a single device */ 48/* Maximum number of stream IDs assigned to a single device */
52#define MAX_MASTER_STREAMIDS MAX_PHANDLE_ARGS 49#define MAX_MASTER_STREAMIDS MAX_PHANDLE_ARGS
@@ -71,40 +68,6 @@
71 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ 68 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
72 ? 0x400 : 0)) 69 ? 0x400 : 0))
73 70
74/* Page table bits */
75#define ARM_SMMU_PTE_XN (((pteval_t)3) << 53)
76#define ARM_SMMU_PTE_CONT (((pteval_t)1) << 52)
77#define ARM_SMMU_PTE_AF (((pteval_t)1) << 10)
78#define ARM_SMMU_PTE_SH_NS (((pteval_t)0) << 8)
79#define ARM_SMMU_PTE_SH_OS (((pteval_t)2) << 8)
80#define ARM_SMMU_PTE_SH_IS (((pteval_t)3) << 8)
81#define ARM_SMMU_PTE_PAGE (((pteval_t)3) << 0)
82
83#if PAGE_SIZE == SZ_4K
84#define ARM_SMMU_PTE_CONT_ENTRIES 16
85#elif PAGE_SIZE == SZ_64K
86#define ARM_SMMU_PTE_CONT_ENTRIES 32
87#else
88#define ARM_SMMU_PTE_CONT_ENTRIES 1
89#endif
90
91#define ARM_SMMU_PTE_CONT_SIZE (PAGE_SIZE * ARM_SMMU_PTE_CONT_ENTRIES)
92#define ARM_SMMU_PTE_CONT_MASK (~(ARM_SMMU_PTE_CONT_SIZE - 1))
93
94/* Stage-1 PTE */
95#define ARM_SMMU_PTE_AP_UNPRIV (((pteval_t)1) << 6)
96#define ARM_SMMU_PTE_AP_RDONLY (((pteval_t)2) << 6)
97#define ARM_SMMU_PTE_ATTRINDX_SHIFT 2
98#define ARM_SMMU_PTE_nG (((pteval_t)1) << 11)
99
100/* Stage-2 PTE */
101#define ARM_SMMU_PTE_HAP_FAULT (((pteval_t)0) << 6)
102#define ARM_SMMU_PTE_HAP_READ (((pteval_t)1) << 6)
103#define ARM_SMMU_PTE_HAP_WRITE (((pteval_t)2) << 6)
104#define ARM_SMMU_PTE_MEMATTR_OIWB (((pteval_t)0xf) << 2)
105#define ARM_SMMU_PTE_MEMATTR_NC (((pteval_t)0x5) << 2)
106#define ARM_SMMU_PTE_MEMATTR_DEV (((pteval_t)0x1) << 2)
107
108/* Configuration registers */ 71/* Configuration registers */
109#define ARM_SMMU_GR0_sCR0 0x0 72#define ARM_SMMU_GR0_sCR0 0x0
110#define sCR0_CLIENTPD (1 << 0) 73#define sCR0_CLIENTPD (1 << 0)
@@ -132,17 +95,11 @@
132#define ARM_SMMU_GR0_sGFSYNR0 0x50 95#define ARM_SMMU_GR0_sGFSYNR0 0x50
133#define ARM_SMMU_GR0_sGFSYNR1 0x54 96#define ARM_SMMU_GR0_sGFSYNR1 0x54
134#define ARM_SMMU_GR0_sGFSYNR2 0x58 97#define ARM_SMMU_GR0_sGFSYNR2 0x58
135#define ARM_SMMU_GR0_PIDR0 0xfe0
136#define ARM_SMMU_GR0_PIDR1 0xfe4
137#define ARM_SMMU_GR0_PIDR2 0xfe8
138 98
139#define ID0_S1TS (1 << 30) 99#define ID0_S1TS (1 << 30)
140#define ID0_S2TS (1 << 29) 100#define ID0_S2TS (1 << 29)
141#define ID0_NTS (1 << 28) 101#define ID0_NTS (1 << 28)
142#define ID0_SMS (1 << 27) 102#define ID0_SMS (1 << 27)
143#define ID0_PTFS_SHIFT 24
144#define ID0_PTFS_MASK 0x2
145#define ID0_PTFS_V8_ONLY 0x2
146#define ID0_CTTW (1 << 14) 103#define ID0_CTTW (1 << 14)
147#define ID0_NUMIRPT_SHIFT 16 104#define ID0_NUMIRPT_SHIFT 16
148#define ID0_NUMIRPT_MASK 0xff 105#define ID0_NUMIRPT_MASK 0xff
@@ -169,9 +126,6 @@
169#define ID2_PTFS_16K (1 << 13) 126#define ID2_PTFS_16K (1 << 13)
170#define ID2_PTFS_64K (1 << 14) 127#define ID2_PTFS_64K (1 << 14)
171 128
172#define PIDR2_ARCH_SHIFT 4
173#define PIDR2_ARCH_MASK 0xf
174
175/* Global TLB invalidation */ 129/* Global TLB invalidation */
176#define ARM_SMMU_GR0_STLBIALL 0x60 130#define ARM_SMMU_GR0_STLBIALL 0x60
177#define ARM_SMMU_GR0_TLBIVMID 0x64 131#define ARM_SMMU_GR0_TLBIVMID 0x64
@@ -231,13 +185,20 @@
231#define ARM_SMMU_CB_TTBCR2 0x10 185#define ARM_SMMU_CB_TTBCR2 0x10
232#define ARM_SMMU_CB_TTBR0_LO 0x20 186#define ARM_SMMU_CB_TTBR0_LO 0x20
233#define ARM_SMMU_CB_TTBR0_HI 0x24 187#define ARM_SMMU_CB_TTBR0_HI 0x24
188#define ARM_SMMU_CB_TTBR1_LO 0x28
189#define ARM_SMMU_CB_TTBR1_HI 0x2c
234#define ARM_SMMU_CB_TTBCR 0x30 190#define ARM_SMMU_CB_TTBCR 0x30
235#define ARM_SMMU_CB_S1_MAIR0 0x38 191#define ARM_SMMU_CB_S1_MAIR0 0x38
192#define ARM_SMMU_CB_S1_MAIR1 0x3c
236#define ARM_SMMU_CB_FSR 0x58 193#define ARM_SMMU_CB_FSR 0x58
237#define ARM_SMMU_CB_FAR_LO 0x60 194#define ARM_SMMU_CB_FAR_LO 0x60
238#define ARM_SMMU_CB_FAR_HI 0x64 195#define ARM_SMMU_CB_FAR_HI 0x64
239#define ARM_SMMU_CB_FSYNR0 0x68 196#define ARM_SMMU_CB_FSYNR0 0x68
197#define ARM_SMMU_CB_S1_TLBIVA 0x600
240#define ARM_SMMU_CB_S1_TLBIASID 0x610 198#define ARM_SMMU_CB_S1_TLBIASID 0x610
199#define ARM_SMMU_CB_S1_TLBIVAL 0x620
200#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
201#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
241 202
242#define SCTLR_S1_ASIDPNE (1 << 12) 203#define SCTLR_S1_ASIDPNE (1 << 12)
243#define SCTLR_CFCFG (1 << 7) 204#define SCTLR_CFCFG (1 << 7)
@@ -252,44 +213,9 @@
252#define RESUME_RETRY (0 << 0) 213#define RESUME_RETRY (0 << 0)
253#define RESUME_TERMINATE (1 << 0) 214#define RESUME_TERMINATE (1 << 0)
254 215
255#define TTBCR_EAE (1 << 31)
256
257#define TTBCR_PASIZE_SHIFT 16
258#define TTBCR_PASIZE_MASK 0x7
259
260#define TTBCR_TG0_4K (0 << 14)
261#define TTBCR_TG0_64K (1 << 14)
262
263#define TTBCR_SH0_SHIFT 12
264#define TTBCR_SH0_MASK 0x3
265#define TTBCR_SH_NS 0
266#define TTBCR_SH_OS 2
267#define TTBCR_SH_IS 3
268
269#define TTBCR_ORGN0_SHIFT 10
270#define TTBCR_IRGN0_SHIFT 8
271#define TTBCR_RGN_MASK 0x3
272#define TTBCR_RGN_NC 0
273#define TTBCR_RGN_WBWA 1
274#define TTBCR_RGN_WT 2
275#define TTBCR_RGN_WB 3
276
277#define TTBCR_SL0_SHIFT 6
278#define TTBCR_SL0_MASK 0x3
279#define TTBCR_SL0_LVL_2 0
280#define TTBCR_SL0_LVL_1 1
281
282#define TTBCR_T1SZ_SHIFT 16
283#define TTBCR_T0SZ_SHIFT 0
284#define TTBCR_SZ_MASK 0xf
285
286#define TTBCR2_SEP_SHIFT 15 216#define TTBCR2_SEP_SHIFT 15
287#define TTBCR2_SEP_MASK 0x7 217#define TTBCR2_SEP_MASK 0x7
288 218
289#define TTBCR2_PASIZE_SHIFT 0
290#define TTBCR2_PASIZE_MASK 0x7
291
292/* Common definitions for PASize and SEP fields */
293#define TTBCR2_ADDR_32 0 219#define TTBCR2_ADDR_32 0
294#define TTBCR2_ADDR_36 1 220#define TTBCR2_ADDR_36 1
295#define TTBCR2_ADDR_40 2 221#define TTBCR2_ADDR_40 2
@@ -297,16 +223,7 @@
297#define TTBCR2_ADDR_44 4 223#define TTBCR2_ADDR_44 4
298#define TTBCR2_ADDR_48 5 224#define TTBCR2_ADDR_48 5
299 225
300#define TTBRn_HI_ASID_SHIFT 16 226#define TTBRn_HI_ASID_SHIFT 16
301
302#define MAIR_ATTR_SHIFT(n) ((n) << 3)
303#define MAIR_ATTR_MASK 0xff
304#define MAIR_ATTR_DEVICE 0x04
305#define MAIR_ATTR_NC 0x44
306#define MAIR_ATTR_WBRWA 0xff
307#define MAIR_ATTR_IDX_NC 0
308#define MAIR_ATTR_IDX_CACHE 1
309#define MAIR_ATTR_IDX_DEV 2
310 227
311#define FSR_MULTI (1 << 31) 228#define FSR_MULTI (1 << 31)
312#define FSR_SS (1 << 30) 229#define FSR_SS (1 << 30)
@@ -380,10 +297,9 @@ struct arm_smmu_device {
380 u32 num_mapping_groups; 297 u32 num_mapping_groups;
381 DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS); 298 DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
382 299
383 unsigned long s1_input_size; 300 unsigned long va_size;
384 unsigned long s1_output_size; 301 unsigned long ipa_size;
385 unsigned long s2_input_size; 302 unsigned long pa_size;
386 unsigned long s2_output_size;
387 303
388 u32 num_global_irqs; 304 u32 num_global_irqs;
389 u32 num_context_irqs; 305 u32 num_context_irqs;
@@ -397,7 +313,6 @@ struct arm_smmu_cfg {
397 u8 cbndx; 313 u8 cbndx;
398 u8 irptndx; 314 u8 irptndx;
399 u32 cbar; 315 u32 cbar;
400 pgd_t *pgd;
401}; 316};
402#define INVALID_IRPTNDX 0xff 317#define INVALID_IRPTNDX 0xff
403 318
@@ -412,11 +327,15 @@ enum arm_smmu_domain_stage {
412 327
413struct arm_smmu_domain { 328struct arm_smmu_domain {
414 struct arm_smmu_device *smmu; 329 struct arm_smmu_device *smmu;
330 struct io_pgtable_ops *pgtbl_ops;
331 spinlock_t pgtbl_lock;
415 struct arm_smmu_cfg cfg; 332 struct arm_smmu_cfg cfg;
416 enum arm_smmu_domain_stage stage; 333 enum arm_smmu_domain_stage stage;
417 spinlock_t lock; 334 struct mutex init_mutex; /* Protects smmu pointer */
418}; 335};
419 336
337static struct iommu_ops arm_smmu_ops;
338
420static DEFINE_SPINLOCK(arm_smmu_devices_lock); 339static DEFINE_SPINLOCK(arm_smmu_devices_lock);
421static LIST_HEAD(arm_smmu_devices); 340static LIST_HEAD(arm_smmu_devices);
422 341
@@ -597,7 +516,7 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
597} 516}
598 517
599/* Wait for any pending TLB invalidations to complete */ 518/* Wait for any pending TLB invalidations to complete */
600static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu) 519static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
601{ 520{
602 int count = 0; 521 int count = 0;
603 void __iomem *gr0_base = ARM_SMMU_GR0(smmu); 522 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
@@ -615,12 +534,19 @@ static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
615 } 534 }
616} 535}
617 536
618static void arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain) 537static void arm_smmu_tlb_sync(void *cookie)
538{
539 struct arm_smmu_domain *smmu_domain = cookie;
540 __arm_smmu_tlb_sync(smmu_domain->smmu);
541}
542
543static void arm_smmu_tlb_inv_context(void *cookie)
619{ 544{
545 struct arm_smmu_domain *smmu_domain = cookie;
620 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 546 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
621 struct arm_smmu_device *smmu = smmu_domain->smmu; 547 struct arm_smmu_device *smmu = smmu_domain->smmu;
622 void __iomem *base = ARM_SMMU_GR0(smmu);
623 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; 548 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
549 void __iomem *base;
624 550
625 if (stage1) { 551 if (stage1) {
626 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); 552 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -632,9 +558,76 @@ static void arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain)
632 base + ARM_SMMU_GR0_TLBIVMID); 558 base + ARM_SMMU_GR0_TLBIVMID);
633 } 559 }
634 560
635 arm_smmu_tlb_sync(smmu); 561 __arm_smmu_tlb_sync(smmu);
562}
563
564static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
565 bool leaf, void *cookie)
566{
567 struct arm_smmu_domain *smmu_domain = cookie;
568 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
569 struct arm_smmu_device *smmu = smmu_domain->smmu;
570 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
571 void __iomem *reg;
572
573 if (stage1) {
574 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
575 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
576
577 if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) {
578 iova &= ~12UL;
579 iova |= ARM_SMMU_CB_ASID(cfg);
580 writel_relaxed(iova, reg);
581#ifdef CONFIG_64BIT
582 } else {
583 iova >>= 12;
584 iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48;
585 writeq_relaxed(iova, reg);
586#endif
587 }
588#ifdef CONFIG_64BIT
589 } else if (smmu->version == ARM_SMMU_V2) {
590 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
591 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
592 ARM_SMMU_CB_S2_TLBIIPAS2;
593 writeq_relaxed(iova >> 12, reg);
594#endif
595 } else {
596 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
597 writel_relaxed(ARM_SMMU_CB_VMID(cfg), reg);
598 }
599}
600
601static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
602{
603 struct arm_smmu_domain *smmu_domain = cookie;
604 struct arm_smmu_device *smmu = smmu_domain->smmu;
605 unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
606
607
608 /* Ensure new page tables are visible to the hardware walker */
609 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
610 dsb(ishst);
611 } else {
612 /*
613 * If the SMMU can't walk tables in the CPU caches, treat them
614 * like non-coherent DMA since we need to flush the new entries
615 * all the way out to memory. There's no possibility of
616 * recursion here as the SMMU table walker will not be wired
617 * through another SMMU.
618 */
619 dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
620 DMA_TO_DEVICE);
621 }
636} 622}
637 623
624static struct iommu_gather_ops arm_smmu_gather_ops = {
625 .tlb_flush_all = arm_smmu_tlb_inv_context,
626 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
627 .tlb_sync = arm_smmu_tlb_sync,
628 .flush_pgtable = arm_smmu_flush_pgtable,
629};
630
638static irqreturn_t arm_smmu_context_fault(int irq, void *dev) 631static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
639{ 632{
640 int flags, ret; 633 int flags, ret;
@@ -712,29 +705,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
712 return IRQ_HANDLED; 705 return IRQ_HANDLED;
713} 706}
714 707
715static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr, 708static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
716 size_t size) 709 struct io_pgtable_cfg *pgtbl_cfg)
717{
718 unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
719
720
721 /* Ensure new page tables are visible to the hardware walker */
722 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
723 dsb(ishst);
724 } else {
725 /*
726 * If the SMMU can't walk tables in the CPU caches, treat them
727 * like non-coherent DMA since we need to flush the new entries
728 * all the way out to memory. There's no possibility of
729 * recursion here as the SMMU table walker will not be wired
730 * through another SMMU.
731 */
732 dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
733 DMA_TO_DEVICE);
734 }
735}
736
737static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
738{ 710{
739 u32 reg; 711 u32 reg;
740 bool stage1; 712 bool stage1;
@@ -771,124 +743,68 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
771#else 743#else
772 reg = CBA2R_RW64_32BIT; 744 reg = CBA2R_RW64_32BIT;
773#endif 745#endif
774 writel_relaxed(reg, 746 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
775 gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
776
777 /* TTBCR2 */
778 switch (smmu->s1_input_size) {
779 case 32:
780 reg = (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT);
781 break;
782 case 36:
783 reg = (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT);
784 break;
785 case 39:
786 case 40:
787 reg = (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT);
788 break;
789 case 42:
790 reg = (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT);
791 break;
792 case 44:
793 reg = (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT);
794 break;
795 case 48:
796 reg = (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT);
797 break;
798 }
799
800 switch (smmu->s1_output_size) {
801 case 32:
802 reg |= (TTBCR2_ADDR_32 << TTBCR2_PASIZE_SHIFT);
803 break;
804 case 36:
805 reg |= (TTBCR2_ADDR_36 << TTBCR2_PASIZE_SHIFT);
806 break;
807 case 39:
808 case 40:
809 reg |= (TTBCR2_ADDR_40 << TTBCR2_PASIZE_SHIFT);
810 break;
811 case 42:
812 reg |= (TTBCR2_ADDR_42 << TTBCR2_PASIZE_SHIFT);
813 break;
814 case 44:
815 reg |= (TTBCR2_ADDR_44 << TTBCR2_PASIZE_SHIFT);
816 break;
817 case 48:
818 reg |= (TTBCR2_ADDR_48 << TTBCR2_PASIZE_SHIFT);
819 break;
820 }
821
822 if (stage1)
823 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
824 } 747 }
825 748
826 /* TTBR0 */ 749 /* TTBRs */
827 arm_smmu_flush_pgtable(smmu, cfg->pgd, 750 if (stage1) {
828 PTRS_PER_PGD * sizeof(pgd_t)); 751 reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
829 reg = __pa(cfg->pgd); 752 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
830 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); 753 reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
831 reg = (phys_addr_t)__pa(cfg->pgd) >> 32;
832 if (stage1)
833 reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; 754 reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
834 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); 755 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
835
836 /*
837 * TTBCR
838 * We use long descriptor, with inner-shareable WBWA tables in TTBR0.
839 */
840 if (smmu->version > ARM_SMMU_V1) {
841 if (PAGE_SIZE == SZ_4K)
842 reg = TTBCR_TG0_4K;
843 else
844 reg = TTBCR_TG0_64K;
845 756
846 if (!stage1) { 757 reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
847 reg |= (64 - smmu->s2_input_size) << TTBCR_T0SZ_SHIFT; 758 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
759 reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
760 reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
761 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
762 } else {
763 reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
764 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
765 reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
766 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
767 }
848 768
849 switch (smmu->s2_output_size) { 769 /* TTBCR */
770 if (stage1) {
771 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
772 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
773 if (smmu->version > ARM_SMMU_V1) {
774 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
775 switch (smmu->va_size) {
850 case 32: 776 case 32:
851 reg |= (TTBCR2_ADDR_32 << TTBCR_PASIZE_SHIFT); 777 reg |= (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT);
852 break; 778 break;
853 case 36: 779 case 36:
854 reg |= (TTBCR2_ADDR_36 << TTBCR_PASIZE_SHIFT); 780 reg |= (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT);
855 break; 781 break;
856 case 40: 782 case 40:
857 reg |= (TTBCR2_ADDR_40 << TTBCR_PASIZE_SHIFT); 783 reg |= (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT);
858 break; 784 break;
859 case 42: 785 case 42:
860 reg |= (TTBCR2_ADDR_42 << TTBCR_PASIZE_SHIFT); 786 reg |= (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT);
861 break; 787 break;
862 case 44: 788 case 44:
863 reg |= (TTBCR2_ADDR_44 << TTBCR_PASIZE_SHIFT); 789 reg |= (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT);
864 break; 790 break;
865 case 48: 791 case 48:
866 reg |= (TTBCR2_ADDR_48 << TTBCR_PASIZE_SHIFT); 792 reg |= (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT);
867 break; 793 break;
868 } 794 }
869 } else { 795 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
870 reg |= (64 - smmu->s1_input_size) << TTBCR_T0SZ_SHIFT;
871 } 796 }
872 } else { 797 } else {
873 reg = 0; 798 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
799 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
874 } 800 }
875 801
876 reg |= TTBCR_EAE | 802 /* MAIRs (stage-1 only) */
877 (TTBCR_SH_IS << TTBCR_SH0_SHIFT) |
878 (TTBCR_RGN_WBWA << TTBCR_ORGN0_SHIFT) |
879 (TTBCR_RGN_WBWA << TTBCR_IRGN0_SHIFT);
880
881 if (!stage1)
882 reg |= (TTBCR_SL0_LVL_1 << TTBCR_SL0_SHIFT);
883
884 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
885
886 /* MAIR0 (stage-1 only) */
887 if (stage1) { 803 if (stage1) {
888 reg = (MAIR_ATTR_NC << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_NC)) | 804 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
889 (MAIR_ATTR_WBRWA << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_CACHE)) |
890 (MAIR_ATTR_DEVICE << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_DEV));
891 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); 805 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
806 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
807 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1);
892 } 808 }
893 809
894 /* SCTLR */ 810 /* SCTLR */
@@ -905,11 +821,14 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
905 struct arm_smmu_device *smmu) 821 struct arm_smmu_device *smmu)
906{ 822{
907 int irq, start, ret = 0; 823 int irq, start, ret = 0;
908 unsigned long flags; 824 unsigned long ias, oas;
825 struct io_pgtable_ops *pgtbl_ops;
826 struct io_pgtable_cfg pgtbl_cfg;
827 enum io_pgtable_fmt fmt;
909 struct arm_smmu_domain *smmu_domain = domain->priv; 828 struct arm_smmu_domain *smmu_domain = domain->priv;
910 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 829 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
911 830
912 spin_lock_irqsave(&smmu_domain->lock, flags); 831 mutex_lock(&smmu_domain->init_mutex);
913 if (smmu_domain->smmu) 832 if (smmu_domain->smmu)
914 goto out_unlock; 833 goto out_unlock;
915 834
@@ -940,6 +859,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
940 case ARM_SMMU_DOMAIN_S1: 859 case ARM_SMMU_DOMAIN_S1:
941 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; 860 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
942 start = smmu->num_s2_context_banks; 861 start = smmu->num_s2_context_banks;
862 ias = smmu->va_size;
863 oas = smmu->ipa_size;
864 if (IS_ENABLED(CONFIG_64BIT))
865 fmt = ARM_64_LPAE_S1;
866 else
867 fmt = ARM_32_LPAE_S1;
943 break; 868 break;
944 case ARM_SMMU_DOMAIN_NESTED: 869 case ARM_SMMU_DOMAIN_NESTED:
945 /* 870 /*
@@ -949,6 +874,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
949 case ARM_SMMU_DOMAIN_S2: 874 case ARM_SMMU_DOMAIN_S2:
950 cfg->cbar = CBAR_TYPE_S2_TRANS; 875 cfg->cbar = CBAR_TYPE_S2_TRANS;
951 start = 0; 876 start = 0;
877 ias = smmu->ipa_size;
878 oas = smmu->pa_size;
879 if (IS_ENABLED(CONFIG_64BIT))
880 fmt = ARM_64_LPAE_S2;
881 else
882 fmt = ARM_32_LPAE_S2;
952 break; 883 break;
953 default: 884 default:
954 ret = -EINVAL; 885 ret = -EINVAL;
@@ -968,10 +899,30 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
968 cfg->irptndx = cfg->cbndx; 899 cfg->irptndx = cfg->cbndx;
969 } 900 }
970 901
971 ACCESS_ONCE(smmu_domain->smmu) = smmu; 902 pgtbl_cfg = (struct io_pgtable_cfg) {
972 arm_smmu_init_context_bank(smmu_domain); 903 .pgsize_bitmap = arm_smmu_ops.pgsize_bitmap,
973 spin_unlock_irqrestore(&smmu_domain->lock, flags); 904 .ias = ias,
905 .oas = oas,
906 .tlb = &arm_smmu_gather_ops,
907 };
908
909 smmu_domain->smmu = smmu;
910 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
911 if (!pgtbl_ops) {
912 ret = -ENOMEM;
913 goto out_clear_smmu;
914 }
915
916 /* Update our support page sizes to reflect the page table format */
917 arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
918
919 /* Initialise the context bank with our page table cfg */
920 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
974 921
922 /*
923 * Request context fault interrupt. Do this last to avoid the
924 * handler seeing a half-initialised domain state.
925 */
975 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; 926 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
976 ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED, 927 ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
977 "arm-smmu-context-fault", domain); 928 "arm-smmu-context-fault", domain);
@@ -981,10 +932,16 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
981 cfg->irptndx = INVALID_IRPTNDX; 932 cfg->irptndx = INVALID_IRPTNDX;
982 } 933 }
983 934
935 mutex_unlock(&smmu_domain->init_mutex);
936
937 /* Publish page table ops for map/unmap */
938 smmu_domain->pgtbl_ops = pgtbl_ops;
984 return 0; 939 return 0;
985 940
941out_clear_smmu:
942 smmu_domain->smmu = NULL;
986out_unlock: 943out_unlock:
987 spin_unlock_irqrestore(&smmu_domain->lock, flags); 944 mutex_unlock(&smmu_domain->init_mutex);
988 return ret; 945 return ret;
989} 946}
990 947
@@ -999,23 +956,27 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
999 if (!smmu) 956 if (!smmu)
1000 return; 957 return;
1001 958
1002 /* Disable the context bank and nuke the TLB before freeing it. */ 959 /*
960 * Disable the context bank and free the page tables before freeing
961 * it.
962 */
1003 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); 963 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1004 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); 964 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1005 arm_smmu_tlb_inv_context(smmu_domain);
1006 965
1007 if (cfg->irptndx != INVALID_IRPTNDX) { 966 if (cfg->irptndx != INVALID_IRPTNDX) {
1008 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; 967 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1009 free_irq(irq, domain); 968 free_irq(irq, domain);
1010 } 969 }
1011 970
971 if (smmu_domain->pgtbl_ops)
972 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
973
1012 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); 974 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1013} 975}
1014 976
1015static int arm_smmu_domain_init(struct iommu_domain *domain) 977static int arm_smmu_domain_init(struct iommu_domain *domain)
1016{ 978{
1017 struct arm_smmu_domain *smmu_domain; 979 struct arm_smmu_domain *smmu_domain;
1018 pgd_t *pgd;
1019 980
1020 /* 981 /*
1021 * Allocate the domain and initialise some of its data structures. 982 * Allocate the domain and initialise some of its data structures.
@@ -1026,81 +987,10 @@ static int arm_smmu_domain_init(struct iommu_domain *domain)
1026 if (!smmu_domain) 987 if (!smmu_domain)
1027 return -ENOMEM; 988 return -ENOMEM;
1028 989
1029 pgd = kcalloc(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL); 990 mutex_init(&smmu_domain->init_mutex);
1030 if (!pgd) 991 spin_lock_init(&smmu_domain->pgtbl_lock);
1031 goto out_free_domain;
1032 smmu_domain->cfg.pgd = pgd;
1033
1034 spin_lock_init(&smmu_domain->lock);
1035 domain->priv = smmu_domain; 992 domain->priv = smmu_domain;
1036 return 0; 993 return 0;
1037
1038out_free_domain:
1039 kfree(smmu_domain);
1040 return -ENOMEM;
1041}
1042
1043static void arm_smmu_free_ptes(pmd_t *pmd)
1044{
1045 pgtable_t table = pmd_pgtable(*pmd);
1046
1047 __free_page(table);
1048}
1049
1050static void arm_smmu_free_pmds(pud_t *pud)
1051{
1052 int i;
1053 pmd_t *pmd, *pmd_base = pmd_offset(pud, 0);
1054
1055 pmd = pmd_base;
1056 for (i = 0; i < PTRS_PER_PMD; ++i) {
1057 if (pmd_none(*pmd))
1058 continue;
1059
1060 arm_smmu_free_ptes(pmd);
1061 pmd++;
1062 }
1063
1064 pmd_free(NULL, pmd_base);
1065}
1066
1067static void arm_smmu_free_puds(pgd_t *pgd)
1068{
1069 int i;
1070 pud_t *pud, *pud_base = pud_offset(pgd, 0);
1071
1072 pud = pud_base;
1073 for (i = 0; i < PTRS_PER_PUD; ++i) {
1074 if (pud_none(*pud))
1075 continue;
1076
1077 arm_smmu_free_pmds(pud);
1078 pud++;
1079 }
1080
1081 pud_free(NULL, pud_base);
1082}
1083
1084static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)
1085{
1086 int i;
1087 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1088 pgd_t *pgd, *pgd_base = cfg->pgd;
1089
1090 /*
1091 * Recursively free the page tables for this domain. We don't
1092 * care about speculative TLB filling because the tables should
1093 * not be active in any context bank at this point (SCTLR.M is 0).
1094 */
1095 pgd = pgd_base;
1096 for (i = 0; i < PTRS_PER_PGD; ++i) {
1097 if (pgd_none(*pgd))
1098 continue;
1099 arm_smmu_free_puds(pgd);
1100 pgd++;
1101 }
1102
1103 kfree(pgd_base);
1104} 994}
1105 995
1106static void arm_smmu_domain_destroy(struct iommu_domain *domain) 996static void arm_smmu_domain_destroy(struct iommu_domain *domain)
@@ -1112,7 +1002,6 @@ static void arm_smmu_domain_destroy(struct iommu_domain *domain)
1112 * already been detached. 1002 * already been detached.
1113 */ 1003 */
1114 arm_smmu_destroy_domain_context(domain); 1004 arm_smmu_destroy_domain_context(domain);
1115 arm_smmu_free_pgtables(smmu_domain);
1116 kfree(smmu_domain); 1005 kfree(smmu_domain);
1117} 1006}
1118 1007
@@ -1244,7 +1133,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1244{ 1133{
1245 int ret; 1134 int ret;
1246 struct arm_smmu_domain *smmu_domain = domain->priv; 1135 struct arm_smmu_domain *smmu_domain = domain->priv;
1247 struct arm_smmu_device *smmu, *dom_smmu; 1136 struct arm_smmu_device *smmu;
1248 struct arm_smmu_master_cfg *cfg; 1137 struct arm_smmu_master_cfg *cfg;
1249 1138
1250 smmu = find_smmu_for_device(dev); 1139 smmu = find_smmu_for_device(dev);
@@ -1258,21 +1147,16 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1258 return -EEXIST; 1147 return -EEXIST;
1259 } 1148 }
1260 1149
1150 /* Ensure that the domain is finalised */
1151 ret = arm_smmu_init_domain_context(domain, smmu);
1152 if (IS_ERR_VALUE(ret))
1153 return ret;
1154
1261 /* 1155 /*
1262 * Sanity check the domain. We don't support domains across 1156 * Sanity check the domain. We don't support domains across
1263 * different SMMUs. 1157 * different SMMUs.
1264 */ 1158 */
1265 dom_smmu = ACCESS_ONCE(smmu_domain->smmu); 1159 if (smmu_domain->smmu != smmu) {
1266 if (!dom_smmu) {
1267 /* Now that we have a master, we can finalise the domain */
1268 ret = arm_smmu_init_domain_context(domain, smmu);
1269 if (IS_ERR_VALUE(ret))
1270 return ret;
1271
1272 dom_smmu = smmu_domain->smmu;
1273 }
1274
1275 if (dom_smmu != smmu) {
1276 dev_err(dev, 1160 dev_err(dev,
1277 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", 1161 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1278 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); 1162 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
@@ -1303,293 +1187,55 @@ static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1303 arm_smmu_domain_remove_master(smmu_domain, cfg); 1187 arm_smmu_domain_remove_master(smmu_domain, cfg);
1304} 1188}
1305 1189
1306static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,
1307 unsigned long end)
1308{
1309 return !(addr & ~ARM_SMMU_PTE_CONT_MASK) &&
1310 (addr + ARM_SMMU_PTE_CONT_SIZE <= end);
1311}
1312
1313static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
1314 unsigned long addr, unsigned long end,
1315 unsigned long pfn, int prot, int stage)
1316{
1317 pte_t *pte, *start;
1318 pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF;
1319
1320 if (pmd_none(*pmd)) {
1321 /* Allocate a new set of tables */
1322 pgtable_t table = alloc_page(GFP_ATOMIC|__GFP_ZERO);
1323
1324 if (!table)
1325 return -ENOMEM;
1326
1327 arm_smmu_flush_pgtable(smmu, page_address(table), PAGE_SIZE);
1328 pmd_populate(NULL, pmd, table);
1329 arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd));
1330 }
1331
1332 if (stage == 1) {
1333 pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
1334 if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
1335 pteval |= ARM_SMMU_PTE_AP_RDONLY;
1336
1337 if (prot & IOMMU_CACHE)
1338 pteval |= (MAIR_ATTR_IDX_CACHE <<
1339 ARM_SMMU_PTE_ATTRINDX_SHIFT);
1340 } else {
1341 pteval |= ARM_SMMU_PTE_HAP_FAULT;
1342 if (prot & IOMMU_READ)
1343 pteval |= ARM_SMMU_PTE_HAP_READ;
1344 if (prot & IOMMU_WRITE)
1345 pteval |= ARM_SMMU_PTE_HAP_WRITE;
1346 if (prot & IOMMU_CACHE)
1347 pteval |= ARM_SMMU_PTE_MEMATTR_OIWB;
1348 else
1349 pteval |= ARM_SMMU_PTE_MEMATTR_NC;
1350 }
1351
1352 if (prot & IOMMU_NOEXEC)
1353 pteval |= ARM_SMMU_PTE_XN;
1354
1355 /* If no access, create a faulting entry to avoid TLB fills */
1356 if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
1357 pteval &= ~ARM_SMMU_PTE_PAGE;
1358
1359 pteval |= ARM_SMMU_PTE_SH_IS;
1360 start = pmd_page_vaddr(*pmd) + pte_index(addr);
1361 pte = start;
1362
1363 /*
1364 * Install the page table entries. This is fairly complicated
1365 * since we attempt to make use of the contiguous hint in the
1366 * ptes where possible. The contiguous hint indicates a series
1367 * of ARM_SMMU_PTE_CONT_ENTRIES ptes mapping a physically
1368 * contiguous region with the following constraints:
1369 *
1370 * - The region start is aligned to ARM_SMMU_PTE_CONT_SIZE
1371 * - Each pte in the region has the contiguous hint bit set
1372 *
1373 * This complicates unmapping (also handled by this code, when
1374 * neither IOMMU_READ or IOMMU_WRITE are set) because it is
1375 * possible, yet highly unlikely, that a client may unmap only
1376 * part of a contiguous range. This requires clearing of the
1377 * contiguous hint bits in the range before installing the new
1378 * faulting entries.
1379 *
1380 * Note that re-mapping an address range without first unmapping
1381 * it is not supported, so TLB invalidation is not required here
1382 * and is instead performed at unmap and domain-init time.
1383 */
1384 do {
1385 int i = 1;
1386
1387 pteval &= ~ARM_SMMU_PTE_CONT;
1388
1389 if (arm_smmu_pte_is_contiguous_range(addr, end)) {
1390 i = ARM_SMMU_PTE_CONT_ENTRIES;
1391 pteval |= ARM_SMMU_PTE_CONT;
1392 } else if (pte_val(*pte) &
1393 (ARM_SMMU_PTE_CONT | ARM_SMMU_PTE_PAGE)) {
1394 int j;
1395 pte_t *cont_start;
1396 unsigned long idx = pte_index(addr);
1397
1398 idx &= ~(ARM_SMMU_PTE_CONT_ENTRIES - 1);
1399 cont_start = pmd_page_vaddr(*pmd) + idx;
1400 for (j = 0; j < ARM_SMMU_PTE_CONT_ENTRIES; ++j)
1401 pte_val(*(cont_start + j)) &=
1402 ~ARM_SMMU_PTE_CONT;
1403
1404 arm_smmu_flush_pgtable(smmu, cont_start,
1405 sizeof(*pte) *
1406 ARM_SMMU_PTE_CONT_ENTRIES);
1407 }
1408
1409 do {
1410 *pte = pfn_pte(pfn, __pgprot(pteval));
1411 } while (pte++, pfn++, addr += PAGE_SIZE, --i);
1412 } while (addr != end);
1413
1414 arm_smmu_flush_pgtable(smmu, start, sizeof(*pte) * (pte - start));
1415 return 0;
1416}
1417
1418static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,
1419 unsigned long addr, unsigned long end,
1420 phys_addr_t phys, int prot, int stage)
1421{
1422 int ret;
1423 pmd_t *pmd;
1424 unsigned long next, pfn = __phys_to_pfn(phys);
1425
1426#ifndef __PAGETABLE_PMD_FOLDED
1427 if (pud_none(*pud)) {
1428 pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
1429 if (!pmd)
1430 return -ENOMEM;
1431
1432 arm_smmu_flush_pgtable(smmu, pmd, PAGE_SIZE);
1433 pud_populate(NULL, pud, pmd);
1434 arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud));
1435
1436 pmd += pmd_index(addr);
1437 } else
1438#endif
1439 pmd = pmd_offset(pud, addr);
1440
1441 do {
1442 next = pmd_addr_end(addr, end);
1443 ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, next, pfn,
1444 prot, stage);
1445 phys += next - addr;
1446 pfn = __phys_to_pfn(phys);
1447 } while (pmd++, addr = next, addr < end);
1448
1449 return ret;
1450}
1451
1452static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,
1453 unsigned long addr, unsigned long end,
1454 phys_addr_t phys, int prot, int stage)
1455{
1456 int ret = 0;
1457 pud_t *pud;
1458 unsigned long next;
1459
1460#ifndef __PAGETABLE_PUD_FOLDED
1461 if (pgd_none(*pgd)) {
1462 pud = (pud_t *)get_zeroed_page(GFP_ATOMIC);
1463 if (!pud)
1464 return -ENOMEM;
1465
1466 arm_smmu_flush_pgtable(smmu, pud, PAGE_SIZE);
1467 pgd_populate(NULL, pgd, pud);
1468 arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd));
1469
1470 pud += pud_index(addr);
1471 } else
1472#endif
1473 pud = pud_offset(pgd, addr);
1474
1475 do {
1476 next = pud_addr_end(addr, end);
1477 ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys,
1478 prot, stage);
1479 phys += next - addr;
1480 } while (pud++, addr = next, addr < end);
1481
1482 return ret;
1483}
1484
1485static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,
1486 unsigned long iova, phys_addr_t paddr,
1487 size_t size, int prot)
1488{
1489 int ret, stage;
1490 unsigned long end;
1491 phys_addr_t input_mask, output_mask;
1492 struct arm_smmu_device *smmu = smmu_domain->smmu;
1493 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1494 pgd_t *pgd = cfg->pgd;
1495 unsigned long flags;
1496
1497 if (cfg->cbar == CBAR_TYPE_S2_TRANS) {
1498 stage = 2;
1499 input_mask = (1ULL << smmu->s2_input_size) - 1;
1500 output_mask = (1ULL << smmu->s2_output_size) - 1;
1501 } else {
1502 stage = 1;
1503 input_mask = (1ULL << smmu->s1_input_size) - 1;
1504 output_mask = (1ULL << smmu->s1_output_size) - 1;
1505 }
1506
1507 if (!pgd)
1508 return -EINVAL;
1509
1510 if (size & ~PAGE_MASK)
1511 return -EINVAL;
1512
1513 if ((phys_addr_t)iova & ~input_mask)
1514 return -ERANGE;
1515
1516 if (paddr & ~output_mask)
1517 return -ERANGE;
1518
1519 spin_lock_irqsave(&smmu_domain->lock, flags);
1520 pgd += pgd_index(iova);
1521 end = iova + size;
1522 do {
1523 unsigned long next = pgd_addr_end(iova, end);
1524
1525 ret = arm_smmu_alloc_init_pud(smmu, pgd, iova, next, paddr,
1526 prot, stage);
1527 if (ret)
1528 goto out_unlock;
1529
1530 paddr += next - iova;
1531 iova = next;
1532 } while (pgd++, iova != end);
1533
1534out_unlock:
1535 spin_unlock_irqrestore(&smmu_domain->lock, flags);
1536
1537 return ret;
1538}
1539
1540static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, 1190static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1541 phys_addr_t paddr, size_t size, int prot) 1191 phys_addr_t paddr, size_t size, int prot)
1542{ 1192{
1193 int ret;
1194 unsigned long flags;
1543 struct arm_smmu_domain *smmu_domain = domain->priv; 1195 struct arm_smmu_domain *smmu_domain = domain->priv;
1196 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1544 1197
1545 if (!smmu_domain) 1198 if (!ops)
1546 return -ENODEV; 1199 return -ENODEV;
1547 1200
1548 return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot); 1201 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1202 ret = ops->map(ops, iova, paddr, size, prot);
1203 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1204 return ret;
1549} 1205}
1550 1206
1551static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, 1207static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1552 size_t size) 1208 size_t size)
1553{ 1209{
1554 int ret; 1210 size_t ret;
1211 unsigned long flags;
1555 struct arm_smmu_domain *smmu_domain = domain->priv; 1212 struct arm_smmu_domain *smmu_domain = domain->priv;
1213 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1556 1214
1557 ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0); 1215 if (!ops)
1558 arm_smmu_tlb_inv_context(smmu_domain); 1216 return 0;
1559 return ret ? 0 : size; 1217
1218 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1219 ret = ops->unmap(ops, iova, size);
1220 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1221 return ret;
1560} 1222}
1561 1223
1562static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, 1224static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1563 dma_addr_t iova) 1225 dma_addr_t iova)
1564{ 1226{
1565 pgd_t *pgdp, pgd; 1227 phys_addr_t ret;
1566 pud_t pud; 1228 unsigned long flags;
1567 pmd_t pmd;
1568 pte_t pte;
1569 struct arm_smmu_domain *smmu_domain = domain->priv; 1229 struct arm_smmu_domain *smmu_domain = domain->priv;
1570 struct arm_smmu_cfg *cfg = &smmu_domain->cfg; 1230 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1571 1231
1572 pgdp = cfg->pgd; 1232 if (!ops)
1573 if (!pgdp)
1574 return 0; 1233 return 0;
1575 1234
1576 pgd = *(pgdp + pgd_index(iova)); 1235 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1577 if (pgd_none(pgd)) 1236 ret = ops->iova_to_phys(ops, iova);
1578 return 0; 1237 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1579 1238 return ret;
1580 pud = *pud_offset(&pgd, iova);
1581 if (pud_none(pud))
1582 return 0;
1583
1584 pmd = *pmd_offset(&pud, iova);
1585 if (pmd_none(pmd))
1586 return 0;
1587
1588 pte = *(pmd_page_vaddr(pmd) + pte_index(iova));
1589 if (pte_none(pte))
1590 return 0;
1591
1592 return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
1593} 1239}
1594 1240
1595static bool arm_smmu_capable(enum iommu_cap cap) 1241static bool arm_smmu_capable(enum iommu_cap cap)
@@ -1698,24 +1344,34 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1698static int arm_smmu_domain_set_attr(struct iommu_domain *domain, 1344static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1699 enum iommu_attr attr, void *data) 1345 enum iommu_attr attr, void *data)
1700{ 1346{
1347 int ret = 0;
1701 struct arm_smmu_domain *smmu_domain = domain->priv; 1348 struct arm_smmu_domain *smmu_domain = domain->priv;
1702 1349
1350 mutex_lock(&smmu_domain->init_mutex);
1351
1703 switch (attr) { 1352 switch (attr) {
1704 case DOMAIN_ATTR_NESTING: 1353 case DOMAIN_ATTR_NESTING:
1705 if (smmu_domain->smmu) 1354 if (smmu_domain->smmu) {
1706 return -EPERM; 1355 ret = -EPERM;
1356 goto out_unlock;
1357 }
1358
1707 if (*(int *)data) 1359 if (*(int *)data)
1708 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; 1360 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1709 else 1361 else
1710 smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 1362 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1711 1363
1712 return 0; 1364 break;
1713 default: 1365 default:
1714 return -ENODEV; 1366 ret = -ENODEV;
1715 } 1367 }
1368
1369out_unlock:
1370 mutex_unlock(&smmu_domain->init_mutex);
1371 return ret;
1716} 1372}
1717 1373
1718static const struct iommu_ops arm_smmu_ops = { 1374static struct iommu_ops arm_smmu_ops = {
1719 .capable = arm_smmu_capable, 1375 .capable = arm_smmu_capable,
1720 .domain_init = arm_smmu_domain_init, 1376 .domain_init = arm_smmu_domain_init,
1721 .domain_destroy = arm_smmu_domain_destroy, 1377 .domain_destroy = arm_smmu_domain_destroy,
@@ -1729,9 +1385,7 @@ static const struct iommu_ops arm_smmu_ops = {
1729 .remove_device = arm_smmu_remove_device, 1385 .remove_device = arm_smmu_remove_device,
1730 .domain_get_attr = arm_smmu_domain_get_attr, 1386 .domain_get_attr = arm_smmu_domain_get_attr,
1731 .domain_set_attr = arm_smmu_domain_set_attr, 1387 .domain_set_attr = arm_smmu_domain_set_attr,
1732 .pgsize_bitmap = (SECTION_SIZE | 1388 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1733 ARM_SMMU_PTE_CONT_SIZE |
1734 PAGE_SIZE),
1735}; 1389};
1736 1390
1737static void arm_smmu_device_reset(struct arm_smmu_device *smmu) 1391static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
@@ -1782,7 +1436,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1782 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT); 1436 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1783 1437
1784 /* Push the button */ 1438 /* Push the button */
1785 arm_smmu_tlb_sync(smmu); 1439 __arm_smmu_tlb_sync(smmu);
1786 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); 1440 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1787} 1441}
1788 1442
@@ -1816,12 +1470,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1816 1470
1817 /* ID0 */ 1471 /* ID0 */
1818 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0); 1472 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1819#ifndef CONFIG_64BIT
1820 if (((id >> ID0_PTFS_SHIFT) & ID0_PTFS_MASK) == ID0_PTFS_V8_ONLY) {
1821 dev_err(smmu->dev, "\tno v7 descriptor support!\n");
1822 return -ENODEV;
1823 }
1824#endif
1825 1473
1826 /* Restrict available stages based on module parameter */ 1474 /* Restrict available stages based on module parameter */
1827 if (force_stage == 1) 1475 if (force_stage == 1)
@@ -1894,16 +1542,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1894 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; 1542 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1895 1543
1896 /* Check for size mismatch of SMMU address space from mapped region */ 1544 /* Check for size mismatch of SMMU address space from mapped region */
1897 size = 1 << 1545 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1898 (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1899 size *= 2 << smmu->pgshift; 1546 size *= 2 << smmu->pgshift;
1900 if (smmu->size != size) 1547 if (smmu->size != size)
1901 dev_warn(smmu->dev, 1548 dev_warn(smmu->dev,
1902 "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", 1549 "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1903 size, smmu->size); 1550 size, smmu->size);
1904 1551
1905 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & 1552 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1906 ID1_NUMS2CB_MASK;
1907 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK; 1553 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1908 if (smmu->num_s2_context_banks > smmu->num_context_banks) { 1554 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1909 dev_err(smmu->dev, "impossible number of S2 context banks!\n"); 1555 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
@@ -1915,46 +1561,40 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1915 /* ID2 */ 1561 /* ID2 */
1916 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2); 1562 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1917 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK); 1563 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1918 smmu->s1_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); 1564 smmu->ipa_size = size;
1919 1565
1920 /* Stage-2 input size limited due to pgd allocation (PTRS_PER_PGD) */ 1566 /* The output mask is also applied for bypass */
1921#ifdef CONFIG_64BIT
1922 smmu->s2_input_size = min_t(unsigned long, VA_BITS, size);
1923#else
1924 smmu->s2_input_size = min(32UL, size);
1925#endif
1926
1927 /* The stage-2 output mask is also applied for bypass */
1928 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK); 1567 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1929 smmu->s2_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); 1568 smmu->pa_size = size;
1930 1569
1931 if (smmu->version == ARM_SMMU_V1) { 1570 if (smmu->version == ARM_SMMU_V1) {
1932 smmu->s1_input_size = 32; 1571 smmu->va_size = smmu->ipa_size;
1572 size = SZ_4K | SZ_2M | SZ_1G;
1933 } else { 1573 } else {
1934#ifdef CONFIG_64BIT
1935 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; 1574 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1936 size = min(VA_BITS, arm_smmu_id_size_to_bits(size)); 1575 smmu->va_size = arm_smmu_id_size_to_bits(size);
1937#else 1576#ifndef CONFIG_64BIT
1938 size = 32; 1577 smmu->va_size = min(32UL, smmu->va_size);
1939#endif 1578#endif
1940 smmu->s1_input_size = size; 1579 size = 0;
1941 1580 if (id & ID2_PTFS_4K)
1942 if ((PAGE_SIZE == SZ_4K && !(id & ID2_PTFS_4K)) || 1581 size |= SZ_4K | SZ_2M | SZ_1G;
1943 (PAGE_SIZE == SZ_64K && !(id & ID2_PTFS_64K)) || 1582 if (id & ID2_PTFS_16K)
1944 (PAGE_SIZE != SZ_4K && PAGE_SIZE != SZ_64K)) { 1583 size |= SZ_16K | SZ_32M;
1945 dev_err(smmu->dev, "CPU page size 0x%lx unsupported\n", 1584 if (id & ID2_PTFS_64K)
1946 PAGE_SIZE); 1585 size |= SZ_64K | SZ_512M;
1947 return -ENODEV;
1948 }
1949 } 1586 }
1950 1587
1588 arm_smmu_ops.pgsize_bitmap &= size;
1589 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size);
1590
1951 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) 1591 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1952 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n", 1592 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1953 smmu->s1_input_size, smmu->s1_output_size); 1593 smmu->va_size, smmu->ipa_size);
1954 1594
1955 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) 1595 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1956 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", 1596 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1957 smmu->s2_input_size, smmu->s2_output_size); 1597 smmu->ipa_size, smmu->pa_size);
1958 1598
1959 return 0; 1599 return 0;
1960} 1600}