From 026b5ca3b65f878019a8eb0c7a702cd5c20a4104 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Sep 2010 14:33:29 +0100 Subject: ARM: 6344/1: Mark CPU_32v6K as depended on CPU_V7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU_32v6K is selected by CPU_V7 but it only depends on CPU_V6. Signed-off-by: Catalin Marinas Acked-by: Uwe Kleine-König Signed-off-by: Russell King --- arch/arm/mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 33c3f570aaa0..a0a2928ae4dd 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -398,7 +398,7 @@ config CPU_V6 # ARMv6k config CPU_32v6K bool "Support ARM V6K processor extensions" if !SMP - depends on CPU_V6 + depends on CPU_V6 || CPU_V7 default y if SMP && !(ARCH_MX3 || ARCH_OMAP2) help Say Y here if your ARMv6 processor supports the 'K' extension. -- cgit v1.2.2 From 2be23c475af8ae4e25f8bab08d815b17593bd547 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 8 Sep 2010 16:27:56 +0100 Subject: ARM: Ensure PTE modifications via dma_alloc_coherent are visible Dave Hylands reports: | We've observed a problem with dma_alloc_writecombine when the system | is under heavy load (heavy bus traffic). We've managed to reduce the | problem to the following snippet, which is run from a kthread in a | continuous loop: | | void *virtAddr; | dma_addr_t physAddr; | unsigned int numBytes = 256; | | for (;;) { | virtAddr = dma_alloc_writecombine(NULL, | numBytes, &physAddr, GFP_KERNEL); | if (virtAddr == NULL) { | printk(KERN_ERR "Running out of memory\n"); | break; | } | | /* access DMA memory allocated */ | tmp = virtAddr; | *tmp = 0x77; | | /* free DMA memory */ | dma_free_writecombine(NULL, | numBytes, virtAddr, physAddr); | | ...sleep here... | } | | By itself, the code will run forever with no issues. However, as we | increase our bus traffic (typically using DMA) then the *tmp = 0x77 | line will eventually cause a page fault. If we add a small delay (a | few microseconds) before the *tmp = 0x77, then we don't see a page | fault, even under heavy load. A dsb() is required after modifying the PTE entries to ensure that they will always be visible. Add this dsb(). Reported-by: Dave Hylands Tested-by: Dave Hylands Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c704eed63c5d..4bc43e535d3b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -229,6 +229,8 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) } } while (size -= PAGE_SIZE); + dsb(); + return (void *)c->vm_start; } return NULL; -- cgit v1.2.2 From 6491848d1ab246f6d243ddef25085fc1d836ff2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Sep 2010 09:50:03 +0100 Subject: ARM: 6387/1: errata: check primary part ID in proc-v7.S Kconfig doesn't have any knowledge of specific v7 cores, so it is possible to select errata workarounds that may cause inadvertent behaviour when executed on a core other than those targetted by the fix. This patch improves the variant and revision checking in proc-v7.S so that the primary part number is also considered when applying errata workarounds. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 6a8506d99ee9..1f16f9e3f441 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -204,8 +204,13 @@ __v7_setup: bne 2f and r5, r0, #0x00f00000 @ variant and r6, r0, #0x0000000f @ revision - orr r0, r6, r5, lsr #20-4 @ combine variant and revision + orr r6, r6, r5, lsr #20-4 @ combine variant and revision + ubfx r0, r0, #4, #12 @ primary part number + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + bne 2f #ifdef CONFIG_ARM_ERRATA_430973 teq r5, #0x00100000 @ only present in r1p* mrceq p15, 0, r10, c1, c0, 1 @ read aux control register @@ -213,14 +218,14 @@ __v7_setup: mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_458693 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 0, r10, c1, c0, 1 @ read aux control register orreq r10, r10, #(1 << 5) @ set L1NEON to 1 orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_460075 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register tsteq r10, #1 << 22 orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit -- cgit v1.2.2 From 9f05027c7cb3cfe56a31892bd83391138d41a667 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Sep 2010 09:51:43 +0100 Subject: ARM: 6388/1: errata: DMB operation may be faulty On versions of the Cortex-A9 up to and including r2p2, under rare circumstances, a DMB instruction between 2 write operations may not ensure the correct visibility ordering of the 2 writes. This workaround sets a bit in the diagnostic register of the Cortex-A9, causing the DMB instruction to behave like a DSB, which functions correctly on the affected cores. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 1f16f9e3f441..945f36341fa6 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -201,7 +201,7 @@ __v7_setup: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 - bne 2f + bne 3f and r5, r0, #0x00f00000 @ variant and r6, r0, #0x0000000f @ revision orr r6, r6, r5, lsr #20-4 @ combine variant and revision @@ -231,8 +231,20 @@ __v7_setup: orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register #endif + b 3f + + /* Cortex-A9 Errata */ +2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + bne 3f +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 4 @ set bit #4 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif -2: mov r10, #0 +3: mov r10, #0 #ifdef HARVARD_CACHE mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #endif -- cgit v1.2.2 From a672e99b129e286df2e2697a1b603d82321117f3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Sep 2010 09:53:02 +0100 Subject: ARM: 6389/1: errata: incorrect hazard handling in the SCU may lead to data corruption On the r2p0, r2p1 and r2p2 versions of the Cortex-A9, data corruption can occur if a shared cache line is replaced on one CPU as another CPU is accessing it. This workaround sets two bits in the diagnostic register of the Cortex-A9, reducing the linefill issuing capabilities of the processor and avoiding the erroneous behaviour. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 945f36341fa6..080129263eef 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -243,6 +243,15 @@ __v7_setup: orrle r10, r10, #1 << 4 @ set bit #4 mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 12 @ set bit #12 + orreq r10, r10, #1 << 22 @ set bit #22 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif 3: mov r10, #0 #ifdef HARVARD_CACHE -- cgit v1.2.2 From 14eff1812679c76564b775aa95cdd378965f6cfb Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Fri, 17 Sep 2010 16:42:10 +0100 Subject: ARM: 6398/1: add proc info for ARM11MPCore/Cortex-A9 from ARM Setting of these bits can cause issues on other SMP SoC's not produced by ARM. Acked-by: Catalin Marinas Signed-off-by: Daniel Walker Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 080129263eef..7563ff0141bd 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -186,13 +186,14 @@ cpu_v7_name: * It is assumed that: * - cache type register is implemented */ -__v7_setup: +__v7_ca9mp_setup: #ifdef CONFIG_SMP mrc p15, 0, r0, c1, c0, 1 tst r0, #(1 << 6) @ SMP/nAMP mode enabled? orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting #endif +__v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all @@ -349,6 +350,29 @@ cpu_elf_name: .section ".proc.info.init", #alloc, #execinstr + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 @ Required ID value + .long 0xff0ffff0 @ Mask for ID + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __v7_ca9mp_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_v7_name + .long v7_processor_functions + .long v7wbi_tlb_fns + .long v6_user_fns + .long v7_cache_fns + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + /* * Match any ARMv7 processor core. */ -- cgit v1.2.2 From d907387c42e9e39261629890e45a08ef4c3ed3fe Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 16:01:24 +0100 Subject: ARM: 6383/1: Implement phys_mem_access_prot() to avoid attributes aliasing ARMv7 onwards requires that there are no aliases to the same physical location using different memory types (i.e. Normal vs Strongly Ordered). Access to SO mappings when the unaligned accesses are handled in hardware is also Unpredictable (pgprot_noncached() mappings in user space). The /dev/mem driver requires uncached mappings with O_SYNC. The patch implements the phys_mem_access_prot() function which generates Strongly Ordered memory attributes if !pfn_valid() (independent of O_SYNC) and Normal Noncacheable (writecombine) if O_SYNC. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6e1c4f6a2b3f..a486bd0d97dc 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -498,6 +499,19 @@ static void __init build_mem_type_table(void) } } +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + #define vectors_base() (vectors_high() ? 0xffff0000 : 0) static void __init *early_alloc(unsigned long sz) -- cgit v1.2.2 From 2f27bf834e1d0a06e83d7458b535891c552271aa Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 20 Sep 2010 04:10:43 +0100 Subject: ARM: 6401/1: plug a race in the alignment trap handler When the policy for user space is to ignore misaligned accesses from user space, the processor then performs a documented rotation on the accessed data. This is the result of the access being trapped, and the kernel disabling the alignment trap before returning to user space again. In kernel space we always want misaligned accesses to be fixed up. This is enforced by always re-enabling the alignment trap on every entry into kernel space from user space. No such re-enabling is performed when an exception occurs while already in kernel space as the alignment trap is always supposed to be enabled in that case. There is however a small race window when a misaligned access in user space is trapped and the alignment trap disabled, but the CPU didn't return to user space just yet. Any exception would be entered from kernel space at that point and the kernel would then execute with the alignment trap disabled. Thanks to Maxime Bizon for providing a test module that made this issue reproducible. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/alignment.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index d073b64ae87e..724ba3bce72c 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -885,8 +885,23 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (ai_usermode & UM_SIGNAL) force_sig(SIGBUS, current); - else - set_cr(cr_no_alignment); + else { + /* + * We're about to disable the alignment trap and return to + * user space. But if an interrupt occurs before actually + * reaching user space, then the IRQ vector entry code will + * notice that we were still in kernel space and therefore + * the alignment trap won't be re-enabled in that case as it + * is presumed to be always on from kernel space. + * Let's prevent that race by disabling interrupts here (they + * are disabled on the way back to user space anyway in + * entry-common.S) and disable the alignment trap only if + * there is no work pending for this thread. + */ + raw_local_irq_disable(); + if (!(current_thread_info()->flags & _TIF_WORK_MASK)) + set_cr(cr_no_alignment); + } return 0; } -- cgit v1.2.2 From f1a2481c0ad3aebd94d11b317c488deaadc25002 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Fri, 24 Sep 2010 07:18:22 +0100 Subject: ARM: 6407/1: mmu: Setup MT_MEMORY and MT_MEMORY_NONCACHED L1 entries This patch populates the L1 entries for MT_MEMORY and MT_MEMORY_NONCACHED types so that at boot-up, we can map memories outside system memory at page level granularity Previously the mapping was limiting to section level, which creates unnecessary additional mapping for which physical memory may not present. On the newer ARM with speculation, this is dangerous and can result in untraceable aborts. Signed-off-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index a486bd0d97dc..6a3a2d0cd6db 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -247,6 +247,9 @@ static struct mem_type mem_types[] = { .domain = DOMAIN_USER, }, [MT_MEMORY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_EXEC, + .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, @@ -255,6 +258,9 @@ static struct mem_type mem_types[] = { .domain = DOMAIN_KERNEL, }, [MT_MEMORY_NONCACHED] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_EXEC | L_PTE_MT_BUFFERABLE, + .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, @@ -412,9 +418,12 @@ static void __init build_mem_type_table(void) * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) */ - if (arch_is_coherent() && cpu_is_xsc3()) + if (arch_is_coherent() && cpu_is_xsc3()) { mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + } /* * ARMv6 and above have extended page tables. */ @@ -439,7 +448,9 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; #endif } @@ -476,6 +487,8 @@ static void __init build_mem_type_table(void) mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; switch (cp->pmd) { -- cgit v1.2.2