aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@arm.linux.org.uk>2015-06-01 18:44:46 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2015-07-25 10:28:05 -0400
commitf81309067ff2d84788316c513a415f6bb8c9171f (patch)
treec01d7d99a60329f3a0716f883c887eb388267aa4
parentd770e558e21961ad6cfdf0ff7df0eb5d7d4f0754 (diff)
ARM: move heavy barrier support out of line
The existing memory barrier macro causes a significant amount of code to be inserted inline at every call site. For example, in gpio_set_irq_type(), we have this for mb(): c0344c08: f57ff04e dsb st c0344c0c: e59f8190 ldr r8, [pc, #400] ; c0344da4 <gpio_set_irq_type+0x230> c0344c10: e3590004 cmp r9, #4 c0344c14: e5983014 ldr r3, [r8, #20] c0344c18: 0a000054 beq c0344d70 <gpio_set_irq_type+0x1fc> c0344c1c: e3530000 cmp r3, #0 c0344c20: 0a000004 beq c0344c38 <gpio_set_irq_type+0xc4> c0344c24: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344c28: e50bc034 str ip, [fp, #-52] ; 0xffffffcc c0344c2c: e12fff33 blx r3 c0344c30: e51bc034 ldr ip, [fp, #-52] ; 0xffffffcc c0344c34: e51b2030 ldr r2, [fp, #-48] ; 0xffffffd0 c0344c38: e5963004 ldr r3, [r6, #4] Moving the outer_cache_sync() call out of line reduces the impact of the barrier: c0344968: f57ff04e dsb st c034496c: e35a0004 cmp sl, #4 c0344970: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344974: 0a000044 beq c0344a8c <gpio_set_irq_type+0x1b8> c0344978: ebf363dd bl c001d8f4 <arm_heavy_mb> c034497c: e5953004 ldr r3, [r5, #4] This should reduce the cache footprint of this code. Overall, this results in a reduction of around 20K in the kernel size: text data bss dec hex filename 10773970 667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old 10754219 667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new Another advantage to this approach is that we can finally resolve the issue of SoCs which have their own memory barrier requirements within multiplatform kernels (such as OMAP.) Here, the bus interconnects need additional handling to ensure that writes become visible in the correct order (eg, between dma_map() operations, writes to DMA coherent memory, and MMIO accesses.) Acked-by: Tony Lindgren <tony@atomide.com> Acked-by: Richard Woodruff <r-woodruff2@ti.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/include/asm/barrier.h12
-rw-r--r--arch/arm/include/asm/outercache.h17
-rw-r--r--arch/arm/kernel/irq.c1
-rw-r--r--arch/arm/mach-mmp/pm-pxa910.c1
-rw-r--r--arch/arm/mach-prima2/pm.c1
-rw-r--r--arch/arm/mach-ux500/cache-l2x0.c1
-rw-r--r--arch/arm/mm/Kconfig4
-rw-r--r--arch/arm/mm/flush.c11
8 files changed, 28 insertions, 20 deletions
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 6c2327e1c732..fea99b0e2087 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -2,7 +2,6 @@
2#define __ASM_BARRIER_H 2#define __ASM_BARRIER_H
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#include <asm/outercache.h>
6 5
7#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); 6#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
8 7
@@ -37,12 +36,19 @@
37#define dmb(x) __asm__ __volatile__ ("" : : : "memory") 36#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
38#endif 37#endif
39 38
39#ifdef CONFIG_ARM_HEAVY_MB
40extern void arm_heavy_mb(void);
41#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0)
42#else
43#define __arm_heavy_mb(x...) dsb(x)
44#endif
45
40#ifdef CONFIG_ARCH_HAS_BARRIERS 46#ifdef CONFIG_ARCH_HAS_BARRIERS
41#include <mach/barriers.h> 47#include <mach/barriers.h>
42#elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) 48#elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
43#define mb() do { dsb(); outer_sync(); } while (0) 49#define mb() __arm_heavy_mb()
44#define rmb() dsb() 50#define rmb() dsb()
45#define wmb() do { dsb(st); outer_sync(); } while (0) 51#define wmb() __arm_heavy_mb(st)
46#define dma_rmb() dmb(osh) 52#define dma_rmb() dmb(osh)
47#define dma_wmb() dmb(oshst) 53#define dma_wmb() dmb(oshst)
48#else 54#else
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 563b92fc2f41..c2bf24f40177 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -129,21 +129,4 @@ static inline void outer_resume(void) { }
129 129
130#endif 130#endif
131 131
132#ifdef CONFIG_OUTER_CACHE_SYNC
133/**
134 * outer_sync - perform a sync point for outer cache
135 *
136 * Ensure that all outer cache operations are complete and any store
137 * buffers are drained.
138 */
139static inline void outer_sync(void)
140{
141 if (outer_cache.sync)
142 outer_cache.sync();
143}
144#else
145static inline void outer_sync(void)
146{ }
147#endif
148
149#endif /* __ASM_OUTERCACHE_H */ 132#endif /* __ASM_OUTERCACHE_H */
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 350f188c92d2..b96c8ed1723a 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -39,6 +39,7 @@
39#include <linux/export.h> 39#include <linux/export.h>
40 40
41#include <asm/hardware/cache-l2x0.h> 41#include <asm/hardware/cache-l2x0.h>
42#include <asm/outercache.h>
42#include <asm/exception.h> 43#include <asm/exception.h>
43#include <asm/mach/arch.h> 44#include <asm/mach/arch.h>
44#include <asm/mach/irq.h> 45#include <asm/mach/irq.h>
diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c
index 04c9daf9f8d7..7db5870d127f 100644
--- a/arch/arm/mach-mmp/pm-pxa910.c
+++ b/arch/arm/mach-mmp/pm-pxa910.c
@@ -18,6 +18,7 @@
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/irq.h> 19#include <linux/irq.h>
20#include <asm/mach-types.h> 20#include <asm/mach-types.h>
21#include <asm/outercache.h>
21#include <mach/hardware.h> 22#include <mach/hardware.h>
22#include <mach/cputype.h> 23#include <mach/cputype.h>
23#include <mach/addr-map.h> 24#include <mach/addr-map.h>
diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c
index d99d08eeb966..83e94c95e314 100644
--- a/arch/arm/mach-prima2/pm.c
+++ b/arch/arm/mach-prima2/pm.c
@@ -16,6 +16,7 @@
16#include <linux/of_platform.h> 16#include <linux/of_platform.h>
17#include <linux/io.h> 17#include <linux/io.h>
18#include <linux/rtc/sirfsoc_rtciobrg.h> 18#include <linux/rtc/sirfsoc_rtciobrg.h>
19#include <asm/outercache.h>
19#include <asm/suspend.h> 20#include <asm/suspend.h>
20#include <asm/hardware/cache-l2x0.h> 21#include <asm/hardware/cache-l2x0.h>
21 22
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 7557bede7ae6..780bd13cd7e3 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -8,6 +8,7 @@
8#include <linux/of.h> 8#include <linux/of.h>
9#include <linux/of_address.h> 9#include <linux/of_address.h>
10 10
11#include <asm/outercache.h>
11#include <asm/hardware/cache-l2x0.h> 12#include <asm/hardware/cache-l2x0.h>
12 13
13#include "db8500-regs.h" 14#include "db8500-regs.h"
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7c6b976ab8d3..df7537f12469 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -883,6 +883,7 @@ config OUTER_CACHE
883 883
884config OUTER_CACHE_SYNC 884config OUTER_CACHE_SYNC
885 bool 885 bool
886 select ARM_HEAVY_MB
886 help 887 help
887 The outer cache has a outer_cache_fns.sync function pointer 888 The outer cache has a outer_cache_fns.sync function pointer
888 that can be used to drain the write buffer of the outer cache. 889 that can be used to drain the write buffer of the outer cache.
@@ -1031,6 +1032,9 @@ config ARCH_HAS_BARRIERS
1031 This option allows the use of custom mandatory barriers 1032 This option allows the use of custom mandatory barriers
1032 included via the mach/barriers.h file. 1033 included via the mach/barriers.h file.
1033 1034
1035config ARM_HEAVY_MB
1036 bool
1037
1034config ARCH_SUPPORTS_BIG_ENDIAN 1038config ARCH_SUPPORTS_BIG_ENDIAN
1035 bool 1039 bool
1036 help 1040 help
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 34b66af516ea..ce6c2960d5ac 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -21,6 +21,17 @@
21 21
22#include "mm.h" 22#include "mm.h"
23 23
24#ifdef CONFIG_ARM_HEAVY_MB
25void arm_heavy_mb(void)
26{
27#ifdef CONFIG_OUTER_CACHE_SYNC
28 if (outer_cache.sync)
29 outer_cache.sync();
30#endif
31}
32EXPORT_SYMBOL(arm_heavy_mb);
33#endif
34
24#ifdef CONFIG_CPU_CACHE_VIPT 35#ifdef CONFIG_CPU_CACHE_VIPT
25 36
26static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) 37static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)