aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2014-12-11 18:02:06 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:06 -0500
commit1077fa36f23e259858caf6f269a47393a5aff523 (patch)
tree569b84cfd3b409f07ce6a10f0166ca78307705e1
parent8a449718414ff10b9d5559ed3e8e09c7178774f2 (diff)
arch: Add lightweight memory barriers dma_rmb() and dma_wmb()
There are a number of situations where the mandatory barriers rmb() and wmb() are used to order memory/memory operations in the device drivers and those barriers are much heavier than they actually need to be. For example in the case of PowerPC wmb() calls the heavy-weight sync instruction when for coherent memory operations all that is really needed is an lsync or eieio instruction. This commit adds a coherent only version of the mandatory memory barriers rmb() and wmb(). In most cases this should result in the barrier being the same as the SMP barriers for the SMP case, however in some cases we use a barrier that is somewhere in between rmb() and smp_rmb(). For example on ARM the rmb barriers break down as follows: Barrier Call Explanation --------- -------- ---------------------------------- rmb() dsb() Data synchronization barrier - system dma_rmb() dmb(osh) data memory barrier - outer sharable smp_rmb() dmb(ish) data memory barrier - inner sharable These new barriers are not as safe as the standard rmb() and wmb(). Specifically they do not guarantee ordering between coherent and incoherent memories. The primary use case for these would be to enforce ordering of reads and writes when accessing coherent memory that is shared between the CPU and a device. It may also be noted that there is no dma_mb(). Most architectures don't provide a good mechanism for performing a coherent only full barrier without resorting to the same mechanism used in mb(). As such there isn't much to be gained in trying to define such a function. Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Michael Ellerman <michael@ellerman.id.au> Cc: Michael Neuling <mikey@neuling.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: David Miller <davem@davemloft.net> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/memory-barriers.txt42
-rw-r--r--arch/arm/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/barrier.h3
-rw-r--r--arch/ia64/include/asm/barrier.h3
-rw-r--r--arch/metag/include/asm/barrier.h14
-rw-r--r--arch/mips/include/asm/barrier.h9
-rw-r--r--arch/powerpc/include/asm/barrier.h13
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/sparc/include/asm/barrier_64.h3
-rw-r--r--arch/x86/include/asm/barrier.h11
-rw-r--r--arch/x86/um/asm/barrier.h13
-rw-r--r--include/asm-generic/barrier.h8
12 files changed, 99 insertions, 26 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7ee2ae6d5451..70a09f8a0383 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1633,6 +1633,48 @@ There are some more advanced barrier functions:
1633 operations" subsection for information on where to use these. 1633 operations" subsection for information on where to use these.
1634 1634
1635 1635
1636 (*) dma_wmb();
1637 (*) dma_rmb();
1638
1639 These are for use with consistent memory to guarantee the ordering
1640 of writes or reads of shared memory accessible to both the CPU and a
1641 DMA capable device.
1642
1643 For example, consider a device driver that shares memory with a device
1644 and uses a descriptor status value to indicate if the descriptor belongs
1645 to the device or the CPU, and a doorbell to notify it when new
1646 descriptors are available:
1647
1648 if (desc->status != DEVICE_OWN) {
1649 /* do not read data until we own descriptor */
1650 dma_rmb();
1651
1652 /* read/modify data */
1653 read_data = desc->data;
1654 desc->data = write_data;
1655
1656 /* flush modifications before status update */
1657 dma_wmb();
1658
1659 /* assign ownership */
1660 desc->status = DEVICE_OWN;
1661
1662 /* force memory to sync before notifying device via MMIO */
1663 wmb();
1664
1665 /* notify device of new descriptors */
1666 writel(DESC_NOTIFY, doorbell);
1667 }
1668
1669 The dma_rmb() allows us guarantee the device has released ownership
1670 before we read the data from the descriptor, and he dma_wmb() allows
1671 us to guarantee the data is written to the descriptor before the device
1672 can see it now has ownership. The wmb() is needed to guarantee that the
1673 cache coherent memory writes have completed before attempting a write to
1674 the cache incoherent MMIO region.
1675
1676 See Documentation/DMA-API.txt for more information on consistent memory.
1677
1636MMIO WRITE BARRIER 1678MMIO WRITE BARRIER
1637------------------ 1679------------------
1638 1680
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index c6a3e73a6e24..d2f81e6b8c1c 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -43,10 +43,14 @@
43#define mb() do { dsb(); outer_sync(); } while (0) 43#define mb() do { dsb(); outer_sync(); } while (0)
44#define rmb() dsb() 44#define rmb() dsb()
45#define wmb() do { dsb(st); outer_sync(); } while (0) 45#define wmb() do { dsb(st); outer_sync(); } while (0)
46#define dma_rmb() dmb(osh)
47#define dma_wmb() dmb(oshst)
46#else 48#else
47#define mb() barrier() 49#define mb() barrier()
48#define rmb() barrier() 50#define rmb() barrier()
49#define wmb() barrier() 51#define wmb() barrier()
52#define dma_rmb() barrier()
53#define dma_wmb() barrier()
50#endif 54#endif
51 55
52#ifndef CONFIG_SMP 56#ifndef CONFIG_SMP
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 6389d60574d9..a5abb0062d6e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -32,6 +32,9 @@
32#define rmb() dsb(ld) 32#define rmb() dsb(ld)
33#define wmb() dsb(st) 33#define wmb() dsb(st)
34 34
35#define dma_rmb() dmb(oshld)
36#define dma_wmb() dmb(oshst)
37
35#ifndef CONFIG_SMP 38#ifndef CONFIG_SMP
36#define smp_mb() barrier() 39#define smp_mb() barrier()
37#define smp_rmb() barrier() 40#define smp_rmb() barrier()
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index e8fffb03963c..f6769eb2bbf9 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -39,6 +39,9 @@
39#define rmb() mb() 39#define rmb() mb()
40#define wmb() mb() 40#define wmb() mb()
41 41
42#define dma_rmb() mb()
43#define dma_wmb() mb()
44
42#ifdef CONFIG_SMP 45#ifdef CONFIG_SMP
43# define smp_mb() mb() 46# define smp_mb() mb()
44#else 47#else
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index 6d8b8c9b7c25..d703d8e26a65 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -4,8 +4,6 @@
4#include <asm/metag_mem.h> 4#include <asm/metag_mem.h>
5 5
6#define nop() asm volatile ("NOP") 6#define nop() asm volatile ("NOP")
7#define mb() wmb()
8#define rmb() barrier()
9 7
10#ifdef CONFIG_METAG_META21 8#ifdef CONFIG_METAG_META21
11 9
@@ -41,11 +39,13 @@ static inline void wr_fence(void)
41 39
42#endif /* !CONFIG_METAG_META21 */ 40#endif /* !CONFIG_METAG_META21 */
43 41
44static inline void wmb(void) 42/* flush writes through the write combiner */
45{ 43#define mb() wr_fence()
46 /* flush writes through the write combiner */ 44#define rmb() barrier()
47 wr_fence(); 45#define wmb() mb()
48} 46
47#define dma_rmb() rmb()
48#define dma_wmb() wmb()
49 49
50#ifndef CONFIG_SMP 50#ifndef CONFIG_SMP
51#define fence() do { } while (0) 51#define fence() do { } while (0)
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 3d69aa829a76..2b8bbbcb9be0 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -75,20 +75,21 @@
75 75
76#include <asm/wbflush.h> 76#include <asm/wbflush.h>
77 77
78#define wmb() fast_wmb()
79#define rmb() fast_rmb()
80#define mb() wbflush() 78#define mb() wbflush()
81#define iob() wbflush() 79#define iob() wbflush()
82 80
83#else /* !CONFIG_CPU_HAS_WB */ 81#else /* !CONFIG_CPU_HAS_WB */
84 82
85#define wmb() fast_wmb()
86#define rmb() fast_rmb()
87#define mb() fast_mb() 83#define mb() fast_mb()
88#define iob() fast_iob() 84#define iob() fast_iob()
89 85
90#endif /* !CONFIG_CPU_HAS_WB */ 86#endif /* !CONFIG_CPU_HAS_WB */
91 87
88#define wmb() fast_wmb()
89#define rmb() fast_rmb()
90#define dma_wmb() fast_wmb()
91#define dma_rmb() fast_rmb()
92
92#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) 93#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
93# ifdef CONFIG_CPU_CAVIUM_OCTEON 94# ifdef CONFIG_CPU_CAVIUM_OCTEON
94# define smp_mb() __sync() 95# define smp_mb() __sync()
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index cb6d66c6e3e1..a3bf5be111ff 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -36,8 +36,6 @@
36 36
37#define set_mb(var, value) do { var = value; mb(); } while (0) 37#define set_mb(var, value) do { var = value; mb(); } while (0)
38 38
39#ifdef CONFIG_SMP
40
41#ifdef __SUBARCH_HAS_LWSYNC 39#ifdef __SUBARCH_HAS_LWSYNC
42# define SMPWMB LWSYNC 40# define SMPWMB LWSYNC
43#else 41#else
@@ -45,12 +43,17 @@
45#endif 43#endif
46 44
47#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") 45#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
46#define dma_rmb() __lwsync()
47#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
48
49#ifdef CONFIG_SMP
50#define smp_lwsync() __lwsync()
48 51
49#define smp_mb() mb() 52#define smp_mb() mb()
50#define smp_rmb() __lwsync() 53#define smp_rmb() __lwsync()
51#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") 54#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
52#else 55#else
53#define __lwsync() barrier() 56#define smp_lwsync() barrier()
54 57
55#define smp_mb() barrier() 58#define smp_mb() barrier()
56#define smp_rmb() barrier() 59#define smp_rmb() barrier()
@@ -72,7 +75,7 @@
72#define smp_store_release(p, v) \ 75#define smp_store_release(p, v) \
73do { \ 76do { \
74 compiletime_assert_atomic_type(*p); \ 77 compiletime_assert_atomic_type(*p); \
75 __lwsync(); \ 78 smp_lwsync(); \
76 ACCESS_ONCE(*p) = (v); \ 79 ACCESS_ONCE(*p) = (v); \
77} while (0) 80} while (0)
78 81
@@ -80,7 +83,7 @@ do { \
80({ \ 83({ \
81 typeof(*p) ___p1 = ACCESS_ONCE(*p); \ 84 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
82 compiletime_assert_atomic_type(*p); \ 85 compiletime_assert_atomic_type(*p); \
83 __lwsync(); \ 86 smp_lwsync(); \
84 ___p1; \ 87 ___p1; \
85}) 88})
86 89
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 33d191d295e4..8d724718ec21 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -24,6 +24,8 @@
24 24
25#define rmb() mb() 25#define rmb() mb()
26#define wmb() mb() 26#define wmb() mb()
27#define dma_rmb() rmb()
28#define dma_wmb() wmb()
27#define smp_mb() mb() 29#define smp_mb() mb()
28#define smp_rmb() rmb() 30#define smp_rmb() rmb()
29#define smp_wmb() wmb() 31#define smp_wmb() wmb()
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 6c974c0977ad..76648941fea7 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -37,6 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
37#define rmb() __asm__ __volatile__("":::"memory") 37#define rmb() __asm__ __volatile__("":::"memory")
38#define wmb() __asm__ __volatile__("":::"memory") 38#define wmb() __asm__ __volatile__("":::"memory")
39 39
40#define dma_rmb() rmb()
41#define dma_wmb() wmb()
42
40#define set_mb(__var, __value) \ 43#define set_mb(__var, __value) \
41 do { __var = __value; membar_safe("#StoreLoad"); } while(0) 44 do { __var = __value; membar_safe("#StoreLoad"); } while(0)
42 45
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 5238000285c1..2ab1eb33106e 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,13 +24,16 @@
24#define wmb() asm volatile("sfence" ::: "memory") 24#define wmb() asm volatile("sfence" ::: "memory")
25#endif 25#endif
26 26
27#ifdef CONFIG_SMP
28#define smp_mb() mb()
29#ifdef CONFIG_X86_PPRO_FENCE 27#ifdef CONFIG_X86_PPRO_FENCE
30# define smp_rmb() rmb() 28#define dma_rmb() rmb()
31#else 29#else
32# define smp_rmb() barrier() 30#define dma_rmb() barrier()
33#endif 31#endif
32#define dma_wmb() barrier()
33
34#ifdef CONFIG_SMP
35#define smp_mb() mb()
36#define smp_rmb() dma_rmb()
34#define smp_wmb() barrier() 37#define smp_wmb() barrier()
35#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 38#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
36#else /* !SMP */ 39#else /* !SMP */
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index d6511d954e2b..2d7d9a1f5b53 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -29,17 +29,18 @@
29 29
30#endif /* CONFIG_X86_32 */ 30#endif /* CONFIG_X86_32 */
31 31
32#ifdef CONFIG_SMP
33
34#define smp_mb() mb()
35#ifdef CONFIG_X86_PPRO_FENCE 32#ifdef CONFIG_X86_PPRO_FENCE
36#define smp_rmb() rmb() 33#define dma_rmb() rmb()
37#else /* CONFIG_X86_PPRO_FENCE */ 34#else /* CONFIG_X86_PPRO_FENCE */
38#define smp_rmb() barrier() 35#define dma_rmb() barrier()
39#endif /* CONFIG_X86_PPRO_FENCE */ 36#endif /* CONFIG_X86_PPRO_FENCE */
37#define dma_wmb() barrier()
40 38
41#define smp_wmb() barrier() 39#ifdef CONFIG_SMP
42 40
41#define smp_mb() mb()
42#define smp_rmb() dma_rmb()
43#define smp_wmb() barrier()
43#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 44#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
44 45
45#else /* CONFIG_SMP */ 46#else /* CONFIG_SMP */
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 1402fa855388..f5c40b0fadc2 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -42,6 +42,14 @@
42#define wmb() mb() 42#define wmb() mb()
43#endif 43#endif
44 44
45#ifndef dma_rmb
46#define dma_rmb() rmb()
47#endif
48
49#ifndef dma_wmb
50#define dma_wmb() wmb()
51#endif
52
45#ifndef read_barrier_depends 53#ifndef read_barrier_depends
46#define read_barrier_depends() do { } while (0) 54#define read_barrier_depends() do { } while (0)
47#endif 55#endif