aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:37 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-11 21:15:37 -0500
commit697766df6b952f09b17eefda8b5ef746acb9c1eb (patch)
treea4962667802529c26231f4768c0233a15f9e9e4c
parentc11a9009ae6a8c42a8cd69d885601e1aa6fbea04 (diff)
parent124b74c18e0e31b24638d256afee7122a994e1b3 (diff)
Merge branch 'dma_mb'
Alexander Duyck says: ==================== arch: Add lightweight memory barriers for coherent memory access These patches introduce two new primitives for synchronizing cache coherent memory writes and reads. These two new primitives are: dma_rmb() dma_wmb() The first patch cleans up some unnecessary overhead related to the definition of read_barrier_depends, smp_read_barrier_depends, and comments related to the barrier. The second patch adds the primitives for the applicable architectures and asm-generic. The third patch adds the barriers to r8169 which turns out to be a good example of where the new barriers might be useful as they have full rmb()/wmb() barriers ordering accesses to the descriptors and the DescOwn bit. The fourth patch adds support for coherent_rmb() to the Intel fm10k, igb, and ixgbe drivers. Testing with the ixgbe driver has shown a processing time reduction of at least 7ns per 64B frame on a Core i7-4930K. This patch series is essentially the v7 for: v4-7: Add lightweight memory barriers for coherent memory access v3: Add lightweight memory barriers fast_rmb() and fast_wmb() v2: Introduce load_acquire() and store_release() v1: Introduce read_acquire() The key changes in this patch series versus the earlier patches are: v7 resubmit: - Added Acked-by: Ben Herrenschmidt from v5 to dma_rmb/wmb patch - No code changes from previous set, still applies cleanly and builds. v7: - Dropped test/debug patch that was accidentally slipped in v6: - Replaced "memory based device I/O" with "consistent memory" in docs - Added reference to DMA-API.txt to explain consistent memory v5: - Renamed barriers dma_rmb and dma_wmb - Undid smp_wmb changes in x86 and PowerPC - Defined smp_rmb as __lwsync for SMP case on PowerPC v4: - Renamed barriers coherent_rmb and coherent_wmb - Added smp_lwsync for use in smp_load_acquire/smp_store_release v3: - Moved away from acquire()/store() and instead focused on barriers - Added cleanup of read_barrier_depends - Added change in r8169 to fix cur_tx/DescOwn ordering - Simplified changes to just replacing/moving barriers in r8169 - Added update to documentation with code example v2: - Renamed read_acquire() to be consistent with smp_load_acquire() - Changed barrier used to be consistent with smp_load_acquire() - Updated PowerPC code to use __lwsync based on IBM article - Added store_release() as this is a viable use case for drivers - Added r8169 patch which is able to fully use primitives - Added fm10k/igb/ixgbe patch which is able to test performance ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/memory-barriers.txt42
-rw-r--r--arch/alpha/include/asm/barrier.h51
-rw-r--r--arch/arm/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/barrier.h3
-rw-r--r--arch/blackfin/include/asm/barrier.h51
-rw-r--r--arch/ia64/include/asm/barrier.h25
-rw-r--r--arch/metag/include/asm/barrier.h19
-rw-r--r--arch/mips/include/asm/barrier.h61
-rw-r--r--arch/powerpc/include/asm/barrier.h19
-rw-r--r--arch/s390/include/asm/barrier.h7
-rw-r--r--arch/sparc/include/asm/barrier_64.h7
-rw-r--r--arch/x86/include/asm/barrier.h70
-rw-r--r--arch/x86/um/asm/barrier.h20
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c6
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c6
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c9
-rw-r--r--drivers/net/ethernet/realtek/r8169.c29
-rw-r--r--include/asm-generic/barrier.h8
18 files changed, 258 insertions, 179 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7ee2ae6d5451..70a09f8a0383 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1633,6 +1633,48 @@ There are some more advanced barrier functions:
1633 operations" subsection for information on where to use these. 1633 operations" subsection for information on where to use these.
1634 1634
1635 1635
1636 (*) dma_wmb();
1637 (*) dma_rmb();
1638
1639 These are for use with consistent memory to guarantee the ordering
1640 of writes or reads of shared memory accessible to both the CPU and a
1641 DMA capable device.
1642
1643 For example, consider a device driver that shares memory with a device
1644 and uses a descriptor status value to indicate if the descriptor belongs
1645 to the device or the CPU, and a doorbell to notify it when new
1646 descriptors are available:
1647
1648 if (desc->status != DEVICE_OWN) {
1649 /* do not read data until we own descriptor */
1650 dma_rmb();
1651
1652 /* read/modify data */
1653 read_data = desc->data;
1654 desc->data = write_data;
1655
1656 /* flush modifications before status update */
1657 dma_wmb();
1658
1659 /* assign ownership */
1660 desc->status = DEVICE_OWN;
1661
1662 /* force memory to sync before notifying device via MMIO */
1663 wmb();
1664
1665 /* notify device of new descriptors */
1666 writel(DESC_NOTIFY, doorbell);
1667 }
1668
1669 The dma_rmb() allows us guarantee the device has released ownership
1670 before we read the data from the descriptor, and he dma_wmb() allows
1671 us to guarantee the data is written to the descriptor before the device
1672 can see it now has ownership. The wmb() is needed to guarantee that the
1673 cache coherent memory writes have completed before attempting a write to
1674 the cache incoherent MMIO region.
1675
1676 See Documentation/DMA-API.txt for more information on consistent memory.
1677
1636MMIO WRITE BARRIER 1678MMIO WRITE BARRIER
1637------------------ 1679------------------
1638 1680
diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h
index 3832bdb794fe..77516c87255d 100644
--- a/arch/alpha/include/asm/barrier.h
+++ b/arch/alpha/include/asm/barrier.h
@@ -7,6 +7,57 @@
7#define rmb() __asm__ __volatile__("mb": : :"memory") 7#define rmb() __asm__ __volatile__("mb": : :"memory")
8#define wmb() __asm__ __volatile__("wmb": : :"memory") 8#define wmb() __asm__ __volatile__("wmb": : :"memory")
9 9
10/**
11 * read_barrier_depends - Flush all pending reads that subsequents reads
12 * depend on.
13 *
14 * No data-dependent reads from memory-like regions are ever reordered
15 * over this barrier. All reads preceding this primitive are guaranteed
16 * to access memory (but not necessarily other CPUs' caches) before any
17 * reads following this primitive that depend on the data return by
18 * any of the preceding reads. This primitive is much lighter weight than
19 * rmb() on most CPUs, and is never heavier weight than is
20 * rmb().
21 *
22 * These ordering constraints are respected by both the local CPU
23 * and the compiler.
24 *
25 * Ordering is not guaranteed by anything other than these primitives,
26 * not even by data dependencies. See the documentation for
27 * memory_barrier() for examples and URLs to more information.
28 *
29 * For example, the following code would force ordering (the initial
30 * value of "a" is zero, "b" is one, and "p" is "&a"):
31 *
32 * <programlisting>
33 * CPU 0 CPU 1
34 *
35 * b = 2;
36 * memory_barrier();
37 * p = &b; q = p;
38 * read_barrier_depends();
39 * d = *q;
40 * </programlisting>
41 *
42 * because the read of "*q" depends on the read of "p" and these
43 * two reads are separated by a read_barrier_depends(). However,
44 * the following code, with the same initial values for "a" and "b":
45 *
46 * <programlisting>
47 * CPU 0 CPU 1
48 *
49 * a = 2;
50 * memory_barrier();
51 * b = 3; y = b;
52 * read_barrier_depends();
53 * x = a;
54 * </programlisting>
55 *
56 * does not enforce ordering, since there is no data dependency between
57 * the read of "a" and the read of "b". Therefore, on some CPUs, such
58 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
59 * in cases like this where there are no data dependencies.
60 */
10#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") 61#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
11 62
12#ifdef CONFIG_SMP 63#ifdef CONFIG_SMP
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index c6a3e73a6e24..d2f81e6b8c1c 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -43,10 +43,14 @@
43#define mb() do { dsb(); outer_sync(); } while (0) 43#define mb() do { dsb(); outer_sync(); } while (0)
44#define rmb() dsb() 44#define rmb() dsb()
45#define wmb() do { dsb(st); outer_sync(); } while (0) 45#define wmb() do { dsb(st); outer_sync(); } while (0)
46#define dma_rmb() dmb(osh)
47#define dma_wmb() dmb(oshst)
46#else 48#else
47#define mb() barrier() 49#define mb() barrier()
48#define rmb() barrier() 50#define rmb() barrier()
49#define wmb() barrier() 51#define wmb() barrier()
52#define dma_rmb() barrier()
53#define dma_wmb() barrier()
50#endif 54#endif
51 55
52#ifndef CONFIG_SMP 56#ifndef CONFIG_SMP
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 6389d60574d9..a5abb0062d6e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -32,6 +32,9 @@
32#define rmb() dsb(ld) 32#define rmb() dsb(ld)
33#define wmb() dsb(st) 33#define wmb() dsb(st)
34 34
35#define dma_rmb() dmb(oshld)
36#define dma_wmb() dmb(oshst)
37
35#ifndef CONFIG_SMP 38#ifndef CONFIG_SMP
36#define smp_mb() barrier() 39#define smp_mb() barrier()
37#define smp_rmb() barrier() 40#define smp_rmb() barrier()
diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h
index 420006877998..dfb66fe88b34 100644
--- a/arch/blackfin/include/asm/barrier.h
+++ b/arch/blackfin/include/asm/barrier.h
@@ -22,6 +22,57 @@
22# define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) 22# define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0)
23# define rmb() do { barrier(); smp_check_barrier(); } while (0) 23# define rmb() do { barrier(); smp_check_barrier(); } while (0)
24# define wmb() do { barrier(); smp_mark_barrier(); } while (0) 24# define wmb() do { barrier(); smp_mark_barrier(); } while (0)
25/*
26 * read_barrier_depends - Flush all pending reads that subsequents reads
27 * depend on.
28 *
29 * No data-dependent reads from memory-like regions are ever reordered
30 * over this barrier. All reads preceding this primitive are guaranteed
31 * to access memory (but not necessarily other CPUs' caches) before any
32 * reads following this primitive that depend on the data return by
33 * any of the preceding reads. This primitive is much lighter weight than
34 * rmb() on most CPUs, and is never heavier weight than is
35 * rmb().
36 *
37 * These ordering constraints are respected by both the local CPU
38 * and the compiler.
39 *
40 * Ordering is not guaranteed by anything other than these primitives,
41 * not even by data dependencies. See the documentation for
42 * memory_barrier() for examples and URLs to more information.
43 *
44 * For example, the following code would force ordering (the initial
45 * value of "a" is zero, "b" is one, and "p" is "&a"):
46 *
47 * <programlisting>
48 * CPU 0 CPU 1
49 *
50 * b = 2;
51 * memory_barrier();
52 * p = &b; q = p;
53 * read_barrier_depends();
54 * d = *q;
55 * </programlisting>
56 *
57 * because the read of "*q" depends on the read of "p" and these
58 * two reads are separated by a read_barrier_depends(). However,
59 * the following code, with the same initial values for "a" and "b":
60 *
61 * <programlisting>
62 * CPU 0 CPU 1
63 *
64 * a = 2;
65 * memory_barrier();
66 * b = 3; y = b;
67 * read_barrier_depends();
68 * x = a;
69 * </programlisting>
70 *
71 * does not enforce ordering, since there is no data dependency between
72 * the read of "a" and the read of "b". Therefore, on some CPUs, such
73 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
74 * in cases like this where there are no data dependencies.
75 */
25# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) 76# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0)
26#endif 77#endif
27 78
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index a48957c7b445..f6769eb2bbf9 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -35,26 +35,25 @@
35 * it's (presumably) much slower than mf and (b) mf.a is supported for 35 * it's (presumably) much slower than mf and (b) mf.a is supported for
36 * sequential memory pages only. 36 * sequential memory pages only.
37 */ 37 */
38#define mb() ia64_mf() 38#define mb() ia64_mf()
39#define rmb() mb() 39#define rmb() mb()
40#define wmb() mb() 40#define wmb() mb()
41#define read_barrier_depends() do { } while(0) 41
42#define dma_rmb() mb()
43#define dma_wmb() mb()
42 44
43#ifdef CONFIG_SMP 45#ifdef CONFIG_SMP
44# define smp_mb() mb() 46# define smp_mb() mb()
45# define smp_rmb() rmb()
46# define smp_wmb() wmb()
47# define smp_read_barrier_depends() read_barrier_depends()
48
49#else 47#else
50
51# define smp_mb() barrier() 48# define smp_mb() barrier()
52# define smp_rmb() barrier()
53# define smp_wmb() barrier()
54# define smp_read_barrier_depends() do { } while(0)
55
56#endif 49#endif
57 50
51#define smp_rmb() smp_mb()
52#define smp_wmb() smp_mb()
53
54#define read_barrier_depends() do { } while (0)
55#define smp_read_barrier_depends() do { } while (0)
56
58#define smp_mb__before_atomic() barrier() 57#define smp_mb__before_atomic() barrier()
59#define smp_mb__after_atomic() barrier() 58#define smp_mb__after_atomic() barrier()
60 59
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c7591e80067c..d703d8e26a65 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -4,8 +4,6 @@
4#include <asm/metag_mem.h> 4#include <asm/metag_mem.h>
5 5
6#define nop() asm volatile ("NOP") 6#define nop() asm volatile ("NOP")
7#define mb() wmb()
8#define rmb() barrier()
9 7
10#ifdef CONFIG_METAG_META21 8#ifdef CONFIG_METAG_META21
11 9
@@ -41,13 +39,13 @@ static inline void wr_fence(void)
41 39
42#endif /* !CONFIG_METAG_META21 */ 40#endif /* !CONFIG_METAG_META21 */
43 41
44static inline void wmb(void) 42/* flush writes through the write combiner */
45{ 43#define mb() wr_fence()
46 /* flush writes through the write combiner */ 44#define rmb() barrier()
47 wr_fence(); 45#define wmb() mb()
48}
49 46
50#define read_barrier_depends() do { } while (0) 47#define dma_rmb() rmb()
48#define dma_wmb() wmb()
51 49
52#ifndef CONFIG_SMP 50#ifndef CONFIG_SMP
53#define fence() do { } while (0) 51#define fence() do { } while (0)
@@ -82,7 +80,10 @@ static inline void fence(void)
82#define smp_wmb() barrier() 80#define smp_wmb() barrier()
83#endif 81#endif
84#endif 82#endif
85#define smp_read_barrier_depends() do { } while (0) 83
84#define read_barrier_depends() do { } while (0)
85#define smp_read_barrier_depends() do { } while (0)
86
86#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 87#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
87 88
88#define smp_store_release(p, v) \ 89#define smp_store_release(p, v) \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index d0101dd0575e..2b8bbbcb9be0 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -10,58 +10,6 @@
10 10
11#include <asm/addrspace.h> 11#include <asm/addrspace.h>
12 12
13/*
14 * read_barrier_depends - Flush all pending reads that subsequents reads
15 * depend on.
16 *
17 * No data-dependent reads from memory-like regions are ever reordered
18 * over this barrier. All reads preceding this primitive are guaranteed
19 * to access memory (but not necessarily other CPUs' caches) before any
20 * reads following this primitive that depend on the data return by
21 * any of the preceding reads. This primitive is much lighter weight than
22 * rmb() on most CPUs, and is never heavier weight than is
23 * rmb().
24 *
25 * These ordering constraints are respected by both the local CPU
26 * and the compiler.
27 *
28 * Ordering is not guaranteed by anything other than these primitives,
29 * not even by data dependencies. See the documentation for
30 * memory_barrier() for examples and URLs to more information.
31 *
32 * For example, the following code would force ordering (the initial
33 * value of "a" is zero, "b" is one, and "p" is "&a"):
34 *
35 * <programlisting>
36 * CPU 0 CPU 1
37 *
38 * b = 2;
39 * memory_barrier();
40 * p = &b; q = p;
41 * read_barrier_depends();
42 * d = *q;
43 * </programlisting>
44 *
45 * because the read of "*q" depends on the read of "p" and these
46 * two reads are separated by a read_barrier_depends(). However,
47 * the following code, with the same initial values for "a" and "b":
48 *
49 * <programlisting>
50 * CPU 0 CPU 1
51 *
52 * a = 2;
53 * memory_barrier();
54 * b = 3; y = b;
55 * read_barrier_depends();
56 * x = a;
57 * </programlisting>
58 *
59 * does not enforce ordering, since there is no data dependency between
60 * the read of "a" and the read of "b". Therefore, on some CPUs, such
61 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
62 * in cases like this where there are no data dependencies.
63 */
64
65#define read_barrier_depends() do { } while(0) 13#define read_barrier_depends() do { } while(0)
66#define smp_read_barrier_depends() do { } while(0) 14#define smp_read_barrier_depends() do { } while(0)
67 15
@@ -127,20 +75,21 @@
127 75
128#include <asm/wbflush.h> 76#include <asm/wbflush.h>
129 77
130#define wmb() fast_wmb()
131#define rmb() fast_rmb()
132#define mb() wbflush() 78#define mb() wbflush()
133#define iob() wbflush() 79#define iob() wbflush()
134 80
135#else /* !CONFIG_CPU_HAS_WB */ 81#else /* !CONFIG_CPU_HAS_WB */
136 82
137#define wmb() fast_wmb()
138#define rmb() fast_rmb()
139#define mb() fast_mb() 83#define mb() fast_mb()
140#define iob() fast_iob() 84#define iob() fast_iob()
141 85
142#endif /* !CONFIG_CPU_HAS_WB */ 86#endif /* !CONFIG_CPU_HAS_WB */
143 87
88#define wmb() fast_wmb()
89#define rmb() fast_rmb()
90#define dma_wmb() fast_wmb()
91#define dma_rmb() fast_rmb()
92
144#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) 93#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
145# ifdef CONFIG_CPU_CAVIUM_OCTEON 94# ifdef CONFIG_CPU_CAVIUM_OCTEON
146# define smp_mb() __sync() 95# define smp_mb() __sync()
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index bab79a110c7b..a3bf5be111ff 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -33,12 +33,9 @@
33#define mb() __asm__ __volatile__ ("sync" : : : "memory") 33#define mb() __asm__ __volatile__ ("sync" : : : "memory")
34#define rmb() __asm__ __volatile__ ("sync" : : : "memory") 34#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
35#define wmb() __asm__ __volatile__ ("sync" : : : "memory") 35#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
36#define read_barrier_depends() do { } while(0)
37 36
38#define set_mb(var, value) do { var = value; mb(); } while (0) 37#define set_mb(var, value) do { var = value; mb(); } while (0)
39 38
40#ifdef CONFIG_SMP
41
42#ifdef __SUBARCH_HAS_LWSYNC 39#ifdef __SUBARCH_HAS_LWSYNC
43# define SMPWMB LWSYNC 40# define SMPWMB LWSYNC
44#else 41#else
@@ -46,20 +43,26 @@
46#endif 43#endif
47 44
48#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") 45#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
46#define dma_rmb() __lwsync()
47#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
48
49#ifdef CONFIG_SMP
50#define smp_lwsync() __lwsync()
49 51
50#define smp_mb() mb() 52#define smp_mb() mb()
51#define smp_rmb() __lwsync() 53#define smp_rmb() __lwsync()
52#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") 54#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
53#define smp_read_barrier_depends() read_barrier_depends()
54#else 55#else
55#define __lwsync() barrier() 56#define smp_lwsync() barrier()
56 57
57#define smp_mb() barrier() 58#define smp_mb() barrier()
58#define smp_rmb() barrier() 59#define smp_rmb() barrier()
59#define smp_wmb() barrier() 60#define smp_wmb() barrier()
60#define smp_read_barrier_depends() do { } while(0)
61#endif /* CONFIG_SMP */ 61#endif /* CONFIG_SMP */
62 62
63#define read_barrier_depends() do { } while (0)
64#define smp_read_barrier_depends() do { } while (0)
65
63/* 66/*
64 * This is a barrier which prevents following instructions from being 67 * This is a barrier which prevents following instructions from being
65 * started until the value of the argument x is known. For example, if 68 * started until the value of the argument x is known. For example, if
@@ -72,7 +75,7 @@
72#define smp_store_release(p, v) \ 75#define smp_store_release(p, v) \
73do { \ 76do { \
74 compiletime_assert_atomic_type(*p); \ 77 compiletime_assert_atomic_type(*p); \
75 __lwsync(); \ 78 smp_lwsync(); \
76 ACCESS_ONCE(*p) = (v); \ 79 ACCESS_ONCE(*p) = (v); \
77} while (0) 80} while (0)
78 81
@@ -80,7 +83,7 @@ do { \
80({ \ 83({ \
81 typeof(*p) ___p1 = ACCESS_ONCE(*p); \ 84 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
82 compiletime_assert_atomic_type(*p); \ 85 compiletime_assert_atomic_type(*p); \
83 __lwsync(); \ 86 smp_lwsync(); \
84 ___p1; \ 87 ___p1; \
85}) 88})
86 89
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index b5dce6544d76..8d724718ec21 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -24,11 +24,14 @@
24 24
25#define rmb() mb() 25#define rmb() mb()
26#define wmb() mb() 26#define wmb() mb()
27#define read_barrier_depends() do { } while(0) 27#define dma_rmb() rmb()
28#define dma_wmb() wmb()
28#define smp_mb() mb() 29#define smp_mb() mb()
29#define smp_rmb() rmb() 30#define smp_rmb() rmb()
30#define smp_wmb() wmb() 31#define smp_wmb() wmb()
31#define smp_read_barrier_depends() read_barrier_depends() 32
33#define read_barrier_depends() do { } while (0)
34#define smp_read_barrier_depends() do { } while (0)
32 35
33#define smp_mb__before_atomic() smp_mb() 36#define smp_mb__before_atomic() smp_mb()
34#define smp_mb__after_atomic() smp_mb() 37#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 305dcc3dc721..76648941fea7 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -37,7 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
37#define rmb() __asm__ __volatile__("":::"memory") 37#define rmb() __asm__ __volatile__("":::"memory")
38#define wmb() __asm__ __volatile__("":::"memory") 38#define wmb() __asm__ __volatile__("":::"memory")
39 39
40#define read_barrier_depends() do { } while(0) 40#define dma_rmb() rmb()
41#define dma_wmb() wmb()
42
41#define set_mb(__var, __value) \ 43#define set_mb(__var, __value) \
42 do { __var = __value; membar_safe("#StoreLoad"); } while(0) 44 do { __var = __value; membar_safe("#StoreLoad"); } while(0)
43 45
@@ -51,7 +53,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
51#define smp_wmb() __asm__ __volatile__("":::"memory") 53#define smp_wmb() __asm__ __volatile__("":::"memory")
52#endif 54#endif
53 55
54#define smp_read_barrier_depends() do { } while(0) 56#define read_barrier_depends() do { } while (0)
57#define smp_read_barrier_depends() do { } while (0)
55 58
56#define smp_store_release(p, v) \ 59#define smp_store_release(p, v) \
57do { \ 60do { \
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0f4460b5636d..2ab1eb33106e 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,78 +24,28 @@
24#define wmb() asm volatile("sfence" ::: "memory") 24#define wmb() asm volatile("sfence" ::: "memory")
25#endif 25#endif
26 26
27/**
28 * read_barrier_depends - Flush all pending reads that subsequents reads
29 * depend on.
30 *
31 * No data-dependent reads from memory-like regions are ever reordered
32 * over this barrier. All reads preceding this primitive are guaranteed
33 * to access memory (but not necessarily other CPUs' caches) before any
34 * reads following this primitive that depend on the data return by
35 * any of the preceding reads. This primitive is much lighter weight than
36 * rmb() on most CPUs, and is never heavier weight than is
37 * rmb().
38 *
39 * These ordering constraints are respected by both the local CPU
40 * and the compiler.
41 *
42 * Ordering is not guaranteed by anything other than these primitives,
43 * not even by data dependencies. See the documentation for
44 * memory_barrier() for examples and URLs to more information.
45 *
46 * For example, the following code would force ordering (the initial
47 * value of "a" is zero, "b" is one, and "p" is "&a"):
48 *
49 * <programlisting>
50 * CPU 0 CPU 1
51 *
52 * b = 2;
53 * memory_barrier();
54 * p = &b; q = p;
55 * read_barrier_depends();
56 * d = *q;
57 * </programlisting>
58 *
59 * because the read of "*q" depends on the read of "p" and these
60 * two reads are separated by a read_barrier_depends(). However,
61 * the following code, with the same initial values for "a" and "b":
62 *
63 * <programlisting>
64 * CPU 0 CPU 1
65 *
66 * a = 2;
67 * memory_barrier();
68 * b = 3; y = b;
69 * read_barrier_depends();
70 * x = a;
71 * </programlisting>
72 *
73 * does not enforce ordering, since there is no data dependency between
74 * the read of "a" and the read of "b". Therefore, on some CPUs, such
75 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
76 * in cases like this where there are no data dependencies.
77 **/
78
79#define read_barrier_depends() do { } while (0)
80
81#ifdef CONFIG_SMP
82#define smp_mb() mb()
83#ifdef CONFIG_X86_PPRO_FENCE 27#ifdef CONFIG_X86_PPRO_FENCE
84# define smp_rmb() rmb() 28#define dma_rmb() rmb()
85#else 29#else
86# define smp_rmb() barrier() 30#define dma_rmb() barrier()
87#endif 31#endif
32#define dma_wmb() barrier()
33
34#ifdef CONFIG_SMP
35#define smp_mb() mb()
36#define smp_rmb() dma_rmb()
88#define smp_wmb() barrier() 37#define smp_wmb() barrier()
89#define smp_read_barrier_depends() read_barrier_depends()
90#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 38#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
91#else /* !SMP */ 39#else /* !SMP */
92#define smp_mb() barrier() 40#define smp_mb() barrier()
93#define smp_rmb() barrier() 41#define smp_rmb() barrier()
94#define smp_wmb() barrier() 42#define smp_wmb() barrier()
95#define smp_read_barrier_depends() do { } while (0)
96#define set_mb(var, value) do { var = value; barrier(); } while (0) 43#define set_mb(var, value) do { var = value; barrier(); } while (0)
97#endif /* SMP */ 44#endif /* SMP */
98 45
46#define read_barrier_depends() do { } while (0)
47#define smp_read_barrier_depends() do { } while (0)
48
99#if defined(CONFIG_X86_PPRO_FENCE) 49#if defined(CONFIG_X86_PPRO_FENCE)
100 50
101/* 51/*
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index cc04e67bfd05..2d7d9a1f5b53 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -29,20 +29,18 @@
29 29
30#endif /* CONFIG_X86_32 */ 30#endif /* CONFIG_X86_32 */
31 31
32#define read_barrier_depends() do { } while (0)
33
34#ifdef CONFIG_SMP
35
36#define smp_mb() mb()
37#ifdef CONFIG_X86_PPRO_FENCE 32#ifdef CONFIG_X86_PPRO_FENCE
38#define smp_rmb() rmb() 33#define dma_rmb() rmb()
39#else /* CONFIG_X86_PPRO_FENCE */ 34#else /* CONFIG_X86_PPRO_FENCE */
40#define smp_rmb() barrier() 35#define dma_rmb() barrier()
41#endif /* CONFIG_X86_PPRO_FENCE */ 36#endif /* CONFIG_X86_PPRO_FENCE */
37#define dma_wmb() barrier()
42 38
43#define smp_wmb() barrier() 39#ifdef CONFIG_SMP
44 40
45#define smp_read_barrier_depends() read_barrier_depends() 41#define smp_mb() mb()
42#define smp_rmb() dma_rmb()
43#define smp_wmb() barrier()
46#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 44#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
47 45
48#else /* CONFIG_SMP */ 46#else /* CONFIG_SMP */
@@ -50,11 +48,13 @@
50#define smp_mb() barrier() 48#define smp_mb() barrier()
51#define smp_rmb() barrier() 49#define smp_rmb() barrier()
52#define smp_wmb() barrier() 50#define smp_wmb() barrier()
53#define smp_read_barrier_depends() do { } while (0)
54#define set_mb(var, value) do { var = value; barrier(); } while (0) 51#define set_mb(var, value) do { var = value; barrier(); } while (0)
55 52
56#endif /* CONFIG_SMP */ 53#endif /* CONFIG_SMP */
57 54
55#define read_barrier_depends() do { } while (0)
56#define smp_read_barrier_depends() do { } while (0)
57
58/* 58/*
59 * Stop RDTSC speculation. This is needed when you need to use RDTSC 59 * Stop RDTSC speculation. This is needed when you need to use RDTSC
60 * (or get_cycles or vread that possibly accesses the TSC) in a defined 60 * (or get_cycles or vread that possibly accesses the TSC) in a defined
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index ee1ecb146df7..eb088b129bc7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -615,14 +615,14 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
615 615
616 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean); 616 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
617 617
618 if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD)) 618 if (!rx_desc->d.staterr)
619 break; 619 break;
620 620
621 /* This memory barrier is needed to keep us from reading 621 /* This memory barrier is needed to keep us from reading
622 * any other fields out of the rx_desc until we know the 622 * any other fields out of the rx_desc until we know the
623 * RXD_STATUS_DD bit is set 623 * descriptor has been written back
624 */ 624 */
625 rmb(); 625 dma_rmb();
626 626
627 /* retrieve a buffer from the ring */ 627 /* retrieve a buffer from the ring */
628 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb); 628 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 2e526d4904a6..ff59897a9463 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6910,14 +6910,14 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
6910 6910
6911 rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); 6911 rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
6912 6912
6913 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) 6913 if (!rx_desc->wb.upper.status_error)
6914 break; 6914 break;
6915 6915
6916 /* This memory barrier is needed to keep us from reading 6916 /* This memory barrier is needed to keep us from reading
6917 * any other fields out of the rx_desc until we know the 6917 * any other fields out of the rx_desc until we know the
6918 * RXD_STAT_DD bit is set 6918 * descriptor has been written back
6919 */ 6919 */
6920 rmb(); 6920 dma_rmb();
6921 6921
6922 /* retrieve a buffer from the ring */ 6922 /* retrieve a buffer from the ring */
6923 skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); 6923 skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 798b05556e1b..2ed2c7de2304 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2009,15 +2009,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
2009 2009
2010 rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); 2010 rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
2011 2011
2012 if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) 2012 if (!rx_desc->wb.upper.status_error)
2013 break; 2013 break;
2014 2014
2015 /* 2015 /* This memory barrier is needed to keep us from reading
2016 * This memory barrier is needed to keep us from reading
2017 * any other fields out of the rx_desc until we know the 2016 * any other fields out of the rx_desc until we know the
2018 * RXD_STAT_DD bit is set 2017 * descriptor has been written back
2019 */ 2018 */
2020 rmb(); 2019 dma_rmb();
2021 2020
2022 /* retrieve a buffer from the ring */ 2021 /* retrieve a buffer from the ring */
2023 skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); 2022 skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3dad7e884952..088136b37ebe 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6605,6 +6605,9 @@ static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz)
6605{ 6605{
6606 u32 eor = le32_to_cpu(desc->opts1) & RingEnd; 6606 u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
6607 6607
6608 /* Force memory writes to complete before releasing descriptor */
6609 dma_wmb();
6610
6608 desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz); 6611 desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
6609} 6612}
6610 6613
@@ -6612,7 +6615,6 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
6612 u32 rx_buf_sz) 6615 u32 rx_buf_sz)
6613{ 6616{
6614 desc->addr = cpu_to_le64(mapping); 6617 desc->addr = cpu_to_le64(mapping);
6615 wmb();
6616 rtl8169_mark_to_asic(desc, rx_buf_sz); 6618 rtl8169_mark_to_asic(desc, rx_buf_sz);
6617} 6619}
6618 6620
@@ -7073,16 +7075,18 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
7073 7075
7074 skb_tx_timestamp(skb); 7076 skb_tx_timestamp(skb);
7075 7077
7076 wmb(); 7078 /* Force memory writes to complete before releasing descriptor */
7079 dma_wmb();
7077 7080
7078 /* Anti gcc 2.95.3 bugware (sic) */ 7081 /* Anti gcc 2.95.3 bugware (sic) */
7079 status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); 7082 status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
7080 txd->opts1 = cpu_to_le32(status); 7083 txd->opts1 = cpu_to_le32(status);
7081 7084
7082 tp->cur_tx += frags + 1; 7085 /* Force all memory writes to complete before notifying device */
7083
7084 wmb(); 7086 wmb();
7085 7087
7088 tp->cur_tx += frags + 1;
7089
7086 RTL_W8(TxPoll, NPQ); 7090 RTL_W8(TxPoll, NPQ);
7087 7091
7088 mmiowb(); 7092 mmiowb();
@@ -7181,11 +7185,16 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
7181 struct ring_info *tx_skb = tp->tx_skb + entry; 7185 struct ring_info *tx_skb = tp->tx_skb + entry;
7182 u32 status; 7186 u32 status;
7183 7187
7184 rmb();
7185 status = le32_to_cpu(tp->TxDescArray[entry].opts1); 7188 status = le32_to_cpu(tp->TxDescArray[entry].opts1);
7186 if (status & DescOwn) 7189 if (status & DescOwn)
7187 break; 7190 break;
7188 7191
7192 /* This barrier is needed to keep us from reading
7193 * any other fields out of the Tx descriptor until
7194 * we know the status of DescOwn
7195 */
7196 dma_rmb();
7197
7189 rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb, 7198 rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
7190 tp->TxDescArray + entry); 7199 tp->TxDescArray + entry);
7191 if (status & LastFrag) { 7200 if (status & LastFrag) {
@@ -7280,11 +7289,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
7280 struct RxDesc *desc = tp->RxDescArray + entry; 7289 struct RxDesc *desc = tp->RxDescArray + entry;
7281 u32 status; 7290 u32 status;
7282 7291
7283 rmb();
7284 status = le32_to_cpu(desc->opts1) & tp->opts1_mask; 7292 status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
7285
7286 if (status & DescOwn) 7293 if (status & DescOwn)
7287 break; 7294 break;
7295
7296 /* This barrier is needed to keep us from reading
7297 * any other fields out of the Rx descriptor until
7298 * we know the status of DescOwn
7299 */
7300 dma_rmb();
7301
7288 if (unlikely(status & RxRES)) { 7302 if (unlikely(status & RxRES)) {
7289 netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n", 7303 netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
7290 status); 7304 status);
@@ -7346,7 +7360,6 @@ process_pkt:
7346 } 7360 }
7347release_descriptor: 7361release_descriptor:
7348 desc->opts2 = 0; 7362 desc->opts2 = 0;
7349 wmb();
7350 rtl8169_mark_to_asic(desc, rx_buf_sz); 7363 rtl8169_mark_to_asic(desc, rx_buf_sz);
7351 } 7364 }
7352 7365
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 1402fa855388..f5c40b0fadc2 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -42,6 +42,14 @@
42#define wmb() mb() 42#define wmb() mb()
43#endif 43#endif
44 44
45#ifndef dma_rmb
46#define dma_rmb() rmb()
47#endif
48
49#ifndef dma_wmb
50#define dma_wmb() wmb()
51#endif
52
45#ifndef read_barrier_depends 53#ifndef read_barrier_depends
46#define read_barrier_depends() do { } while (0) 54#define read_barrier_depends() do { } while (0)
47#endif 55#endif