diff options
author | David S. Miller <davem@davemloft.net> | 2014-12-11 21:15:37 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-12-11 21:15:37 -0500 |
commit | 697766df6b952f09b17eefda8b5ef746acb9c1eb (patch) | |
tree | a4962667802529c26231f4768c0233a15f9e9e4c | |
parent | c11a9009ae6a8c42a8cd69d885601e1aa6fbea04 (diff) | |
parent | 124b74c18e0e31b24638d256afee7122a994e1b3 (diff) |
Merge branch 'dma_mb'
Alexander Duyck says:
====================
arch: Add lightweight memory barriers for coherent memory access
These patches introduce two new primitives for synchronizing cache coherent
memory writes and reads. These two new primitives are:
dma_rmb()
dma_wmb()
The first patch cleans up some unnecessary overhead related to the
definition of read_barrier_depends, smp_read_barrier_depends, and comments
related to the barrier.
The second patch adds the primitives for the applicable architectures and
asm-generic.
The third patch adds the barriers to r8169 which turns out to be a good
example of where the new barriers might be useful as they have full
rmb()/wmb() barriers ordering accesses to the descriptors and the DescOwn
bit.
The fourth patch adds support for coherent_rmb() to the Intel fm10k, igb,
and ixgbe drivers. Testing with the ixgbe driver has shown a processing
time reduction of at least 7ns per 64B frame on a Core i7-4930K.
This patch series is essentially the v7 for:
v4-7: Add lightweight memory barriers for coherent memory access
v3: Add lightweight memory barriers fast_rmb() and fast_wmb()
v2: Introduce load_acquire() and store_release()
v1: Introduce read_acquire()
The key changes in this patch series versus the earlier patches are:
v7 resubmit:
- Added Acked-by: Ben Herrenschmidt from v5 to dma_rmb/wmb patch
- No code changes from previous set, still applies cleanly and builds.
v7:
- Dropped test/debug patch that was accidentally slipped in
v6:
- Replaced "memory based device I/O" with "consistent memory" in
docs
- Added reference to DMA-API.txt to explain consistent memory
v5:
- Renamed barriers dma_rmb and dma_wmb
- Undid smp_wmb changes in x86 and PowerPC
- Defined smp_rmb as __lwsync for SMP case on PowerPC
v4:
- Renamed barriers coherent_rmb and coherent_wmb
- Added smp_lwsync for use in smp_load_acquire/smp_store_release
v3:
- Moved away from acquire()/store() and instead focused on barriers
- Added cleanup of read_barrier_depends
- Added change in r8169 to fix cur_tx/DescOwn ordering
- Simplified changes to just replacing/moving barriers in r8169
- Added update to documentation with code example
v2:
- Renamed read_acquire() to be consistent with smp_load_acquire()
- Changed barrier used to be consistent with smp_load_acquire()
- Updated PowerPC code to use __lwsync based on IBM article
- Added store_release() as this is a viable use case for drivers
- Added r8169 patch which is able to fully use primitives
- Added fm10k/igb/ixgbe patch which is able to test performance
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/memory-barriers.txt | 42 | ||||
-rw-r--r-- | arch/alpha/include/asm/barrier.h | 51 | ||||
-rw-r--r-- | arch/arm/include/asm/barrier.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/barrier.h | 3 | ||||
-rw-r--r-- | arch/blackfin/include/asm/barrier.h | 51 | ||||
-rw-r--r-- | arch/ia64/include/asm/barrier.h | 25 | ||||
-rw-r--r-- | arch/metag/include/asm/barrier.h | 19 | ||||
-rw-r--r-- | arch/mips/include/asm/barrier.h | 61 | ||||
-rw-r--r-- | arch/powerpc/include/asm/barrier.h | 19 | ||||
-rw-r--r-- | arch/s390/include/asm/barrier.h | 7 | ||||
-rw-r--r-- | arch/sparc/include/asm/barrier_64.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/barrier.h | 70 | ||||
-rw-r--r-- | arch/x86/um/asm/barrier.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/fm10k/fm10k_main.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/realtek/r8169.c | 29 | ||||
-rw-r--r-- | include/asm-generic/barrier.h | 8 |
18 files changed, 258 insertions, 179 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 7ee2ae6d5451..70a09f8a0383 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
@@ -1633,6 +1633,48 @@ There are some more advanced barrier functions: | |||
1633 | operations" subsection for information on where to use these. | 1633 | operations" subsection for information on where to use these. |
1634 | 1634 | ||
1635 | 1635 | ||
1636 | (*) dma_wmb(); | ||
1637 | (*) dma_rmb(); | ||
1638 | |||
1639 | These are for use with consistent memory to guarantee the ordering | ||
1640 | of writes or reads of shared memory accessible to both the CPU and a | ||
1641 | DMA capable device. | ||
1642 | |||
1643 | For example, consider a device driver that shares memory with a device | ||
1644 | and uses a descriptor status value to indicate if the descriptor belongs | ||
1645 | to the device or the CPU, and a doorbell to notify it when new | ||
1646 | descriptors are available: | ||
1647 | |||
1648 | if (desc->status != DEVICE_OWN) { | ||
1649 | /* do not read data until we own descriptor */ | ||
1650 | dma_rmb(); | ||
1651 | |||
1652 | /* read/modify data */ | ||
1653 | read_data = desc->data; | ||
1654 | desc->data = write_data; | ||
1655 | |||
1656 | /* flush modifications before status update */ | ||
1657 | dma_wmb(); | ||
1658 | |||
1659 | /* assign ownership */ | ||
1660 | desc->status = DEVICE_OWN; | ||
1661 | |||
1662 | /* force memory to sync before notifying device via MMIO */ | ||
1663 | wmb(); | ||
1664 | |||
1665 | /* notify device of new descriptors */ | ||
1666 | writel(DESC_NOTIFY, doorbell); | ||
1667 | } | ||
1668 | |||
1669 | The dma_rmb() allows us guarantee the device has released ownership | ||
1670 | before we read the data from the descriptor, and he dma_wmb() allows | ||
1671 | us to guarantee the data is written to the descriptor before the device | ||
1672 | can see it now has ownership. The wmb() is needed to guarantee that the | ||
1673 | cache coherent memory writes have completed before attempting a write to | ||
1674 | the cache incoherent MMIO region. | ||
1675 | |||
1676 | See Documentation/DMA-API.txt for more information on consistent memory. | ||
1677 | |||
1636 | MMIO WRITE BARRIER | 1678 | MMIO WRITE BARRIER |
1637 | ------------------ | 1679 | ------------------ |
1638 | 1680 | ||
diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h index 3832bdb794fe..77516c87255d 100644 --- a/arch/alpha/include/asm/barrier.h +++ b/arch/alpha/include/asm/barrier.h | |||
@@ -7,6 +7,57 @@ | |||
7 | #define rmb() __asm__ __volatile__("mb": : :"memory") | 7 | #define rmb() __asm__ __volatile__("mb": : :"memory") |
8 | #define wmb() __asm__ __volatile__("wmb": : :"memory") | 8 | #define wmb() __asm__ __volatile__("wmb": : :"memory") |
9 | 9 | ||
10 | /** | ||
11 | * read_barrier_depends - Flush all pending reads that subsequents reads | ||
12 | * depend on. | ||
13 | * | ||
14 | * No data-dependent reads from memory-like regions are ever reordered | ||
15 | * over this barrier. All reads preceding this primitive are guaranteed | ||
16 | * to access memory (but not necessarily other CPUs' caches) before any | ||
17 | * reads following this primitive that depend on the data return by | ||
18 | * any of the preceding reads. This primitive is much lighter weight than | ||
19 | * rmb() on most CPUs, and is never heavier weight than is | ||
20 | * rmb(). | ||
21 | * | ||
22 | * These ordering constraints are respected by both the local CPU | ||
23 | * and the compiler. | ||
24 | * | ||
25 | * Ordering is not guaranteed by anything other than these primitives, | ||
26 | * not even by data dependencies. See the documentation for | ||
27 | * memory_barrier() for examples and URLs to more information. | ||
28 | * | ||
29 | * For example, the following code would force ordering (the initial | ||
30 | * value of "a" is zero, "b" is one, and "p" is "&a"): | ||
31 | * | ||
32 | * <programlisting> | ||
33 | * CPU 0 CPU 1 | ||
34 | * | ||
35 | * b = 2; | ||
36 | * memory_barrier(); | ||
37 | * p = &b; q = p; | ||
38 | * read_barrier_depends(); | ||
39 | * d = *q; | ||
40 | * </programlisting> | ||
41 | * | ||
42 | * because the read of "*q" depends on the read of "p" and these | ||
43 | * two reads are separated by a read_barrier_depends(). However, | ||
44 | * the following code, with the same initial values for "a" and "b": | ||
45 | * | ||
46 | * <programlisting> | ||
47 | * CPU 0 CPU 1 | ||
48 | * | ||
49 | * a = 2; | ||
50 | * memory_barrier(); | ||
51 | * b = 3; y = b; | ||
52 | * read_barrier_depends(); | ||
53 | * x = a; | ||
54 | * </programlisting> | ||
55 | * | ||
56 | * does not enforce ordering, since there is no data dependency between | ||
57 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | ||
58 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | ||
59 | * in cases like this where there are no data dependencies. | ||
60 | */ | ||
10 | #define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") | 61 | #define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") |
11 | 62 | ||
12 | #ifdef CONFIG_SMP | 63 | #ifdef CONFIG_SMP |
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index c6a3e73a6e24..d2f81e6b8c1c 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h | |||
@@ -43,10 +43,14 @@ | |||
43 | #define mb() do { dsb(); outer_sync(); } while (0) | 43 | #define mb() do { dsb(); outer_sync(); } while (0) |
44 | #define rmb() dsb() | 44 | #define rmb() dsb() |
45 | #define wmb() do { dsb(st); outer_sync(); } while (0) | 45 | #define wmb() do { dsb(st); outer_sync(); } while (0) |
46 | #define dma_rmb() dmb(osh) | ||
47 | #define dma_wmb() dmb(oshst) | ||
46 | #else | 48 | #else |
47 | #define mb() barrier() | 49 | #define mb() barrier() |
48 | #define rmb() barrier() | 50 | #define rmb() barrier() |
49 | #define wmb() barrier() | 51 | #define wmb() barrier() |
52 | #define dma_rmb() barrier() | ||
53 | #define dma_wmb() barrier() | ||
50 | #endif | 54 | #endif |
51 | 55 | ||
52 | #ifndef CONFIG_SMP | 56 | #ifndef CONFIG_SMP |
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 6389d60574d9..a5abb0062d6e 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h | |||
@@ -32,6 +32,9 @@ | |||
32 | #define rmb() dsb(ld) | 32 | #define rmb() dsb(ld) |
33 | #define wmb() dsb(st) | 33 | #define wmb() dsb(st) |
34 | 34 | ||
35 | #define dma_rmb() dmb(oshld) | ||
36 | #define dma_wmb() dmb(oshst) | ||
37 | |||
35 | #ifndef CONFIG_SMP | 38 | #ifndef CONFIG_SMP |
36 | #define smp_mb() barrier() | 39 | #define smp_mb() barrier() |
37 | #define smp_rmb() barrier() | 40 | #define smp_rmb() barrier() |
diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h index 420006877998..dfb66fe88b34 100644 --- a/arch/blackfin/include/asm/barrier.h +++ b/arch/blackfin/include/asm/barrier.h | |||
@@ -22,6 +22,57 @@ | |||
22 | # define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) | 22 | # define mb() do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0) |
23 | # define rmb() do { barrier(); smp_check_barrier(); } while (0) | 23 | # define rmb() do { barrier(); smp_check_barrier(); } while (0) |
24 | # define wmb() do { barrier(); smp_mark_barrier(); } while (0) | 24 | # define wmb() do { barrier(); smp_mark_barrier(); } while (0) |
25 | /* | ||
26 | * read_barrier_depends - Flush all pending reads that subsequents reads | ||
27 | * depend on. | ||
28 | * | ||
29 | * No data-dependent reads from memory-like regions are ever reordered | ||
30 | * over this barrier. All reads preceding this primitive are guaranteed | ||
31 | * to access memory (but not necessarily other CPUs' caches) before any | ||
32 | * reads following this primitive that depend on the data return by | ||
33 | * any of the preceding reads. This primitive is much lighter weight than | ||
34 | * rmb() on most CPUs, and is never heavier weight than is | ||
35 | * rmb(). | ||
36 | * | ||
37 | * These ordering constraints are respected by both the local CPU | ||
38 | * and the compiler. | ||
39 | * | ||
40 | * Ordering is not guaranteed by anything other than these primitives, | ||
41 | * not even by data dependencies. See the documentation for | ||
42 | * memory_barrier() for examples and URLs to more information. | ||
43 | * | ||
44 | * For example, the following code would force ordering (the initial | ||
45 | * value of "a" is zero, "b" is one, and "p" is "&a"): | ||
46 | * | ||
47 | * <programlisting> | ||
48 | * CPU 0 CPU 1 | ||
49 | * | ||
50 | * b = 2; | ||
51 | * memory_barrier(); | ||
52 | * p = &b; q = p; | ||
53 | * read_barrier_depends(); | ||
54 | * d = *q; | ||
55 | * </programlisting> | ||
56 | * | ||
57 | * because the read of "*q" depends on the read of "p" and these | ||
58 | * two reads are separated by a read_barrier_depends(). However, | ||
59 | * the following code, with the same initial values for "a" and "b": | ||
60 | * | ||
61 | * <programlisting> | ||
62 | * CPU 0 CPU 1 | ||
63 | * | ||
64 | * a = 2; | ||
65 | * memory_barrier(); | ||
66 | * b = 3; y = b; | ||
67 | * read_barrier_depends(); | ||
68 | * x = a; | ||
69 | * </programlisting> | ||
70 | * | ||
71 | * does not enforce ordering, since there is no data dependency between | ||
72 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | ||
73 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | ||
74 | * in cases like this where there are no data dependencies. | ||
75 | */ | ||
25 | # define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) | 76 | # define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) |
26 | #endif | 77 | #endif |
27 | 78 | ||
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index a48957c7b445..f6769eb2bbf9 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h | |||
@@ -35,26 +35,25 @@ | |||
35 | * it's (presumably) much slower than mf and (b) mf.a is supported for | 35 | * it's (presumably) much slower than mf and (b) mf.a is supported for |
36 | * sequential memory pages only. | 36 | * sequential memory pages only. |
37 | */ | 37 | */ |
38 | #define mb() ia64_mf() | 38 | #define mb() ia64_mf() |
39 | #define rmb() mb() | 39 | #define rmb() mb() |
40 | #define wmb() mb() | 40 | #define wmb() mb() |
41 | #define read_barrier_depends() do { } while(0) | 41 | |
42 | #define dma_rmb() mb() | ||
43 | #define dma_wmb() mb() | ||
42 | 44 | ||
43 | #ifdef CONFIG_SMP | 45 | #ifdef CONFIG_SMP |
44 | # define smp_mb() mb() | 46 | # define smp_mb() mb() |
45 | # define smp_rmb() rmb() | ||
46 | # define smp_wmb() wmb() | ||
47 | # define smp_read_barrier_depends() read_barrier_depends() | ||
48 | |||
49 | #else | 47 | #else |
50 | |||
51 | # define smp_mb() barrier() | 48 | # define smp_mb() barrier() |
52 | # define smp_rmb() barrier() | ||
53 | # define smp_wmb() barrier() | ||
54 | # define smp_read_barrier_depends() do { } while(0) | ||
55 | |||
56 | #endif | 49 | #endif |
57 | 50 | ||
51 | #define smp_rmb() smp_mb() | ||
52 | #define smp_wmb() smp_mb() | ||
53 | |||
54 | #define read_barrier_depends() do { } while (0) | ||
55 | #define smp_read_barrier_depends() do { } while (0) | ||
56 | |||
58 | #define smp_mb__before_atomic() barrier() | 57 | #define smp_mb__before_atomic() barrier() |
59 | #define smp_mb__after_atomic() barrier() | 58 | #define smp_mb__after_atomic() barrier() |
60 | 59 | ||
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index c7591e80067c..d703d8e26a65 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h | |||
@@ -4,8 +4,6 @@ | |||
4 | #include <asm/metag_mem.h> | 4 | #include <asm/metag_mem.h> |
5 | 5 | ||
6 | #define nop() asm volatile ("NOP") | 6 | #define nop() asm volatile ("NOP") |
7 | #define mb() wmb() | ||
8 | #define rmb() barrier() | ||
9 | 7 | ||
10 | #ifdef CONFIG_METAG_META21 | 8 | #ifdef CONFIG_METAG_META21 |
11 | 9 | ||
@@ -41,13 +39,13 @@ static inline void wr_fence(void) | |||
41 | 39 | ||
42 | #endif /* !CONFIG_METAG_META21 */ | 40 | #endif /* !CONFIG_METAG_META21 */ |
43 | 41 | ||
44 | static inline void wmb(void) | 42 | /* flush writes through the write combiner */ |
45 | { | 43 | #define mb() wr_fence() |
46 | /* flush writes through the write combiner */ | 44 | #define rmb() barrier() |
47 | wr_fence(); | 45 | #define wmb() mb() |
48 | } | ||
49 | 46 | ||
50 | #define read_barrier_depends() do { } while (0) | 47 | #define dma_rmb() rmb() |
48 | #define dma_wmb() wmb() | ||
51 | 49 | ||
52 | #ifndef CONFIG_SMP | 50 | #ifndef CONFIG_SMP |
53 | #define fence() do { } while (0) | 51 | #define fence() do { } while (0) |
@@ -82,7 +80,10 @@ static inline void fence(void) | |||
82 | #define smp_wmb() barrier() | 80 | #define smp_wmb() barrier() |
83 | #endif | 81 | #endif |
84 | #endif | 82 | #endif |
85 | #define smp_read_barrier_depends() do { } while (0) | 83 | |
84 | #define read_barrier_depends() do { } while (0) | ||
85 | #define smp_read_barrier_depends() do { } while (0) | ||
86 | |||
86 | #define set_mb(var, value) do { var = value; smp_mb(); } while (0) | 87 | #define set_mb(var, value) do { var = value; smp_mb(); } while (0) |
87 | 88 | ||
88 | #define smp_store_release(p, v) \ | 89 | #define smp_store_release(p, v) \ |
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index d0101dd0575e..2b8bbbcb9be0 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h | |||
@@ -10,58 +10,6 @@ | |||
10 | 10 | ||
11 | #include <asm/addrspace.h> | 11 | #include <asm/addrspace.h> |
12 | 12 | ||
13 | /* | ||
14 | * read_barrier_depends - Flush all pending reads that subsequents reads | ||
15 | * depend on. | ||
16 | * | ||
17 | * No data-dependent reads from memory-like regions are ever reordered | ||
18 | * over this barrier. All reads preceding this primitive are guaranteed | ||
19 | * to access memory (but not necessarily other CPUs' caches) before any | ||
20 | * reads following this primitive that depend on the data return by | ||
21 | * any of the preceding reads. This primitive is much lighter weight than | ||
22 | * rmb() on most CPUs, and is never heavier weight than is | ||
23 | * rmb(). | ||
24 | * | ||
25 | * These ordering constraints are respected by both the local CPU | ||
26 | * and the compiler. | ||
27 | * | ||
28 | * Ordering is not guaranteed by anything other than these primitives, | ||
29 | * not even by data dependencies. See the documentation for | ||
30 | * memory_barrier() for examples and URLs to more information. | ||
31 | * | ||
32 | * For example, the following code would force ordering (the initial | ||
33 | * value of "a" is zero, "b" is one, and "p" is "&a"): | ||
34 | * | ||
35 | * <programlisting> | ||
36 | * CPU 0 CPU 1 | ||
37 | * | ||
38 | * b = 2; | ||
39 | * memory_barrier(); | ||
40 | * p = &b; q = p; | ||
41 | * read_barrier_depends(); | ||
42 | * d = *q; | ||
43 | * </programlisting> | ||
44 | * | ||
45 | * because the read of "*q" depends on the read of "p" and these | ||
46 | * two reads are separated by a read_barrier_depends(). However, | ||
47 | * the following code, with the same initial values for "a" and "b": | ||
48 | * | ||
49 | * <programlisting> | ||
50 | * CPU 0 CPU 1 | ||
51 | * | ||
52 | * a = 2; | ||
53 | * memory_barrier(); | ||
54 | * b = 3; y = b; | ||
55 | * read_barrier_depends(); | ||
56 | * x = a; | ||
57 | * </programlisting> | ||
58 | * | ||
59 | * does not enforce ordering, since there is no data dependency between | ||
60 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | ||
61 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | ||
62 | * in cases like this where there are no data dependencies. | ||
63 | */ | ||
64 | |||
65 | #define read_barrier_depends() do { } while(0) | 13 | #define read_barrier_depends() do { } while(0) |
66 | #define smp_read_barrier_depends() do { } while(0) | 14 | #define smp_read_barrier_depends() do { } while(0) |
67 | 15 | ||
@@ -127,20 +75,21 @@ | |||
127 | 75 | ||
128 | #include <asm/wbflush.h> | 76 | #include <asm/wbflush.h> |
129 | 77 | ||
130 | #define wmb() fast_wmb() | ||
131 | #define rmb() fast_rmb() | ||
132 | #define mb() wbflush() | 78 | #define mb() wbflush() |
133 | #define iob() wbflush() | 79 | #define iob() wbflush() |
134 | 80 | ||
135 | #else /* !CONFIG_CPU_HAS_WB */ | 81 | #else /* !CONFIG_CPU_HAS_WB */ |
136 | 82 | ||
137 | #define wmb() fast_wmb() | ||
138 | #define rmb() fast_rmb() | ||
139 | #define mb() fast_mb() | 83 | #define mb() fast_mb() |
140 | #define iob() fast_iob() | 84 | #define iob() fast_iob() |
141 | 85 | ||
142 | #endif /* !CONFIG_CPU_HAS_WB */ | 86 | #endif /* !CONFIG_CPU_HAS_WB */ |
143 | 87 | ||
88 | #define wmb() fast_wmb() | ||
89 | #define rmb() fast_rmb() | ||
90 | #define dma_wmb() fast_wmb() | ||
91 | #define dma_rmb() fast_rmb() | ||
92 | |||
144 | #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) | 93 | #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) |
145 | # ifdef CONFIG_CPU_CAVIUM_OCTEON | 94 | # ifdef CONFIG_CPU_CAVIUM_OCTEON |
146 | # define smp_mb() __sync() | 95 | # define smp_mb() __sync() |
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index bab79a110c7b..a3bf5be111ff 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h | |||
@@ -33,12 +33,9 @@ | |||
33 | #define mb() __asm__ __volatile__ ("sync" : : : "memory") | 33 | #define mb() __asm__ __volatile__ ("sync" : : : "memory") |
34 | #define rmb() __asm__ __volatile__ ("sync" : : : "memory") | 34 | #define rmb() __asm__ __volatile__ ("sync" : : : "memory") |
35 | #define wmb() __asm__ __volatile__ ("sync" : : : "memory") | 35 | #define wmb() __asm__ __volatile__ ("sync" : : : "memory") |
36 | #define read_barrier_depends() do { } while(0) | ||
37 | 36 | ||
38 | #define set_mb(var, value) do { var = value; mb(); } while (0) | 37 | #define set_mb(var, value) do { var = value; mb(); } while (0) |
39 | 38 | ||
40 | #ifdef CONFIG_SMP | ||
41 | |||
42 | #ifdef __SUBARCH_HAS_LWSYNC | 39 | #ifdef __SUBARCH_HAS_LWSYNC |
43 | # define SMPWMB LWSYNC | 40 | # define SMPWMB LWSYNC |
44 | #else | 41 | #else |
@@ -46,20 +43,26 @@ | |||
46 | #endif | 43 | #endif |
47 | 44 | ||
48 | #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") | 45 | #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") |
46 | #define dma_rmb() __lwsync() | ||
47 | #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") | ||
48 | |||
49 | #ifdef CONFIG_SMP | ||
50 | #define smp_lwsync() __lwsync() | ||
49 | 51 | ||
50 | #define smp_mb() mb() | 52 | #define smp_mb() mb() |
51 | #define smp_rmb() __lwsync() | 53 | #define smp_rmb() __lwsync() |
52 | #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") | 54 | #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") |
53 | #define smp_read_barrier_depends() read_barrier_depends() | ||
54 | #else | 55 | #else |
55 | #define __lwsync() barrier() | 56 | #define smp_lwsync() barrier() |
56 | 57 | ||
57 | #define smp_mb() barrier() | 58 | #define smp_mb() barrier() |
58 | #define smp_rmb() barrier() | 59 | #define smp_rmb() barrier() |
59 | #define smp_wmb() barrier() | 60 | #define smp_wmb() barrier() |
60 | #define smp_read_barrier_depends() do { } while(0) | ||
61 | #endif /* CONFIG_SMP */ | 61 | #endif /* CONFIG_SMP */ |
62 | 62 | ||
63 | #define read_barrier_depends() do { } while (0) | ||
64 | #define smp_read_barrier_depends() do { } while (0) | ||
65 | |||
63 | /* | 66 | /* |
64 | * This is a barrier which prevents following instructions from being | 67 | * This is a barrier which prevents following instructions from being |
65 | * started until the value of the argument x is known. For example, if | 68 | * started until the value of the argument x is known. For example, if |
@@ -72,7 +75,7 @@ | |||
72 | #define smp_store_release(p, v) \ | 75 | #define smp_store_release(p, v) \ |
73 | do { \ | 76 | do { \ |
74 | compiletime_assert_atomic_type(*p); \ | 77 | compiletime_assert_atomic_type(*p); \ |
75 | __lwsync(); \ | 78 | smp_lwsync(); \ |
76 | ACCESS_ONCE(*p) = (v); \ | 79 | ACCESS_ONCE(*p) = (v); \ |
77 | } while (0) | 80 | } while (0) |
78 | 81 | ||
@@ -80,7 +83,7 @@ do { \ | |||
80 | ({ \ | 83 | ({ \ |
81 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | 84 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ |
82 | compiletime_assert_atomic_type(*p); \ | 85 | compiletime_assert_atomic_type(*p); \ |
83 | __lwsync(); \ | 86 | smp_lwsync(); \ |
84 | ___p1; \ | 87 | ___p1; \ |
85 | }) | 88 | }) |
86 | 89 | ||
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index b5dce6544d76..8d724718ec21 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h | |||
@@ -24,11 +24,14 @@ | |||
24 | 24 | ||
25 | #define rmb() mb() | 25 | #define rmb() mb() |
26 | #define wmb() mb() | 26 | #define wmb() mb() |
27 | #define read_barrier_depends() do { } while(0) | 27 | #define dma_rmb() rmb() |
28 | #define dma_wmb() wmb() | ||
28 | #define smp_mb() mb() | 29 | #define smp_mb() mb() |
29 | #define smp_rmb() rmb() | 30 | #define smp_rmb() rmb() |
30 | #define smp_wmb() wmb() | 31 | #define smp_wmb() wmb() |
31 | #define smp_read_barrier_depends() read_barrier_depends() | 32 | |
33 | #define read_barrier_depends() do { } while (0) | ||
34 | #define smp_read_barrier_depends() do { } while (0) | ||
32 | 35 | ||
33 | #define smp_mb__before_atomic() smp_mb() | 36 | #define smp_mb__before_atomic() smp_mb() |
34 | #define smp_mb__after_atomic() smp_mb() | 37 | #define smp_mb__after_atomic() smp_mb() |
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 305dcc3dc721..76648941fea7 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h | |||
@@ -37,7 +37,9 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ | |||
37 | #define rmb() __asm__ __volatile__("":::"memory") | 37 | #define rmb() __asm__ __volatile__("":::"memory") |
38 | #define wmb() __asm__ __volatile__("":::"memory") | 38 | #define wmb() __asm__ __volatile__("":::"memory") |
39 | 39 | ||
40 | #define read_barrier_depends() do { } while(0) | 40 | #define dma_rmb() rmb() |
41 | #define dma_wmb() wmb() | ||
42 | |||
41 | #define set_mb(__var, __value) \ | 43 | #define set_mb(__var, __value) \ |
42 | do { __var = __value; membar_safe("#StoreLoad"); } while(0) | 44 | do { __var = __value; membar_safe("#StoreLoad"); } while(0) |
43 | 45 | ||
@@ -51,7 +53,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ | |||
51 | #define smp_wmb() __asm__ __volatile__("":::"memory") | 53 | #define smp_wmb() __asm__ __volatile__("":::"memory") |
52 | #endif | 54 | #endif |
53 | 55 | ||
54 | #define smp_read_barrier_depends() do { } while(0) | 56 | #define read_barrier_depends() do { } while (0) |
57 | #define smp_read_barrier_depends() do { } while (0) | ||
55 | 58 | ||
56 | #define smp_store_release(p, v) \ | 59 | #define smp_store_release(p, v) \ |
57 | do { \ | 60 | do { \ |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 0f4460b5636d..2ab1eb33106e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -24,78 +24,28 @@ | |||
24 | #define wmb() asm volatile("sfence" ::: "memory") | 24 | #define wmb() asm volatile("sfence" ::: "memory") |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | /** | ||
28 | * read_barrier_depends - Flush all pending reads that subsequents reads | ||
29 | * depend on. | ||
30 | * | ||
31 | * No data-dependent reads from memory-like regions are ever reordered | ||
32 | * over this barrier. All reads preceding this primitive are guaranteed | ||
33 | * to access memory (but not necessarily other CPUs' caches) before any | ||
34 | * reads following this primitive that depend on the data return by | ||
35 | * any of the preceding reads. This primitive is much lighter weight than | ||
36 | * rmb() on most CPUs, and is never heavier weight than is | ||
37 | * rmb(). | ||
38 | * | ||
39 | * These ordering constraints are respected by both the local CPU | ||
40 | * and the compiler. | ||
41 | * | ||
42 | * Ordering is not guaranteed by anything other than these primitives, | ||
43 | * not even by data dependencies. See the documentation for | ||
44 | * memory_barrier() for examples and URLs to more information. | ||
45 | * | ||
46 | * For example, the following code would force ordering (the initial | ||
47 | * value of "a" is zero, "b" is one, and "p" is "&a"): | ||
48 | * | ||
49 | * <programlisting> | ||
50 | * CPU 0 CPU 1 | ||
51 | * | ||
52 | * b = 2; | ||
53 | * memory_barrier(); | ||
54 | * p = &b; q = p; | ||
55 | * read_barrier_depends(); | ||
56 | * d = *q; | ||
57 | * </programlisting> | ||
58 | * | ||
59 | * because the read of "*q" depends on the read of "p" and these | ||
60 | * two reads are separated by a read_barrier_depends(). However, | ||
61 | * the following code, with the same initial values for "a" and "b": | ||
62 | * | ||
63 | * <programlisting> | ||
64 | * CPU 0 CPU 1 | ||
65 | * | ||
66 | * a = 2; | ||
67 | * memory_barrier(); | ||
68 | * b = 3; y = b; | ||
69 | * read_barrier_depends(); | ||
70 | * x = a; | ||
71 | * </programlisting> | ||
72 | * | ||
73 | * does not enforce ordering, since there is no data dependency between | ||
74 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | ||
75 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | ||
76 | * in cases like this where there are no data dependencies. | ||
77 | **/ | ||
78 | |||
79 | #define read_barrier_depends() do { } while (0) | ||
80 | |||
81 | #ifdef CONFIG_SMP | ||
82 | #define smp_mb() mb() | ||
83 | #ifdef CONFIG_X86_PPRO_FENCE | 27 | #ifdef CONFIG_X86_PPRO_FENCE |
84 | # define smp_rmb() rmb() | 28 | #define dma_rmb() rmb() |
85 | #else | 29 | #else |
86 | # define smp_rmb() barrier() | 30 | #define dma_rmb() barrier() |
87 | #endif | 31 | #endif |
32 | #define dma_wmb() barrier() | ||
33 | |||
34 | #ifdef CONFIG_SMP | ||
35 | #define smp_mb() mb() | ||
36 | #define smp_rmb() dma_rmb() | ||
88 | #define smp_wmb() barrier() | 37 | #define smp_wmb() barrier() |
89 | #define smp_read_barrier_depends() read_barrier_depends() | ||
90 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | 38 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
91 | #else /* !SMP */ | 39 | #else /* !SMP */ |
92 | #define smp_mb() barrier() | 40 | #define smp_mb() barrier() |
93 | #define smp_rmb() barrier() | 41 | #define smp_rmb() barrier() |
94 | #define smp_wmb() barrier() | 42 | #define smp_wmb() barrier() |
95 | #define smp_read_barrier_depends() do { } while (0) | ||
96 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | 43 | #define set_mb(var, value) do { var = value; barrier(); } while (0) |
97 | #endif /* SMP */ | 44 | #endif /* SMP */ |
98 | 45 | ||
46 | #define read_barrier_depends() do { } while (0) | ||
47 | #define smp_read_barrier_depends() do { } while (0) | ||
48 | |||
99 | #if defined(CONFIG_X86_PPRO_FENCE) | 49 | #if defined(CONFIG_X86_PPRO_FENCE) |
100 | 50 | ||
101 | /* | 51 | /* |
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc04e67bfd05..2d7d9a1f5b53 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h | |||
@@ -29,20 +29,18 @@ | |||
29 | 29 | ||
30 | #endif /* CONFIG_X86_32 */ | 30 | #endif /* CONFIG_X86_32 */ |
31 | 31 | ||
32 | #define read_barrier_depends() do { } while (0) | ||
33 | |||
34 | #ifdef CONFIG_SMP | ||
35 | |||
36 | #define smp_mb() mb() | ||
37 | #ifdef CONFIG_X86_PPRO_FENCE | 32 | #ifdef CONFIG_X86_PPRO_FENCE |
38 | #define smp_rmb() rmb() | 33 | #define dma_rmb() rmb() |
39 | #else /* CONFIG_X86_PPRO_FENCE */ | 34 | #else /* CONFIG_X86_PPRO_FENCE */ |
40 | #define smp_rmb() barrier() | 35 | #define dma_rmb() barrier() |
41 | #endif /* CONFIG_X86_PPRO_FENCE */ | 36 | #endif /* CONFIG_X86_PPRO_FENCE */ |
37 | #define dma_wmb() barrier() | ||
42 | 38 | ||
43 | #define smp_wmb() barrier() | 39 | #ifdef CONFIG_SMP |
44 | 40 | ||
45 | #define smp_read_barrier_depends() read_barrier_depends() | 41 | #define smp_mb() mb() |
42 | #define smp_rmb() dma_rmb() | ||
43 | #define smp_wmb() barrier() | ||
46 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | 44 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
47 | 45 | ||
48 | #else /* CONFIG_SMP */ | 46 | #else /* CONFIG_SMP */ |
@@ -50,11 +48,13 @@ | |||
50 | #define smp_mb() barrier() | 48 | #define smp_mb() barrier() |
51 | #define smp_rmb() barrier() | 49 | #define smp_rmb() barrier() |
52 | #define smp_wmb() barrier() | 50 | #define smp_wmb() barrier() |
53 | #define smp_read_barrier_depends() do { } while (0) | ||
54 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | 51 | #define set_mb(var, value) do { var = value; barrier(); } while (0) |
55 | 52 | ||
56 | #endif /* CONFIG_SMP */ | 53 | #endif /* CONFIG_SMP */ |
57 | 54 | ||
55 | #define read_barrier_depends() do { } while (0) | ||
56 | #define smp_read_barrier_depends() do { } while (0) | ||
57 | |||
58 | /* | 58 | /* |
59 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | 59 | * Stop RDTSC speculation. This is needed when you need to use RDTSC |
60 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | 60 | * (or get_cycles or vread that possibly accesses the TSC) in a defined |
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index ee1ecb146df7..eb088b129bc7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c | |||
@@ -615,14 +615,14 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, | |||
615 | 615 | ||
616 | rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean); | 616 | rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean); |
617 | 617 | ||
618 | if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD)) | 618 | if (!rx_desc->d.staterr) |
619 | break; | 619 | break; |
620 | 620 | ||
621 | /* This memory barrier is needed to keep us from reading | 621 | /* This memory barrier is needed to keep us from reading |
622 | * any other fields out of the rx_desc until we know the | 622 | * any other fields out of the rx_desc until we know the |
623 | * RXD_STATUS_DD bit is set | 623 | * descriptor has been written back |
624 | */ | 624 | */ |
625 | rmb(); | 625 | dma_rmb(); |
626 | 626 | ||
627 | /* retrieve a buffer from the ring */ | 627 | /* retrieve a buffer from the ring */ |
628 | skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb); | 628 | skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb); |
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 2e526d4904a6..ff59897a9463 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c | |||
@@ -6910,14 +6910,14 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) | |||
6910 | 6910 | ||
6911 | rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); | 6911 | rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); |
6912 | 6912 | ||
6913 | if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) | 6913 | if (!rx_desc->wb.upper.status_error) |
6914 | break; | 6914 | break; |
6915 | 6915 | ||
6916 | /* This memory barrier is needed to keep us from reading | 6916 | /* This memory barrier is needed to keep us from reading |
6917 | * any other fields out of the rx_desc until we know the | 6917 | * any other fields out of the rx_desc until we know the |
6918 | * RXD_STAT_DD bit is set | 6918 | * descriptor has been written back |
6919 | */ | 6919 | */ |
6920 | rmb(); | 6920 | dma_rmb(); |
6921 | 6921 | ||
6922 | /* retrieve a buffer from the ring */ | 6922 | /* retrieve a buffer from the ring */ |
6923 | skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); | 6923 | skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); |
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 798b05556e1b..2ed2c7de2304 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | |||
@@ -2009,15 +2009,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, | |||
2009 | 2009 | ||
2010 | rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); | 2010 | rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); |
2011 | 2011 | ||
2012 | if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) | 2012 | if (!rx_desc->wb.upper.status_error) |
2013 | break; | 2013 | break; |
2014 | 2014 | ||
2015 | /* | 2015 | /* This memory barrier is needed to keep us from reading |
2016 | * This memory barrier is needed to keep us from reading | ||
2017 | * any other fields out of the rx_desc until we know the | 2016 | * any other fields out of the rx_desc until we know the |
2018 | * RXD_STAT_DD bit is set | 2017 | * descriptor has been written back |
2019 | */ | 2018 | */ |
2020 | rmb(); | 2019 | dma_rmb(); |
2021 | 2020 | ||
2022 | /* retrieve a buffer from the ring */ | 2021 | /* retrieve a buffer from the ring */ |
2023 | skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); | 2022 | skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); |
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 3dad7e884952..088136b37ebe 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c | |||
@@ -6605,6 +6605,9 @@ static inline void rtl8169_mark_to_asic(struct RxDesc *desc, u32 rx_buf_sz) | |||
6605 | { | 6605 | { |
6606 | u32 eor = le32_to_cpu(desc->opts1) & RingEnd; | 6606 | u32 eor = le32_to_cpu(desc->opts1) & RingEnd; |
6607 | 6607 | ||
6608 | /* Force memory writes to complete before releasing descriptor */ | ||
6609 | dma_wmb(); | ||
6610 | |||
6608 | desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz); | 6611 | desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz); |
6609 | } | 6612 | } |
6610 | 6613 | ||
@@ -6612,7 +6615,6 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping, | |||
6612 | u32 rx_buf_sz) | 6615 | u32 rx_buf_sz) |
6613 | { | 6616 | { |
6614 | desc->addr = cpu_to_le64(mapping); | 6617 | desc->addr = cpu_to_le64(mapping); |
6615 | wmb(); | ||
6616 | rtl8169_mark_to_asic(desc, rx_buf_sz); | 6618 | rtl8169_mark_to_asic(desc, rx_buf_sz); |
6617 | } | 6619 | } |
6618 | 6620 | ||
@@ -7073,16 +7075,18 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, | |||
7073 | 7075 | ||
7074 | skb_tx_timestamp(skb); | 7076 | skb_tx_timestamp(skb); |
7075 | 7077 | ||
7076 | wmb(); | 7078 | /* Force memory writes to complete before releasing descriptor */ |
7079 | dma_wmb(); | ||
7077 | 7080 | ||
7078 | /* Anti gcc 2.95.3 bugware (sic) */ | 7081 | /* Anti gcc 2.95.3 bugware (sic) */ |
7079 | status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); | 7082 | status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); |
7080 | txd->opts1 = cpu_to_le32(status); | 7083 | txd->opts1 = cpu_to_le32(status); |
7081 | 7084 | ||
7082 | tp->cur_tx += frags + 1; | 7085 | /* Force all memory writes to complete before notifying device */ |
7083 | |||
7084 | wmb(); | 7086 | wmb(); |
7085 | 7087 | ||
7088 | tp->cur_tx += frags + 1; | ||
7089 | |||
7086 | RTL_W8(TxPoll, NPQ); | 7090 | RTL_W8(TxPoll, NPQ); |
7087 | 7091 | ||
7088 | mmiowb(); | 7092 | mmiowb(); |
@@ -7181,11 +7185,16 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) | |||
7181 | struct ring_info *tx_skb = tp->tx_skb + entry; | 7185 | struct ring_info *tx_skb = tp->tx_skb + entry; |
7182 | u32 status; | 7186 | u32 status; |
7183 | 7187 | ||
7184 | rmb(); | ||
7185 | status = le32_to_cpu(tp->TxDescArray[entry].opts1); | 7188 | status = le32_to_cpu(tp->TxDescArray[entry].opts1); |
7186 | if (status & DescOwn) | 7189 | if (status & DescOwn) |
7187 | break; | 7190 | break; |
7188 | 7191 | ||
7192 | /* This barrier is needed to keep us from reading | ||
7193 | * any other fields out of the Tx descriptor until | ||
7194 | * we know the status of DescOwn | ||
7195 | */ | ||
7196 | dma_rmb(); | ||
7197 | |||
7189 | rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb, | 7198 | rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb, |
7190 | tp->TxDescArray + entry); | 7199 | tp->TxDescArray + entry); |
7191 | if (status & LastFrag) { | 7200 | if (status & LastFrag) { |
@@ -7280,11 +7289,16 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget | |||
7280 | struct RxDesc *desc = tp->RxDescArray + entry; | 7289 | struct RxDesc *desc = tp->RxDescArray + entry; |
7281 | u32 status; | 7290 | u32 status; |
7282 | 7291 | ||
7283 | rmb(); | ||
7284 | status = le32_to_cpu(desc->opts1) & tp->opts1_mask; | 7292 | status = le32_to_cpu(desc->opts1) & tp->opts1_mask; |
7285 | |||
7286 | if (status & DescOwn) | 7293 | if (status & DescOwn) |
7287 | break; | 7294 | break; |
7295 | |||
7296 | /* This barrier is needed to keep us from reading | ||
7297 | * any other fields out of the Rx descriptor until | ||
7298 | * we know the status of DescOwn | ||
7299 | */ | ||
7300 | dma_rmb(); | ||
7301 | |||
7288 | if (unlikely(status & RxRES)) { | 7302 | if (unlikely(status & RxRES)) { |
7289 | netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n", | 7303 | netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n", |
7290 | status); | 7304 | status); |
@@ -7346,7 +7360,6 @@ process_pkt: | |||
7346 | } | 7360 | } |
7347 | release_descriptor: | 7361 | release_descriptor: |
7348 | desc->opts2 = 0; | 7362 | desc->opts2 = 0; |
7349 | wmb(); | ||
7350 | rtl8169_mark_to_asic(desc, rx_buf_sz); | 7363 | rtl8169_mark_to_asic(desc, rx_buf_sz); |
7351 | } | 7364 | } |
7352 | 7365 | ||
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 1402fa855388..f5c40b0fadc2 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h | |||
@@ -42,6 +42,14 @@ | |||
42 | #define wmb() mb() | 42 | #define wmb() mb() |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | #ifndef dma_rmb | ||
46 | #define dma_rmb() rmb() | ||
47 | #endif | ||
48 | |||
49 | #ifndef dma_wmb | ||
50 | #define dma_wmb() wmb() | ||
51 | #endif | ||
52 | |||
45 | #ifndef read_barrier_depends | 53 | #ifndef read_barrier_depends |
46 | #define read_barrier_depends() do { } while (0) | 54 | #define read_barrier_depends() do { } while (0) |
47 | #endif | 55 | #endif |