aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlan Stern <stern@rowland.harvard.edu>2018-02-20 18:25:12 -0500
committerIngo Molnar <mingo@kernel.org>2018-02-21 03:58:16 -0500
commitbf28ae5627442355dbb8d99238da4fb95c2dd4ec (patch)
tree2618fa8006b9b999a4ce34adc53cb1850c70cc0c
parentcac79a39f200ef73ae7fc8a429ce2859ebb118d9 (diff)
tools/memory-model: Remove rb-dep, smp_read_barrier_depends, and lockless_dereference
Since commit 76ebbe78f739 ("locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15 kernel, it has not been necessary to use smp_read_barrier_depends(). Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill lockless_dereference()") removed lockless_dereference() from the kernel. Since these primitives are no longer part of the kernel, they do not belong in the Linux Kernel Memory Consistency Model. This patch removes them, along with the internal rb-dep relation, and updates the revelant documentation. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: akiyks@gmail.com Cc: boqun.feng@gmail.com Cc: dhowells@redhat.com Cc: j.alglave@ucl.ac.uk Cc: linux-arch@vger.kernel.org Cc: luc.maranget@inria.fr Cc: nborisov@suse.com Cc: npiggin@gmail.com Cc: parri.andrea@gmail.com Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1519169112-20593-12-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt3
-rw-r--r--tools/memory-model/Documentation/explanation.txt81
-rw-r--r--tools/memory-model/linux-kernel.bell1
-rw-r--r--tools/memory-model/linux-kernel.cat7
-rw-r--r--tools/memory-model/linux-kernel.def2
5 files changed, 46 insertions, 48 deletions
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 1917712bce99..04e458acd6d4 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -6,8 +6,7 @@
6Store, e.g., WRITE_ONCE() Y Y 6Store, e.g., WRITE_ONCE() Y Y
7Load, e.g., READ_ONCE() Y Y Y 7Load, e.g., READ_ONCE() Y Y Y
8Unsuccessful RMW operation Y Y Y 8Unsuccessful RMW operation Y Y Y
9smp_read_barrier_depends() Y Y Y 9rcu_dereference() Y Y Y Y
10*_dereference() Y Y Y Y
11Successful *_acquire() R Y Y Y Y Y Y 10Successful *_acquire() R Y Y Y Y Y Y
12Successful *_release() C Y Y Y W Y 11Successful *_release() C Y Y Y W Y
13smp_rmb() Y R Y Y R 12smp_rmb() Y R Y Y R
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 867e0ea69b6d..dae8b8cb2ad3 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1,5 +1,5 @@
1Explanation of the Linux-Kernel Memory Model 1Explanation of the Linux-Kernel Memory Consistency Model
2~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 3
4:Author: Alan Stern <stern@rowland.harvard.edu> 4:Author: Alan Stern <stern@rowland.harvard.edu>
5:Created: October 2017 5:Created: October 2017
@@ -35,25 +35,24 @@ Explanation of the Linux-Kernel Memory Model
35INTRODUCTION 35INTRODUCTION
36------------ 36------------
37 37
38The Linux-kernel memory model (LKMM) is rather complex and obscure. 38The Linux-kernel memory consistency model (LKMM) is rather complex and
39This is particularly evident if you read through the linux-kernel.bell 39obscure. This is particularly evident if you read through the
40and linux-kernel.cat files that make up the formal version of the 40linux-kernel.bell and linux-kernel.cat files that make up the formal
41memory model; they are extremely terse and their meanings are far from 41version of the model; they are extremely terse and their meanings are
42clear. 42far from clear.
43 43
44This document describes the ideas underlying the LKMM. It is meant 44This document describes the ideas underlying the LKMM. It is meant
45for people who want to understand how the memory model was designed. 45for people who want to understand how the model was designed. It does
46It does not go into the details of the code in the .bell and .cat 46not go into the details of the code in the .bell and .cat files;
47files; rather, it explains in English what the code expresses 47rather, it explains in English what the code expresses symbolically.
48symbolically.
49 48
50Sections 2 (BACKGROUND) through 5 (ORDERING AND CYCLES) are aimed 49Sections 2 (BACKGROUND) through 5 (ORDERING AND CYCLES) are aimed
51toward beginners; they explain what memory models are and the basic 50toward beginners; they explain what memory consistency models are and
52notions shared by all such models. People already familiar with these 51the basic notions shared by all such models. People already familiar
53concepts can skim or skip over them. Sections 6 (EVENTS) through 12 52with these concepts can skim or skip over them. Sections 6 (EVENTS)
54(THE FROM_READS RELATION) describe the fundamental relations used in 53through 12 (THE FROM_READS RELATION) describe the fundamental
55many memory models. Starting in Section 13 (AN OPERATIONAL MODEL), 54relations used in many models. Starting in Section 13 (AN OPERATIONAL
56the workings of the LKMM itself are covered. 55MODEL), the workings of the LKMM itself are covered.
57 56
58Warning: The code examples in this document are not written in the 57Warning: The code examples in this document are not written in the
59proper format for litmus tests. They don't include a header line, the 58proper format for litmus tests. They don't include a header line, the
@@ -827,8 +826,8 @@ A-cumulative; they only affect the propagation of stores that are
827executed on C before the fence (i.e., those which precede the fence in 826executed on C before the fence (i.e., those which precede the fence in
828program order). 827program order).
829 828
830smp_read_barrier_depends(), rcu_read_lock(), rcu_read_unlock(), and 829read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have
831synchronize_rcu() fences have other properties which we discuss later. 830other properties which we discuss later.
832 831
833 832
834PROPAGATION ORDER RELATION: cumul-fence 833PROPAGATION ORDER RELATION: cumul-fence
@@ -988,8 +987,8 @@ Another possibility, not mentioned earlier but discussed in the next
988section, is: 987section, is:
989 988
990 X and Y are both loads, X ->addr Y (i.e., there is an address 989 X and Y are both loads, X ->addr Y (i.e., there is an address
991 dependency from X to Y), and an smp_read_barrier_depends() 990 dependency from X to Y), and X is a READ_ONCE() or an atomic
992 fence occurs between them. 991 access.
993 992
994Dependencies can also cause instructions to be executed in program 993Dependencies can also cause instructions to be executed in program
995order. This is uncontroversial when the second instruction is a 994order. This is uncontroversial when the second instruction is a
@@ -1015,9 +1014,9 @@ After all, a CPU cannot ask the memory subsystem to load a value from
1015a particular location before it knows what that location is. However, 1014a particular location before it knows what that location is. However,
1016the split-cache design used by Alpha can cause it to behave in a way 1015the split-cache design used by Alpha can cause it to behave in a way
1017that looks as if the loads were executed out of order (see the next 1016that looks as if the loads were executed out of order (see the next
1018section for more details). For this reason, the LKMM does not include 1017section for more details). The kernel includes a workaround for this
1019address dependencies between read events in the ppo relation unless an 1018problem when the loads come from READ_ONCE(), and therefore the LKMM
1020smp_read_barrier_depends() fence is present. 1019includes address dependencies to loads in the ppo relation.
1021 1020
1022On the other hand, dependencies can indirectly affect the ordering of 1021On the other hand, dependencies can indirectly affect the ordering of
1023two loads. This happens when there is a dependency from a load to a 1022two loads. This happens when there is a dependency from a load to a
@@ -1114,11 +1113,12 @@ code such as the following:
1114 int *r1; 1113 int *r1;
1115 int r2; 1114 int r2;
1116 1115
1117 r1 = READ_ONCE(ptr); 1116 r1 = ptr;
1118 r2 = READ_ONCE(*r1); 1117 r2 = READ_ONCE(*r1);
1119 } 1118 }
1120 1119
1121can malfunction on Alpha systems. It is quite possible that r1 = &x 1120can malfunction on Alpha systems (notice that P1 uses an ordinary load
1121to read ptr instead of READ_ONCE()). It is quite possible that r1 = &x
1122and r2 = 0 at the end, in spite of the address dependency. 1122and r2 = 0 at the end, in spite of the address dependency.
1123 1123
1124At first glance this doesn't seem to make sense. We know that the 1124At first glance this doesn't seem to make sense. We know that the
@@ -1141,11 +1141,15 @@ This could not have happened if the local cache had processed the
1141incoming stores in FIFO order. In constrast, other architectures 1141incoming stores in FIFO order. In constrast, other architectures
1142maintain at least the appearance of FIFO order. 1142maintain at least the appearance of FIFO order.
1143 1143
1144In practice, this difficulty is solved by inserting an 1144In practice, this difficulty is solved by inserting a special fence
1145smp_read_barrier_depends() fence between P1's two loads. The effect 1145between P1's two loads when the kernel is compiled for the Alpha
1146of this fence is to cause the CPU not to execute any po-later 1146architecture. In fact, as of version 4.15, the kernel automatically
1147instructions until after the local cache has finished processing all 1147adds this fence (called smp_read_barrier_depends() and defined as
1148the stores it has already received. Thus, if the code was changed to: 1148nothing at all on non-Alpha builds) after every READ_ONCE() and atomic
1149load. The effect of the fence is to cause the CPU not to execute any
1150po-later instructions until after the local cache has finished
1151processing all the stores it has already received. Thus, if the code
1152was changed to:
1149 1153
1150 P1() 1154 P1()
1151 { 1155 {
@@ -1153,13 +1157,15 @@ the stores it has already received. Thus, if the code was changed to:
1153 int r2; 1157 int r2;
1154 1158
1155 r1 = READ_ONCE(ptr); 1159 r1 = READ_ONCE(ptr);
1156 smp_read_barrier_depends();
1157 r2 = READ_ONCE(*r1); 1160 r2 = READ_ONCE(*r1);
1158 } 1161 }
1159 1162
1160then we would never get r1 = &x and r2 = 0. By the time P1 executed 1163then we would never get r1 = &x and r2 = 0. By the time P1 executed
1161its second load, the x = 1 store would already be fully processed by 1164its second load, the x = 1 store would already be fully processed by
1162the local cache and available for satisfying the read request. 1165the local cache and available for satisfying the read request. Thus
1166we have yet another reason why shared data should always be read with
1167READ_ONCE() or another synchronization primitive rather than accessed
1168directly.
1163 1169
1164The LKMM requires that smp_rmb(), acquire fences, and strong fences 1170The LKMM requires that smp_rmb(), acquire fences, and strong fences
1165share this property with smp_read_barrier_depends(): They do not allow 1171share this property with smp_read_barrier_depends(): They do not allow
@@ -1751,11 +1757,10 @@ no further involvement from the CPU. Since the CPU doesn't ever read
1751the value of x, there is nothing for the smp_rmb() fence to act on. 1757the value of x, there is nothing for the smp_rmb() fence to act on.
1752 1758
1753The LKMM defines a few extra synchronization operations in terms of 1759The LKMM defines a few extra synchronization operations in terms of
1754things we have already covered. In particular, rcu_dereference() and 1760things we have already covered. In particular, rcu_dereference() is
1755lockless_dereference() are both treated as a READ_ONCE() followed by 1761treated as READ_ONCE() and rcu_assign_pointer() is treated as
1756smp_read_barrier_depends() -- which also happens to be how they are 1762smp_store_release() -- which is basically how the Linux kernel treats
1757defined in include/linux/rcupdate.h and include/linux/compiler.h, 1763them.
1758respectively.
1759 1764
1760There are a few oddball fences which need special treatment: 1765There are a few oddball fences which need special treatment:
1761smp_mb__before_atomic(), smp_mb__after_atomic(), and 1766smp_mb__before_atomic(), smp_mb__after_atomic(), and
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 18885ad15de9..432c7cf71b23 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -24,7 +24,6 @@ instructions RMW[{'once,'acquire,'release}]
24enum Barriers = 'wmb (*smp_wmb*) || 24enum Barriers = 'wmb (*smp_wmb*) ||
25 'rmb (*smp_rmb*) || 25 'rmb (*smp_rmb*) ||
26 'mb (*smp_mb*) || 26 'mb (*smp_mb*) ||
27 'rb_dep (*smp_read_barrier_depends*) ||
28 'rcu-lock (*rcu_read_lock*) || 27 'rcu-lock (*rcu_read_lock*) ||
29 'rcu-unlock (*rcu_read_unlock*) || 28 'rcu-unlock (*rcu_read_unlock*) ||
30 'sync-rcu (*synchronize_rcu*) || 29 'sync-rcu (*synchronize_rcu*) ||
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index f0d27f813ec2..df97db03b6c2 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -25,7 +25,6 @@ include "lock.cat"
25(*******************) 25(*******************)
26 26
27(* Fences *) 27(* Fences *)
28let rb-dep = [R] ; fencerel(Rb_dep) ; [R]
29let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn] 28let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
30let wmb = [W] ; fencerel(Wmb) ; [W] 29let wmb = [W] ; fencerel(Wmb) ; [W]
31let mb = ([M] ; fencerel(Mb) ; [M]) | 30let mb = ([M] ; fencerel(Mb) ; [M]) |
@@ -61,11 +60,9 @@ let dep = addr | data
61let rwdep = (dep | ctrl) ; [W] 60let rwdep = (dep | ctrl) ; [W]
62let overwrite = co | fr 61let overwrite = co | fr
63let to-w = rwdep | (overwrite & int) 62let to-w = rwdep | (overwrite & int)
64let rrdep = addr | (dep ; rfi) 63let to-r = addr | (dep ; rfi) | rfi-rel-acq
65let strong-rrdep = rrdep+ & rb-dep
66let to-r = strong-rrdep | rfi-rel-acq
67let fence = strong-fence | wmb | po-rel | rmb | acq-po 64let fence = strong-fence | wmb | po-rel | rmb | acq-po
68let ppo = rrdep* ; (to-r | to-w | fence) 65let ppo = to-r | to-w | fence
69 66
70(* Propagation: Ordering from release operations and strong fences. *) 67(* Propagation: Ordering from release operations and strong fences. *)
71let A-cumul(r) = rfe? ; r 68let A-cumul(r) = rfe? ; r
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index f5a1eb04cb64..5dfb9c7f3462 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -13,14 +13,12 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }
13smp_store_release(X,V) { __store{release}(*X,V); } 13smp_store_release(X,V) { __store{release}(*X,V); }
14smp_load_acquire(X) __load{acquire}(*X) 14smp_load_acquire(X) __load{acquire}(*X)
15rcu_assign_pointer(X,V) { __store{release}(X,V); } 15rcu_assign_pointer(X,V) { __store{release}(X,V); }
16lockless_dereference(X) __load{lderef}(X)
17rcu_dereference(X) __load{deref}(X) 16rcu_dereference(X) __load{deref}(X)
18 17
19// Fences 18// Fences
20smp_mb() { __fence{mb} ; } 19smp_mb() { __fence{mb} ; }
21smp_rmb() { __fence{rmb} ; } 20smp_rmb() { __fence{rmb} ; }
22smp_wmb() { __fence{wmb} ; } 21smp_wmb() { __fence{wmb} ; }
23smp_read_barrier_depends() { __fence{rb_dep}; }
24smp_mb__before_atomic() { __fence{before-atomic} ; } 22smp_mb__before_atomic() { __fence{before-atomic} ; }
25smp_mb__after_atomic() { __fence{after-atomic} ; } 23smp_mb__after_atomic() { __fence{after-atomic} ; }
26smp_mb__after_spinlock() { __fence{after-spinlock} ; } 24smp_mb__after_spinlock() { __fence{after-spinlock} ; }