diff options
author | Elena Reshetova <elena.reshetova@intel.com> | 2019-01-30 06:18:51 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-02-04 03:03:31 -0500 |
commit | 47b8f3ab9c49daa824af848f9e02889662d8638f (patch) | |
tree | bd42ab897748eafaacdc5d0bb97256a678cee3d5 | |
parent | 412f34a82ccf7dd52f6b197f6450a33f03342523 (diff) |
refcount_t: Add ACQUIRE ordering on success for dec(sub)_and_test() variants
This adds an smp_acquire__after_ctrl_dep() barrier on successful
decrease of refcounter value from 1 to 0 for refcount_dec(sub)_and_test
variants and therefore gives stronger memory ordering guarantees than
prior versions of these functions.
Co-developed-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: dvyukov@google.com
Cc: keescook@chromium.org
Cc: stern@rowland.harvard.edu
Link: https://lkml.kernel.org/r/1548847131-27854-2-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | Documentation/core-api/refcount-vs-atomic.rst | 24 | ||||
-rw-r--r-- | arch/x86/include/asm/refcount.h | 22 | ||||
-rw-r--r-- | lib/refcount.c | 18 |
3 files changed, 52 insertions, 12 deletions
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst index 322851bada16..976e85adffe8 100644 --- a/Documentation/core-api/refcount-vs-atomic.rst +++ b/Documentation/core-api/refcount-vs-atomic.rst | |||
@@ -54,6 +54,13 @@ must propagate to all other CPUs before the release operation | |||
54 | (A-cumulative property). This is implemented using | 54 | (A-cumulative property). This is implemented using |
55 | :c:func:`smp_store_release`. | 55 | :c:func:`smp_store_release`. |
56 | 56 | ||
57 | An ACQUIRE memory ordering guarantees that all post loads and | ||
58 | stores (all po-later instructions) on the same CPU are | ||
59 | completed after the acquire operation. It also guarantees that all | ||
60 | po-later stores on the same CPU must propagate to all other CPUs | ||
61 | after the acquire operation executes. This is implemented using | ||
62 | :c:func:`smp_acquire__after_ctrl_dep`. | ||
63 | |||
57 | A control dependency (on success) for refcounters guarantees that | 64 | A control dependency (on success) for refcounters guarantees that |
58 | if a reference for an object was successfully obtained (reference | 65 | if a reference for an object was successfully obtained (reference |
59 | counter increment or addition happened, function returned true), | 66 | counter increment or addition happened, function returned true), |
@@ -119,13 +126,24 @@ Memory ordering guarantees changes: | |||
119 | result of obtaining pointer to the object! | 126 | result of obtaining pointer to the object! |
120 | 127 | ||
121 | 128 | ||
122 | case 5) - decrement-based RMW ops that return a value | 129 | case 5) - generic dec/sub decrement-based RMW ops that return a value |
123 | ----------------------------------------------------- | 130 | --------------------------------------------------------------------- |
124 | 131 | ||
125 | Function changes: | 132 | Function changes: |
126 | 133 | ||
127 | * :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test` | 134 | * :c:func:`atomic_dec_and_test` --> :c:func:`refcount_dec_and_test` |
128 | * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test` | 135 | * :c:func:`atomic_sub_and_test` --> :c:func:`refcount_sub_and_test` |
136 | |||
137 | Memory ordering guarantees changes: | ||
138 | |||
139 | * fully ordered --> RELEASE ordering + ACQUIRE ordering on success | ||
140 | |||
141 | |||
142 | case 6) other decrement-based RMW ops that return a value | ||
143 | --------------------------------------------------------- | ||
144 | |||
145 | Function changes: | ||
146 | |||
129 | * no atomic counterpart --> :c:func:`refcount_dec_if_one` | 147 | * no atomic counterpart --> :c:func:`refcount_dec_if_one` |
130 | * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)`` | 148 | * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)`` |
131 | 149 | ||
@@ -136,7 +154,7 @@ Memory ordering guarantees changes: | |||
136 | .. note:: :c:func:`atomic_add_unless` only provides full order on success. | 154 | .. note:: :c:func:`atomic_add_unless` only provides full order on success. |
137 | 155 | ||
138 | 156 | ||
139 | case 6) - lock-based RMW | 157 | case 7) - lock-based RMW |
140 | ------------------------ | 158 | ------------------------ |
141 | 159 | ||
142 | Function changes: | 160 | Function changes: |
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index dbaed55c1c24..232f856e0db0 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h | |||
@@ -67,16 +67,30 @@ static __always_inline void refcount_dec(refcount_t *r) | |||
67 | static __always_inline __must_check | 67 | static __always_inline __must_check |
68 | bool refcount_sub_and_test(unsigned int i, refcount_t *r) | 68 | bool refcount_sub_and_test(unsigned int i, refcount_t *r) |
69 | { | 69 | { |
70 | return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", | 70 | bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", |
71 | REFCOUNT_CHECK_LT_ZERO, | 71 | REFCOUNT_CHECK_LT_ZERO, |
72 | r->refs.counter, e, "er", i, "cx"); | 72 | r->refs.counter, e, "er", i, "cx"); |
73 | |||
74 | if (ret) { | ||
75 | smp_acquire__after_ctrl_dep(); | ||
76 | return true; | ||
77 | } | ||
78 | |||
79 | return false; | ||
73 | } | 80 | } |
74 | 81 | ||
75 | static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) | 82 | static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) |
76 | { | 83 | { |
77 | return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", | 84 | bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", |
78 | REFCOUNT_CHECK_LT_ZERO, | 85 | REFCOUNT_CHECK_LT_ZERO, |
79 | r->refs.counter, e, "cx"); | 86 | r->refs.counter, e, "cx"); |
87 | |||
88 | if (ret) { | ||
89 | smp_acquire__after_ctrl_dep(); | ||
90 | return true; | ||
91 | } | ||
92 | |||
93 | return false; | ||
80 | } | 94 | } |
81 | 95 | ||
82 | static __always_inline __must_check | 96 | static __always_inline __must_check |
diff --git a/lib/refcount.c b/lib/refcount.c index ebcf8cd49e05..6e904af0fb3e 100644 --- a/lib/refcount.c +++ b/lib/refcount.c | |||
@@ -33,6 +33,9 @@ | |||
33 | * Note that the allocator is responsible for ordering things between free() | 33 | * Note that the allocator is responsible for ordering things between free() |
34 | * and alloc(). | 34 | * and alloc(). |
35 | * | 35 | * |
36 | * The decrements dec_and_test() and sub_and_test() also provide acquire | ||
37 | * ordering on success. | ||
38 | * | ||
36 | */ | 39 | */ |
37 | 40 | ||
38 | #include <linux/mutex.h> | 41 | #include <linux/mutex.h> |
@@ -164,8 +167,8 @@ EXPORT_SYMBOL(refcount_inc_checked); | |||
164 | * at UINT_MAX. | 167 | * at UINT_MAX. |
165 | * | 168 | * |
166 | * Provides release memory ordering, such that prior loads and stores are done | 169 | * Provides release memory ordering, such that prior loads and stores are done |
167 | * before, and provides a control dependency such that free() must come after. | 170 | * before, and provides an acquire ordering on success such that free() |
168 | * See the comment on top. | 171 | * must come after. |
169 | * | 172 | * |
170 | * Use of this function is not recommended for the normal reference counting | 173 | * Use of this function is not recommended for the normal reference counting |
171 | * use case in which references are taken and released one at a time. In these | 174 | * use case in which references are taken and released one at a time. In these |
@@ -190,7 +193,12 @@ bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) | |||
190 | 193 | ||
191 | } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); | 194 | } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); |
192 | 195 | ||
193 | return !new; | 196 | if (!new) { |
197 | smp_acquire__after_ctrl_dep(); | ||
198 | return true; | ||
199 | } | ||
200 | return false; | ||
201 | |||
194 | } | 202 | } |
195 | EXPORT_SYMBOL(refcount_sub_and_test_checked); | 203 | EXPORT_SYMBOL(refcount_sub_and_test_checked); |
196 | 204 | ||
@@ -202,8 +210,8 @@ EXPORT_SYMBOL(refcount_sub_and_test_checked); | |||
202 | * decrement when saturated at UINT_MAX. | 210 | * decrement when saturated at UINT_MAX. |
203 | * | 211 | * |
204 | * Provides release memory ordering, such that prior loads and stores are done | 212 | * Provides release memory ordering, such that prior loads and stores are done |
205 | * before, and provides a control dependency such that free() must come after. | 213 | * before, and provides an acquire ordering on success such that free() |
206 | * See the comment on top. | 214 | * must come after. |
207 | * | 215 | * |
208 | * Return: true if the resulting refcount is 0, false otherwise | 216 | * Return: true if the resulting refcount is 0, false otherwise |
209 | */ | 217 | */ |