aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 13:27:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 13:27:28 -0400
commitcc76ee75a9d3201eeacc576d17fbc1511f673010 (patch)
tree9505405c270718c491840ae7d0da0521386ce939 /kernel/locking
parent9c65e12a55fea2da50f4069ec0dc47c50b7bd2bb (diff)
parent58995a9a5b292458f94a2356b8c878230fa56fe0 (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking changes from Ingo Molnar: "Main changes: - jump label asm preparatory work for PowerPC (Anton Blanchard) - rwsem optimizations and cleanups (Davidlohr Bueso) - mutex optimizations and cleanups (Jason Low) - futex fix (Oleg Nesterov) - remove broken atomicity checks from {READ,WRITE}_ONCE() (Peter Zijlstra)" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: powerpc, jump_label: Include linux/jump_label.h to get HAVE_JUMP_LABEL define jump_label: Allow jump labels to be used in assembly jump_label: Allow asm/jump_label.h to be included in assembly locking/mutex: Further simplify mutex_spin_on_owner() locking: Remove atomicy checks from {READ,WRITE}_ONCE locking/rtmutex: Rename argument in the rt_mutex_adjust_prio_chain() documentation as well locking/rwsem: Fix lock optimistic spinning when owner is not running locking: Remove ACCESS_ONCE() usage locking/rwsem: Check for active lock before bailing on spinning locking/rwsem: Avoid deceiving lock spinners locking/rwsem: Set lock ownership ASAP locking/rwsem: Document barrier need when waking tasks locking/futex: Check PF_KTHREAD rather than !p->mm to filter out kthreads locking/mutex: Refactor mutex_spin_on_owner() locking/mutex: In mutex_spin_on_owner(), return true when owner changes
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/mcs_spinlock.h6
-rw-r--r--kernel/locking/mutex.c51
-rw-r--r--kernel/locking/osq_lock.c14
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/locking/rwsem-spinlock.c7
-rw-r--r--kernel/locking/rwsem-xadd.c98
-rw-r--r--kernel/locking/rwsem.c22
-rw-r--r--kernel/locking/rwsem.h20
8 files changed, 118 insertions, 102 deletions
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index d1fe2ba5bac9..75e114bdf3f2 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
78 */ 78 */
79 return; 79 return;
80 } 80 }
81 ACCESS_ONCE(prev->next) = node; 81 WRITE_ONCE(prev->next, node);
82 82
83 /* Wait until the lock holder passes the lock down. */ 83 /* Wait until the lock holder passes the lock down. */
84 arch_mcs_spin_lock_contended(&node->locked); 84 arch_mcs_spin_lock_contended(&node->locked);
@@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
91static inline 91static inline
92void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) 92void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
93{ 93{
94 struct mcs_spinlock *next = ACCESS_ONCE(node->next); 94 struct mcs_spinlock *next = READ_ONCE(node->next);
95 95
96 if (likely(!next)) { 96 if (likely(!next)) {
97 /* 97 /*
@@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
100 if (likely(cmpxchg(lock, node, NULL) == node)) 100 if (likely(cmpxchg(lock, node, NULL) == node))
101 return; 101 return;
102 /* Wait until the next pointer is set */ 102 /* Wait until the next pointer is set */
103 while (!(next = ACCESS_ONCE(node->next))) 103 while (!(next = READ_ONCE(node->next)))
104 cpu_relax_lowlatency(); 104 cpu_relax_lowlatency();
105 } 105 }
106 106
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 94674e5919cb..4cccea6b8934 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,7 +25,7 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/debug_locks.h> 27#include <linux/debug_locks.h>
28#include "mcs_spinlock.h" 28#include <linux/osq_lock.h>
29 29
30/* 30/*
31 * In the DEBUG case we are using the "NULL fastpath" for mutexes, 31 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
217} 217}
218 218
219#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 219#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
220static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
221{
222 if (lock->owner != owner)
223 return false;
224
225 /*
226 * Ensure we emit the owner->on_cpu, dereference _after_ checking
227 * lock->owner still matches owner, if that fails, owner might
228 * point to free()d memory, if it still matches, the rcu_read_lock()
229 * ensures the memory stays valid.
230 */
231 barrier();
232
233 return owner->on_cpu;
234}
235
236/* 220/*
237 * Look out! "owner" is an entirely speculative pointer 221 * Look out! "owner" is an entirely speculative pointer
238 * access and not reliable. 222 * access and not reliable.
239 */ 223 */
240static noinline 224static noinline
241int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) 225bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
242{ 226{
227 bool ret = true;
228
243 rcu_read_lock(); 229 rcu_read_lock();
244 while (owner_running(lock, owner)) { 230 while (lock->owner == owner) {
245 if (need_resched()) 231 /*
232 * Ensure we emit the owner->on_cpu, dereference _after_
233 * checking lock->owner still matches owner. If that fails,
234 * owner might point to freed memory. If it still matches,
235 * the rcu_read_lock() ensures the memory stays valid.
236 */
237 barrier();
238
239 if (!owner->on_cpu || need_resched()) {
240 ret = false;
246 break; 241 break;
242 }
247 243
248 cpu_relax_lowlatency(); 244 cpu_relax_lowlatency();
249 } 245 }
250 rcu_read_unlock(); 246 rcu_read_unlock();
251 247
252 /* 248 return ret;
253 * We break out the loop above on need_resched() and when the
254 * owner changed, which is a sign for heavy contention. Return
255 * success only when lock->owner is NULL.
256 */
257 return lock->owner == NULL;
258} 249}
259 250
260/* 251/*
@@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
269 return 0; 260 return 0;
270 261
271 rcu_read_lock(); 262 rcu_read_lock();
272 owner = ACCESS_ONCE(lock->owner); 263 owner = READ_ONCE(lock->owner);
273 if (owner) 264 if (owner)
274 retval = owner->on_cpu; 265 retval = owner->on_cpu;
275 rcu_read_unlock(); 266 rcu_read_unlock();
@@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
343 * As such, when deadlock detection needs to be 334 * As such, when deadlock detection needs to be
344 * performed the optimistic spinning cannot be done. 335 * performed the optimistic spinning cannot be done.
345 */ 336 */
346 if (ACCESS_ONCE(ww->ctx)) 337 if (READ_ONCE(ww->ctx))
347 break; 338 break;
348 } 339 }
349 340
@@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
351 * If there's an owner, wait for it to either 342 * If there's an owner, wait for it to either
352 * release the lock or go to sleep. 343 * release the lock or go to sleep.
353 */ 344 */
354 owner = ACCESS_ONCE(lock->owner); 345 owner = READ_ONCE(lock->owner);
355 if (owner && !mutex_spin_on_owner(lock, owner)) 346 if (owner && !mutex_spin_on_owner(lock, owner))
356 break; 347 break;
357 348
@@ -490,7 +481,7 @@ static inline int __sched
490__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) 481__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
491{ 482{
492 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); 483 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
493 struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); 484 struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
494 485
495 if (!hold_ctx) 486 if (!hold_ctx)
496 return 0; 487 return 0;
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index c112d00341b0..dc85ee23a26f 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
98 98
99 prev = decode_cpu(old); 99 prev = decode_cpu(old);
100 node->prev = prev; 100 node->prev = prev;
101 ACCESS_ONCE(prev->next) = node; 101 WRITE_ONCE(prev->next, node);
102 102
103 /* 103 /*
104 * Normally @prev is untouchable after the above store; because at that 104 * Normally @prev is untouchable after the above store; because at that
@@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
109 * cmpxchg in an attempt to undo our queueing. 109 * cmpxchg in an attempt to undo our queueing.
110 */ 110 */
111 111
112 while (!ACCESS_ONCE(node->locked)) { 112 while (!READ_ONCE(node->locked)) {
113 /* 113 /*
114 * If we need to reschedule bail... so we can block. 114 * If we need to reschedule bail... so we can block.
115 */ 115 */
@@ -148,7 +148,7 @@ unqueue:
148 * Or we race against a concurrent unqueue()'s step-B, in which 148 * Or we race against a concurrent unqueue()'s step-B, in which
149 * case its step-C will write us a new @node->prev pointer. 149 * case its step-C will write us a new @node->prev pointer.
150 */ 150 */
151 prev = ACCESS_ONCE(node->prev); 151 prev = READ_ONCE(node->prev);
152 } 152 }
153 153
154 /* 154 /*
@@ -170,8 +170,8 @@ unqueue:
170 * it will wait in Step-A. 170 * it will wait in Step-A.
171 */ 171 */
172 172
173 ACCESS_ONCE(next->prev) = prev; 173 WRITE_ONCE(next->prev, prev);
174 ACCESS_ONCE(prev->next) = next; 174 WRITE_ONCE(prev->next, next);
175 175
176 return false; 176 return false;
177} 177}
@@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock)
193 node = this_cpu_ptr(&osq_node); 193 node = this_cpu_ptr(&osq_node);
194 next = xchg(&node->next, NULL); 194 next = xchg(&node->next, NULL);
195 if (next) { 195 if (next) {
196 ACCESS_ONCE(next->locked) = 1; 196 WRITE_ONCE(next->locked, 1);
197 return; 197 return;
198 } 198 }
199 199
200 next = osq_wait_next(lock, node, NULL); 200 next = osq_wait_next(lock, node, NULL);
201 if (next) 201 if (next)
202 ACCESS_ONCE(next->locked) = 1; 202 WRITE_ONCE(next->locked, 1);
203} 203}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6357265a31ad..b73279367087 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
349 * 349 *
350 * @task: the task owning the mutex (owner) for which a chain walk is 350 * @task: the task owning the mutex (owner) for which a chain walk is
351 * probably needed 351 * probably needed
352 * @deadlock_detect: do we have to carry out deadlock detection? 352 * @chwalk: do we have to carry out deadlock detection?
353 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 353 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
354 * things for a task that has just got its priority adjusted, and 354 * things for a task that has just got its priority adjusted, and
355 * is waiting on a mutex) 355 * is waiting on a mutex)
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 2555ae15ec14..3a5048572065 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
85 85
86 list_del(&waiter->list); 86 list_del(&waiter->list);
87 tsk = waiter->task; 87 tsk = waiter->task;
88 /*
89 * Make sure we do not wakeup the next reader before
90 * setting the nil condition to grant the next reader;
91 * otherwise we could miss the wakeup on the other
92 * side and end up sleeping again. See the pairing
93 * in rwsem_down_read_failed().
94 */
88 smp_mb(); 95 smp_mb();
89 waiter->task = NULL; 96 waiter->task = NULL;
90 wake_up_process(tsk); 97 wake_up_process(tsk);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2f7cc4076f50..3417d0172a5d 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -14,8 +14,9 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/sched/rt.h> 16#include <linux/sched/rt.h>
17#include <linux/osq_lock.h>
17 18
18#include "mcs_spinlock.h" 19#include "rwsem.h"
19 20
20/* 21/*
21 * Guide to the rw_semaphore's count field for common values. 22 * Guide to the rw_semaphore's count field for common values.
@@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
186 waiter = list_entry(next, struct rwsem_waiter, list); 187 waiter = list_entry(next, struct rwsem_waiter, list);
187 next = waiter->list.next; 188 next = waiter->list.next;
188 tsk = waiter->task; 189 tsk = waiter->task;
190 /*
191 * Make sure we do not wakeup the next reader before
192 * setting the nil condition to grant the next reader;
193 * otherwise we could miss the wakeup on the other
194 * side and end up sleeping again. See the pairing
195 * in rwsem_down_read_failed().
196 */
189 smp_mb(); 197 smp_mb();
190 waiter->task = NULL; 198 waiter->task = NULL;
191 wake_up_process(tsk); 199 wake_up_process(tsk);
@@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
258 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { 266 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
259 if (!list_is_singular(&sem->wait_list)) 267 if (!list_is_singular(&sem->wait_list))
260 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); 268 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
269 rwsem_set_owner(sem);
261 return true; 270 return true;
262 } 271 }
263 272
@@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
270 */ 279 */
271static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) 280static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
272{ 281{
273 long old, count = ACCESS_ONCE(sem->count); 282 long old, count = READ_ONCE(sem->count);
274 283
275 while (true) { 284 while (true) {
276 if (!(count == 0 || count == RWSEM_WAITING_BIAS)) 285 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
277 return false; 286 return false;
278 287
279 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); 288 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
280 if (old == count) 289 if (old == count) {
290 rwsem_set_owner(sem);
281 return true; 291 return true;
292 }
282 293
283 count = old; 294 count = old;
284 } 295 }
@@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
287static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) 298static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
288{ 299{
289 struct task_struct *owner; 300 struct task_struct *owner;
290 bool on_cpu = false; 301 bool ret = true;
291 302
292 if (need_resched()) 303 if (need_resched())
293 return false; 304 return false;
294 305
295 rcu_read_lock(); 306 rcu_read_lock();
296 owner = ACCESS_ONCE(sem->owner); 307 owner = READ_ONCE(sem->owner);
297 if (owner) 308 if (!owner) {
298 on_cpu = owner->on_cpu; 309 long count = READ_ONCE(sem->count);
299 rcu_read_unlock(); 310 /*
300 311 * If sem->owner is not set, yet we have just recently entered the
301 /* 312 * slowpath with the lock being active, then there is a possibility
302 * If sem->owner is not set, yet we have just recently entered the 313 * reader(s) may have the lock. To be safe, bail spinning in these
303 * slowpath, then there is a possibility reader(s) may have the lock. 314 * situations.
304 * To be safe, avoid spinning in these situations. 315 */
305 */ 316 if (count & RWSEM_ACTIVE_MASK)
306 return on_cpu; 317 ret = false;
307} 318 goto done;
308 319 }
309static inline bool owner_running(struct rw_semaphore *sem,
310 struct task_struct *owner)
311{
312 if (sem->owner != owner)
313 return false;
314
315 /*
316 * Ensure we emit the owner->on_cpu, dereference _after_ checking
317 * sem->owner still matches owner, if that fails, owner might
318 * point to free()d memory, if it still matches, the rcu_read_lock()
319 * ensures the memory stays valid.
320 */
321 barrier();
322 320
323 return owner->on_cpu; 321 ret = owner->on_cpu;
322done:
323 rcu_read_unlock();
324 return ret;
324} 325}
325 326
326static noinline 327static noinline
327bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) 328bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
328{ 329{
330 long count;
331
329 rcu_read_lock(); 332 rcu_read_lock();
330 while (owner_running(sem, owner)) { 333 while (sem->owner == owner) {
331 if (need_resched()) 334 /*
332 break; 335 * Ensure we emit the owner->on_cpu, dereference _after_
336 * checking sem->owner still matches owner, if that fails,
337 * owner might point to free()d memory, if it still matches,
338 * the rcu_read_lock() ensures the memory stays valid.
339 */
340 barrier();
341
342 /* abort spinning when need_resched or owner is not running */
343 if (!owner->on_cpu || need_resched()) {
344 rcu_read_unlock();
345 return false;
346 }
333 347
334 cpu_relax_lowlatency(); 348 cpu_relax_lowlatency();
335 } 349 }
336 rcu_read_unlock(); 350 rcu_read_unlock();
337 351
352 if (READ_ONCE(sem->owner))
353 return true; /* new owner, continue spinning */
354
338 /* 355 /*
339 * We break out the loop above on need_resched() or when the 356 * When the owner is not set, the lock could be free or
340 * owner changed, which is a sign for heavy contention. Return 357 * held by readers. Check the counter to verify the
341 * success only when sem->owner is NULL. 358 * state.
342 */ 359 */
343 return sem->owner == NULL; 360 count = READ_ONCE(sem->count);
361 return (count == 0 || count == RWSEM_WAITING_BIAS);
344} 362}
345 363
346static bool rwsem_optimistic_spin(struct rw_semaphore *sem) 364static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
358 goto done; 376 goto done;
359 377
360 while (true) { 378 while (true) {
361 owner = ACCESS_ONCE(sem->owner); 379 owner = READ_ONCE(sem->owner);
362 if (owner && !rwsem_spin_on_owner(sem, owner)) 380 if (owner && !rwsem_spin_on_owner(sem, owner))
363 break; 381 break;
364 382
@@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
432 450
433 /* we're now waiting on the lock, but no longer actively locking */ 451 /* we're now waiting on the lock, but no longer actively locking */
434 if (waiting) { 452 if (waiting) {
435 count = ACCESS_ONCE(sem->count); 453 count = READ_ONCE(sem->count);
436 454
437 /* 455 /*
438 * If there were already threads queued before us and there are 456 * If there were already threads queued before us and there are
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e2d3bc7f03b4..205be0ce34de 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -9,29 +9,9 @@
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/export.h> 10#include <linux/export.h>
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12
13#include <linux/atomic.h> 12#include <linux/atomic.h>
14 13
15#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 14#include "rwsem.h"
16static inline void rwsem_set_owner(struct rw_semaphore *sem)
17{
18 sem->owner = current;
19}
20
21static inline void rwsem_clear_owner(struct rw_semaphore *sem)
22{
23 sem->owner = NULL;
24}
25
26#else
27static inline void rwsem_set_owner(struct rw_semaphore *sem)
28{
29}
30
31static inline void rwsem_clear_owner(struct rw_semaphore *sem)
32{
33}
34#endif
35 15
36/* 16/*
37 * lock for reading 17 * lock for reading
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
new file mode 100644
index 000000000000..870ed9a5b426
--- /dev/null
+++ b/kernel/locking/rwsem.h
@@ -0,0 +1,20 @@
1#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
2static inline void rwsem_set_owner(struct rw_semaphore *sem)
3{
4 sem->owner = current;
5}
6
7static inline void rwsem_clear_owner(struct rw_semaphore *sem)
8{
9 sem->owner = NULL;
10}
11
12#else
13static inline void rwsem_set_owner(struct rw_semaphore *sem)
14{
15}
16
17static inline void rwsem_clear_owner(struct rw_semaphore *sem)
18{
19}
20#endif