aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 21:48:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 21:48:15 -0400
commitc29deef32e3699e40da3e9e82267610de04e6b54 (patch)
tree820ab21fe399225f7341499e461ee793a180d414 /kernel
parentf9da455b93f6ba076935b4ef4589f61e529ae046 (diff)
parentbd01ec1a13f9a327950c8e3080096446c7804753 (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull more locking changes from Ingo Molnar: "This is the second round of locking tree updates for v3.16, offering large system scalability improvements: - optimistic spinning for rwsems, from Davidlohr Bueso. - 'qrwlocks' core code and x86 enablement, from Waiman Long and PeterZ" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86, locking/rwlocks: Enable qrwlocks on x86 locking/rwlocks: Introduce 'qrwlocks' - fair, queued rwlocks locking/mutexes: Documentation update/rewrite locking/rwsem: Fix checkpatch.pl warnings locking/rwsem: Fix warnings for CONFIG_RWSEM_GENERIC_SPINLOCK locking/rwsem: Support optimistic spinning
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks7
-rw-r--r--kernel/locking/Makefile1
-rw-r--r--kernel/locking/qrwlock.c133
-rw-r--r--kernel/locking/rwsem-xadd.c225
-rw-r--r--kernel/locking/rwsem.c31
5 files changed, 367 insertions, 30 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index d2b32ac27a39..35536d9c0964 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -223,3 +223,10 @@ endif
223config MUTEX_SPIN_ON_OWNER 223config MUTEX_SPIN_ON_OWNER
224 def_bool y 224 def_bool y
225 depends on SMP && !DEBUG_MUTEXES 225 depends on SMP && !DEBUG_MUTEXES
226
227config ARCH_USE_QUEUE_RWLOCK
228 bool
229
230config QUEUE_RWLOCK
231 def_bool y if ARCH_USE_QUEUE_RWLOCK
232 depends on SMP
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index b8bdcd4785b7..8541bfdfd232 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
24obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 24obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
25obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o 25obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
26obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o 26obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
27obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
27obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o 28obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
new file mode 100644
index 000000000000..fb5b8ac411a5
--- /dev/null
+++ b/kernel/locking/qrwlock.c
@@ -0,0 +1,133 @@
1/*
2 * Queue read/write lock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
15 *
16 * Authors: Waiman Long <waiman.long@hp.com>
17 */
18#include <linux/smp.h>
19#include <linux/bug.h>
20#include <linux/cpumask.h>
21#include <linux/percpu.h>
22#include <linux/hardirq.h>
23#include <linux/mutex.h>
24#include <asm/qrwlock.h>
25
26/**
27 * rspin_until_writer_unlock - inc reader count & spin until writer is gone
28 * @lock : Pointer to queue rwlock structure
29 * @writer: Current queue rwlock writer status byte
30 *
31 * In interrupt context or at the head of the queue, the reader will just
32 * increment the reader count & wait until the writer releases the lock.
33 */
34static __always_inline void
35rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
36{
37 while ((cnts & _QW_WMASK) == _QW_LOCKED) {
38 arch_mutex_cpu_relax();
39 cnts = smp_load_acquire((u32 *)&lock->cnts);
40 }
41}
42
43/**
44 * queue_read_lock_slowpath - acquire read lock of a queue rwlock
45 * @lock: Pointer to queue rwlock structure
46 */
47void queue_read_lock_slowpath(struct qrwlock *lock)
48{
49 u32 cnts;
50
51 /*
52 * Readers come here when they cannot get the lock without waiting
53 */
54 if (unlikely(in_interrupt())) {
55 /*
56 * Readers in interrupt context will spin until the lock is
57 * available without waiting in the queue.
58 */
59 cnts = smp_load_acquire((u32 *)&lock->cnts);
60 rspin_until_writer_unlock(lock, cnts);
61 return;
62 }
63 atomic_sub(_QR_BIAS, &lock->cnts);
64
65 /*
66 * Put the reader into the wait queue
67 */
68 arch_spin_lock(&lock->lock);
69
70 /*
71 * At the head of the wait queue now, wait until the writer state
72 * goes to 0 and then try to increment the reader count and get
73 * the lock. It is possible that an incoming writer may steal the
74 * lock in the interim, so it is necessary to check the writer byte
75 * to make sure that the write lock isn't taken.
76 */
77 while (atomic_read(&lock->cnts) & _QW_WMASK)
78 arch_mutex_cpu_relax();
79
80 cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
81 rspin_until_writer_unlock(lock, cnts);
82
83 /*
84 * Signal the next one in queue to become queue head
85 */
86 arch_spin_unlock(&lock->lock);
87}
88EXPORT_SYMBOL(queue_read_lock_slowpath);
89
90/**
91 * queue_write_lock_slowpath - acquire write lock of a queue rwlock
92 * @lock : Pointer to queue rwlock structure
93 */
94void queue_write_lock_slowpath(struct qrwlock *lock)
95{
96 u32 cnts;
97
98 /* Put the writer into the wait queue */
99 arch_spin_lock(&lock->lock);
100
101 /* Try to acquire the lock directly if no reader is present */
102 if (!atomic_read(&lock->cnts) &&
103 (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
104 goto unlock;
105
106 /*
107 * Set the waiting flag to notify readers that a writer is pending,
108 * or wait for a previous writer to go away.
109 */
110 for (;;) {
111 cnts = atomic_read(&lock->cnts);
112 if (!(cnts & _QW_WMASK) &&
113 (atomic_cmpxchg(&lock->cnts, cnts,
114 cnts | _QW_WAITING) == cnts))
115 break;
116
117 arch_mutex_cpu_relax();
118 }
119
120 /* When no more readers, set the locked flag */
121 for (;;) {
122 cnts = atomic_read(&lock->cnts);
123 if ((cnts == _QW_WAITING) &&
124 (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
125 _QW_LOCKED) == _QW_WAITING))
126 break;
127
128 arch_mutex_cpu_relax();
129 }
130unlock:
131 arch_spin_unlock(&lock->lock);
132}
133EXPORT_SYMBOL(queue_write_lock_slowpath);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff87b8c..dacc32142fcc 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -5,11 +5,17 @@
5 * 5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com> 7 * and Michel Lespinasse <walken@google.com>
8 *
9 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
10 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
8 */ 11 */
9#include <linux/rwsem.h> 12#include <linux/rwsem.h>
10#include <linux/sched.h> 13#include <linux/sched.h>
11#include <linux/init.h> 14#include <linux/init.h>
12#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/sched/rt.h>
17
18#include "mcs_spinlock.h"
13 19
14/* 20/*
15 * Guide to the rw_semaphore's count field for common values. 21 * Guide to the rw_semaphore's count field for common values.
@@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
76 sem->count = RWSEM_UNLOCKED_VALUE; 82 sem->count = RWSEM_UNLOCKED_VALUE;
77 raw_spin_lock_init(&sem->wait_lock); 83 raw_spin_lock_init(&sem->wait_lock);
78 INIT_LIST_HEAD(&sem->wait_list); 84 INIT_LIST_HEAD(&sem->wait_list);
85#ifdef CONFIG_SMP
86 sem->owner = NULL;
87 sem->osq = NULL;
88#endif
79} 89}
80 90
81EXPORT_SYMBOL(__init_rwsem); 91EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
190} 200}
191 201
192/* 202/*
193 * wait for the read lock to be granted 203 * Wait for the read lock to be granted
194 */ 204 */
195__visible 205__visible
196struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) 206struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
237 return sem; 247 return sem;
238} 248}
239 249
250static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
251{
252 if (!(count & RWSEM_ACTIVE_MASK)) {
253 /* try acquiring the write lock */
254 if (sem->count == RWSEM_WAITING_BIAS &&
255 cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
256 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
257 if (!list_is_singular(&sem->wait_list))
258 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
259 return true;
260 }
261 }
262 return false;
263}
264
265#ifdef CONFIG_SMP
240/* 266/*
241 * wait until we successfully acquire the write lock 267 * Try to acquire write lock before the writer has been put on wait queue.
268 */
269static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
270{
271 long old, count = ACCESS_ONCE(sem->count);
272
273 while (true) {
274 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
275 return false;
276
277 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
278 if (old == count)
279 return true;
280
281 count = old;
282 }
283}
284
285static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
286{
287 struct task_struct *owner;
288 bool on_cpu = true;
289
290 if (need_resched())
291 return 0;
292
293 rcu_read_lock();
294 owner = ACCESS_ONCE(sem->owner);
295 if (owner)
296 on_cpu = owner->on_cpu;
297 rcu_read_unlock();
298
299 /*
300 * If sem->owner is not set, the rwsem owner may have
301 * just acquired it and not set the owner yet or the rwsem
302 * has been released.
303 */
304 return on_cpu;
305}
306
307static inline bool owner_running(struct rw_semaphore *sem,
308 struct task_struct *owner)
309{
310 if (sem->owner != owner)
311 return false;
312
313 /*
314 * Ensure we emit the owner->on_cpu, dereference _after_ checking
315 * sem->owner still matches owner, if that fails, owner might
316 * point to free()d memory, if it still matches, the rcu_read_lock()
317 * ensures the memory stays valid.
318 */
319 barrier();
320
321 return owner->on_cpu;
322}
323
324static noinline
325bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
326{
327 rcu_read_lock();
328 while (owner_running(sem, owner)) {
329 if (need_resched())
330 break;
331
332 arch_mutex_cpu_relax();
333 }
334 rcu_read_unlock();
335
336 /*
337 * We break out the loop above on need_resched() or when the
338 * owner changed, which is a sign for heavy contention. Return
339 * success only when sem->owner is NULL.
340 */
341 return sem->owner == NULL;
342}
343
344static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
345{
346 struct task_struct *owner;
347 bool taken = false;
348
349 preempt_disable();
350
351 /* sem->wait_lock should not be held when doing optimistic spinning */
352 if (!rwsem_can_spin_on_owner(sem))
353 goto done;
354
355 if (!osq_lock(&sem->osq))
356 goto done;
357
358 while (true) {
359 owner = ACCESS_ONCE(sem->owner);
360 if (owner && !rwsem_spin_on_owner(sem, owner))
361 break;
362
363 /* wait_lock will be acquired if write_lock is obtained */
364 if (rwsem_try_write_lock_unqueued(sem)) {
365 taken = true;
366 break;
367 }
368
369 /*
370 * When there's no owner, we might have preempted between the
371 * owner acquiring the lock and setting the owner field. If
372 * we're an RT task that will live-lock because we won't let
373 * the owner complete.
374 */
375 if (!owner && (need_resched() || rt_task(current)))
376 break;
377
378 /*
379 * The cpu_relax() call is a compiler barrier which forces
380 * everything in this loop to be re-loaded. We don't need
381 * memory barriers as we'll eventually observe the right
382 * values at the cost of a few extra spins.
383 */
384 arch_mutex_cpu_relax();
385 }
386 osq_unlock(&sem->osq);
387done:
388 preempt_enable();
389 return taken;
390}
391
392#else
393static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
394{
395 return false;
396}
397#endif
398
399/*
400 * Wait until we successfully acquire the write lock
242 */ 401 */
243__visible 402__visible
244struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) 403struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
245{ 404{
246 long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; 405 long count;
406 bool waiting = true; /* any queued threads before us */
247 struct rwsem_waiter waiter; 407 struct rwsem_waiter waiter;
248 struct task_struct *tsk = current;
249 408
250 /* set up my own style of waitqueue */ 409 /* undo write bias from down_write operation, stop active locking */
251 waiter.task = tsk; 410 count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
411
412 /* do optimistic spinning and steal lock if possible */
413 if (rwsem_optimistic_spin(sem))
414 return sem;
415
416 /*
417 * Optimistic spinning failed, proceed to the slowpath
418 * and block until we can acquire the sem.
419 */
420 waiter.task = current;
252 waiter.type = RWSEM_WAITING_FOR_WRITE; 421 waiter.type = RWSEM_WAITING_FOR_WRITE;
253 422
254 raw_spin_lock_irq(&sem->wait_lock); 423 raw_spin_lock_irq(&sem->wait_lock);
424
425 /* account for this before adding a new element to the list */
255 if (list_empty(&sem->wait_list)) 426 if (list_empty(&sem->wait_list))
256 adjustment += RWSEM_WAITING_BIAS; 427 waiting = false;
428
257 list_add_tail(&waiter.list, &sem->wait_list); 429 list_add_tail(&waiter.list, &sem->wait_list);
258 430
259 /* we're now waiting on the lock, but no longer actively locking */ 431 /* we're now waiting on the lock, but no longer actively locking */
260 count = rwsem_atomic_update(adjustment, sem); 432 if (waiting) {
433 count = ACCESS_ONCE(sem->count);
261 434
262 /* If there were already threads queued before us and there are no 435 /*
263 * active writers, the lock must be read owned; so we try to wake 436 * If there were already threads queued before us and there are
264 * any read locks that were queued ahead of us. */ 437 * no active writers, the lock must be read owned; so we try to
265 if (count > RWSEM_WAITING_BIAS && 438 * wake any read locks that were queued ahead of us.
266 adjustment == -RWSEM_ACTIVE_WRITE_BIAS) 439 */
267 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); 440 if (count > RWSEM_WAITING_BIAS)
441 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
442
443 } else
444 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
268 445
269 /* wait until we successfully acquire the lock */ 446 /* wait until we successfully acquire the lock */
270 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 447 set_current_state(TASK_UNINTERRUPTIBLE);
271 while (true) { 448 while (true) {
272 if (!(count & RWSEM_ACTIVE_MASK)) { 449 if (rwsem_try_write_lock(count, sem))
273 /* Try acquiring the write lock. */ 450 break;
274 count = RWSEM_ACTIVE_WRITE_BIAS;
275 if (!list_is_singular(&sem->wait_list))
276 count += RWSEM_WAITING_BIAS;
277
278 if (sem->count == RWSEM_WAITING_BIAS &&
279 cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
280 RWSEM_WAITING_BIAS)
281 break;
282 }
283
284 raw_spin_unlock_irq(&sem->wait_lock); 451 raw_spin_unlock_irq(&sem->wait_lock);
285 452
286 /* Block until there are no active lockers. */ 453 /* Block until there are no active lockers. */
287 do { 454 do {
288 schedule(); 455 schedule();
289 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 456 set_current_state(TASK_UNINTERRUPTIBLE);
290 } while ((count = sem->count) & RWSEM_ACTIVE_MASK); 457 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
291 458
292 raw_spin_lock_irq(&sem->wait_lock); 459 raw_spin_lock_irq(&sem->wait_lock);
293 } 460 }
461 __set_current_state(TASK_RUNNING);
294 462
295 list_del(&waiter.list); 463 list_del(&waiter.list);
296 raw_spin_unlock_irq(&sem->wait_lock); 464 raw_spin_unlock_irq(&sem->wait_lock);
297 tsk->state = TASK_RUNNING;
298 465
299 return sem; 466 return sem;
300} 467}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff1435bdfb..42f806de49d4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,6 +12,27 @@
12 12
13#include <linux/atomic.h> 13#include <linux/atomic.h>
14 14
15#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
16static inline void rwsem_set_owner(struct rw_semaphore *sem)
17{
18 sem->owner = current;
19}
20
21static inline void rwsem_clear_owner(struct rw_semaphore *sem)
22{
23 sem->owner = NULL;
24}
25
26#else
27static inline void rwsem_set_owner(struct rw_semaphore *sem)
28{
29}
30
31static inline void rwsem_clear_owner(struct rw_semaphore *sem)
32{
33}
34#endif
35
15/* 36/*
16 * lock for reading 37 * lock for reading
17 */ 38 */
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 69 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49 70
50 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 71 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
72 rwsem_set_owner(sem);
51} 73}
52 74
53EXPORT_SYMBOL(down_write); 75EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
59{ 81{
60 int ret = __down_write_trylock(sem); 82 int ret = __down_write_trylock(sem);
61 83
62 if (ret == 1) 84 if (ret == 1) {
63 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); 85 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
86 rwsem_set_owner(sem);
87 }
88
64 return ret; 89 return ret;
65} 90}
66 91
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
85{ 110{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_); 111 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87 112
113 rwsem_clear_owner(sem);
88 __up_write(sem); 114 __up_write(sem);
89} 115}
90 116
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
99 * lockdep: a downgraded write will live on as a write 125 * lockdep: a downgraded write will live on as a write
100 * dependency. 126 * dependency.
101 */ 127 */
128 rwsem_clear_owner(sem);
102 __downgrade_write(sem); 129 __downgrade_write(sem);
103} 130}
104 131
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
122 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); 149 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
123 150
124 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 151 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
152 rwsem_set_owner(sem);
125} 153}
126 154
127EXPORT_SYMBOL(_down_write_nest_lock); 155EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
141 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 169 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
142 170
143 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 171 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
172 rwsem_set_owner(sem);
144} 173}
145 174
146EXPORT_SYMBOL(down_write_nested); 175EXPORT_SYMBOL(down_write_nested);