aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/lockdep.c81
-rw-r--r--kernel/locking/mcs_spinlock.h6
-rw-r--r--kernel/locking/mutex.c51
-rw-r--r--kernel/locking/osq_lock.c14
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/locking/rwsem-spinlock.c7
-rw-r--r--kernel/locking/rwsem-xadd.c98
-rw-r--r--kernel/locking/rwsem.c22
-rw-r--r--kernel/locking/rwsem.h20
9 files changed, 173 insertions, 128 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 88d0d4420ad2..ba77ab5f64dd 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class)
633 if (!new_class->name) 633 if (!new_class->name)
634 return 0; 634 return 0;
635 635
636 list_for_each_entry(class, &all_lock_classes, lock_entry) { 636 list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) {
637 if (new_class->key - new_class->subclass == class->key) 637 if (new_class->key - new_class->subclass == class->key)
638 return class->name_version; 638 return class->name_version;
639 if (class->name && !strcmp(class->name, new_class->name)) 639 if (class->name && !strcmp(class->name, new_class->name))
@@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
700 hash_head = classhashentry(key); 700 hash_head = classhashentry(key);
701 701
702 /* 702 /*
703 * We can walk the hash lockfree, because the hash only 703 * We do an RCU walk of the hash, see lockdep_free_key_range().
704 * grows, and we are careful when adding entries to the end:
705 */ 704 */
706 list_for_each_entry(class, hash_head, hash_entry) { 705 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
706 return NULL;
707
708 list_for_each_entry_rcu(class, hash_head, hash_entry) {
707 if (class->key == key) { 709 if (class->key == key) {
708 /* 710 /*
709 * Huh! same key, different name? Did someone trample 711 * Huh! same key, different name? Did someone trample
@@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
728 struct lockdep_subclass_key *key; 730 struct lockdep_subclass_key *key;
729 struct list_head *hash_head; 731 struct list_head *hash_head;
730 struct lock_class *class; 732 struct lock_class *class;
731 unsigned long flags; 733
734 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
732 735
733 class = look_up_lock_class(lock, subclass); 736 class = look_up_lock_class(lock, subclass);
734 if (likely(class)) 737 if (likely(class))
@@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
750 key = lock->key->subkeys + subclass; 753 key = lock->key->subkeys + subclass;
751 hash_head = classhashentry(key); 754 hash_head = classhashentry(key);
752 755
753 raw_local_irq_save(flags);
754 if (!graph_lock()) { 756 if (!graph_lock()) {
755 raw_local_irq_restore(flags);
756 return NULL; 757 return NULL;
757 } 758 }
758 /* 759 /*
759 * We have to do the hash-walk again, to avoid races 760 * We have to do the hash-walk again, to avoid races
760 * with another CPU: 761 * with another CPU:
761 */ 762 */
762 list_for_each_entry(class, hash_head, hash_entry) 763 list_for_each_entry_rcu(class, hash_head, hash_entry) {
763 if (class->key == key) 764 if (class->key == key)
764 goto out_unlock_set; 765 goto out_unlock_set;
766 }
767
765 /* 768 /*
766 * Allocate a new key from the static array, and add it to 769 * Allocate a new key from the static array, and add it to
767 * the hash: 770 * the hash:
768 */ 771 */
769 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { 772 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
770 if (!debug_locks_off_graph_unlock()) { 773 if (!debug_locks_off_graph_unlock()) {
771 raw_local_irq_restore(flags);
772 return NULL; 774 return NULL;
773 } 775 }
774 raw_local_irq_restore(flags);
775 776
776 print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); 777 print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
777 dump_stack(); 778 dump_stack();
@@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
798 799
799 if (verbose(class)) { 800 if (verbose(class)) {
800 graph_unlock(); 801 graph_unlock();
801 raw_local_irq_restore(flags);
802 802
803 printk("\nnew class %p: %s", class->key, class->name); 803 printk("\nnew class %p: %s", class->key, class->name);
804 if (class->name_version > 1) 804 if (class->name_version > 1)
@@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
806 printk("\n"); 806 printk("\n");
807 dump_stack(); 807 dump_stack();
808 808
809 raw_local_irq_save(flags);
810 if (!graph_lock()) { 809 if (!graph_lock()) {
811 raw_local_irq_restore(flags);
812 return NULL; 810 return NULL;
813 } 811 }
814 } 812 }
815out_unlock_set: 813out_unlock_set:
816 graph_unlock(); 814 graph_unlock();
817 raw_local_irq_restore(flags);
818 815
819out_set_class_cache: 816out_set_class_cache:
820 if (!subclass || force) 817 if (!subclass || force)
@@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
870 entry->distance = distance; 867 entry->distance = distance;
871 entry->trace = *trace; 868 entry->trace = *trace;
872 /* 869 /*
873 * Since we never remove from the dependency list, the list can 870 * Both allocation and removal are done under the graph lock; but
874 * be walked lockless by other CPUs, it's only allocation 871 * iteration is under RCU-sched; see look_up_lock_class() and
875 * that must be protected by the spinlock. But this also means 872 * lockdep_free_key_range().
876 * we must make new entries visible only once writes to the
877 * entry become visible - hence the RCU op:
878 */ 873 */
879 list_add_tail_rcu(&entry->entry, head); 874 list_add_tail_rcu(&entry->entry, head);
880 875
@@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry,
1025 else 1020 else
1026 head = &lock->class->locks_before; 1021 head = &lock->class->locks_before;
1027 1022
1028 list_for_each_entry(entry, head, entry) { 1023 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
1024
1025 list_for_each_entry_rcu(entry, head, entry) {
1029 if (!lock_accessed(entry)) { 1026 if (!lock_accessed(entry)) {
1030 unsigned int cq_depth; 1027 unsigned int cq_depth;
1031 mark_lock_accessed(entry, lock); 1028 mark_lock_accessed(entry, lock);
@@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
2022 * We can walk it lock-free, because entries only get added 2019 * We can walk it lock-free, because entries only get added
2023 * to the hash: 2020 * to the hash:
2024 */ 2021 */
2025 list_for_each_entry(chain, hash_head, entry) { 2022 list_for_each_entry_rcu(chain, hash_head, entry) {
2026 if (chain->chain_key == chain_key) { 2023 if (chain->chain_key == chain_key) {
2027cache_hit: 2024cache_hit:
2028 debug_atomic_inc(chain_lookup_hits); 2025 debug_atomic_inc(chain_lookup_hits);
@@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
2996 if (unlikely(!debug_locks)) 2993 if (unlikely(!debug_locks))
2997 return; 2994 return;
2998 2995
2999 if (subclass) 2996 if (subclass) {
2997 unsigned long flags;
2998
2999 if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
3000 return;
3001
3002 raw_local_irq_save(flags);
3003 current->lockdep_recursion = 1;
3000 register_lock_class(lock, subclass, 1); 3004 register_lock_class(lock, subclass, 1);
3005 current->lockdep_recursion = 0;
3006 raw_local_irq_restore(flags);
3007 }
3001} 3008}
3002EXPORT_SYMBOL_GPL(lockdep_init_map); 3009EXPORT_SYMBOL_GPL(lockdep_init_map);
3003 3010
@@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size)
3887 return addr >= start && addr < start + size; 3894 return addr >= start && addr < start + size;
3888} 3895}
3889 3896
3897/*
3898 * Used in module.c to remove lock classes from memory that is going to be
3899 * freed; and possibly re-used by other modules.
3900 *
3901 * We will have had one sync_sched() before getting here, so we're guaranteed
3902 * nobody will look up these exact classes -- they're properly dead but still
3903 * allocated.
3904 */
3890void lockdep_free_key_range(void *start, unsigned long size) 3905void lockdep_free_key_range(void *start, unsigned long size)
3891{ 3906{
3892 struct lock_class *class, *next; 3907 struct lock_class *class;
3893 struct list_head *head; 3908 struct list_head *head;
3894 unsigned long flags; 3909 unsigned long flags;
3895 int i; 3910 int i;
@@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
3905 head = classhash_table + i; 3920 head = classhash_table + i;
3906 if (list_empty(head)) 3921 if (list_empty(head))
3907 continue; 3922 continue;
3908 list_for_each_entry_safe(class, next, head, hash_entry) { 3923 list_for_each_entry_rcu(class, head, hash_entry) {
3909 if (within(class->key, start, size)) 3924 if (within(class->key, start, size))
3910 zap_class(class); 3925 zap_class(class);
3911 else if (within(class->name, start, size)) 3926 else if (within(class->name, start, size))
@@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size)
3916 if (locked) 3931 if (locked)
3917 graph_unlock(); 3932 graph_unlock();
3918 raw_local_irq_restore(flags); 3933 raw_local_irq_restore(flags);
3934
3935 /*
3936 * Wait for any possible iterators from look_up_lock_class() to pass
3937 * before continuing to free the memory they refer to.
3938 *
3939 * sync_sched() is sufficient because the read-side is IRQ disable.
3940 */
3941 synchronize_sched();
3942
3943 /*
3944 * XXX at this point we could return the resources to the pool;
3945 * instead we leak them. We would need to change to bitmap allocators
3946 * instead of the linear allocators we have now.
3947 */
3919} 3948}
3920 3949
3921void lockdep_reset_lock(struct lockdep_map *lock) 3950void lockdep_reset_lock(struct lockdep_map *lock)
3922{ 3951{
3923 struct lock_class *class, *next; 3952 struct lock_class *class;
3924 struct list_head *head; 3953 struct list_head *head;
3925 unsigned long flags; 3954 unsigned long flags;
3926 int i, j; 3955 int i, j;
@@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
3948 head = classhash_table + i; 3977 head = classhash_table + i;
3949 if (list_empty(head)) 3978 if (list_empty(head))
3950 continue; 3979 continue;
3951 list_for_each_entry_safe(class, next, head, hash_entry) { 3980 list_for_each_entry_rcu(class, head, hash_entry) {
3952 int match = 0; 3981 int match = 0;
3953 3982
3954 for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) 3983 for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index d1fe2ba5bac9..75e114bdf3f2 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
78 */ 78 */
79 return; 79 return;
80 } 80 }
81 ACCESS_ONCE(prev->next) = node; 81 WRITE_ONCE(prev->next, node);
82 82
83 /* Wait until the lock holder passes the lock down. */ 83 /* Wait until the lock holder passes the lock down. */
84 arch_mcs_spin_lock_contended(&node->locked); 84 arch_mcs_spin_lock_contended(&node->locked);
@@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
91static inline 91static inline
92void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) 92void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
93{ 93{
94 struct mcs_spinlock *next = ACCESS_ONCE(node->next); 94 struct mcs_spinlock *next = READ_ONCE(node->next);
95 95
96 if (likely(!next)) { 96 if (likely(!next)) {
97 /* 97 /*
@@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
100 if (likely(cmpxchg(lock, node, NULL) == node)) 100 if (likely(cmpxchg(lock, node, NULL) == node))
101 return; 101 return;
102 /* Wait until the next pointer is set */ 102 /* Wait until the next pointer is set */
103 while (!(next = ACCESS_ONCE(node->next))) 103 while (!(next = READ_ONCE(node->next)))
104 cpu_relax_lowlatency(); 104 cpu_relax_lowlatency();
105 } 105 }
106 106
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 94674e5919cb..4cccea6b8934 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,7 +25,7 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/debug_locks.h> 27#include <linux/debug_locks.h>
28#include "mcs_spinlock.h" 28#include <linux/osq_lock.h>
29 29
30/* 30/*
31 * In the DEBUG case we are using the "NULL fastpath" for mutexes, 31 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
217} 217}
218 218
219#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 219#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
220static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
221{
222 if (lock->owner != owner)
223 return false;
224
225 /*
226 * Ensure we emit the owner->on_cpu, dereference _after_ checking
227 * lock->owner still matches owner, if that fails, owner might
228 * point to free()d memory, if it still matches, the rcu_read_lock()
229 * ensures the memory stays valid.
230 */
231 barrier();
232
233 return owner->on_cpu;
234}
235
236/* 220/*
237 * Look out! "owner" is an entirely speculative pointer 221 * Look out! "owner" is an entirely speculative pointer
238 * access and not reliable. 222 * access and not reliable.
239 */ 223 */
240static noinline 224static noinline
241int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) 225bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
242{ 226{
227 bool ret = true;
228
243 rcu_read_lock(); 229 rcu_read_lock();
244 while (owner_running(lock, owner)) { 230 while (lock->owner == owner) {
245 if (need_resched()) 231 /*
232 * Ensure we emit the owner->on_cpu, dereference _after_
233 * checking lock->owner still matches owner. If that fails,
234 * owner might point to freed memory. If it still matches,
235 * the rcu_read_lock() ensures the memory stays valid.
236 */
237 barrier();
238
239 if (!owner->on_cpu || need_resched()) {
240 ret = false;
246 break; 241 break;
242 }
247 243
248 cpu_relax_lowlatency(); 244 cpu_relax_lowlatency();
249 } 245 }
250 rcu_read_unlock(); 246 rcu_read_unlock();
251 247
252 /* 248 return ret;
253 * We break out the loop above on need_resched() and when the
254 * owner changed, which is a sign for heavy contention. Return
255 * success only when lock->owner is NULL.
256 */
257 return lock->owner == NULL;
258} 249}
259 250
260/* 251/*
@@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
269 return 0; 260 return 0;
270 261
271 rcu_read_lock(); 262 rcu_read_lock();
272 owner = ACCESS_ONCE(lock->owner); 263 owner = READ_ONCE(lock->owner);
273 if (owner) 264 if (owner)
274 retval = owner->on_cpu; 265 retval = owner->on_cpu;
275 rcu_read_unlock(); 266 rcu_read_unlock();
@@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
343 * As such, when deadlock detection needs to be 334 * As such, when deadlock detection needs to be
344 * performed the optimistic spinning cannot be done. 335 * performed the optimistic spinning cannot be done.
345 */ 336 */
346 if (ACCESS_ONCE(ww->ctx)) 337 if (READ_ONCE(ww->ctx))
347 break; 338 break;
348 } 339 }
349 340
@@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
351 * If there's an owner, wait for it to either 342 * If there's an owner, wait for it to either
352 * release the lock or go to sleep. 343 * release the lock or go to sleep.
353 */ 344 */
354 owner = ACCESS_ONCE(lock->owner); 345 owner = READ_ONCE(lock->owner);
355 if (owner && !mutex_spin_on_owner(lock, owner)) 346 if (owner && !mutex_spin_on_owner(lock, owner))
356 break; 347 break;
357 348
@@ -490,7 +481,7 @@ static inline int __sched
490__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) 481__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
491{ 482{
492 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); 483 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
493 struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); 484 struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
494 485
495 if (!hold_ctx) 486 if (!hold_ctx)
496 return 0; 487 return 0;
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index c112d00341b0..dc85ee23a26f 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
98 98
99 prev = decode_cpu(old); 99 prev = decode_cpu(old);
100 node->prev = prev; 100 node->prev = prev;
101 ACCESS_ONCE(prev->next) = node; 101 WRITE_ONCE(prev->next, node);
102 102
103 /* 103 /*
104 * Normally @prev is untouchable after the above store; because at that 104 * Normally @prev is untouchable after the above store; because at that
@@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
109 * cmpxchg in an attempt to undo our queueing. 109 * cmpxchg in an attempt to undo our queueing.
110 */ 110 */
111 111
112 while (!ACCESS_ONCE(node->locked)) { 112 while (!READ_ONCE(node->locked)) {
113 /* 113 /*
114 * If we need to reschedule bail... so we can block. 114 * If we need to reschedule bail... so we can block.
115 */ 115 */
@@ -148,7 +148,7 @@ unqueue:
148 * Or we race against a concurrent unqueue()'s step-B, in which 148 * Or we race against a concurrent unqueue()'s step-B, in which
149 * case its step-C will write us a new @node->prev pointer. 149 * case its step-C will write us a new @node->prev pointer.
150 */ 150 */
151 prev = ACCESS_ONCE(node->prev); 151 prev = READ_ONCE(node->prev);
152 } 152 }
153 153
154 /* 154 /*
@@ -170,8 +170,8 @@ unqueue:
170 * it will wait in Step-A. 170 * it will wait in Step-A.
171 */ 171 */
172 172
173 ACCESS_ONCE(next->prev) = prev; 173 WRITE_ONCE(next->prev, prev);
174 ACCESS_ONCE(prev->next) = next; 174 WRITE_ONCE(prev->next, next);
175 175
176 return false; 176 return false;
177} 177}
@@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock)
193 node = this_cpu_ptr(&osq_node); 193 node = this_cpu_ptr(&osq_node);
194 next = xchg(&node->next, NULL); 194 next = xchg(&node->next, NULL);
195 if (next) { 195 if (next) {
196 ACCESS_ONCE(next->locked) = 1; 196 WRITE_ONCE(next->locked, 1);
197 return; 197 return;
198 } 198 }
199 199
200 next = osq_wait_next(lock, node, NULL); 200 next = osq_wait_next(lock, node, NULL);
201 if (next) 201 if (next)
202 ACCESS_ONCE(next->locked) = 1; 202 WRITE_ONCE(next->locked, 1);
203} 203}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6357265a31ad..b73279367087 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
349 * 349 *
350 * @task: the task owning the mutex (owner) for which a chain walk is 350 * @task: the task owning the mutex (owner) for which a chain walk is
351 * probably needed 351 * probably needed
352 * @deadlock_detect: do we have to carry out deadlock detection? 352 * @chwalk: do we have to carry out deadlock detection?
353 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 353 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
354 * things for a task that has just got its priority adjusted, and 354 * things for a task that has just got its priority adjusted, and
355 * is waiting on a mutex) 355 * is waiting on a mutex)
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 2555ae15ec14..3a5048572065 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
85 85
86 list_del(&waiter->list); 86 list_del(&waiter->list);
87 tsk = waiter->task; 87 tsk = waiter->task;
88 /*
89 * Make sure we do not wakeup the next reader before
90 * setting the nil condition to grant the next reader;
91 * otherwise we could miss the wakeup on the other
92 * side and end up sleeping again. See the pairing
93 * in rwsem_down_read_failed().
94 */
88 smp_mb(); 95 smp_mb();
89 waiter->task = NULL; 96 waiter->task = NULL;
90 wake_up_process(tsk); 97 wake_up_process(tsk);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2f7cc4076f50..3417d0172a5d 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -14,8 +14,9 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/sched/rt.h> 16#include <linux/sched/rt.h>
17#include <linux/osq_lock.h>
17 18
18#include "mcs_spinlock.h" 19#include "rwsem.h"
19 20
20/* 21/*
21 * Guide to the rw_semaphore's count field for common values. 22 * Guide to the rw_semaphore's count field for common values.
@@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
186 waiter = list_entry(next, struct rwsem_waiter, list); 187 waiter = list_entry(next, struct rwsem_waiter, list);
187 next = waiter->list.next; 188 next = waiter->list.next;
188 tsk = waiter->task; 189 tsk = waiter->task;
190 /*
191 * Make sure we do not wakeup the next reader before
192 * setting the nil condition to grant the next reader;
193 * otherwise we could miss the wakeup on the other
194 * side and end up sleeping again. See the pairing
195 * in rwsem_down_read_failed().
196 */
189 smp_mb(); 197 smp_mb();
190 waiter->task = NULL; 198 waiter->task = NULL;
191 wake_up_process(tsk); 199 wake_up_process(tsk);
@@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
258 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { 266 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
259 if (!list_is_singular(&sem->wait_list)) 267 if (!list_is_singular(&sem->wait_list))
260 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); 268 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
269 rwsem_set_owner(sem);
261 return true; 270 return true;
262 } 271 }
263 272
@@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
270 */ 279 */
271static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) 280static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
272{ 281{
273 long old, count = ACCESS_ONCE(sem->count); 282 long old, count = READ_ONCE(sem->count);
274 283
275 while (true) { 284 while (true) {
276 if (!(count == 0 || count == RWSEM_WAITING_BIAS)) 285 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
277 return false; 286 return false;
278 287
279 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); 288 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
280 if (old == count) 289 if (old == count) {
290 rwsem_set_owner(sem);
281 return true; 291 return true;
292 }
282 293
283 count = old; 294 count = old;
284 } 295 }
@@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
287static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) 298static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
288{ 299{
289 struct task_struct *owner; 300 struct task_struct *owner;
290 bool on_cpu = false; 301 bool ret = true;
291 302
292 if (need_resched()) 303 if (need_resched())
293 return false; 304 return false;
294 305
295 rcu_read_lock(); 306 rcu_read_lock();
296 owner = ACCESS_ONCE(sem->owner); 307 owner = READ_ONCE(sem->owner);
297 if (owner) 308 if (!owner) {
298 on_cpu = owner->on_cpu; 309 long count = READ_ONCE(sem->count);
299 rcu_read_unlock(); 310 /*
300 311 * If sem->owner is not set, yet we have just recently entered the
301 /* 312 * slowpath with the lock being active, then there is a possibility
302 * If sem->owner is not set, yet we have just recently entered the 313 * reader(s) may have the lock. To be safe, bail spinning in these
303 * slowpath, then there is a possibility reader(s) may have the lock. 314 * situations.
304 * To be safe, avoid spinning in these situations. 315 */
305 */ 316 if (count & RWSEM_ACTIVE_MASK)
306 return on_cpu; 317 ret = false;
307} 318 goto done;
308 319 }
309static inline bool owner_running(struct rw_semaphore *sem,
310 struct task_struct *owner)
311{
312 if (sem->owner != owner)
313 return false;
314
315 /*
316 * Ensure we emit the owner->on_cpu, dereference _after_ checking
317 * sem->owner still matches owner, if that fails, owner might
318 * point to free()d memory, if it still matches, the rcu_read_lock()
319 * ensures the memory stays valid.
320 */
321 barrier();
322 320
323 return owner->on_cpu; 321 ret = owner->on_cpu;
322done:
323 rcu_read_unlock();
324 return ret;
324} 325}
325 326
326static noinline 327static noinline
327bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) 328bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
328{ 329{
330 long count;
331
329 rcu_read_lock(); 332 rcu_read_lock();
330 while (owner_running(sem, owner)) { 333 while (sem->owner == owner) {
331 if (need_resched()) 334 /*
332 break; 335 * Ensure we emit the owner->on_cpu, dereference _after_
336 * checking sem->owner still matches owner, if that fails,
337 * owner might point to free()d memory, if it still matches,
338 * the rcu_read_lock() ensures the memory stays valid.
339 */
340 barrier();
341
342 /* abort spinning when need_resched or owner is not running */
343 if (!owner->on_cpu || need_resched()) {
344 rcu_read_unlock();
345 return false;
346 }
333 347
334 cpu_relax_lowlatency(); 348 cpu_relax_lowlatency();
335 } 349 }
336 rcu_read_unlock(); 350 rcu_read_unlock();
337 351
352 if (READ_ONCE(sem->owner))
353 return true; /* new owner, continue spinning */
354
338 /* 355 /*
339 * We break out the loop above on need_resched() or when the 356 * When the owner is not set, the lock could be free or
340 * owner changed, which is a sign for heavy contention. Return 357 * held by readers. Check the counter to verify the
341 * success only when sem->owner is NULL. 358 * state.
342 */ 359 */
343 return sem->owner == NULL; 360 count = READ_ONCE(sem->count);
361 return (count == 0 || count == RWSEM_WAITING_BIAS);
344} 362}
345 363
346static bool rwsem_optimistic_spin(struct rw_semaphore *sem) 364static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
358 goto done; 376 goto done;
359 377
360 while (true) { 378 while (true) {
361 owner = ACCESS_ONCE(sem->owner); 379 owner = READ_ONCE(sem->owner);
362 if (owner && !rwsem_spin_on_owner(sem, owner)) 380 if (owner && !rwsem_spin_on_owner(sem, owner))
363 break; 381 break;
364 382
@@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
432 450
433 /* we're now waiting on the lock, but no longer actively locking */ 451 /* we're now waiting on the lock, but no longer actively locking */
434 if (waiting) { 452 if (waiting) {
435 count = ACCESS_ONCE(sem->count); 453 count = READ_ONCE(sem->count);
436 454
437 /* 455 /*
438 * If there were already threads queued before us and there are 456 * If there were already threads queued before us and there are
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e2d3bc7f03b4..205be0ce34de 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -9,29 +9,9 @@
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/export.h> 10#include <linux/export.h>
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12
13#include <linux/atomic.h> 12#include <linux/atomic.h>
14 13
15#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 14#include "rwsem.h"
16static inline void rwsem_set_owner(struct rw_semaphore *sem)
17{
18 sem->owner = current;
19}
20
21static inline void rwsem_clear_owner(struct rw_semaphore *sem)
22{
23 sem->owner = NULL;
24}
25
26#else
27static inline void rwsem_set_owner(struct rw_semaphore *sem)
28{
29}
30
31static inline void rwsem_clear_owner(struct rw_semaphore *sem)
32{
33}
34#endif
35 15
36/* 16/*
37 * lock for reading 17 * lock for reading
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
new file mode 100644
index 000000000000..870ed9a5b426
--- /dev/null
+++ b/kernel/locking/rwsem.h
@@ -0,0 +1,20 @@
1#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
2static inline void rwsem_set_owner(struct rw_semaphore *sem)
3{
4 sem->owner = current;
5}
6
7static inline void rwsem_clear_owner(struct rw_semaphore *sem)
8{
9 sem->owner = NULL;
10}
11
12#else
13static inline void rwsem_set_owner(struct rw_semaphore *sem)
14{
15}
16
17static inline void rwsem_clear_owner(struct rw_semaphore *sem)
18{
19}
20#endif