diff options
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/lockdep.c | 81 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.h | 6 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 51 | ||||
| -rw-r--r-- | kernel/locking/osq_lock.c | 14 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 2 | ||||
| -rw-r--r-- | kernel/locking/rwsem-spinlock.c | 7 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 98 | ||||
| -rw-r--r-- | kernel/locking/rwsem.c | 22 | ||||
| -rw-r--r-- | kernel/locking/rwsem.h | 20 | 
9 files changed, 173 insertions, 128 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..ba77ab5f64dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c  | |||
| @@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class) | |||
| 633 | if (!new_class->name) | 633 | if (!new_class->name) | 
| 634 | return 0; | 634 | return 0; | 
| 635 | 635 | ||
| 636 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | 636 | list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) { | 
| 637 | if (new_class->key - new_class->subclass == class->key) | 637 | if (new_class->key - new_class->subclass == class->key) | 
| 638 | return class->name_version; | 638 | return class->name_version; | 
| 639 | if (class->name && !strcmp(class->name, new_class->name)) | 639 | if (class->name && !strcmp(class->name, new_class->name)) | 
| @@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
| 700 | hash_head = classhashentry(key); | 700 | hash_head = classhashentry(key); | 
| 701 | 701 | ||
| 702 | /* | 702 | /* | 
| 703 | * We can walk the hash lockfree, because the hash only | 703 | * We do an RCU walk of the hash, see lockdep_free_key_range(). | 
| 704 | * grows, and we are careful when adding entries to the end: | ||
| 705 | */ | 704 | */ | 
| 706 | list_for_each_entry(class, hash_head, hash_entry) { | 705 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 
| 706 | return NULL; | ||
| 707 | |||
| 708 | list_for_each_entry_rcu(class, hash_head, hash_entry) { | ||
| 707 | if (class->key == key) { | 709 | if (class->key == key) { | 
| 708 | /* | 710 | /* | 
| 709 | * Huh! same key, different name? Did someone trample | 711 | * Huh! same key, different name? Did someone trample | 
| @@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 728 | struct lockdep_subclass_key *key; | 730 | struct lockdep_subclass_key *key; | 
| 729 | struct list_head *hash_head; | 731 | struct list_head *hash_head; | 
| 730 | struct lock_class *class; | 732 | struct lock_class *class; | 
| 731 | unsigned long flags; | 733 | |
| 734 | DEBUG_LOCKS_WARN_ON(!irqs_disabled()); | ||
| 732 | 735 | ||
| 733 | class = look_up_lock_class(lock, subclass); | 736 | class = look_up_lock_class(lock, subclass); | 
| 734 | if (likely(class)) | 737 | if (likely(class)) | 
| @@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 750 | key = lock->key->subkeys + subclass; | 753 | key = lock->key->subkeys + subclass; | 
| 751 | hash_head = classhashentry(key); | 754 | hash_head = classhashentry(key); | 
| 752 | 755 | ||
| 753 | raw_local_irq_save(flags); | ||
| 754 | if (!graph_lock()) { | 756 | if (!graph_lock()) { | 
| 755 | raw_local_irq_restore(flags); | ||
| 756 | return NULL; | 757 | return NULL; | 
| 757 | } | 758 | } | 
| 758 | /* | 759 | /* | 
| 759 | * We have to do the hash-walk again, to avoid races | 760 | * We have to do the hash-walk again, to avoid races | 
| 760 | * with another CPU: | 761 | * with another CPU: | 
| 761 | */ | 762 | */ | 
| 762 | list_for_each_entry(class, hash_head, hash_entry) | 763 | list_for_each_entry_rcu(class, hash_head, hash_entry) { | 
| 763 | if (class->key == key) | 764 | if (class->key == key) | 
| 764 | goto out_unlock_set; | 765 | goto out_unlock_set; | 
| 766 | } | ||
| 767 | |||
| 765 | /* | 768 | /* | 
| 766 | * Allocate a new key from the static array, and add it to | 769 | * Allocate a new key from the static array, and add it to | 
| 767 | * the hash: | 770 | * the hash: | 
| 768 | */ | 771 | */ | 
| 769 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | 772 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | 
| 770 | if (!debug_locks_off_graph_unlock()) { | 773 | if (!debug_locks_off_graph_unlock()) { | 
| 771 | raw_local_irq_restore(flags); | ||
| 772 | return NULL; | 774 | return NULL; | 
| 773 | } | 775 | } | 
| 774 | raw_local_irq_restore(flags); | ||
| 775 | 776 | ||
| 776 | print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); | 777 | print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); | 
| 777 | dump_stack(); | 778 | dump_stack(); | 
| @@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 798 | 799 | ||
| 799 | if (verbose(class)) { | 800 | if (verbose(class)) { | 
| 800 | graph_unlock(); | 801 | graph_unlock(); | 
| 801 | raw_local_irq_restore(flags); | ||
| 802 | 802 | ||
| 803 | printk("\nnew class %p: %s", class->key, class->name); | 803 | printk("\nnew class %p: %s", class->key, class->name); | 
| 804 | if (class->name_version > 1) | 804 | if (class->name_version > 1) | 
| @@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 806 | printk("\n"); | 806 | printk("\n"); | 
| 807 | dump_stack(); | 807 | dump_stack(); | 
| 808 | 808 | ||
| 809 | raw_local_irq_save(flags); | ||
| 810 | if (!graph_lock()) { | 809 | if (!graph_lock()) { | 
| 811 | raw_local_irq_restore(flags); | ||
| 812 | return NULL; | 810 | return NULL; | 
| 813 | } | 811 | } | 
| 814 | } | 812 | } | 
| 815 | out_unlock_set: | 813 | out_unlock_set: | 
| 816 | graph_unlock(); | 814 | graph_unlock(); | 
| 817 | raw_local_irq_restore(flags); | ||
| 818 | 815 | ||
| 819 | out_set_class_cache: | 816 | out_set_class_cache: | 
| 820 | if (!subclass || force) | 817 | if (!subclass || force) | 
| @@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, | |||
| 870 | entry->distance = distance; | 867 | entry->distance = distance; | 
| 871 | entry->trace = *trace; | 868 | entry->trace = *trace; | 
| 872 | /* | 869 | /* | 
| 873 | * Since we never remove from the dependency list, the list can | 870 | * Both allocation and removal are done under the graph lock; but | 
| 874 | * be walked lockless by other CPUs, it's only allocation | 871 | * iteration is under RCU-sched; see look_up_lock_class() and | 
| 875 | * that must be protected by the spinlock. But this also means | 872 | * lockdep_free_key_range(). | 
| 876 | * we must make new entries visible only once writes to the | ||
| 877 | * entry become visible - hence the RCU op: | ||
| 878 | */ | 873 | */ | 
| 879 | list_add_tail_rcu(&entry->entry, head); | 874 | list_add_tail_rcu(&entry->entry, head); | 
| 880 | 875 | ||
| @@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry, | |||
| 1025 | else | 1020 | else | 
| 1026 | head = &lock->class->locks_before; | 1021 | head = &lock->class->locks_before; | 
| 1027 | 1022 | ||
| 1028 | list_for_each_entry(entry, head, entry) { | 1023 | DEBUG_LOCKS_WARN_ON(!irqs_disabled()); | 
| 1024 | |||
| 1025 | list_for_each_entry_rcu(entry, head, entry) { | ||
| 1029 | if (!lock_accessed(entry)) { | 1026 | if (!lock_accessed(entry)) { | 
| 1030 | unsigned int cq_depth; | 1027 | unsigned int cq_depth; | 
| 1031 | mark_lock_accessed(entry, lock); | 1028 | mark_lock_accessed(entry, lock); | 
| @@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
| 2022 | * We can walk it lock-free, because entries only get added | 2019 | * We can walk it lock-free, because entries only get added | 
| 2023 | * to the hash: | 2020 | * to the hash: | 
| 2024 | */ | 2021 | */ | 
| 2025 | list_for_each_entry(chain, hash_head, entry) { | 2022 | list_for_each_entry_rcu(chain, hash_head, entry) { | 
| 2026 | if (chain->chain_key == chain_key) { | 2023 | if (chain->chain_key == chain_key) { | 
| 2027 | cache_hit: | 2024 | cache_hit: | 
| 2028 | debug_atomic_inc(chain_lookup_hits); | 2025 | debug_atomic_inc(chain_lookup_hits); | 
| @@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
| 2996 | if (unlikely(!debug_locks)) | 2993 | if (unlikely(!debug_locks)) | 
| 2997 | return; | 2994 | return; | 
| 2998 | 2995 | ||
| 2999 | if (subclass) | 2996 | if (subclass) { | 
| 2997 | unsigned long flags; | ||
| 2998 | |||
| 2999 | if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) | ||
| 3000 | return; | ||
| 3001 | |||
| 3002 | raw_local_irq_save(flags); | ||
| 3003 | current->lockdep_recursion = 1; | ||
| 3000 | register_lock_class(lock, subclass, 1); | 3004 | register_lock_class(lock, subclass, 1); | 
| 3005 | current->lockdep_recursion = 0; | ||
| 3006 | raw_local_irq_restore(flags); | ||
| 3007 | } | ||
| 3001 | } | 3008 | } | 
| 3002 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 3009 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 
| 3003 | 3010 | ||
| @@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size) | |||
| 3887 | return addr >= start && addr < start + size; | 3894 | return addr >= start && addr < start + size; | 
| 3888 | } | 3895 | } | 
| 3889 | 3896 | ||
| 3897 | /* | ||
| 3898 | * Used in module.c to remove lock classes from memory that is going to be | ||
| 3899 | * freed; and possibly re-used by other modules. | ||
| 3900 | * | ||
| 3901 | * We will have had one sync_sched() before getting here, so we're guaranteed | ||
| 3902 | * nobody will look up these exact classes -- they're properly dead but still | ||
| 3903 | * allocated. | ||
| 3904 | */ | ||
| 3890 | void lockdep_free_key_range(void *start, unsigned long size) | 3905 | void lockdep_free_key_range(void *start, unsigned long size) | 
| 3891 | { | 3906 | { | 
| 3892 | struct lock_class *class, *next; | 3907 | struct lock_class *class; | 
| 3893 | struct list_head *head; | 3908 | struct list_head *head; | 
| 3894 | unsigned long flags; | 3909 | unsigned long flags; | 
| 3895 | int i; | 3910 | int i; | 
| @@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
| 3905 | head = classhash_table + i; | 3920 | head = classhash_table + i; | 
| 3906 | if (list_empty(head)) | 3921 | if (list_empty(head)) | 
| 3907 | continue; | 3922 | continue; | 
| 3908 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3923 | list_for_each_entry_rcu(class, head, hash_entry) { | 
| 3909 | if (within(class->key, start, size)) | 3924 | if (within(class->key, start, size)) | 
| 3910 | zap_class(class); | 3925 | zap_class(class); | 
| 3911 | else if (within(class->name, start, size)) | 3926 | else if (within(class->name, start, size)) | 
| @@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
| 3916 | if (locked) | 3931 | if (locked) | 
| 3917 | graph_unlock(); | 3932 | graph_unlock(); | 
| 3918 | raw_local_irq_restore(flags); | 3933 | raw_local_irq_restore(flags); | 
| 3934 | |||
| 3935 | /* | ||
| 3936 | * Wait for any possible iterators from look_up_lock_class() to pass | ||
| 3937 | * before continuing to free the memory they refer to. | ||
| 3938 | * | ||
| 3939 | * sync_sched() is sufficient because the read-side is IRQ disable. | ||
| 3940 | */ | ||
| 3941 | synchronize_sched(); | ||
| 3942 | |||
| 3943 | /* | ||
| 3944 | * XXX at this point we could return the resources to the pool; | ||
| 3945 | * instead we leak them. We would need to change to bitmap allocators | ||
| 3946 | * instead of the linear allocators we have now. | ||
| 3947 | */ | ||
| 3919 | } | 3948 | } | 
| 3920 | 3949 | ||
| 3921 | void lockdep_reset_lock(struct lockdep_map *lock) | 3950 | void lockdep_reset_lock(struct lockdep_map *lock) | 
| 3922 | { | 3951 | { | 
| 3923 | struct lock_class *class, *next; | 3952 | struct lock_class *class; | 
| 3924 | struct list_head *head; | 3953 | struct list_head *head; | 
| 3925 | unsigned long flags; | 3954 | unsigned long flags; | 
| 3926 | int i, j; | 3955 | int i, j; | 
| @@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
| 3948 | head = classhash_table + i; | 3977 | head = classhash_table + i; | 
| 3949 | if (list_empty(head)) | 3978 | if (list_empty(head)) | 
| 3950 | continue; | 3979 | continue; | 
| 3951 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3980 | list_for_each_entry_rcu(class, head, hash_entry) { | 
| 3952 | int match = 0; | 3981 | int match = 0; | 
| 3953 | 3982 | ||
| 3954 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | 3983 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | 
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index d1fe2ba5bac9..75e114bdf3f2 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h  | |||
| @@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
| 78 | */ | 78 | */ | 
| 79 | return; | 79 | return; | 
| 80 | } | 80 | } | 
| 81 | ACCESS_ONCE(prev->next) = node; | 81 | WRITE_ONCE(prev->next, node); | 
| 82 | 82 | ||
| 83 | /* Wait until the lock holder passes the lock down. */ | 83 | /* Wait until the lock holder passes the lock down. */ | 
| 84 | arch_mcs_spin_lock_contended(&node->locked); | 84 | arch_mcs_spin_lock_contended(&node->locked); | 
| @@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
| 91 | static inline | 91 | static inline | 
| 92 | void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | 92 | void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | 
| 93 | { | 93 | { | 
| 94 | struct mcs_spinlock *next = ACCESS_ONCE(node->next); | 94 | struct mcs_spinlock *next = READ_ONCE(node->next); | 
| 95 | 95 | ||
| 96 | if (likely(!next)) { | 96 | if (likely(!next)) { | 
| 97 | /* | 97 | /* | 
| @@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
| 100 | if (likely(cmpxchg(lock, node, NULL) == node)) | 100 | if (likely(cmpxchg(lock, node, NULL) == node)) | 
| 101 | return; | 101 | return; | 
| 102 | /* Wait until the next pointer is set */ | 102 | /* Wait until the next pointer is set */ | 
| 103 | while (!(next = ACCESS_ONCE(node->next))) | 103 | while (!(next = READ_ONCE(node->next))) | 
| 104 | cpu_relax_lowlatency(); | 104 | cpu_relax_lowlatency(); | 
| 105 | } | 105 | } | 
| 106 | 106 | ||
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 94674e5919cb..4cccea6b8934 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c  | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> | 
| 26 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> | 
| 27 | #include <linux/debug_locks.h> | 27 | #include <linux/debug_locks.h> | 
| 28 | #include "mcs_spinlock.h" | 28 | #include <linux/osq_lock.h> | 
| 29 | 29 | ||
| 30 | /* | 30 | /* | 
| 31 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, | 31 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, | 
| @@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, | |||
| 217 | } | 217 | } | 
| 218 | 218 | ||
| 219 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 219 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 
| 220 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | ||
| 221 | { | ||
| 222 | if (lock->owner != owner) | ||
| 223 | return false; | ||
| 224 | |||
| 225 | /* | ||
| 226 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
| 227 | * lock->owner still matches owner, if that fails, owner might | ||
| 228 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
| 229 | * ensures the memory stays valid. | ||
| 230 | */ | ||
| 231 | barrier(); | ||
| 232 | |||
| 233 | return owner->on_cpu; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | 220 | /* | 
| 237 | * Look out! "owner" is an entirely speculative pointer | 221 | * Look out! "owner" is an entirely speculative pointer | 
| 238 | * access and not reliable. | 222 | * access and not reliable. | 
| 239 | */ | 223 | */ | 
| 240 | static noinline | 224 | static noinline | 
| 241 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | 225 | bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | 
| 242 | { | 226 | { | 
| 227 | bool ret = true; | ||
| 228 | |||
| 243 | rcu_read_lock(); | 229 | rcu_read_lock(); | 
| 244 | while (owner_running(lock, owner)) { | 230 | while (lock->owner == owner) { | 
| 245 | if (need_resched()) | 231 | /* | 
| 232 | * Ensure we emit the owner->on_cpu, dereference _after_ | ||
| 233 | * checking lock->owner still matches owner. If that fails, | ||
| 234 | * owner might point to freed memory. If it still matches, | ||
| 235 | * the rcu_read_lock() ensures the memory stays valid. | ||
| 236 | */ | ||
| 237 | barrier(); | ||
| 238 | |||
| 239 | if (!owner->on_cpu || need_resched()) { | ||
| 240 | ret = false; | ||
| 246 | break; | 241 | break; | 
| 242 | } | ||
| 247 | 243 | ||
| 248 | cpu_relax_lowlatency(); | 244 | cpu_relax_lowlatency(); | 
| 249 | } | 245 | } | 
| 250 | rcu_read_unlock(); | 246 | rcu_read_unlock(); | 
| 251 | 247 | ||
| 252 | /* | 248 | return ret; | 
| 253 | * We break out the loop above on need_resched() and when the | ||
| 254 | * owner changed, which is a sign for heavy contention. Return | ||
| 255 | * success only when lock->owner is NULL. | ||
| 256 | */ | ||
| 257 | return lock->owner == NULL; | ||
| 258 | } | 249 | } | 
| 259 | 250 | ||
| 260 | /* | 251 | /* | 
| @@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) | |||
| 269 | return 0; | 260 | return 0; | 
| 270 | 261 | ||
| 271 | rcu_read_lock(); | 262 | rcu_read_lock(); | 
| 272 | owner = ACCESS_ONCE(lock->owner); | 263 | owner = READ_ONCE(lock->owner); | 
| 273 | if (owner) | 264 | if (owner) | 
| 274 | retval = owner->on_cpu; | 265 | retval = owner->on_cpu; | 
| 275 | rcu_read_unlock(); | 266 | rcu_read_unlock(); | 
| @@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, | |||
| 343 | * As such, when deadlock detection needs to be | 334 | * As such, when deadlock detection needs to be | 
| 344 | * performed the optimistic spinning cannot be done. | 335 | * performed the optimistic spinning cannot be done. | 
| 345 | */ | 336 | */ | 
| 346 | if (ACCESS_ONCE(ww->ctx)) | 337 | if (READ_ONCE(ww->ctx)) | 
| 347 | break; | 338 | break; | 
| 348 | } | 339 | } | 
| 349 | 340 | ||
| @@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, | |||
| 351 | * If there's an owner, wait for it to either | 342 | * If there's an owner, wait for it to either | 
| 352 | * release the lock or go to sleep. | 343 | * release the lock or go to sleep. | 
| 353 | */ | 344 | */ | 
| 354 | owner = ACCESS_ONCE(lock->owner); | 345 | owner = READ_ONCE(lock->owner); | 
| 355 | if (owner && !mutex_spin_on_owner(lock, owner)) | 346 | if (owner && !mutex_spin_on_owner(lock, owner)) | 
| 356 | break; | 347 | break; | 
| 357 | 348 | ||
| @@ -490,7 +481,7 @@ static inline int __sched | |||
| 490 | __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) | 481 | __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) | 
| 491 | { | 482 | { | 
| 492 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | 483 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | 
| 493 | struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | 484 | struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); | 
| 494 | 485 | ||
| 495 | if (!hold_ctx) | 486 | if (!hold_ctx) | 
| 496 | return 0; | 487 | return 0; | 
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index c112d00341b0..dc85ee23a26f 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c  | |||
| @@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) | |||
| 98 | 98 | ||
| 99 | prev = decode_cpu(old); | 99 | prev = decode_cpu(old); | 
| 100 | node->prev = prev; | 100 | node->prev = prev; | 
| 101 | ACCESS_ONCE(prev->next) = node; | 101 | WRITE_ONCE(prev->next, node); | 
| 102 | 102 | ||
| 103 | /* | 103 | /* | 
| 104 | * Normally @prev is untouchable after the above store; because at that | 104 | * Normally @prev is untouchable after the above store; because at that | 
| @@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) | |||
| 109 | * cmpxchg in an attempt to undo our queueing. | 109 | * cmpxchg in an attempt to undo our queueing. | 
| 110 | */ | 110 | */ | 
| 111 | 111 | ||
| 112 | while (!ACCESS_ONCE(node->locked)) { | 112 | while (!READ_ONCE(node->locked)) { | 
| 113 | /* | 113 | /* | 
| 114 | * If we need to reschedule bail... so we can block. | 114 | * If we need to reschedule bail... so we can block. | 
| 115 | */ | 115 | */ | 
| @@ -148,7 +148,7 @@ unqueue: | |||
| 148 | * Or we race against a concurrent unqueue()'s step-B, in which | 148 | * Or we race against a concurrent unqueue()'s step-B, in which | 
| 149 | * case its step-C will write us a new @node->prev pointer. | 149 | * case its step-C will write us a new @node->prev pointer. | 
| 150 | */ | 150 | */ | 
| 151 | prev = ACCESS_ONCE(node->prev); | 151 | prev = READ_ONCE(node->prev); | 
| 152 | } | 152 | } | 
| 153 | 153 | ||
| 154 | /* | 154 | /* | 
| @@ -170,8 +170,8 @@ unqueue: | |||
| 170 | * it will wait in Step-A. | 170 | * it will wait in Step-A. | 
| 171 | */ | 171 | */ | 
| 172 | 172 | ||
| 173 | ACCESS_ONCE(next->prev) = prev; | 173 | WRITE_ONCE(next->prev, prev); | 
| 174 | ACCESS_ONCE(prev->next) = next; | 174 | WRITE_ONCE(prev->next, next); | 
| 175 | 175 | ||
| 176 | return false; | 176 | return false; | 
| 177 | } | 177 | } | 
| @@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock) | |||
| 193 | node = this_cpu_ptr(&osq_node); | 193 | node = this_cpu_ptr(&osq_node); | 
| 194 | next = xchg(&node->next, NULL); | 194 | next = xchg(&node->next, NULL); | 
| 195 | if (next) { | 195 | if (next) { | 
| 196 | ACCESS_ONCE(next->locked) = 1; | 196 | WRITE_ONCE(next->locked, 1); | 
| 197 | return; | 197 | return; | 
| 198 | } | 198 | } | 
| 199 | 199 | ||
| 200 | next = osq_wait_next(lock, node, NULL); | 200 | next = osq_wait_next(lock, node, NULL); | 
| 201 | if (next) | 201 | if (next) | 
| 202 | ACCESS_ONCE(next->locked) = 1; | 202 | WRITE_ONCE(next->locked, 1); | 
| 203 | } | 203 | } | 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6357265a31ad..b73279367087 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c  | |||
| @@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | |||
| 349 | * | 349 | * | 
| 350 | * @task: the task owning the mutex (owner) for which a chain walk is | 350 | * @task: the task owning the mutex (owner) for which a chain walk is | 
| 351 | * probably needed | 351 | * probably needed | 
| 352 | * @deadlock_detect: do we have to carry out deadlock detection? | 352 | * @chwalk: do we have to carry out deadlock detection? | 
| 353 | * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck | 353 | * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck | 
| 354 | * things for a task that has just got its priority adjusted, and | 354 | * things for a task that has just got its priority adjusted, and | 
| 355 | * is waiting on a mutex) | 355 | * is waiting on a mutex) | 
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 2555ae15ec14..3a5048572065 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c  | |||
| @@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | |||
| 85 | 85 | ||
| 86 | list_del(&waiter->list); | 86 | list_del(&waiter->list); | 
| 87 | tsk = waiter->task; | 87 | tsk = waiter->task; | 
| 88 | /* | ||
| 89 | * Make sure we do not wakeup the next reader before | ||
| 90 | * setting the nil condition to grant the next reader; | ||
| 91 | * otherwise we could miss the wakeup on the other | ||
| 92 | * side and end up sleeping again. See the pairing | ||
| 93 | * in rwsem_down_read_failed(). | ||
| 94 | */ | ||
| 88 | smp_mb(); | 95 | smp_mb(); | 
| 89 | waiter->task = NULL; | 96 | waiter->task = NULL; | 
| 90 | wake_up_process(tsk); | 97 | wake_up_process(tsk); | 
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2f7cc4076f50..3417d0172a5d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c  | |||
| @@ -14,8 +14,9 @@ | |||
| 14 | #include <linux/init.h> | 14 | #include <linux/init.h> | 
| 15 | #include <linux/export.h> | 15 | #include <linux/export.h> | 
| 16 | #include <linux/sched/rt.h> | 16 | #include <linux/sched/rt.h> | 
| 17 | #include <linux/osq_lock.h> | ||
| 17 | 18 | ||
| 18 | #include "mcs_spinlock.h" | 19 | #include "rwsem.h" | 
| 19 | 20 | ||
| 20 | /* | 21 | /* | 
| 21 | * Guide to the rw_semaphore's count field for common values. | 22 | * Guide to the rw_semaphore's count field for common values. | 
| @@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) | |||
| 186 | waiter = list_entry(next, struct rwsem_waiter, list); | 187 | waiter = list_entry(next, struct rwsem_waiter, list); | 
| 187 | next = waiter->list.next; | 188 | next = waiter->list.next; | 
| 188 | tsk = waiter->task; | 189 | tsk = waiter->task; | 
| 190 | /* | ||
| 191 | * Make sure we do not wakeup the next reader before | ||
| 192 | * setting the nil condition to grant the next reader; | ||
| 193 | * otherwise we could miss the wakeup on the other | ||
| 194 | * side and end up sleeping again. See the pairing | ||
| 195 | * in rwsem_down_read_failed(). | ||
| 196 | */ | ||
| 189 | smp_mb(); | 197 | smp_mb(); | 
| 190 | waiter->task = NULL; | 198 | waiter->task = NULL; | 
| 191 | wake_up_process(tsk); | 199 | wake_up_process(tsk); | 
| @@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | |||
| 258 | RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { | 266 | RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { | 
| 259 | if (!list_is_singular(&sem->wait_list)) | 267 | if (!list_is_singular(&sem->wait_list)) | 
| 260 | rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); | 268 | rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); | 
| 269 | rwsem_set_owner(sem); | ||
| 261 | return true; | 270 | return true; | 
| 262 | } | 271 | } | 
| 263 | 272 | ||
| @@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | |||
| 270 | */ | 279 | */ | 
| 271 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | 280 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | 
| 272 | { | 281 | { | 
| 273 | long old, count = ACCESS_ONCE(sem->count); | 282 | long old, count = READ_ONCE(sem->count); | 
| 274 | 283 | ||
| 275 | while (true) { | 284 | while (true) { | 
| 276 | if (!(count == 0 || count == RWSEM_WAITING_BIAS)) | 285 | if (!(count == 0 || count == RWSEM_WAITING_BIAS)) | 
| 277 | return false; | 286 | return false; | 
| 278 | 287 | ||
| 279 | old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); | 288 | old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); | 
| 280 | if (old == count) | 289 | if (old == count) { | 
| 290 | rwsem_set_owner(sem); | ||
| 281 | return true; | 291 | return true; | 
| 292 | } | ||
| 282 | 293 | ||
| 283 | count = old; | 294 | count = old; | 
| 284 | } | 295 | } | 
| @@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |||
| 287 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | 298 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | 
| 288 | { | 299 | { | 
| 289 | struct task_struct *owner; | 300 | struct task_struct *owner; | 
| 290 | bool on_cpu = false; | 301 | bool ret = true; | 
| 291 | 302 | ||
| 292 | if (need_resched()) | 303 | if (need_resched()) | 
| 293 | return false; | 304 | return false; | 
| 294 | 305 | ||
| 295 | rcu_read_lock(); | 306 | rcu_read_lock(); | 
| 296 | owner = ACCESS_ONCE(sem->owner); | 307 | owner = READ_ONCE(sem->owner); | 
| 297 | if (owner) | 308 | if (!owner) { | 
| 298 | on_cpu = owner->on_cpu; | 309 | long count = READ_ONCE(sem->count); | 
| 299 | rcu_read_unlock(); | 310 | /* | 
| 300 | 311 | * If sem->owner is not set, yet we have just recently entered the | |
| 301 | /* | 312 | * slowpath with the lock being active, then there is a possibility | 
| 302 | * If sem->owner is not set, yet we have just recently entered the | 313 | * reader(s) may have the lock. To be safe, bail spinning in these | 
| 303 | * slowpath, then there is a possibility reader(s) may have the lock. | 314 | * situations. | 
| 304 | * To be safe, avoid spinning in these situations. | 315 | */ | 
| 305 | */ | 316 | if (count & RWSEM_ACTIVE_MASK) | 
| 306 | return on_cpu; | 317 | ret = false; | 
| 307 | } | 318 | goto done; | 
| 308 | 319 | } | |
| 309 | static inline bool owner_running(struct rw_semaphore *sem, | ||
| 310 | struct task_struct *owner) | ||
| 311 | { | ||
| 312 | if (sem->owner != owner) | ||
| 313 | return false; | ||
| 314 | |||
| 315 | /* | ||
| 316 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
| 317 | * sem->owner still matches owner, if that fails, owner might | ||
| 318 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
| 319 | * ensures the memory stays valid. | ||
| 320 | */ | ||
| 321 | barrier(); | ||
| 322 | 320 | ||
| 323 | return owner->on_cpu; | 321 | ret = owner->on_cpu; | 
| 322 | done: | ||
| 323 | rcu_read_unlock(); | ||
| 324 | return ret; | ||
| 324 | } | 325 | } | 
| 325 | 326 | ||
| 326 | static noinline | 327 | static noinline | 
| 327 | bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) | 328 | bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) | 
| 328 | { | 329 | { | 
| 330 | long count; | ||
| 331 | |||
| 329 | rcu_read_lock(); | 332 | rcu_read_lock(); | 
| 330 | while (owner_running(sem, owner)) { | 333 | while (sem->owner == owner) { | 
| 331 | if (need_resched()) | 334 | /* | 
| 332 | break; | 335 | * Ensure we emit the owner->on_cpu, dereference _after_ | 
| 336 | * checking sem->owner still matches owner, if that fails, | ||
| 337 | * owner might point to free()d memory, if it still matches, | ||
| 338 | * the rcu_read_lock() ensures the memory stays valid. | ||
| 339 | */ | ||
| 340 | barrier(); | ||
| 341 | |||
| 342 | /* abort spinning when need_resched or owner is not running */ | ||
| 343 | if (!owner->on_cpu || need_resched()) { | ||
| 344 | rcu_read_unlock(); | ||
| 345 | return false; | ||
| 346 | } | ||
| 333 | 347 | ||
| 334 | cpu_relax_lowlatency(); | 348 | cpu_relax_lowlatency(); | 
| 335 | } | 349 | } | 
| 336 | rcu_read_unlock(); | 350 | rcu_read_unlock(); | 
| 337 | 351 | ||
| 352 | if (READ_ONCE(sem->owner)) | ||
| 353 | return true; /* new owner, continue spinning */ | ||
| 354 | |||
| 338 | /* | 355 | /* | 
| 339 | * We break out the loop above on need_resched() or when the | 356 | * When the owner is not set, the lock could be free or | 
| 340 | * owner changed, which is a sign for heavy contention. Return | 357 | * held by readers. Check the counter to verify the | 
| 341 | * success only when sem->owner is NULL. | 358 | * state. | 
| 342 | */ | 359 | */ | 
| 343 | return sem->owner == NULL; | 360 | count = READ_ONCE(sem->count); | 
| 361 | return (count == 0 || count == RWSEM_WAITING_BIAS); | ||
| 344 | } | 362 | } | 
| 345 | 363 | ||
| 346 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | 364 | static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | 
| @@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | |||
| 358 | goto done; | 376 | goto done; | 
| 359 | 377 | ||
| 360 | while (true) { | 378 | while (true) { | 
| 361 | owner = ACCESS_ONCE(sem->owner); | 379 | owner = READ_ONCE(sem->owner); | 
| 362 | if (owner && !rwsem_spin_on_owner(sem, owner)) | 380 | if (owner && !rwsem_spin_on_owner(sem, owner)) | 
| 363 | break; | 381 | break; | 
| 364 | 382 | ||
| @@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | |||
| 432 | 450 | ||
| 433 | /* we're now waiting on the lock, but no longer actively locking */ | 451 | /* we're now waiting on the lock, but no longer actively locking */ | 
| 434 | if (waiting) { | 452 | if (waiting) { | 
| 435 | count = ACCESS_ONCE(sem->count); | 453 | count = READ_ONCE(sem->count); | 
| 436 | 454 | ||
| 437 | /* | 455 | /* | 
| 438 | * If there were already threads queued before us and there are | 456 | * If there were already threads queued before us and there are | 
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e2d3bc7f03b4..205be0ce34de 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c  | |||
| @@ -9,29 +9,9 @@ | |||
| 9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> | 
| 10 | #include <linux/export.h> | 10 | #include <linux/export.h> | 
| 11 | #include <linux/rwsem.h> | 11 | #include <linux/rwsem.h> | 
| 12 | |||
| 13 | #include <linux/atomic.h> | 12 | #include <linux/atomic.h> | 
| 14 | 13 | ||
| 15 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER | 14 | #include "rwsem.h" | 
| 16 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | ||
| 17 | { | ||
| 18 | sem->owner = current; | ||
| 19 | } | ||
| 20 | |||
| 21 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | ||
| 22 | { | ||
| 23 | sem->owner = NULL; | ||
| 24 | } | ||
| 25 | |||
| 26 | #else | ||
| 27 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | ||
| 28 | { | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | ||
| 32 | { | ||
| 33 | } | ||
| 34 | #endif | ||
| 35 | 15 | ||
| 36 | /* | 16 | /* | 
| 37 | * lock for reading | 17 | * lock for reading | 
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h new file mode 100644 index 000000000000..870ed9a5b426 --- /dev/null +++ b/kernel/locking/rwsem.h  | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER | ||
| 2 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | ||
| 3 | { | ||
| 4 | sem->owner = current; | ||
| 5 | } | ||
| 6 | |||
| 7 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | ||
| 8 | { | ||
| 9 | sem->owner = NULL; | ||
| 10 | } | ||
| 11 | |||
| 12 | #else | ||
| 13 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | ||
| 14 | { | ||
| 15 | } | ||
| 16 | |||
| 17 | static inline void rwsem_clear_owner(struct rw_semaphore *sem) | ||
| 18 | { | ||
| 19 | } | ||
| 20 | #endif | ||
