diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 13:59:39 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 13:59:39 -0400 |
| commit | 462bf234a82ae1ae9d7628f59bc81022591e1348 (patch) | |
| tree | f75eea7864ae7c72c0757d5d090e38f757b5cb2d /kernel/locking | |
| parent | 455c6fdbd219161bd09b1165f11699d6d73de11c (diff) | |
| parent | 6f008e72cd111a119b5d8de8c5438d892aae99eb (diff) | |
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking updates from Ingo Molnar:
"The biggest change is the MCS spinlock generalization changes from Tim
Chen, Peter Zijlstra, Jason Low et al. There's also lockdep
fixes/enhancements from Oleg Nesterov, in particular a false negative
fix related to lockdep_set_novalidate_class() usage"
* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (22 commits)
locking/mutex: Fix debug checks
locking/mutexes: Add extra reschedule point
locking/mutexes: Introduce cancelable MCS lock for adaptive spinning
locking/mutexes: Unlock the mutex without the wait_lock
locking/mutexes: Modify the way optimistic spinners are queued
locking/mutexes: Return false if task need_resched() in mutex_can_spin_on_owner()
locking: Move mcs_spinlock.h into kernel/locking/
m68k: Skip futex_atomic_cmpxchg_inatomic() test
futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test
Revert "sched/wait: Suppress Sparse 'variable shadowing' warning"
lockdep: Change lockdep_set_novalidate_class() to use _and_name
lockdep: Change mark_held_locks() to check hlock->check instead of lockdep_no_validate
lockdep: Don't create the wrong dependency on hlock->check == 0
lockdep: Make held_lock->check and "int check" argument bool
locking/mcs: Allow architecture specific asm files to be used for contended case
locking/mcs: Order the header files in Kbuild of each architecture in alphabetical order
sched/wait: Suppress Sparse 'variable shadowing' warning
hung_task/Documentation: Fix hung_task_warnings description
locking/mcs: Allow architectures to hook in to contended paths
locking/mcs: Micro-optimize the MCS code, add extra comments
...
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/Makefile | 2 | ||||
| -rw-r--r-- | kernel/locking/lockdep.c | 17 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.c | 178 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.h | 129 | ||||
| -rw-r--r-- | kernel/locking/mutex-debug.c | 6 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 94 |
6 files changed, 353 insertions, 73 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5e7f66..2a9ee96ecf00 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o | 2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o |
| 3 | 3 | ||
| 4 | ifdef CONFIG_FUNCTION_TRACER | 4 | ifdef CONFIG_FUNCTION_TRACER |
| 5 | CFLAGS_REMOVE_lockdep.o = -pg | 5 | CFLAGS_REMOVE_lockdep.o = -pg |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eb8a54783fa0..bf0c6b0dd9c5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -1936,12 +1936,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
| 1936 | 1936 | ||
| 1937 | for (;;) { | 1937 | for (;;) { |
| 1938 | int distance = curr->lockdep_depth - depth + 1; | 1938 | int distance = curr->lockdep_depth - depth + 1; |
| 1939 | hlock = curr->held_locks + depth-1; | 1939 | hlock = curr->held_locks + depth - 1; |
| 1940 | /* | 1940 | /* |
| 1941 | * Only non-recursive-read entries get new dependencies | 1941 | * Only non-recursive-read entries get new dependencies |
| 1942 | * added: | 1942 | * added: |
| 1943 | */ | 1943 | */ |
| 1944 | if (hlock->read != 2) { | 1944 | if (hlock->read != 2 && hlock->check) { |
| 1945 | if (!check_prev_add(curr, hlock, next, | 1945 | if (!check_prev_add(curr, hlock, next, |
| 1946 | distance, trylock_loop)) | 1946 | distance, trylock_loop)) |
| 1947 | return 0; | 1947 | return 0; |
| @@ -2098,7 +2098,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
| 2098 | * (If lookup_chain_cache() returns with 1 it acquires | 2098 | * (If lookup_chain_cache() returns with 1 it acquires |
| 2099 | * graph_lock for us) | 2099 | * graph_lock for us) |
| 2100 | */ | 2100 | */ |
| 2101 | if (!hlock->trylock && (hlock->check == 2) && | 2101 | if (!hlock->trylock && hlock->check && |
| 2102 | lookup_chain_cache(curr, hlock, chain_key)) { | 2102 | lookup_chain_cache(curr, hlock, chain_key)) { |
| 2103 | /* | 2103 | /* |
| 2104 | * Check whether last held lock: | 2104 | * Check whether last held lock: |
| @@ -2517,7 +2517,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
| 2517 | 2517 | ||
| 2518 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); | 2518 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); |
| 2519 | 2519 | ||
| 2520 | if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) | 2520 | if (!hlock->check) |
| 2521 | continue; | 2521 | continue; |
| 2522 | 2522 | ||
| 2523 | if (!mark_lock(curr, hlock, usage_bit)) | 2523 | if (!mark_lock(curr, hlock, usage_bit)) |
| @@ -3055,9 +3055,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3055 | int class_idx; | 3055 | int class_idx; |
| 3056 | u64 chain_key; | 3056 | u64 chain_key; |
| 3057 | 3057 | ||
| 3058 | if (!prove_locking) | ||
| 3059 | check = 1; | ||
| 3060 | |||
| 3061 | if (unlikely(!debug_locks)) | 3058 | if (unlikely(!debug_locks)) |
| 3062 | return 0; | 3059 | return 0; |
| 3063 | 3060 | ||
| @@ -3069,8 +3066,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3069 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 3066 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 3070 | return 0; | 3067 | return 0; |
| 3071 | 3068 | ||
| 3072 | if (lock->key == &__lockdep_no_validate__) | 3069 | if (!prove_locking || lock->key == &__lockdep_no_validate__) |
| 3073 | check = 1; | 3070 | check = 0; |
| 3074 | 3071 | ||
| 3075 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) | 3072 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
| 3076 | class = lock->class_cache[subclass]; | 3073 | class = lock->class_cache[subclass]; |
| @@ -3138,7 +3135,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3138 | hlock->holdtime_stamp = lockstat_clock(); | 3135 | hlock->holdtime_stamp = lockstat_clock(); |
| 3139 | #endif | 3136 | #endif |
| 3140 | 3137 | ||
| 3141 | if (check == 2 && !mark_irqflags(curr, hlock)) | 3138 | if (check && !mark_irqflags(curr, hlock)) |
| 3142 | return 0; | 3139 | return 0; |
| 3143 | 3140 | ||
| 3144 | /* mark it as used: */ | 3141 | /* mark it as used: */ |
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 000000000000..838dc9e00669 --- /dev/null +++ b/kernel/locking/mcs_spinlock.c | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | |||
| 2 | #include <linux/percpu.h> | ||
| 3 | #include <linux/mutex.h> | ||
| 4 | #include <linux/sched.h> | ||
| 5 | #include "mcs_spinlock.h" | ||
| 6 | |||
| 7 | #ifdef CONFIG_SMP | ||
| 8 | |||
| 9 | /* | ||
| 10 | * An MCS like lock especially tailored for optimistic spinning for sleeping | ||
| 11 | * lock implementations (mutex, rwsem, etc). | ||
| 12 | * | ||
| 13 | * Using a single mcs node per CPU is safe because sleeping locks should not be | ||
| 14 | * called from interrupt context and we have preemption disabled while | ||
| 15 | * spinning. | ||
| 16 | */ | ||
| 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | ||
| 21 | * Can return NULL in case we were the last queued and we updated @lock instead. | ||
| 22 | */ | ||
| 23 | static inline struct optimistic_spin_queue * | ||
| 24 | osq_wait_next(struct optimistic_spin_queue **lock, | ||
| 25 | struct optimistic_spin_queue *node, | ||
| 26 | struct optimistic_spin_queue *prev) | ||
| 27 | { | ||
| 28 | struct optimistic_spin_queue *next = NULL; | ||
| 29 | |||
| 30 | for (;;) { | ||
| 31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | ||
| 32 | /* | ||
| 33 | * We were the last queued, we moved @lock back. @prev | ||
| 34 | * will now observe @lock and will complete its | ||
| 35 | * unlock()/unqueue(). | ||
| 36 | */ | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | |||
| 40 | /* | ||
| 41 | * We must xchg() the @node->next value, because if we were to | ||
| 42 | * leave it in, a concurrent unlock()/unqueue() from | ||
| 43 | * @node->next might complete Step-A and think its @prev is | ||
| 44 | * still valid. | ||
| 45 | * | ||
| 46 | * If the concurrent unlock()/unqueue() wins the race, we'll | ||
| 47 | * wait for either @lock to point to us, through its Step-B, or | ||
| 48 | * wait for a new @node->next from its Step-C. | ||
| 49 | */ | ||
| 50 | if (node->next) { | ||
| 51 | next = xchg(&node->next, NULL); | ||
| 52 | if (next) | ||
| 53 | break; | ||
| 54 | } | ||
| 55 | |||
| 56 | arch_mutex_cpu_relax(); | ||
| 57 | } | ||
| 58 | |||
| 59 | return next; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool osq_lock(struct optimistic_spin_queue **lock) | ||
| 63 | { | ||
| 64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
| 65 | struct optimistic_spin_queue *prev, *next; | ||
| 66 | |||
| 67 | node->locked = 0; | ||
| 68 | node->next = NULL; | ||
| 69 | |||
| 70 | node->prev = prev = xchg(lock, node); | ||
| 71 | if (likely(prev == NULL)) | ||
| 72 | return true; | ||
| 73 | |||
| 74 | ACCESS_ONCE(prev->next) = node; | ||
| 75 | |||
| 76 | /* | ||
| 77 | * Normally @prev is untouchable after the above store; because at that | ||
| 78 | * moment unlock can proceed and wipe the node element from stack. | ||
| 79 | * | ||
| 80 | * However, since our nodes are static per-cpu storage, we're | ||
| 81 | * guaranteed their existence -- this allows us to apply | ||
| 82 | * cmpxchg in an attempt to undo our queueing. | ||
| 83 | */ | ||
| 84 | |||
| 85 | while (!smp_load_acquire(&node->locked)) { | ||
| 86 | /* | ||
| 87 | * If we need to reschedule bail... so we can block. | ||
| 88 | */ | ||
| 89 | if (need_resched()) | ||
| 90 | goto unqueue; | ||
| 91 | |||
| 92 | arch_mutex_cpu_relax(); | ||
| 93 | } | ||
| 94 | return true; | ||
| 95 | |||
| 96 | unqueue: | ||
| 97 | /* | ||
| 98 | * Step - A -- stabilize @prev | ||
| 99 | * | ||
| 100 | * Undo our @prev->next assignment; this will make @prev's | ||
| 101 | * unlock()/unqueue() wait for a next pointer since @lock points to us | ||
| 102 | * (or later). | ||
| 103 | */ | ||
| 104 | |||
| 105 | for (;;) { | ||
| 106 | if (prev->next == node && | ||
| 107 | cmpxchg(&prev->next, node, NULL) == node) | ||
| 108 | break; | ||
| 109 | |||
| 110 | /* | ||
| 111 | * We can only fail the cmpxchg() racing against an unlock(), | ||
| 112 | * in which case we should observe @node->locked becomming | ||
| 113 | * true. | ||
| 114 | */ | ||
| 115 | if (smp_load_acquire(&node->locked)) | ||
| 116 | return true; | ||
| 117 | |||
| 118 | arch_mutex_cpu_relax(); | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Or we race against a concurrent unqueue()'s step-B, in which | ||
| 122 | * case its step-C will write us a new @node->prev pointer. | ||
| 123 | */ | ||
| 124 | prev = ACCESS_ONCE(node->prev); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Step - B -- stabilize @next | ||
| 129 | * | ||
| 130 | * Similar to unlock(), wait for @node->next or move @lock from @node | ||
| 131 | * back to @prev. | ||
| 132 | */ | ||
| 133 | |||
| 134 | next = osq_wait_next(lock, node, prev); | ||
| 135 | if (!next) | ||
| 136 | return false; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Step - C -- unlink | ||
| 140 | * | ||
| 141 | * @prev is stable because its still waiting for a new @prev->next | ||
| 142 | * pointer, @next is stable because our @node->next pointer is NULL and | ||
| 143 | * it will wait in Step-A. | ||
| 144 | */ | ||
| 145 | |||
| 146 | ACCESS_ONCE(next->prev) = prev; | ||
| 147 | ACCESS_ONCE(prev->next) = next; | ||
| 148 | |||
| 149 | return false; | ||
| 150 | } | ||
| 151 | |||
| 152 | void osq_unlock(struct optimistic_spin_queue **lock) | ||
| 153 | { | ||
| 154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
| 155 | struct optimistic_spin_queue *next; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Fast path for the uncontended case. | ||
| 159 | */ | ||
| 160 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
| 161 | return; | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Second most likely case. | ||
| 165 | */ | ||
| 166 | next = xchg(&node->next, NULL); | ||
| 167 | if (next) { | ||
| 168 | ACCESS_ONCE(next->locked) = 1; | ||
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | next = osq_wait_next(lock, node, NULL); | ||
| 173 | if (next) | ||
| 174 | ACCESS_ONCE(next->locked) = 1; | ||
| 175 | } | ||
| 176 | |||
| 177 | #endif | ||
| 178 | |||
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h new file mode 100644 index 000000000000..a2dbac4aca6b --- /dev/null +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | /* | ||
| 2 | * MCS lock defines | ||
| 3 | * | ||
| 4 | * This file contains the main data structure and API definitions of MCS lock. | ||
| 5 | * | ||
| 6 | * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock | ||
| 7 | * with the desirable properties of being fair, and with each cpu trying | ||
| 8 | * to acquire the lock spinning on a local variable. | ||
| 9 | * It avoids expensive cache bouncings that common test-and-set spin-lock | ||
| 10 | * implementations incur. | ||
| 11 | */ | ||
| 12 | #ifndef __LINUX_MCS_SPINLOCK_H | ||
| 13 | #define __LINUX_MCS_SPINLOCK_H | ||
| 14 | |||
| 15 | #include <asm/mcs_spinlock.h> | ||
| 16 | |||
| 17 | struct mcs_spinlock { | ||
| 18 | struct mcs_spinlock *next; | ||
| 19 | int locked; /* 1 if lock acquired */ | ||
| 20 | }; | ||
| 21 | |||
| 22 | #ifndef arch_mcs_spin_lock_contended | ||
| 23 | /* | ||
| 24 | * Using smp_load_acquire() provides a memory barrier that ensures | ||
| 25 | * subsequent operations happen after the lock is acquired. | ||
| 26 | */ | ||
| 27 | #define arch_mcs_spin_lock_contended(l) \ | ||
| 28 | do { \ | ||
| 29 | while (!(smp_load_acquire(l))) \ | ||
| 30 | arch_mutex_cpu_relax(); \ | ||
| 31 | } while (0) | ||
| 32 | #endif | ||
| 33 | |||
| 34 | #ifndef arch_mcs_spin_unlock_contended | ||
| 35 | /* | ||
| 36 | * smp_store_release() provides a memory barrier to ensure all | ||
| 37 | * operations in the critical section has been completed before | ||
| 38 | * unlocking. | ||
| 39 | */ | ||
| 40 | #define arch_mcs_spin_unlock_contended(l) \ | ||
| 41 | smp_store_release((l), 1) | ||
| 42 | #endif | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Note: the smp_load_acquire/smp_store_release pair is not | ||
| 46 | * sufficient to form a full memory barrier across | ||
| 47 | * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. | ||
| 48 | * For applications that need a full barrier across multiple cpus | ||
| 49 | * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be | ||
| 50 | * used after mcs_lock. | ||
| 51 | */ | ||
| 52 | |||
| 53 | /* | ||
| 54 | * In order to acquire the lock, the caller should declare a local node and | ||
| 55 | * pass a reference of the node to this function in addition to the lock. | ||
| 56 | * If the lock has already been acquired, then this will proceed to spin | ||
| 57 | * on this node->locked until the previous lock holder sets the node->locked | ||
| 58 | * in mcs_spin_unlock(). | ||
| 59 | * | ||
| 60 | * We don't inline mcs_spin_lock() so that perf can correctly account for the | ||
| 61 | * time spent in this lock function. | ||
| 62 | */ | ||
| 63 | static inline | ||
| 64 | void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | ||
| 65 | { | ||
| 66 | struct mcs_spinlock *prev; | ||
| 67 | |||
| 68 | /* Init node */ | ||
| 69 | node->locked = 0; | ||
| 70 | node->next = NULL; | ||
| 71 | |||
| 72 | prev = xchg(lock, node); | ||
| 73 | if (likely(prev == NULL)) { | ||
| 74 | /* | ||
| 75 | * Lock acquired, don't need to set node->locked to 1. Threads | ||
| 76 | * only spin on its own node->locked value for lock acquisition. | ||
| 77 | * However, since this thread can immediately acquire the lock | ||
| 78 | * and does not proceed to spin on its own node->locked, this | ||
| 79 | * value won't be used. If a debug mode is needed to | ||
| 80 | * audit lock status, then set node->locked value here. | ||
| 81 | */ | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | ACCESS_ONCE(prev->next) = node; | ||
| 85 | |||
| 86 | /* Wait until the lock holder passes the lock down. */ | ||
| 87 | arch_mcs_spin_lock_contended(&node->locked); | ||
| 88 | } | ||
| 89 | |||
| 90 | /* | ||
| 91 | * Releases the lock. The caller should pass in the corresponding node that | ||
| 92 | * was used to acquire the lock. | ||
| 93 | */ | ||
| 94 | static inline | ||
| 95 | void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | ||
| 96 | { | ||
| 97 | struct mcs_spinlock *next = ACCESS_ONCE(node->next); | ||
| 98 | |||
| 99 | if (likely(!next)) { | ||
| 100 | /* | ||
| 101 | * Release the lock by setting it to NULL | ||
| 102 | */ | ||
| 103 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
| 104 | return; | ||
| 105 | /* Wait until the next pointer is set */ | ||
| 106 | while (!(next = ACCESS_ONCE(node->next))) | ||
| 107 | arch_mutex_cpu_relax(); | ||
| 108 | } | ||
| 109 | |||
| 110 | /* Pass lock to next waiter. */ | ||
| 111 | arch_mcs_spin_unlock_contended(&next->locked); | ||
| 112 | } | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Cancellable version of the MCS lock above. | ||
| 116 | * | ||
| 117 | * Intended for adaptive spinning of sleeping locks: | ||
| 118 | * mutex_lock()/rwsem_down_{read,write}() etc. | ||
| 119 | */ | ||
| 120 | |||
| 121 | struct optimistic_spin_queue { | ||
| 122 | struct optimistic_spin_queue *next, *prev; | ||
| 123 | int locked; /* 1 if lock acquired */ | ||
| 124 | }; | ||
| 125 | |||
| 126 | extern bool osq_lock(struct optimistic_spin_queue **lock); | ||
| 127 | extern void osq_unlock(struct optimistic_spin_queue **lock); | ||
| 128 | |||
| 129 | #endif /* __LINUX_MCS_SPINLOCK_H */ | ||
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index faf6f5b53e77..e1191c996c59 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c | |||
| @@ -83,6 +83,12 @@ void debug_mutex_unlock(struct mutex *lock) | |||
| 83 | 83 | ||
| 84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
| 85 | mutex_clear_owner(lock); | 85 | mutex_clear_owner(lock); |
| 86 | |||
| 87 | /* | ||
| 88 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug | ||
| 89 | * mutexes so that we can do it here after we've verified state. | ||
| 90 | */ | ||
| 91 | atomic_set(&lock->count, 1); | ||
| 86 | } | 92 | } |
| 87 | 93 | ||
| 88 | void debug_mutex_init(struct mutex *lock, const char *name, | 94 | void debug_mutex_init(struct mutex *lock, const char *name, |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4dd6e4c219de..14fe72cc8ce7 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
| 26 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
| 27 | #include <linux/debug_locks.h> | 27 | #include <linux/debug_locks.h> |
| 28 | #include "mcs_spinlock.h" | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, | 31 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, |
| @@ -33,6 +34,13 @@ | |||
| 33 | #ifdef CONFIG_DEBUG_MUTEXES | 34 | #ifdef CONFIG_DEBUG_MUTEXES |
| 34 | # include "mutex-debug.h" | 35 | # include "mutex-debug.h" |
| 35 | # include <asm-generic/mutex-null.h> | 36 | # include <asm-generic/mutex-null.h> |
| 37 | /* | ||
| 38 | * Must be 0 for the debug case so we do not do the unlock outside of the | ||
| 39 | * wait_lock region. debug_mutex_unlock() will do the actual unlock in this | ||
| 40 | * case. | ||
| 41 | */ | ||
| 42 | # undef __mutex_slowpath_needs_to_unlock | ||
| 43 | # define __mutex_slowpath_needs_to_unlock() 0 | ||
| 36 | #else | 44 | #else |
| 37 | # include "mutex.h" | 45 | # include "mutex.h" |
| 38 | # include <asm/mutex.h> | 46 | # include <asm/mutex.h> |
| @@ -52,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
| 52 | INIT_LIST_HEAD(&lock->wait_list); | 60 | INIT_LIST_HEAD(&lock->wait_list); |
| 53 | mutex_clear_owner(lock); | 61 | mutex_clear_owner(lock); |
| 54 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 62 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
| 55 | lock->spin_mlock = NULL; | 63 | lock->osq = NULL; |
| 56 | #endif | 64 | #endif |
| 57 | 65 | ||
| 58 | debug_mutex_init(lock, name, key); | 66 | debug_mutex_init(lock, name, key); |
| @@ -111,54 +119,7 @@ EXPORT_SYMBOL(mutex_lock); | |||
| 111 | * more or less simultaneously, the spinners need to acquire a MCS lock | 119 | * more or less simultaneously, the spinners need to acquire a MCS lock |
| 112 | * first before spinning on the owner field. | 120 | * first before spinning on the owner field. |
| 113 | * | 121 | * |
| 114 | * We don't inline mspin_lock() so that perf can correctly account for the | ||
| 115 | * time spent in this lock function. | ||
| 116 | */ | 122 | */ |
| 117 | struct mspin_node { | ||
| 118 | struct mspin_node *next ; | ||
| 119 | int locked; /* 1 if lock acquired */ | ||
| 120 | }; | ||
| 121 | #define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) | ||
| 122 | |||
| 123 | static noinline | ||
| 124 | void mspin_lock(struct mspin_node **lock, struct mspin_node *node) | ||
| 125 | { | ||
| 126 | struct mspin_node *prev; | ||
| 127 | |||
| 128 | /* Init node */ | ||
| 129 | node->locked = 0; | ||
| 130 | node->next = NULL; | ||
| 131 | |||
| 132 | prev = xchg(lock, node); | ||
| 133 | if (likely(prev == NULL)) { | ||
| 134 | /* Lock acquired */ | ||
| 135 | node->locked = 1; | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | ACCESS_ONCE(prev->next) = node; | ||
| 139 | smp_wmb(); | ||
| 140 | /* Wait until the lock holder passes the lock down */ | ||
| 141 | while (!ACCESS_ONCE(node->locked)) | ||
| 142 | arch_mutex_cpu_relax(); | ||
| 143 | } | ||
| 144 | |||
| 145 | static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) | ||
| 146 | { | ||
| 147 | struct mspin_node *next = ACCESS_ONCE(node->next); | ||
| 148 | |||
| 149 | if (likely(!next)) { | ||
| 150 | /* | ||
| 151 | * Release the lock by setting it to NULL | ||
| 152 | */ | ||
| 153 | if (cmpxchg(lock, node, NULL) == node) | ||
| 154 | return; | ||
| 155 | /* Wait until the next pointer is set */ | ||
| 156 | while (!(next = ACCESS_ONCE(node->next))) | ||
| 157 | arch_mutex_cpu_relax(); | ||
| 158 | } | ||
| 159 | ACCESS_ONCE(next->locked) = 1; | ||
| 160 | smp_wmb(); | ||
| 161 | } | ||
| 162 | 123 | ||
| 163 | /* | 124 | /* |
| 164 | * Mutex spinning code migrated from kernel/sched/core.c | 125 | * Mutex spinning code migrated from kernel/sched/core.c |
| @@ -212,6 +173,9 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) | |||
| 212 | struct task_struct *owner; | 173 | struct task_struct *owner; |
| 213 | int retval = 1; | 174 | int retval = 1; |
| 214 | 175 | ||
| 176 | if (need_resched()) | ||
| 177 | return 0; | ||
| 178 | |||
| 215 | rcu_read_lock(); | 179 | rcu_read_lock(); |
| 216 | owner = ACCESS_ONCE(lock->owner); | 180 | owner = ACCESS_ONCE(lock->owner); |
| 217 | if (owner) | 181 | if (owner) |
| @@ -446,9 +410,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 446 | if (!mutex_can_spin_on_owner(lock)) | 410 | if (!mutex_can_spin_on_owner(lock)) |
| 447 | goto slowpath; | 411 | goto slowpath; |
| 448 | 412 | ||
| 413 | if (!osq_lock(&lock->osq)) | ||
| 414 | goto slowpath; | ||
| 415 | |||
| 449 | for (;;) { | 416 | for (;;) { |
| 450 | struct task_struct *owner; | 417 | struct task_struct *owner; |
| 451 | struct mspin_node node; | ||
| 452 | 418 | ||
| 453 | if (use_ww_ctx && ww_ctx->acquired > 0) { | 419 | if (use_ww_ctx && ww_ctx->acquired > 0) { |
| 454 | struct ww_mutex *ww; | 420 | struct ww_mutex *ww; |
| @@ -463,19 +429,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 463 | * performed the optimistic spinning cannot be done. | 429 | * performed the optimistic spinning cannot be done. |
| 464 | */ | 430 | */ |
| 465 | if (ACCESS_ONCE(ww->ctx)) | 431 | if (ACCESS_ONCE(ww->ctx)) |
| 466 | goto slowpath; | 432 | break; |
| 467 | } | 433 | } |
| 468 | 434 | ||
| 469 | /* | 435 | /* |
| 470 | * If there's an owner, wait for it to either | 436 | * If there's an owner, wait for it to either |
| 471 | * release the lock or go to sleep. | 437 | * release the lock or go to sleep. |
| 472 | */ | 438 | */ |
| 473 | mspin_lock(MLOCK(lock), &node); | ||
| 474 | owner = ACCESS_ONCE(lock->owner); | 439 | owner = ACCESS_ONCE(lock->owner); |
| 475 | if (owner && !mutex_spin_on_owner(lock, owner)) { | 440 | if (owner && !mutex_spin_on_owner(lock, owner)) |
| 476 | mspin_unlock(MLOCK(lock), &node); | 441 | break; |
| 477 | goto slowpath; | ||
| 478 | } | ||
| 479 | 442 | ||
| 480 | if ((atomic_read(&lock->count) == 1) && | 443 | if ((atomic_read(&lock->count) == 1) && |
| 481 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { | 444 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { |
| @@ -488,11 +451,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 488 | } | 451 | } |
| 489 | 452 | ||
| 490 | mutex_set_owner(lock); | 453 | mutex_set_owner(lock); |
| 491 | mspin_unlock(MLOCK(lock), &node); | 454 | osq_unlock(&lock->osq); |
| 492 | preempt_enable(); | 455 | preempt_enable(); |
| 493 | return 0; | 456 | return 0; |
| 494 | } | 457 | } |
| 495 | mspin_unlock(MLOCK(lock), &node); | ||
| 496 | 458 | ||
| 497 | /* | 459 | /* |
| 498 | * When there's no owner, we might have preempted between the | 460 | * When there's no owner, we might have preempted between the |
| @@ -501,7 +463,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 501 | * the owner complete. | 463 | * the owner complete. |
| 502 | */ | 464 | */ |
| 503 | if (!owner && (need_resched() || rt_task(task))) | 465 | if (!owner && (need_resched() || rt_task(task))) |
| 504 | goto slowpath; | 466 | break; |
| 505 | 467 | ||
| 506 | /* | 468 | /* |
| 507 | * The cpu_relax() call is a compiler barrier which forces | 469 | * The cpu_relax() call is a compiler barrier which forces |
| @@ -511,7 +473,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 511 | */ | 473 | */ |
| 512 | arch_mutex_cpu_relax(); | 474 | arch_mutex_cpu_relax(); |
| 513 | } | 475 | } |
| 476 | osq_unlock(&lock->osq); | ||
| 514 | slowpath: | 477 | slowpath: |
| 478 | /* | ||
| 479 | * If we fell out of the spin path because of need_resched(), | ||
| 480 | * reschedule now, before we try-lock the mutex. This avoids getting | ||
| 481 | * scheduled out right after we obtained the mutex. | ||
| 482 | */ | ||
| 483 | if (need_resched()) | ||
| 484 | schedule_preempt_disabled(); | ||
| 515 | #endif | 485 | #endif |
| 516 | spin_lock_mutex(&lock->wait_lock, flags); | 486 | spin_lock_mutex(&lock->wait_lock, flags); |
| 517 | 487 | ||
| @@ -717,10 +687,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 717 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 687 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
| 718 | unsigned long flags; | 688 | unsigned long flags; |
| 719 | 689 | ||
| 720 | spin_lock_mutex(&lock->wait_lock, flags); | ||
| 721 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
| 722 | debug_mutex_unlock(lock); | ||
| 723 | |||
| 724 | /* | 690 | /* |
| 725 | * some architectures leave the lock unlocked in the fastpath failure | 691 | * some architectures leave the lock unlocked in the fastpath failure |
| 726 | * case, others need to leave it locked. In the later case we have to | 692 | * case, others need to leave it locked. In the later case we have to |
| @@ -729,6 +695,10 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 729 | if (__mutex_slowpath_needs_to_unlock()) | 695 | if (__mutex_slowpath_needs_to_unlock()) |
| 730 | atomic_set(&lock->count, 1); | 696 | atomic_set(&lock->count, 1); |
| 731 | 697 | ||
| 698 | spin_lock_mutex(&lock->wait_lock, flags); | ||
| 699 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
| 700 | debug_mutex_unlock(lock); | ||
| 701 | |||
| 732 | if (!list_empty(&lock->wait_list)) { | 702 | if (!list_empty(&lock->wait_list)) { |
| 733 | /* get the first entry from the wait-list: */ | 703 | /* get the first entry from the wait-list: */ |
| 734 | struct mutex_waiter *waiter = | 704 | struct mutex_waiter *waiter = |
