summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-01-11 08:17:48 -0500
committerIngo Molnar <mingo@kernel.org>2017-01-14 05:14:38 -0500
commite274795ea7b7caa0fd74ef651594382a69e2a951 (patch)
tree345b942f86040c9361e1a2b0bc45f58f7b48bb19 /kernel
parent52b94129f274937e4c25dd17b76697664a3c43c9 (diff)
locking/mutex: Fix mutex handoff
While reviewing the ww_mutex patches, I noticed that it was still possible to (incorrectly) succeed for (incorrect) code like: mutex_lock(&a); mutex_lock(&a); This was possible if the second mutex_lock() would block (as expected) but then receive a spurious wakeup. At that point it would find itself at the front of the queue, request a handoff and instantly claim ownership and continue, since owner would point to itself. Avoid this scenario and simplify the code by introducing a third low bit to signal handoff pickup. So once we request handoff, unlock clears the handoff bit and sets the pickup bit along with the new owner. This also removes the need for the .handoff argument to __mutex_trylock(), since that becomes superfluous with PICKUP. In order to guarantee enough low bits, ensure task_struct alignment is at least L1_CACHE_BYTES (which seems a good ideal regardless). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 9d659ae14b54 ("locking/mutex: Add lock handoff to avoid starvation") Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/locking/mutex.c108
2 files changed, 56 insertions, 58 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 11c5c8ab827c..a90510d0bbf8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -432,11 +432,13 @@ void __init fork_init(void)
432 int i; 432 int i;
433#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR 433#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
434#ifndef ARCH_MIN_TASKALIGN 434#ifndef ARCH_MIN_TASKALIGN
435#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 435#define ARCH_MIN_TASKALIGN 0
436#endif 436#endif
437 int align = min_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
438
437 /* create a slab on which task_structs can be allocated */ 439 /* create a slab on which task_structs can be allocated */
438 task_struct_cachep = kmem_cache_create("task_struct", 440 task_struct_cachep = kmem_cache_create("task_struct",
439 arch_task_struct_size, ARCH_MIN_TASKALIGN, 441 arch_task_struct_size, align,
440 SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); 442 SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
441#endif 443#endif
442 444
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 97d142486f93..24284497c425 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init);
50/* 50/*
51 * @owner: contains: 'struct task_struct *' to the current lock owner, 51 * @owner: contains: 'struct task_struct *' to the current lock owner,
52 * NULL means not owned. Since task_struct pointers are aligned at 52 * NULL means not owned. Since task_struct pointers are aligned at
53 * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low 53 * at least L1_CACHE_BYTES, we have low bits to store extra state.
54 * bits to store extra state.
55 * 54 *
56 * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. 55 * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
57 * Bit1 indicates unlock needs to hand the lock to the top-waiter 56 * Bit1 indicates unlock needs to hand the lock to the top-waiter
57 * Bit2 indicates handoff has been done and we're waiting for pickup.
58 */ 58 */
59#define MUTEX_FLAG_WAITERS 0x01 59#define MUTEX_FLAG_WAITERS 0x01
60#define MUTEX_FLAG_HANDOFF 0x02 60#define MUTEX_FLAG_HANDOFF 0x02
61#define MUTEX_FLAG_PICKUP 0x04
61 62
62#define MUTEX_FLAGS 0x03 63#define MUTEX_FLAGS 0x07
63 64
64static inline struct task_struct *__owner_task(unsigned long owner) 65static inline struct task_struct *__owner_task(unsigned long owner)
65{ 66{
@@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
72} 73}
73 74
74/* 75/*
75 * Actual trylock that will work on any unlocked state. 76 * Trylock variant that retuns the owning task on failure.
76 *
77 * When setting the owner field, we must preserve the low flag bits.
78 *
79 * Be careful with @handoff, only set that in a wait-loop (where you set
80 * HANDOFF) to avoid recursive lock attempts.
81 */ 77 */
82static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) 78static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
83{ 79{
84 unsigned long owner, curr = (unsigned long)current; 80 unsigned long owner, curr = (unsigned long)current;
85 81
86 owner = atomic_long_read(&lock->owner); 82 owner = atomic_long_read(&lock->owner);
87 for (;;) { /* must loop, can race against a flag */ 83 for (;;) { /* must loop, can race against a flag */
88 unsigned long old, flags = __owner_flags(owner); 84 unsigned long old, flags = __owner_flags(owner);
85 unsigned long task = owner & ~MUTEX_FLAGS;
86
87 if (task) {
88 if (likely(task != curr))
89 break;
90
91 if (likely(!(flags & MUTEX_FLAG_PICKUP)))
92 break;
89 93
90 if (__owner_task(owner)) { 94 flags &= ~MUTEX_FLAG_PICKUP;
91 if (handoff && unlikely(__owner_task(owner) == current)) { 95 } else {
92 /* 96#ifdef CONFIG_DEBUG_MUTEXES
93 * Provide ACQUIRE semantics for the lock-handoff. 97 DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
94 * 98#endif
95 * We cannot easily use load-acquire here, since
96 * the actual load is a failed cmpxchg, which
97 * doesn't imply any barriers.
98 *
99 * Also, this is a fairly unlikely scenario, and
100 * this contains the cost.
101 */
102 smp_mb(); /* ACQUIRE */
103 return true;
104 }
105
106 return false;
107 } 99 }
108 100
109 /* 101 /*
@@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
111 * past the point where we acquire it. This would be possible 103 * past the point where we acquire it. This would be possible
112 * if we (accidentally) set the bit on an unlocked mutex. 104 * if we (accidentally) set the bit on an unlocked mutex.
113 */ 105 */
114 if (handoff) 106 flags &= ~MUTEX_FLAG_HANDOFF;
115 flags &= ~MUTEX_FLAG_HANDOFF;
116 107
117 old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); 108 old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
118 if (old == owner) 109 if (old == owner)
119 return true; 110 return NULL;
120 111
121 owner = old; 112 owner = old;
122 } 113 }
114
115 return __owner_task(owner);
116}
117
118/*
119 * Actual trylock that will work on any unlocked state.
120 */
121static inline bool __mutex_trylock(struct mutex *lock)
122{
123 return !__mutex_trylock_or_owner(lock);
123} 124}
124 125
125#ifndef CONFIG_DEBUG_LOCK_ALLOC 126#ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
171 172
172/* 173/*
173 * Give up ownership to a specific task, when @task = NULL, this is equivalent 174 * Give up ownership to a specific task, when @task = NULL, this is equivalent
174 * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE 175 * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
175 * semantics like a regular unlock, the __mutex_trylock() provides matching 176 * WAITERS. Provides RELEASE semantics like a regular unlock, the
176 * ACQUIRE semantics for the handoff. 177 * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
177 */ 178 */
178static void __mutex_handoff(struct mutex *lock, struct task_struct *task) 179static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
179{ 180{
@@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
184 185
185#ifdef CONFIG_DEBUG_MUTEXES 186#ifdef CONFIG_DEBUG_MUTEXES
186 DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); 187 DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
188 DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
187#endif 189#endif
188 190
189 new = (owner & MUTEX_FLAG_WAITERS); 191 new = (owner & MUTEX_FLAG_WAITERS);
190 new |= (unsigned long)task; 192 new |= (unsigned long)task;
193 if (task)
194 new |= MUTEX_FLAG_PICKUP;
191 195
192 old = atomic_long_cmpxchg_release(&lock->owner, owner, new); 196 old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
193 if (old == owner) 197 if (old == owner)
@@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock,
435 struct ww_acquire_ctx *ww_ctx, 439 struct ww_acquire_ctx *ww_ctx,
436 const bool use_ww_ctx, const bool waiter) 440 const bool use_ww_ctx, const bool waiter)
437{ 441{
438 struct task_struct *task = current;
439
440 if (!waiter) { 442 if (!waiter) {
441 /* 443 /*
442 * The purpose of the mutex_can_spin_on_owner() function is 444 * The purpose of the mutex_can_spin_on_owner() function is
@@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
476 goto fail_unlock; 478 goto fail_unlock;
477 } 479 }
478 480
481 /* Try to acquire the mutex... */
482 owner = __mutex_trylock_or_owner(lock);
483 if (!owner)
484 break;
485
479 /* 486 /*
480 * If there's an owner, wait for it to either 487 * There's an owner, wait for it to either
481 * release the lock or go to sleep. 488 * release the lock or go to sleep.
482 */ 489 */
483 owner = __mutex_owner(lock); 490 if (!mutex_spin_on_owner(lock, owner))
484 if (owner) { 491 goto fail_unlock;
485 if (waiter && owner == task) {
486 smp_mb(); /* ACQUIRE */
487 break;
488 }
489
490 if (!mutex_spin_on_owner(lock, owner))
491 goto fail_unlock;
492 }
493
494 /* Try to acquire the mutex if it is unlocked. */
495 if (__mutex_trylock(lock, waiter))
496 break;
497 492
498 /* 493 /*
499 * The cpu_relax() call is a compiler barrier which forces 494 * The cpu_relax() call is a compiler barrier which forces
@@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
637 preempt_disable(); 632 preempt_disable();
638 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); 633 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
639 634
640 if (__mutex_trylock(lock, false) || 635 if (__mutex_trylock(lock) ||
641 mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { 636 mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
642 /* got the lock, yay! */ 637 /* got the lock, yay! */
643 lock_acquired(&lock->dep_map, ip); 638 lock_acquired(&lock->dep_map, ip);
@@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
651 /* 646 /*
652 * After waiting to acquire the wait_lock, try again. 647 * After waiting to acquire the wait_lock, try again.
653 */ 648 */
654 if (__mutex_trylock(lock, false)) 649 if (__mutex_trylock(lock))
655 goto skip_wait; 650 goto skip_wait;
656 651
657 debug_mutex_lock_common(lock, &waiter); 652 debug_mutex_lock_common(lock, &waiter);
@@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
674 * before testing the error conditions to make sure we pick up 669 * before testing the error conditions to make sure we pick up
675 * the handoff. 670 * the handoff.
676 */ 671 */
677 if (__mutex_trylock(lock, first)) 672 if (__mutex_trylock(lock))
678 goto acquired; 673 goto acquired;
679 674
680 /* 675 /*
@@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
707 * state back to RUNNING and fall through the next schedule(), 702 * state back to RUNNING and fall through the next schedule(),
708 * or we must see its unlock and acquire. 703 * or we must see its unlock and acquire.
709 */ 704 */
710 if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) || 705 if (__mutex_trylock(lock) ||
711 __mutex_trylock(lock, first)) 706 (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)))
712 break; 707 break;
713 708
714 spin_lock_mutex(&lock->wait_lock, flags); 709 spin_lock_mutex(&lock->wait_lock, flags);
@@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
865 860
866#ifdef CONFIG_DEBUG_MUTEXES 861#ifdef CONFIG_DEBUG_MUTEXES
867 DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); 862 DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
863 DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
868#endif 864#endif
869 865
870 if (owner & MUTEX_FLAG_HANDOFF) 866 if (owner & MUTEX_FLAG_HANDOFF)
@@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
1003 */ 999 */
1004int __sched mutex_trylock(struct mutex *lock) 1000int __sched mutex_trylock(struct mutex *lock)
1005{ 1001{
1006 bool locked = __mutex_trylock(lock, false); 1002 bool locked = __mutex_trylock(lock);
1007 1003
1008 if (locked) 1004 if (locked)
1009 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 1005 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);