aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2014-06-11 14:44:04 -0400
committerThomas Gleixner <tglx@linutronix.de>2014-06-16 04:03:09 -0400
commit27e35715df54cbc4f2d044f681802ae30479e7fb (patch)
treedeacb08195cf3cc2ea7480f103991bd4b3e4aa6b /kernel
parent82084984383babe728e6e3c9a8e5c46278091315 (diff)
rtmutex: Plug slow unlock race
When the rtmutex fast path is enabled the slow unlock function can create the following situation: spin_lock(foo->m->wait_lock); foo->m->owner = NULL; rt_mutex_lock(foo->m); <-- fast path free = atomic_dec_and_test(foo->refcnt); rt_mutex_unlock(foo->m); <-- fast path if (free) kfree(foo); spin_unlock(foo->m->wait_lock); <--- Use after free. Plug the race by changing the slow unlock to the following scheme: while (!rt_mutex_has_waiters(m)) { /* Clear the waiters bit in m->owner */ clear_rt_mutex_waiters(m); owner = rt_mutex_owner(m); spin_unlock(m->wait_lock); if (cmpxchg(m->owner, owner, 0) == owner) return; spin_lock(m->wait_lock); } So in case of a new waiter incoming while the owner tries the slow path unlock we have two situations: unlock(wait_lock); lock(wait_lock); cmpxchg(p, owner, 0) == owner mark_rt_mutex_waiters(lock); acquire(lock); Or: unlock(wait_lock); lock(wait_lock); mark_rt_mutex_waiters(lock); cmpxchg(p, owner, 0) != owner enqueue_waiter(); unlock(wait_lock); lock(wait_lock); wakeup_next waiter(); unlock(wait_lock); lock(wait_lock); acquire(lock); If the fast path is disabled, then the simple m->owner = NULL; unlock(m->wait_lock); is sufficient as all access to m->owner is serialized via m->wait_lock; Also document and clarify the wakeup_next_waiter function as suggested by Oleg Nesterov. Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140611183852.937945560@linutronix.de Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/locking/rtmutex.c115
1 files changed, 109 insertions, 6 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index a8a83a22bb91..fc605941b9b8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
83 owner = *p; 83 owner = *p;
84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); 84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
85} 85}
86
87/*
88 * Safe fastpath aware unlock:
89 * 1) Clear the waiters bit
90 * 2) Drop lock->wait_lock
91 * 3) Try to unlock the lock with cmpxchg
92 */
93static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
94 __releases(lock->wait_lock)
95{
96 struct task_struct *owner = rt_mutex_owner(lock);
97
98 clear_rt_mutex_waiters(lock);
99 raw_spin_unlock(&lock->wait_lock);
100 /*
101 * If a new waiter comes in between the unlock and the cmpxchg
102 * we have two situations:
103 *
104 * unlock(wait_lock);
105 * lock(wait_lock);
106 * cmpxchg(p, owner, 0) == owner
107 * mark_rt_mutex_waiters(lock);
108 * acquire(lock);
109 * or:
110 *
111 * unlock(wait_lock);
112 * lock(wait_lock);
113 * mark_rt_mutex_waiters(lock);
114 *
115 * cmpxchg(p, owner, 0) != owner
116 * enqueue_waiter();
117 * unlock(wait_lock);
118 * lock(wait_lock);
119 * wake waiter();
120 * unlock(wait_lock);
121 * lock(wait_lock);
122 * acquire(lock);
123 */
124 return rt_mutex_cmpxchg(lock, owner, NULL);
125}
126
86#else 127#else
87# define rt_mutex_cmpxchg(l,c,n) (0) 128# define rt_mutex_cmpxchg(l,c,n) (0)
88static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 129static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
90 lock->owner = (struct task_struct *) 131 lock->owner = (struct task_struct *)
91 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 132 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
92} 133}
134
135/*
136 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
137 */
138static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
139 __releases(lock->wait_lock)
140{
141 lock->owner = NULL;
142 raw_spin_unlock(&lock->wait_lock);
143 return true;
144}
93#endif 145#endif
94 146
95static inline int 147static inline int
@@ -650,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
650/* 702/*
651 * Wake up the next waiter on the lock. 703 * Wake up the next waiter on the lock.
652 * 704 *
653 * Remove the top waiter from the current tasks waiter list and wake it up. 705 * Remove the top waiter from the current tasks pi waiter list and
706 * wake it up.
654 * 707 *
655 * Called with lock->wait_lock held. 708 * Called with lock->wait_lock held.
656 */ 709 */
@@ -671,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
671 */ 724 */
672 rt_mutex_dequeue_pi(current, waiter); 725 rt_mutex_dequeue_pi(current, waiter);
673 726
674 rt_mutex_set_owner(lock, NULL); 727 /*
728 * As we are waking up the top waiter, and the waiter stays
729 * queued on the lock until it gets the lock, this lock
730 * obviously has waiters. Just set the bit here and this has
731 * the added benefit of forcing all new tasks into the
732 * slow path making sure no task of lower priority than
733 * the top waiter can steal this lock.
734 */
735 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
675 736
676 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 737 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
677 738
739 /*
740 * It's safe to dereference waiter as it cannot go away as
741 * long as we hold lock->wait_lock. The waiter task needs to
742 * acquire it in order to dequeue the waiter.
743 */
678 wake_up_process(waiter->task); 744 wake_up_process(waiter->task);
679} 745}
680 746
@@ -928,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
928 994
929 rt_mutex_deadlock_account_unlock(current); 995 rt_mutex_deadlock_account_unlock(current);
930 996
931 if (!rt_mutex_has_waiters(lock)) { 997 /*
932 lock->owner = NULL; 998 * We must be careful here if the fast path is enabled. If we
933 raw_spin_unlock(&lock->wait_lock); 999 * have no waiters queued we cannot set owner to NULL here
934 return; 1000 * because of:
1001 *
1002 * foo->lock->owner = NULL;
1003 * rtmutex_lock(foo->lock); <- fast path
1004 * free = atomic_dec_and_test(foo->refcnt);
1005 * rtmutex_unlock(foo->lock); <- fast path
1006 * if (free)
1007 * kfree(foo);
1008 * raw_spin_unlock(foo->lock->wait_lock);
1009 *
1010 * So for the fastpath enabled kernel:
1011 *
1012 * Nothing can set the waiters bit as long as we hold
1013 * lock->wait_lock. So we do the following sequence:
1014 *
1015 * owner = rt_mutex_owner(lock);
1016 * clear_rt_mutex_waiters(lock);
1017 * raw_spin_unlock(&lock->wait_lock);
1018 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1019 * return;
1020 * goto retry;
1021 *
1022 * The fastpath disabled variant is simple as all access to
1023 * lock->owner is serialized by lock->wait_lock:
1024 *
1025 * lock->owner = NULL;
1026 * raw_spin_unlock(&lock->wait_lock);
1027 */
1028 while (!rt_mutex_has_waiters(lock)) {
1029 /* Drops lock->wait_lock ! */
1030 if (unlock_rt_mutex_safe(lock) == true)
1031 return;
1032 /* Relock the rtmutex and try again */
1033 raw_spin_lock(&lock->wait_lock);
935 } 1034 }
936 1035
1036 /*
1037 * The wakeup next waiter path does not suffer from the above
1038 * race. See the comments there.
1039 */
937 wakeup_next_waiter(lock); 1040 wakeup_next_waiter(lock);
938 1041
939 raw_spin_unlock(&lock->wait_lock); 1042 raw_spin_unlock(&lock->wait_lock);