rtmutex: Plug slow unlock race

When the rtmutex fast path is enabled the slow unlock function can create the following situation: spin_lock(foo->m->wait_lock); foo->m->owner = NULL; rt_mutex_lock(foo->m); <-- fast path free = atomic_dec_and_test(foo->refcnt); rt_mutex_unlock(foo->m); <-- fast path if (free) kfree(foo); spin_unlock(foo->m->wait_lock); <--- Use after free. Plug the race by changing the slow unlock to the following scheme: while (!rt_mutex_has_waiters(m)) { /* Clear the waiters bit in m->owner */ clear_rt_mutex_waiters(m); owner = rt_mutex_owner(m); spin_unlock(m->wait_lock); if (cmpxchg(m->owner, owner, 0) == owner) return; spin_lock(m->wait_lock); } So in case of a new waiter incoming while the owner tries the slow path unlock we have two situations: unlock(wait_lock); lock(wait_lock); cmpxchg(p, owner, 0) == owner mark_rt_mutex_waiters(lock); acquire(lock); Or: unlock(wait_lock); lock(wait_lock); mark_rt_mutex_waiters(lock); cmpxchg(p, owner, 0) != owner enqueue_waiter(); unlock(wait_lock); lock(wait_lock); wakeup_next waiter(); unlock(wait_lock); lock(wait_lock); acquire(lock); If the fast path is disabled, then the simple m->owner = NULL; unlock(m->wait_lock); is sufficient as all access to m->owner is serialized via m->wait_lock; Also document and clarify the wakeup_next_waiter function as suggested by Oleg Nesterov. Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140611183852.937945560@linutronix.de Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
author: Thomas Gleixner <tglx@linutronix.de> 2014-06-11 14:44:04 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2014-06-16 04:03:09 -0400
commit: 27e35715df54cbc4f2d044f681802ae30479e7fb (patch)
tree: deacb08195cf3cc2ea7480f103991bd4b3e4aa6b /kernel
parent: 82084984383babe728e6e3c9a8e5c46278091315 (diff)
1 files changed, 109 insertions, 6 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index a8a83a22bb91..fc605941b9b8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
                owner = *p;
        } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
 }
+/*
+ * Safe fastpath aware unlock:
+ * 1) Clear the waiters bit
+ * 2) Drop lock->wait_lock
+ * 3) Try to unlock the lock with cmpxchg
+ */
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+        __releases(lock->wait_lock)
+{
+        struct task_struct *owner = rt_mutex_owner(lock);
+        clear_rt_mutex_waiters(lock);
+        raw_spin_unlock(&lock->wait_lock);
+        /*
+         * If a new waiter comes in between the unlock and the cmpxchg
+         * we have two situations:
+         *
+         * unlock(wait_lock);
+         *                                      lock(wait_lock);
+         * cmpxchg(p, owner, 0) == owner
+         *                                      mark_rt_mutex_waiters(lock);
+         *                                      acquire(lock);
+         * or:
+         *
+         * unlock(wait_lock);
+         *                                      lock(wait_lock);
+         *                                      mark_rt_mutex_waiters(lock);
+         *
+         * cmpxchg(p, owner, 0) != owner
+         *                                      enqueue_waiter();
+         *                                      unlock(wait_lock);
+         * lock(wait_lock);
+         * wake waiter();
+         * unlock(wait_lock);
+         *                                      lock(wait_lock);
+         *                                      acquire(lock);
+         */
+        return rt_mutex_cmpxchg(lock, owner, NULL);
+}
 #else
 # define rt_mutex_cmpxchg(l,c,n)        (0)
 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
        lock->owner = (struct task_struct *)
                        ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 }
+/*
+ * Simple slow path only version: lock->owner is protected by lock->wait_lock.
+ */
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+        __releases(lock->wait_lock)
+{
+        lock->owner = NULL;
+        raw_spin_unlock(&lock->wait_lock);
+        return true;
+}
 #endif
 static inline int
@@ -650,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 /*
 * Wake up the next waiter on the lock.
 *
- * Remove the top waiter from the current tasks waiter list and wake it up.
+ * Remove the top waiter from the current tasks pi waiter list and
+ * wake it up.
 *
 * Called with lock->wait_lock held.
 */
@@ -671,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
         */
        rt_mutex_dequeue_pi(current, waiter);
-        rt_mutex_set_owner(lock, NULL);
+        /*
+         * As we are waking up the top waiter, and the waiter stays
+         * queued on the lock until it gets the lock, this lock
+         * obviously has waiters. Just set the bit here and this has
+         * the added benefit of forcing all new tasks into the
+         * slow path making sure no task of lower priority than
+         * the top waiter can steal this lock.
+         */
+        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
        raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+        /*
+         * It's safe to dereference waiter as it cannot go away as
+         * long as we hold lock->wait_lock. The waiter task needs to
+         * acquire it in order to dequeue the waiter.
+         */
        wake_up_process(waiter->task);
 }
@@ -928,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
        rt_mutex_deadlock_account_unlock(current);
-        if (!rt_mutex_has_waiters(lock)) {
+        /*
-                lock->owner = NULL;
+         * We must be careful here if the fast path is enabled. If we
-                raw_spin_unlock(&lock->wait_lock);
+         * have no waiters queued we cannot set owner to NULL here
-                return;
+         * because of:
+         *
+         * foo->lock->owner = NULL;
+         *                      rtmutex_lock(foo->lock);   <- fast path
+         *                      free = atomic_dec_and_test(foo->refcnt);
+         *                      rtmutex_unlock(foo->lock); <- fast path
+         *                      if (free)
+         *                              kfree(foo);
+         * raw_spin_unlock(foo->lock->wait_lock);
+         *
+         * So for the fastpath enabled kernel:
+         *
+         * Nothing can set the waiters bit as long as we hold
+         * lock->wait_lock. So we do the following sequence:
+         *
+         *      owner = rt_mutex_owner(lock);
+         *      clear_rt_mutex_waiters(lock);
+         *      raw_spin_unlock(&lock->wait_lock);
+         *      if (cmpxchg(&lock->owner, owner, 0) == owner)
+         *              return;
+         *      goto retry;
+         *
+         * The fastpath disabled variant is simple as all access to
+         * lock->owner is serialized by lock->wait_lock:
+         *
+         *      lock->owner = NULL;
+         *      raw_spin_unlock(&lock->wait_lock);
+         */
+        while (!rt_mutex_has_waiters(lock)) {
+                /* Drops lock->wait_lock ! */
+                if (unlock_rt_mutex_safe(lock) == true)
+                        return;
+                /* Relock the rtmutex and try again */
+                raw_spin_lock(&lock->wait_lock);
        }
+        /*
+         * The wakeup next waiter path does not suffer from the above
+         * race. See the comments there.
+         */
        wakeup_next_waiter(lock);
        raw_spin_unlock(&lock->wait_lock);
author	Thomas Gleixner <tglx@linutronix.de>	2014-06-11 14:44:04 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2014-06-16 04:03:09 -0400
commit	27e35715df54cbc4f2d044f681802ae30479e7fb (patch)
tree	deacb08195cf3cc2ea7480f103991bd4b3e4aa6b /kernel
parent	82084984383babe728e6e3c9a8e5c46278091315 (diff)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a8a83a22bb91..fc605941b9b8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
83	owner = *p;	83	owner = *p;
84	} while (cmpxchg(p, owner, owner \| RT_MUTEX_HAS_WAITERS) != owner);	84	} while (cmpxchg(p, owner, owner \| RT_MUTEX_HAS_WAITERS) != owner);
85	}	85	}
		86
		87	/*
		88	* Safe fastpath aware unlock:
		89	* 1) Clear the waiters bit
		90	* 2) Drop lock->wait_lock
		91	* 3) Try to unlock the lock with cmpxchg
		92	*/
		93	static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
		94	__releases(lock->wait_lock)
		95	{
		96	struct task_struct *owner = rt_mutex_owner(lock);
		97
		98	clear_rt_mutex_waiters(lock);
		99	raw_spin_unlock(&lock->wait_lock);
		100	/*
		101	* If a new waiter comes in between the unlock and the cmpxchg
		102	* we have two situations:
		103	*
		104	* unlock(wait_lock);
		105	* lock(wait_lock);
		106	* cmpxchg(p, owner, 0) == owner
		107	* mark_rt_mutex_waiters(lock);
		108	* acquire(lock);
		109	* or:
		110	*
		111	* unlock(wait_lock);
		112	* lock(wait_lock);
		113	* mark_rt_mutex_waiters(lock);
		114	*
		115	* cmpxchg(p, owner, 0) != owner
		116	* enqueue_waiter();
		117	* unlock(wait_lock);
		118	* lock(wait_lock);
		119	* wake waiter();
		120	* unlock(wait_lock);
		121	* lock(wait_lock);
		122	* acquire(lock);
		123	*/
		124	return rt_mutex_cmpxchg(lock, owner, NULL);
		125	}
		126
86	#else	127	#else
87	# define rt_mutex_cmpxchg(l,c,n) (0)	128	# define rt_mutex_cmpxchg(l,c,n) (0)
88	static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)	129	static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
90	lock->owner = (struct task_struct *)	131	lock->owner = (struct task_struct *)
91	((unsigned long)lock->owner \| RT_MUTEX_HAS_WAITERS);	132	((unsigned long)lock->owner \| RT_MUTEX_HAS_WAITERS);
92	}	133	}
		134
		135	/*
		136	* Simple slow path only version: lock->owner is protected by lock->wait_lock.
		137	*/
		138	static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
		139	__releases(lock->wait_lock)
		140	{
		141	lock->owner = NULL;
		142	raw_spin_unlock(&lock->wait_lock);
		143	return true;
		144	}
93	#endif	145	#endif
94		146
95	static inline int	147	static inline int
@@ -650,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
650	/*	702	/*
651	* Wake up the next waiter on the lock.	703	* Wake up the next waiter on the lock.
652	*	704	*
653	* Remove the top waiter from the current tasks waiter list and wake it up.	705	* Remove the top waiter from the current tasks pi waiter list and
		706	* wake it up.
654	*	707	*
655	* Called with lock->wait_lock held.	708	* Called with lock->wait_lock held.
656	*/	709	*/
@@ -671,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
671	*/	724	*/
672	rt_mutex_dequeue_pi(current, waiter);	725	rt_mutex_dequeue_pi(current, waiter);
673		726
674	rt_mutex_set_owner(lock, NULL);	727	/*
		728	* As we are waking up the top waiter, and the waiter stays
		729	* queued on the lock until it gets the lock, this lock
		730	* obviously has waiters. Just set the bit here and this has
		731	* the added benefit of forcing all new tasks into the
		732	* slow path making sure no task of lower priority than
		733	* the top waiter can steal this lock.
		734	*/
		735	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
675		736
676	raw_spin_unlock_irqrestore(&current->pi_lock, flags);	737	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
677		738
		739	/*
		740	* It's safe to dereference waiter as it cannot go away as
		741	* long as we hold lock->wait_lock. The waiter task needs to
		742	* acquire it in order to dequeue the waiter.
		743	*/
678	wake_up_process(waiter->task);	744	wake_up_process(waiter->task);
679	}	745	}
680		746
@@ -928,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
928		994
929	rt_mutex_deadlock_account_unlock(current);	995	rt_mutex_deadlock_account_unlock(current);
930		996
931	if (!rt_mutex_has_waiters(lock)) {	997	/*
932	lock->owner = NULL;	998	* We must be careful here if the fast path is enabled. If we
933	raw_spin_unlock(&lock->wait_lock);	999	* have no waiters queued we cannot set owner to NULL here
934	return;	1000	* because of:
		1001	*
		1002	* foo->lock->owner = NULL;
		1003	* rtmutex_lock(foo->lock); <- fast path
		1004	* free = atomic_dec_and_test(foo->refcnt);
		1005	* rtmutex_unlock(foo->lock); <- fast path
		1006	* if (free)
		1007	* kfree(foo);
		1008	* raw_spin_unlock(foo->lock->wait_lock);
		1009	*
		1010	* So for the fastpath enabled kernel:
		1011	*
		1012	* Nothing can set the waiters bit as long as we hold
		1013	* lock->wait_lock. So we do the following sequence:
		1014	*
		1015	* owner = rt_mutex_owner(lock);
		1016	* clear_rt_mutex_waiters(lock);
		1017	* raw_spin_unlock(&lock->wait_lock);
		1018	* if (cmpxchg(&lock->owner, owner, 0) == owner)
		1019	* return;
		1020	* goto retry;
		1021	*
		1022	* The fastpath disabled variant is simple as all access to
		1023	* lock->owner is serialized by lock->wait_lock:
		1024	*
		1025	* lock->owner = NULL;
		1026	* raw_spin_unlock(&lock->wait_lock);
		1027	*/
		1028	while (!rt_mutex_has_waiters(lock)) {
		1029	/* Drops lock->wait_lock ! */
		1030	if (unlock_rt_mutex_safe(lock) == true)
		1031	return;
		1032	/* Relock the rtmutex and try again */
		1033	raw_spin_lock(&lock->wait_lock);
935	}	1034	}
936		1035
		1036	/*
		1037	* The wakeup next waiter path does not suffer from the above
		1038	* race. See the comments there.
		1039	*/
937	wakeup_next_waiter(lock);	1040	wakeup_next_waiter(lock);
938		1041
939	raw_spin_unlock(&lock->wait_lock);	1042	raw_spin_unlock(&lock->wait_lock);