rtmutex: Detect changes in the pi lock chain

When we walk the lock chain, we drop all locks after each step. So the lock chain can change under us before we reacquire the locks. That's harmless in principle as we just follow the wrong lock path. But it can lead to a false positive in the dead lock detection logic: T0 holds L0 T0 blocks on L1 held by T1 T1 blocks on L2 held by T2 T2 blocks on L3 held by T3 T4 blocks on L4 held by T4 Now we walk the chain lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> drop locks T2 times out and blocks on L0 Now we continue: lock T2 -> lock L0 -> deadlock detected, but it's not a deadlock at all. Brad tried to work around that in the deadlock detection logic itself, but the more I looked at it the less I liked it, because it's crystal ball magic after the fact. We actually can detect a chain change very simple: lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> next_lock = T2->pi_blocked_on->lock; drop locks T2 times out and blocks on L0 Now we continue: lock T2 -> if (next_lock != T2->pi_blocked_on->lock) return; So if we detect that T2 is now blocked on a different lock we stop the chain walk. That's also correct in the following scenario: lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> next_lock = T2->pi_blocked_on->lock; drop locks T3 times out and drops L3 T2 acquires L3 and blocks on L4 now Now we continue: lock T2 -> if (next_lock != T2->pi_blocked_on->lock) return; We don't have to follow up the chain at that point, because T2 propagated our priority up to T4 already. [ Folded a cleanup patch from peterz ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reported-by: Brad Mouring <bmouring@ni.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140605152801.930031935@linutronix.de Cc: stable@vger.kernel.org
author: Thomas Gleixner <tglx@linutronix.de> 2014-06-05 05:16:12 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2014-06-07 08:55:40 -0400
commit: 82084984383babe728e6e3c9a8e5c46278091315 (patch)
tree: 19efe9a2f33af39e566575ef2224e839ccc407e6 /kernel
parent: 3d5c9340d1949733eb37616abd15db36aef9a57c (diff)
1 files changed, 71 insertions, 24 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index eb7a46327798..a8a83a22bb91 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -260,27 +260,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
 */
 int max_lock_depth = 1024;
+static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
+{
+        return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
+}
 /*
 * Adjust the priority chain. Also used for deadlock detection.
 * Decreases task's usage by one - may thus free the task.
 *
- * @task: the task owning the mutex (owner) for which a chain walk is probably
+ * @task:       the task owning the mutex (owner) for which a chain walk is
- *        needed
+ *              probably needed
 * @deadlock_detect: do we have to carry out deadlock detection?
- * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
+ * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
- *             things for a task that has just got its priority adjusted, and
+ *              things for a task that has just got its priority adjusted, and
- *             is waiting on a mutex)
+ *              is waiting on a mutex)
+ * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
+ *              we dropped its pi_lock. Is never dereferenced, only used for
+ *              comparison to detect lock chain changes.
 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
- *               its priority to the mutex owner (can be NULL in the case
+ *              its priority to the mutex owner (can be NULL in the case
- *               depicted above or if the top waiter is gone away and we are
+ *              depicted above or if the top waiter is gone away and we are
- *               actually deboosting the owner)
+ *              actually deboosting the owner)
- * @top_task: the current top waiter
+ * @top_task:   the current top waiter
 *
 * Returns 0 or -EDEADLK.
 */
 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                                      int deadlock_detect,
                                      struct rt_mutex *orig_lock,
+                                      struct rt_mutex *next_lock,
                                      struct rt_mutex_waiter *orig_waiter,
                                      struct task_struct *top_task)
 {
@@ -339,6 +348,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                goto out_unlock_pi;
        /*
+         * We dropped all locks after taking a refcount on @task, so
+         * the task might have moved on in the lock chain or even left
+         * the chain completely and blocks now on an unrelated lock or
+         * on @orig_lock.
+         *
+         * We stored the lock on which @task was blocked in @next_lock,
+         * so we can detect the chain change.
+         */
+        if (next_lock != waiter->lock)
+                goto out_unlock_pi;
+        /*
         * Drop out, when the task has no waiters. Note,
         * top_waiter can be NULL, when we are in the deboosting
         * mode!
@@ -422,11 +443,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                __rt_mutex_adjust_prio(task);
        }
+        /*
+         * Check whether the task which owns the current lock is pi
+         * blocked itself. If yes we store a pointer to the lock for
+         * the lock chain change detection above. After we dropped
+         * task->pi_lock next_lock cannot be dereferenced anymore.
+         */
+        next_lock = task_blocked_on_lock(task);
        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        top_waiter = rt_mutex_top_waiter(lock);
        raw_spin_unlock(&lock->wait_lock);
+        /*
+         * We reached the end of the lock chain. Stop right here. No
+         * point to go back just to figure that out.
+         */
+        if (!next_lock)
+                goto out_put_task;
        if (!detect_deadlock && waiter != top_waiter)
                goto out_put_task;
@@ -536,8 +572,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 {
        struct task_struct *owner = rt_mutex_owner(lock);
        struct rt_mutex_waiter *top_waiter = waiter;
-        unsigned long flags;
+        struct rt_mutex *next_lock;
        int chain_walk = 0, res;
+        unsigned long flags;
        /*
         * Early deadlock detection. We really don't want the task to
@@ -569,20 +606,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        if (!owner)
                return 0;
+        raw_spin_lock_irqsave(&owner->pi_lock, flags);
        if (waiter == rt_mutex_top_waiter(lock)) {
-                raw_spin_lock_irqsave(&owner->pi_lock, flags);
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
                __rt_mutex_adjust_prio(owner);
                if (owner->pi_blocked_on)
                        chain_walk = 1;
-                raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+        } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
-        }
-        else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
                chain_walk = 1;
+        }
-        if (!chain_walk)
+        /* Store the lock on which owner is blocked or NULL */
+        next_lock = task_blocked_on_lock(owner);
+        raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+        /*
+         * Even if full deadlock detection is on, if the owner is not
+         * blocked itself, we can avoid finding this out in the chain
+         * walk.
+         */
+        if (!chain_walk || !next_lock)
                return 0;
        /*
@@ -594,8 +639,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        raw_spin_unlock(&lock->wait_lock);
-        res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
+        res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
-                                         task);
+                                         next_lock, waiter, task);
        raw_spin_lock(&lock->wait_lock);
@@ -644,8 +689,8 @@ static void remove_waiter(struct rt_mutex *lock,
 {
        int first = (waiter == rt_mutex_top_waiter(lock));
        struct task_struct *owner = rt_mutex_owner(lock);
+        struct rt_mutex *next_lock = NULL;
        unsigned long flags;
-        int chain_walk = 0;
        raw_spin_lock_irqsave(&current->pi_lock, flags);
        rt_mutex_dequeue(lock, waiter);
@@ -669,13 +714,13 @@ static void remove_waiter(struct rt_mutex *lock,
                }
                __rt_mutex_adjust_prio(owner);
-                if (owner->pi_blocked_on)
+                /* Store the lock on which owner is blocked or NULL */
-                        chain_walk = 1;
+                next_lock = task_blocked_on_lock(owner);
                raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
        }
-        if (!chain_walk)
+        if (!next_lock)
                return;
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
@@ -683,7 +728,7 @@ static void remove_waiter(struct rt_mutex *lock,
        raw_spin_unlock(&lock->wait_lock);
-        rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+        rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
        raw_spin_lock(&lock->wait_lock);
 }
@@ -696,6 +741,7 @@ static void remove_waiter(struct rt_mutex *lock,
 void rt_mutex_adjust_pi(struct task_struct *task)
 {
        struct rt_mutex_waiter *waiter;
+        struct rt_mutex *next_lock;
        unsigned long flags;
        raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -706,12 +752,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
                return;
        }
+        next_lock = waiter->lock;
        raw_spin_unlock_irqrestore(&task->pi_lock, flags);
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(task);
-        rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+        rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
 }
 /**
author	Thomas Gleixner <tglx@linutronix.de>	2014-06-05 05:16:12 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2014-06-07 08:55:40 -0400
commit	82084984383babe728e6e3c9a8e5c46278091315 (patch)
tree	19efe9a2f33af39e566575ef2224e839ccc407e6 /kernel
parent	3d5c9340d1949733eb37616abd15db36aef9a57c (diff)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index eb7a46327798..a8a83a22bb91 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c
@@ -260,27 +260,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
260	*/	260	*/
261	int max_lock_depth = 1024;	261	int max_lock_depth = 1024;
262		262
		263	static inline struct rt_mutex task_blocked_on_lock(struct task_struct p)
		264	{
		265	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
		266	}
		267
263	/*	268	/*
264	* Adjust the priority chain. Also used for deadlock detection.	269	* Adjust the priority chain. Also used for deadlock detection.
265	* Decreases task's usage by one - may thus free the task.	270	* Decreases task's usage by one - may thus free the task.
266	*	271	*
267	* @task: the task owning the mutex (owner) for which a chain walk is probably	272	* @task: the task owning the mutex (owner) for which a chain walk is
268	* needed	273	* probably needed
269	* @deadlock_detect: do we have to carry out deadlock detection?	274	* @deadlock_detect: do we have to carry out deadlock detection?
270	* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck	275	* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
271	* things for a task that has just got its priority adjusted, and	276	* things for a task that has just got its priority adjusted, and
272	* is waiting on a mutex)	277	* is waiting on a mutex)
		278	* @next_lock: the mutex on which the owner of @orig_lock was blocked before
		279	* we dropped its pi_lock. Is never dereferenced, only used for
		280	* comparison to detect lock chain changes.
273	* @orig_waiter: rt_mutex_waiter struct for the task that has just donated	281	* @orig_waiter: rt_mutex_waiter struct for the task that has just donated
274	* its priority to the mutex owner (can be NULL in the case	282	* its priority to the mutex owner (can be NULL in the case
275	* depicted above or if the top waiter is gone away and we are	283	* depicted above or if the top waiter is gone away and we are
276	* actually deboosting the owner)	284	* actually deboosting the owner)
277	* @top_task: the current top waiter	285	* @top_task: the current top waiter
278	*	286	*
279	* Returns 0 or -EDEADLK.	287	* Returns 0 or -EDEADLK.
280	*/	288	*/
281	static int rt_mutex_adjust_prio_chain(struct task_struct *task,	289	static int rt_mutex_adjust_prio_chain(struct task_struct *task,
282	int deadlock_detect,	290	int deadlock_detect,
283	struct rt_mutex *orig_lock,	291	struct rt_mutex *orig_lock,
		292	struct rt_mutex *next_lock,
284	struct rt_mutex_waiter *orig_waiter,	293	struct rt_mutex_waiter *orig_waiter,
285	struct task_struct *top_task)	294	struct task_struct *top_task)
286	{	295	{
@@ -339,6 +348,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
339	goto out_unlock_pi;	348	goto out_unlock_pi;
340		349
341	/*	350	/*
		351	* We dropped all locks after taking a refcount on @task, so
		352	* the task might have moved on in the lock chain or even left
		353	* the chain completely and blocks now on an unrelated lock or
		354	* on @orig_lock.
		355	*
		356	* We stored the lock on which @task was blocked in @next_lock,
		357	* so we can detect the chain change.
		358	*/
		359	if (next_lock != waiter->lock)
		360	goto out_unlock_pi;
		361
		362	/*
342	* Drop out, when the task has no waiters. Note,	363	* Drop out, when the task has no waiters. Note,
343	* top_waiter can be NULL, when we are in the deboosting	364	* top_waiter can be NULL, when we are in the deboosting
344	* mode!	365	* mode!
@@ -422,11 +443,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
422	__rt_mutex_adjust_prio(task);	443	__rt_mutex_adjust_prio(task);
423	}	444	}
424		445
		446	/*
		447	* Check whether the task which owns the current lock is pi
		448	* blocked itself. If yes we store a pointer to the lock for
		449	* the lock chain change detection above. After we dropped
		450	* task->pi_lock next_lock cannot be dereferenced anymore.
		451	*/
		452	next_lock = task_blocked_on_lock(task);
		453
425	raw_spin_unlock_irqrestore(&task->pi_lock, flags);	454	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
426		455
427	top_waiter = rt_mutex_top_waiter(lock);	456	top_waiter = rt_mutex_top_waiter(lock);
428	raw_spin_unlock(&lock->wait_lock);	457	raw_spin_unlock(&lock->wait_lock);
429		458
		459	/*
		460	* We reached the end of the lock chain. Stop right here. No
		461	* point to go back just to figure that out.
		462	*/
		463	if (!next_lock)
		464	goto out_put_task;
		465
430	if (!detect_deadlock && waiter != top_waiter)	466	if (!detect_deadlock && waiter != top_waiter)
431	goto out_put_task;	467	goto out_put_task;
432		468
@@ -536,8 +572,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
536	{	572	{
537	struct task_struct *owner = rt_mutex_owner(lock);	573	struct task_struct *owner = rt_mutex_owner(lock);
538	struct rt_mutex_waiter *top_waiter = waiter;	574	struct rt_mutex_waiter *top_waiter = waiter;
539	unsigned long flags;	575	struct rt_mutex *next_lock;
540	int chain_walk = 0, res;	576	int chain_walk = 0, res;
		577	unsigned long flags;
541		578
542	/*	579	/*
543	* Early deadlock detection. We really don't want the task to	580	* Early deadlock detection. We really don't want the task to
@@ -569,20 +606,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
569	if (!owner)	606	if (!owner)
570	return 0;	607	return 0;
571		608
		609	raw_spin_lock_irqsave(&owner->pi_lock, flags);
572	if (waiter == rt_mutex_top_waiter(lock)) {	610	if (waiter == rt_mutex_top_waiter(lock)) {
573	raw_spin_lock_irqsave(&owner->pi_lock, flags);
574	rt_mutex_dequeue_pi(owner, top_waiter);	611	rt_mutex_dequeue_pi(owner, top_waiter);
575	rt_mutex_enqueue_pi(owner, waiter);	612	rt_mutex_enqueue_pi(owner, waiter);
576		613
577	__rt_mutex_adjust_prio(owner);	614	__rt_mutex_adjust_prio(owner);
578	if (owner->pi_blocked_on)	615	if (owner->pi_blocked_on)
579	chain_walk = 1;	616	chain_walk = 1;
580	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);	617	} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
581	}
582	else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
583	chain_walk = 1;	618	chain_walk = 1;
		619	}
584		620
585	if (!chain_walk)	621	/* Store the lock on which owner is blocked or NULL */
		622	next_lock = task_blocked_on_lock(owner);
		623
		624	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
		625	/*
		626	* Even if full deadlock detection is on, if the owner is not
		627	* blocked itself, we can avoid finding this out in the chain
		628	* walk.
		629	*/
		630	if (!chain_walk \|\| !next_lock)
586	return 0;	631	return 0;
587		632
588	/*	633	/*
@@ -594,8 +639,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
594		639
595	raw_spin_unlock(&lock->wait_lock);	640	raw_spin_unlock(&lock->wait_lock);
596		641
597	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,	642	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
598	task);	643	next_lock, waiter, task);
599		644
600	raw_spin_lock(&lock->wait_lock);	645	raw_spin_lock(&lock->wait_lock);
601		646
@@ -644,8 +689,8 @@ static void remove_waiter(struct rt_mutex *lock,
644	{	689	{
645	int first = (waiter == rt_mutex_top_waiter(lock));	690	int first = (waiter == rt_mutex_top_waiter(lock));
646	struct task_struct *owner = rt_mutex_owner(lock);	691	struct task_struct *owner = rt_mutex_owner(lock);
		692	struct rt_mutex *next_lock = NULL;
647	unsigned long flags;	693	unsigned long flags;
648	int chain_walk = 0;
649		694
650	raw_spin_lock_irqsave(&current->pi_lock, flags);	695	raw_spin_lock_irqsave(&current->pi_lock, flags);
651	rt_mutex_dequeue(lock, waiter);	696	rt_mutex_dequeue(lock, waiter);
@@ -669,13 +714,13 @@ static void remove_waiter(struct rt_mutex *lock,
669	}	714	}
670	__rt_mutex_adjust_prio(owner);	715	__rt_mutex_adjust_prio(owner);
671		716
672	if (owner->pi_blocked_on)	717	/* Store the lock on which owner is blocked or NULL */
673	chain_walk = 1;	718	next_lock = task_blocked_on_lock(owner);
674		719
675	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);	720	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
676	}	721	}
677		722
678	if (!chain_walk)	723	if (!next_lock)
679	return;	724	return;
680		725
681	/* gets dropped in rt_mutex_adjust_prio_chain()! */	726	/* gets dropped in rt_mutex_adjust_prio_chain()! */
@@ -683,7 +728,7 @@ static void remove_waiter(struct rt_mutex *lock,
683		728
684	raw_spin_unlock(&lock->wait_lock);	729	raw_spin_unlock(&lock->wait_lock);
685		730
686	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);	731	rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
687		732
688	raw_spin_lock(&lock->wait_lock);	733	raw_spin_lock(&lock->wait_lock);
689	}	734	}
@@ -696,6 +741,7 @@ static void remove_waiter(struct rt_mutex *lock,
696	void rt_mutex_adjust_pi(struct task_struct *task)	741	void rt_mutex_adjust_pi(struct task_struct *task)
697	{	742	{
698	struct rt_mutex_waiter *waiter;	743	struct rt_mutex_waiter *waiter;
		744	struct rt_mutex *next_lock;
699	unsigned long flags;	745	unsigned long flags;
700		746
701	raw_spin_lock_irqsave(&task->pi_lock, flags);	747	raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -706,12 +752,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
706	raw_spin_unlock_irqrestore(&task->pi_lock, flags);	752	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
707	return;	753	return;
708	}	754	}
709		755	next_lock = waiter->lock;
710	raw_spin_unlock_irqrestore(&task->pi_lock, flags);	756	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
711		757
712	/* gets dropped in rt_mutex_adjust_prio_chain()! */	758	/* gets dropped in rt_mutex_adjust_prio_chain()! */
713	get_task_struct(task);	759	get_task_struct(task);
714	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);	760
		761	rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
715	}	762	}
716		763
717	/**	764	/**