aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2014-06-05 05:16:12 -0400
committerThomas Gleixner <tglx@linutronix.de>2014-06-07 08:55:40 -0400
commit82084984383babe728e6e3c9a8e5c46278091315 (patch)
tree19efe9a2f33af39e566575ef2224e839ccc407e6 /kernel
parent3d5c9340d1949733eb37616abd15db36aef9a57c (diff)
rtmutex: Detect changes in the pi lock chain
When we walk the lock chain, we drop all locks after each step. So the lock chain can change under us before we reacquire the locks. That's harmless in principle as we just follow the wrong lock path. But it can lead to a false positive in the dead lock detection logic: T0 holds L0 T0 blocks on L1 held by T1 T1 blocks on L2 held by T2 T2 blocks on L3 held by T3 T4 blocks on L4 held by T4 Now we walk the chain lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> drop locks T2 times out and blocks on L0 Now we continue: lock T2 -> lock L0 -> deadlock detected, but it's not a deadlock at all. Brad tried to work around that in the deadlock detection logic itself, but the more I looked at it the less I liked it, because it's crystal ball magic after the fact. We actually can detect a chain change very simple: lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> next_lock = T2->pi_blocked_on->lock; drop locks T2 times out and blocks on L0 Now we continue: lock T2 -> if (next_lock != T2->pi_blocked_on->lock) return; So if we detect that T2 is now blocked on a different lock we stop the chain walk. That's also correct in the following scenario: lock T1 -> lock L2 -> adjust L2 -> unlock T1 -> lock T2 -> adjust T2 -> next_lock = T2->pi_blocked_on->lock; drop locks T3 times out and drops L3 T2 acquires L3 and blocks on L4 now Now we continue: lock T2 -> if (next_lock != T2->pi_blocked_on->lock) return; We don't have to follow up the chain at that point, because T2 propagated our priority up to T4 already. [ Folded a cleanup patch from peterz ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reported-by: Brad Mouring <bmouring@ni.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140605152801.930031935@linutronix.de Cc: stable@vger.kernel.org
Diffstat (limited to 'kernel')
-rw-r--r--kernel/locking/rtmutex.c95
1 files changed, 71 insertions, 24 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index eb7a46327798..a8a83a22bb91 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -260,27 +260,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
260 */ 260 */
261int max_lock_depth = 1024; 261int max_lock_depth = 1024;
262 262
263static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
264{
265 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
266}
267
263/* 268/*
264 * Adjust the priority chain. Also used for deadlock detection. 269 * Adjust the priority chain. Also used for deadlock detection.
265 * Decreases task's usage by one - may thus free the task. 270 * Decreases task's usage by one - may thus free the task.
266 * 271 *
267 * @task: the task owning the mutex (owner) for which a chain walk is probably 272 * @task: the task owning the mutex (owner) for which a chain walk is
268 * needed 273 * probably needed
269 * @deadlock_detect: do we have to carry out deadlock detection? 274 * @deadlock_detect: do we have to carry out deadlock detection?
270 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 275 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
271 * things for a task that has just got its priority adjusted, and 276 * things for a task that has just got its priority adjusted, and
272 * is waiting on a mutex) 277 * is waiting on a mutex)
278 * @next_lock: the mutex on which the owner of @orig_lock was blocked before
279 * we dropped its pi_lock. Is never dereferenced, only used for
280 * comparison to detect lock chain changes.
273 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 281 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
274 * its priority to the mutex owner (can be NULL in the case 282 * its priority to the mutex owner (can be NULL in the case
275 * depicted above or if the top waiter is gone away and we are 283 * depicted above or if the top waiter is gone away and we are
276 * actually deboosting the owner) 284 * actually deboosting the owner)
277 * @top_task: the current top waiter 285 * @top_task: the current top waiter
278 * 286 *
279 * Returns 0 or -EDEADLK. 287 * Returns 0 or -EDEADLK.
280 */ 288 */
281static int rt_mutex_adjust_prio_chain(struct task_struct *task, 289static int rt_mutex_adjust_prio_chain(struct task_struct *task,
282 int deadlock_detect, 290 int deadlock_detect,
283 struct rt_mutex *orig_lock, 291 struct rt_mutex *orig_lock,
292 struct rt_mutex *next_lock,
284 struct rt_mutex_waiter *orig_waiter, 293 struct rt_mutex_waiter *orig_waiter,
285 struct task_struct *top_task) 294 struct task_struct *top_task)
286{ 295{
@@ -339,6 +348,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
339 goto out_unlock_pi; 348 goto out_unlock_pi;
340 349
341 /* 350 /*
351 * We dropped all locks after taking a refcount on @task, so
352 * the task might have moved on in the lock chain or even left
353 * the chain completely and blocks now on an unrelated lock or
354 * on @orig_lock.
355 *
356 * We stored the lock on which @task was blocked in @next_lock,
357 * so we can detect the chain change.
358 */
359 if (next_lock != waiter->lock)
360 goto out_unlock_pi;
361
362 /*
342 * Drop out, when the task has no waiters. Note, 363 * Drop out, when the task has no waiters. Note,
343 * top_waiter can be NULL, when we are in the deboosting 364 * top_waiter can be NULL, when we are in the deboosting
344 * mode! 365 * mode!
@@ -422,11 +443,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
422 __rt_mutex_adjust_prio(task); 443 __rt_mutex_adjust_prio(task);
423 } 444 }
424 445
446 /*
447 * Check whether the task which owns the current lock is pi
448 * blocked itself. If yes we store a pointer to the lock for
449 * the lock chain change detection above. After we dropped
450 * task->pi_lock next_lock cannot be dereferenced anymore.
451 */
452 next_lock = task_blocked_on_lock(task);
453
425 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 454 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
426 455
427 top_waiter = rt_mutex_top_waiter(lock); 456 top_waiter = rt_mutex_top_waiter(lock);
428 raw_spin_unlock(&lock->wait_lock); 457 raw_spin_unlock(&lock->wait_lock);
429 458
459 /*
460 * We reached the end of the lock chain. Stop right here. No
461 * point to go back just to figure that out.
462 */
463 if (!next_lock)
464 goto out_put_task;
465
430 if (!detect_deadlock && waiter != top_waiter) 466 if (!detect_deadlock && waiter != top_waiter)
431 goto out_put_task; 467 goto out_put_task;
432 468
@@ -536,8 +572,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
536{ 572{
537 struct task_struct *owner = rt_mutex_owner(lock); 573 struct task_struct *owner = rt_mutex_owner(lock);
538 struct rt_mutex_waiter *top_waiter = waiter; 574 struct rt_mutex_waiter *top_waiter = waiter;
539 unsigned long flags; 575 struct rt_mutex *next_lock;
540 int chain_walk = 0, res; 576 int chain_walk = 0, res;
577 unsigned long flags;
541 578
542 /* 579 /*
543 * Early deadlock detection. We really don't want the task to 580 * Early deadlock detection. We really don't want the task to
@@ -569,20 +606,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
569 if (!owner) 606 if (!owner)
570 return 0; 607 return 0;
571 608
609 raw_spin_lock_irqsave(&owner->pi_lock, flags);
572 if (waiter == rt_mutex_top_waiter(lock)) { 610 if (waiter == rt_mutex_top_waiter(lock)) {
573 raw_spin_lock_irqsave(&owner->pi_lock, flags);
574 rt_mutex_dequeue_pi(owner, top_waiter); 611 rt_mutex_dequeue_pi(owner, top_waiter);
575 rt_mutex_enqueue_pi(owner, waiter); 612 rt_mutex_enqueue_pi(owner, waiter);
576 613
577 __rt_mutex_adjust_prio(owner); 614 __rt_mutex_adjust_prio(owner);
578 if (owner->pi_blocked_on) 615 if (owner->pi_blocked_on)
579 chain_walk = 1; 616 chain_walk = 1;
580 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 617 } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
581 }
582 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
583 chain_walk = 1; 618 chain_walk = 1;
619 }
584 620
585 if (!chain_walk) 621 /* Store the lock on which owner is blocked or NULL */
622 next_lock = task_blocked_on_lock(owner);
623
624 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
625 /*
626 * Even if full deadlock detection is on, if the owner is not
627 * blocked itself, we can avoid finding this out in the chain
628 * walk.
629 */
630 if (!chain_walk || !next_lock)
586 return 0; 631 return 0;
587 632
588 /* 633 /*
@@ -594,8 +639,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
594 639
595 raw_spin_unlock(&lock->wait_lock); 640 raw_spin_unlock(&lock->wait_lock);
596 641
597 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 642 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
598 task); 643 next_lock, waiter, task);
599 644
600 raw_spin_lock(&lock->wait_lock); 645 raw_spin_lock(&lock->wait_lock);
601 646
@@ -644,8 +689,8 @@ static void remove_waiter(struct rt_mutex *lock,
644{ 689{
645 int first = (waiter == rt_mutex_top_waiter(lock)); 690 int first = (waiter == rt_mutex_top_waiter(lock));
646 struct task_struct *owner = rt_mutex_owner(lock); 691 struct task_struct *owner = rt_mutex_owner(lock);
692 struct rt_mutex *next_lock = NULL;
647 unsigned long flags; 693 unsigned long flags;
648 int chain_walk = 0;
649 694
650 raw_spin_lock_irqsave(&current->pi_lock, flags); 695 raw_spin_lock_irqsave(&current->pi_lock, flags);
651 rt_mutex_dequeue(lock, waiter); 696 rt_mutex_dequeue(lock, waiter);
@@ -669,13 +714,13 @@ static void remove_waiter(struct rt_mutex *lock,
669 } 714 }
670 __rt_mutex_adjust_prio(owner); 715 __rt_mutex_adjust_prio(owner);
671 716
672 if (owner->pi_blocked_on) 717 /* Store the lock on which owner is blocked or NULL */
673 chain_walk = 1; 718 next_lock = task_blocked_on_lock(owner);
674 719
675 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 720 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
676 } 721 }
677 722
678 if (!chain_walk) 723 if (!next_lock)
679 return; 724 return;
680 725
681 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 726 /* gets dropped in rt_mutex_adjust_prio_chain()! */
@@ -683,7 +728,7 @@ static void remove_waiter(struct rt_mutex *lock,
683 728
684 raw_spin_unlock(&lock->wait_lock); 729 raw_spin_unlock(&lock->wait_lock);
685 730
686 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 731 rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
687 732
688 raw_spin_lock(&lock->wait_lock); 733 raw_spin_lock(&lock->wait_lock);
689} 734}
@@ -696,6 +741,7 @@ static void remove_waiter(struct rt_mutex *lock,
696void rt_mutex_adjust_pi(struct task_struct *task) 741void rt_mutex_adjust_pi(struct task_struct *task)
697{ 742{
698 struct rt_mutex_waiter *waiter; 743 struct rt_mutex_waiter *waiter;
744 struct rt_mutex *next_lock;
699 unsigned long flags; 745 unsigned long flags;
700 746
701 raw_spin_lock_irqsave(&task->pi_lock, flags); 747 raw_spin_lock_irqsave(&task->pi_lock, flags);
@@ -706,12 +752,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
706 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 752 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
707 return; 753 return;
708 } 754 }
709 755 next_lock = waiter->lock;
710 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 756 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
711 757
712 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 758 /* gets dropped in rt_mutex_adjust_prio_chain()! */
713 get_task_struct(task); 759 get_task_struct(task);
714 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); 760
761 rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
715} 762}
716 763
717/** 764/**