aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-03-22 06:36:00 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-03-23 14:14:59 -0400
commit56222b212e8edb1cf51f5dd73ff645809b082b40 (patch)
tree162dda41c7fd9129019c28e86b873c42925acb17 /kernel/futex.c
parentbebe5b514345f09be2c15e414d076b02ecb9cce8 (diff)
futex: Drop hb->lock before enqueueing on the rtmutex
When PREEMPT_RT_FULL does the spinlock -> rt_mutex substitution the PI chain code will (falsely) report a deadlock and BUG. The problem is that it hold hb->lock (now an rt_mutex) while doing task_blocks_on_rt_mutex on the futex's pi_state::rtmutex. This, when interleaved just right with futex_unlock_pi() leads it to believe to see an AB-BA deadlock. Task1 (holds rt_mutex, Task2 (does FUTEX_LOCK_PI) does FUTEX_UNLOCK_PI) lock hb->lock lock rt_mutex (as per start_proxy) lock hb->lock Which is a trivial AB-BA. It is not an actual deadlock, because it won't be holding hb->lock by the time it actually blocks on the rt_mutex, but the chainwalk code doesn't know that and it would be a nightmare to handle this gracefully. To avoid this problem, do the same as in futex_unlock_pi() and drop hb->lock after acquiring wait_lock. This still fully serializes against futex_unlock_pi(), since adding to the wait_list does the very same lock dance, and removing it holds both locks. Aside of solving the RT problem this makes the lock and unlock mechanism symetric and reduces the hb->lock held time. Reported-and-tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: juri.lelli@arm.com Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170322104152.161341537@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c30
1 files changed, 21 insertions, 9 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 4cdc603b00c3..628be42296eb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2654,20 +2654,33 @@ retry_private:
2654 goto no_block; 2654 goto no_block;
2655 } 2655 }
2656 2656
2657 rt_mutex_init_waiter(&rt_waiter);
2658
2657 /* 2659 /*
2658 * We must add ourselves to the rt_mutex waitlist while holding hb->lock 2660 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
2659 * such that the hb and rt_mutex wait lists match. 2661 * hold it while doing rt_mutex_start_proxy(), because then it will
2662 * include hb->lock in the blocking chain, even through we'll not in
2663 * fact hold it while blocking. This will lead it to report -EDEADLK
2664 * and BUG when futex_unlock_pi() interleaves with this.
2665 *
2666 * Therefore acquire wait_lock while holding hb->lock, but drop the
2667 * latter before calling rt_mutex_start_proxy_lock(). This still fully
2668 * serializes against futex_unlock_pi() as that does the exact same
2669 * lock handoff sequence.
2660 */ 2670 */
2661 rt_mutex_init_waiter(&rt_waiter); 2671 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2662 ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); 2672 spin_unlock(q.lock_ptr);
2673 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2674 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2675
2663 if (ret) { 2676 if (ret) {
2664 if (ret == 1) 2677 if (ret == 1)
2665 ret = 0; 2678 ret = 0;
2666 2679
2680 spin_lock(q.lock_ptr);
2667 goto no_block; 2681 goto no_block;
2668 } 2682 }
2669 2683
2670 spin_unlock(q.lock_ptr);
2671 2684
2672 if (unlikely(to)) 2685 if (unlikely(to))
2673 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 2686 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
@@ -2680,6 +2693,9 @@ retry_private:
2680 * first acquire the hb->lock before removing the lock from the 2693 * first acquire the hb->lock before removing the lock from the
2681 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex 2694 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
2682 * wait lists consistent. 2695 * wait lists consistent.
2696 *
2697 * In particular; it is important that futex_unlock_pi() can not
2698 * observe this inconsistency.
2683 */ 2699 */
2684 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) 2700 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
2685 ret = 0; 2701 ret = 0;
@@ -2791,10 +2807,6 @@ retry:
2791 2807
2792 get_pi_state(pi_state); 2808 get_pi_state(pi_state);
2793 /* 2809 /*
2794 * Since modifying the wait_list is done while holding both
2795 * hb->lock and wait_lock, holding either is sufficient to
2796 * observe it.
2797 *
2798 * By taking wait_lock while still holding hb->lock, we ensure 2810 * By taking wait_lock while still holding hb->lock, we ensure
2799 * there is no point where we hold neither; and therefore 2811 * there is no point where we hold neither; and therefore
2800 * wake_futex_pi() must observe a state consistent with what we 2812 * wake_futex_pi() must observe a state consistent with what we