summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2017-03-22 06:36:00 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-03-23 14:14:59 -0400
commit56222b212e8edb1cf51f5dd73ff645809b082b40 (patch)
tree162dda41c7fd9129019c28e86b873c42925acb17 /kernel
parentbebe5b514345f09be2c15e414d076b02ecb9cce8 (diff)
futex: Drop hb->lock before enqueueing on the rtmutex
When PREEMPT_RT_FULL does the spinlock -> rt_mutex substitution the PI chain code will (falsely) report a deadlock and BUG. The problem is that it hold hb->lock (now an rt_mutex) while doing task_blocks_on_rt_mutex on the futex's pi_state::rtmutex. This, when interleaved just right with futex_unlock_pi() leads it to believe to see an AB-BA deadlock. Task1 (holds rt_mutex, Task2 (does FUTEX_LOCK_PI) does FUTEX_UNLOCK_PI) lock hb->lock lock rt_mutex (as per start_proxy) lock hb->lock Which is a trivial AB-BA. It is not an actual deadlock, because it won't be holding hb->lock by the time it actually blocks on the rt_mutex, but the chainwalk code doesn't know that and it would be a nightmare to handle this gracefully. To avoid this problem, do the same as in futex_unlock_pi() and drop hb->lock after acquiring wait_lock. This still fully serializes against futex_unlock_pi(), since adding to the wait_list does the very same lock dance, and removing it holds both locks. Aside of solving the RT problem this makes the lock and unlock mechanism symetric and reduces the hb->lock held time. Reported-and-tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: juri.lelli@arm.com Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170322104152.161341537@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c30
-rw-r--r--kernel/locking/rtmutex.c49
-rw-r--r--kernel/locking/rtmutex_common.h3
3 files changed, 52 insertions, 30 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 4cdc603b00c3..628be42296eb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2654,20 +2654,33 @@ retry_private:
2654 goto no_block; 2654 goto no_block;
2655 } 2655 }
2656 2656
2657 rt_mutex_init_waiter(&rt_waiter);
2658
2657 /* 2659 /*
2658 * We must add ourselves to the rt_mutex waitlist while holding hb->lock 2660 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
2659 * such that the hb and rt_mutex wait lists match. 2661 * hold it while doing rt_mutex_start_proxy(), because then it will
2662 * include hb->lock in the blocking chain, even through we'll not in
2663 * fact hold it while blocking. This will lead it to report -EDEADLK
2664 * and BUG when futex_unlock_pi() interleaves with this.
2665 *
2666 * Therefore acquire wait_lock while holding hb->lock, but drop the
2667 * latter before calling rt_mutex_start_proxy_lock(). This still fully
2668 * serializes against futex_unlock_pi() as that does the exact same
2669 * lock handoff sequence.
2660 */ 2670 */
2661 rt_mutex_init_waiter(&rt_waiter); 2671 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2662 ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); 2672 spin_unlock(q.lock_ptr);
2673 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2674 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2675
2663 if (ret) { 2676 if (ret) {
2664 if (ret == 1) 2677 if (ret == 1)
2665 ret = 0; 2678 ret = 0;
2666 2679
2680 spin_lock(q.lock_ptr);
2667 goto no_block; 2681 goto no_block;
2668 } 2682 }
2669 2683
2670 spin_unlock(q.lock_ptr);
2671 2684
2672 if (unlikely(to)) 2685 if (unlikely(to))
2673 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 2686 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
@@ -2680,6 +2693,9 @@ retry_private:
2680 * first acquire the hb->lock before removing the lock from the 2693 * first acquire the hb->lock before removing the lock from the
2681 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex 2694 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
2682 * wait lists consistent. 2695 * wait lists consistent.
2696 *
2697 * In particular; it is important that futex_unlock_pi() can not
2698 * observe this inconsistency.
2683 */ 2699 */
2684 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) 2700 if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
2685 ret = 0; 2701 ret = 0;
@@ -2791,10 +2807,6 @@ retry:
2791 2807
2792 get_pi_state(pi_state); 2808 get_pi_state(pi_state);
2793 /* 2809 /*
2794 * Since modifying the wait_list is done while holding both
2795 * hb->lock and wait_lock, holding either is sufficient to
2796 * observe it.
2797 *
2798 * By taking wait_lock while still holding hb->lock, we ensure 2810 * By taking wait_lock while still holding hb->lock, we ensure
2799 * there is no point where we hold neither; and therefore 2811 * there is no point where we hold neither; and therefore
2800 * wake_futex_pi() must observe a state consistent with what we 2812 * wake_futex_pi() must observe a state consistent with what we
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 48418a1733b8..dd103124166b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1669,31 +1669,14 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1669 rt_mutex_set_owner(lock, NULL); 1669 rt_mutex_set_owner(lock, NULL);
1670} 1670}
1671 1671
1672/** 1672int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1673 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1674 * @lock: the rt_mutex to take
1675 * @waiter: the pre-initialized rt_mutex_waiter
1676 * @task: the task to prepare
1677 *
1678 * Returns:
1679 * 0 - task blocked on lock
1680 * 1 - acquired the lock for task, caller should wake it up
1681 * <0 - error
1682 *
1683 * Special API call for FUTEX_REQUEUE_PI support.
1684 */
1685int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1686 struct rt_mutex_waiter *waiter, 1673 struct rt_mutex_waiter *waiter,
1687 struct task_struct *task) 1674 struct task_struct *task)
1688{ 1675{
1689 int ret; 1676 int ret;
1690 1677
1691 raw_spin_lock_irq(&lock->wait_lock); 1678 if (try_to_take_rt_mutex(lock, task, NULL))
1692
1693 if (try_to_take_rt_mutex(lock, task, NULL)) {
1694 raw_spin_unlock_irq(&lock->wait_lock);
1695 return 1; 1679 return 1;
1696 }
1697 1680
1698 /* We enforce deadlock detection for futexes */ 1681 /* We enforce deadlock detection for futexes */
1699 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1682 ret = task_blocks_on_rt_mutex(lock, waiter, task,
@@ -1712,14 +1695,38 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1712 if (unlikely(ret)) 1695 if (unlikely(ret))
1713 remove_waiter(lock, waiter); 1696 remove_waiter(lock, waiter);
1714 1697
1715 raw_spin_unlock_irq(&lock->wait_lock);
1716
1717 debug_rt_mutex_print_deadlock(waiter); 1698 debug_rt_mutex_print_deadlock(waiter);
1718 1699
1719 return ret; 1700 return ret;
1720} 1701}
1721 1702
1722/** 1703/**
1704 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1705 * @lock: the rt_mutex to take
1706 * @waiter: the pre-initialized rt_mutex_waiter
1707 * @task: the task to prepare
1708 *
1709 * Returns:
1710 * 0 - task blocked on lock
1711 * 1 - acquired the lock for task, caller should wake it up
1712 * <0 - error
1713 *
1714 * Special API call for FUTEX_REQUEUE_PI support.
1715 */
1716int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1717 struct rt_mutex_waiter *waiter,
1718 struct task_struct *task)
1719{
1720 int ret;
1721
1722 raw_spin_lock_irq(&lock->wait_lock);
1723 ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1724 raw_spin_unlock_irq(&lock->wait_lock);
1725
1726 return ret;
1727}
1728
1729/**
1723 * rt_mutex_next_owner - return the next owner of the lock 1730 * rt_mutex_next_owner - return the next owner of the lock
1724 * 1731 *
1725 * @lock: the rt lock query 1732 * @lock: the rt lock query
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 1e93e15a0e45..b1ccfea2effe 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -104,6 +104,9 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
104extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, 104extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
105 struct task_struct *proxy_owner); 105 struct task_struct *proxy_owner);
106extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); 106extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
107extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
108 struct rt_mutex_waiter *waiter,
109 struct task_struct *task);
107extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 110extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
108 struct rt_mutex_waiter *waiter, 111 struct rt_mutex_waiter *waiter,
109 struct task_struct *task); 112 struct task_struct *task);