aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking/rtmutex.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2019-01-29 17:15:12 -0500
committerThomas Gleixner <tglx@linutronix.de>2019-02-08 07:00:36 -0500
commit1a1fb985f2e2b85ec0d3dc2e519ee48389ec2434 (patch)
treedb20b498bbd047982a926c49bc419096cf268955 /kernel/locking/rtmutex.c
parent6f568ebe2afefdc33a6fb06ef20a94f8b96455f1 (diff)
futex: Handle early deadlock return correctly
commit 56222b212e8e ("futex: Drop hb->lock before enqueueing on the rtmutex") changed the locking rules in the futex code so that the hash bucket lock is not longer held while the waiter is enqueued into the rtmutex wait list. This made the lock and the unlock path symmetric, but unfortunately the possible early exit from __rt_mutex_proxy_start() due to a detected deadlock was not updated accordingly. That allows a concurrent unlocker to observe inconsitent state which triggers the warning in the unlock path. futex_lock_pi() futex_unlock_pi() lock(hb->lock) queue(hb_waiter) lock(hb->lock) lock(rtmutex->wait_lock) unlock(hb->lock) // acquired hb->lock hb_waiter = futex_top_waiter() lock(rtmutex->wait_lock) __rt_mutex_proxy_start() ---> fail remove(rtmutex_waiter); ---> returns -EDEADLOCK unlock(rtmutex->wait_lock) // acquired wait_lock wake_futex_pi() rt_mutex_next_owner() --> returns NULL --> WARN lock(hb->lock) unqueue(hb_waiter) The problem is caused by the remove(rtmutex_waiter) in the failure case of __rt_mutex_proxy_start() as this lets the unlocker observe a waiter in the hash bucket but no waiter on the rtmutex, i.e. inconsistent state. The original commit handles this correctly for the other early return cases (timeout, signal) by delaying the removal of the rtmutex waiter until the returning task reacquired the hash bucket lock. Treat the failure case of __rt_mutex_proxy_start() in the same way and let the existing cleanup code handle the eventual handover of the rtmutex gracefully. The regular rt_mutex_proxy_start() gains the rtmutex waiter removal for the failure case, so that the other callsites are still operating correctly. Add proper comments to the code so all these details are fully documented. Thanks to Peter for helping with the analysis and writing the really valuable code comments. Fixes: 56222b212e8e ("futex: Drop hb->lock before enqueueing on the rtmutex") Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com> Co-developed-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: linux-s390@vger.kernel.org Cc: Stefan Liebler <stli@linux.ibm.com> Cc: Sebastian Sewior <bigeasy@linutronix.de> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1901292311410.1950@nanos.tec.linutronix.de
Diffstat (limited to 'kernel/locking/rtmutex.c')
-rw-r--r--kernel/locking/rtmutex.c37
1 files changed, 32 insertions, 5 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 581edcc63c26..978d63a8261c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1726 rt_mutex_set_owner(lock, NULL); 1726 rt_mutex_set_owner(lock, NULL);
1727} 1727}
1728 1728
1729/**
1730 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1731 * @lock: the rt_mutex to take
1732 * @waiter: the pre-initialized rt_mutex_waiter
1733 * @task: the task to prepare
1734 *
1735 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1736 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1737 *
1738 * NOTE: does _NOT_ remove the @waiter on failure; must either call
1739 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
1740 *
1741 * Returns:
1742 * 0 - task blocked on lock
1743 * 1 - acquired the lock for task, caller should wake it up
1744 * <0 - error
1745 *
1746 * Special API call for PI-futex support.
1747 */
1729int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1748int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1730 struct rt_mutex_waiter *waiter, 1749 struct rt_mutex_waiter *waiter,
1731 struct task_struct *task) 1750 struct task_struct *task)
1732{ 1751{
1733 int ret; 1752 int ret;
1734 1753
1754 lockdep_assert_held(&lock->wait_lock);
1755
1735 if (try_to_take_rt_mutex(lock, task, NULL)) 1756 if (try_to_take_rt_mutex(lock, task, NULL))
1736 return 1; 1757 return 1;
1737 1758
@@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1749 ret = 0; 1770 ret = 0;
1750 } 1771 }
1751 1772
1752 if (unlikely(ret))
1753 remove_waiter(lock, waiter);
1754
1755 debug_rt_mutex_print_deadlock(waiter); 1773 debug_rt_mutex_print_deadlock(waiter);
1756 1774
1757 return ret; 1775 return ret;
@@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1763 * @waiter: the pre-initialized rt_mutex_waiter 1781 * @waiter: the pre-initialized rt_mutex_waiter
1764 * @task: the task to prepare 1782 * @task: the task to prepare
1765 * 1783 *
1784 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1785 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1786 *
1787 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
1788 * on failure.
1789 *
1766 * Returns: 1790 * Returns:
1767 * 0 - task blocked on lock 1791 * 0 - task blocked on lock
1768 * 1 - acquired the lock for task, caller should wake it up 1792 * 1 - acquired the lock for task, caller should wake it up
1769 * <0 - error 1793 * <0 - error
1770 * 1794 *
1771 * Special API call for FUTEX_REQUEUE_PI support. 1795 * Special API call for PI-futex support.
1772 */ 1796 */
1773int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1797int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1774 struct rt_mutex_waiter *waiter, 1798 struct rt_mutex_waiter *waiter,
@@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1778 1802
1779 raw_spin_lock_irq(&lock->wait_lock); 1803 raw_spin_lock_irq(&lock->wait_lock);
1780 ret = __rt_mutex_start_proxy_lock(lock, waiter, task); 1804 ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1805 if (unlikely(ret))
1806 remove_waiter(lock, waiter);
1781 raw_spin_unlock_irq(&lock->wait_lock); 1807 raw_spin_unlock_irq(&lock->wait_lock);
1782 1808
1783 return ret; 1809 return ret;
@@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
1845 * @lock: the rt_mutex we were woken on 1871 * @lock: the rt_mutex we were woken on
1846 * @waiter: the pre-initialized rt_mutex_waiter 1872 * @waiter: the pre-initialized rt_mutex_waiter
1847 * 1873 *
1848 * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). 1874 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
1875 * rt_mutex_wait_proxy_lock().
1849 * 1876 *
1850 * Unless we acquired the lock; we're still enqueued on the wait-list and can 1877 * Unless we acquired the lock; we're still enqueued on the wait-list and can
1851 * in fact still be granted ownership until we're removed. Therefore we can 1878 * in fact still be granted ownership until we're removed. Therefore we can