diff options
author | Peter Zijlstra <peterz@infradead.org> | 2017-12-08 07:49:39 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2018-01-14 12:49:16 -0500 |
commit | c1e2f0eaf015fb7076d51a339011f2383e6dd389 (patch) | |
tree | c993bc9680bf3297d147c1bae503e6e47362bad7 /kernel/locking/rtmutex.c | |
parent | c92a9a461dff6140c539c61e457aa97df29517d6 (diff) |
futex: Avoid violating the 10th rule of futex
Julia reported futex state corruption in the following scenario:
waiter waker stealer (prio > waiter)
futex(WAIT_REQUEUE_PI, uaddr, uaddr2,
timeout=[N ms])
futex_wait_requeue_pi()
futex_wait_queue_me()
freezable_schedule()
<scheduled out>
futex(LOCK_PI, uaddr2)
futex(CMP_REQUEUE_PI, uaddr,
uaddr2, 1, 0)
/* requeues waiter to uaddr2 */
futex(UNLOCK_PI, uaddr2)
wake_futex_pi()
cmp_futex_value_locked(uaddr2, waiter)
wake_up_q()
<woken by waker>
<hrtimer_wakeup() fires,
clears sleeper->task>
futex(LOCK_PI, uaddr2)
__rt_mutex_start_proxy_lock()
try_to_take_rt_mutex() /* steals lock */
rt_mutex_set_owner(lock, stealer)
<preempted>
<scheduled in>
rt_mutex_wait_proxy_lock()
__rt_mutex_slowlock()
try_to_take_rt_mutex() /* fails, lock held by stealer */
if (timeout && !timeout->task)
return -ETIMEDOUT;
fixup_owner()
/* lock wasn't acquired, so,
fixup_pi_state_owner skipped */
return -ETIMEDOUT;
/* At this point, we've returned -ETIMEDOUT to userspace, but the
* futex word shows waiter to be the owner, and the pi_mutex has
* stealer as the owner */
futex_lock(LOCK_PI, uaddr2)
-> bails with EDEADLK, futex word says we're owner.
And suggested that what commit:
73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state")
removes from fixup_owner() looks to be just what is needed. And indeed
it is -- I completely missed that requeue_pi could also result in this
case. So we need to restore that, except that subsequent patches, like
commit:
16ffa12d7425 ("futex: Pull rt_mutex_futex_unlock() out from under hb->lock")
changed all the locking rules. Even without that, the sequence:
- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) {
- locked = 1;
- goto out;
- }
- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
- owner = rt_mutex_owner(&q->pi_state->pi_mutex);
- if (!owner)
- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
- ret = fixup_pi_state_owner(uaddr, q, owner);
already suggests there were races; otherwise we'd never have to look
at next_owner.
So instead of doing 3 consecutive wait_lock sections with who knows
what races, we do it all in a single section. Additionally, the usage
of pi_state->owner in fixup_owner() was only safe because only the
rt_mutex owner would modify it, which this additional case wrecks.
Luckily the values can only change away and not to the value we're
testing, this means we can do a speculative test and double check once
we have the wait_lock.
Fixes: 73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state")
Reported-by: Julia Cartwright <julia@ni.com>
Reported-by: Gratian Crisan <gratian.crisan@ni.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Julia Cartwright <julia@ni.com>
Tested-by: Gratian Crisan <gratian.crisan@ni.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20171208124939.7livp7no2ov65rrc@hirez.programming.kicks-ass.net
Diffstat (limited to 'kernel/locking/rtmutex.c')
-rw-r--r-- | kernel/locking/rtmutex.c | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6f3dba6e4e9e..65cc0cb984e6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
1290 | return ret; | 1290 | return ret; |
1291 | } | 1291 | } |
1292 | 1292 | ||
1293 | static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) | ||
1294 | { | ||
1295 | int ret = try_to_take_rt_mutex(lock, current, NULL); | ||
1296 | |||
1297 | /* | ||
1298 | * try_to_take_rt_mutex() sets the lock waiters bit | ||
1299 | * unconditionally. Clean this up. | ||
1300 | */ | ||
1301 | fixup_rt_mutex_waiters(lock); | ||
1302 | |||
1303 | return ret; | ||
1304 | } | ||
1305 | |||
1293 | /* | 1306 | /* |
1294 | * Slow path try-lock function: | 1307 | * Slow path try-lock function: |
1295 | */ | 1308 | */ |
@@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) | |||
1312 | */ | 1325 | */ |
1313 | raw_spin_lock_irqsave(&lock->wait_lock, flags); | 1326 | raw_spin_lock_irqsave(&lock->wait_lock, flags); |
1314 | 1327 | ||
1315 | ret = try_to_take_rt_mutex(lock, current, NULL); | 1328 | ret = __rt_mutex_slowtrylock(lock); |
1316 | |||
1317 | /* | ||
1318 | * try_to_take_rt_mutex() sets the lock waiters bit | ||
1319 | * unconditionally. Clean this up. | ||
1320 | */ | ||
1321 | fixup_rt_mutex_waiters(lock); | ||
1322 | 1329 | ||
1323 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | 1330 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
1324 | 1331 | ||
@@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) | |||
1505 | return rt_mutex_slowtrylock(lock); | 1512 | return rt_mutex_slowtrylock(lock); |
1506 | } | 1513 | } |
1507 | 1514 | ||
1515 | int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) | ||
1516 | { | ||
1517 | return __rt_mutex_slowtrylock(lock); | ||
1518 | } | ||
1519 | |||
1508 | /** | 1520 | /** |
1509 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | 1521 | * rt_mutex_timed_lock - lock a rt_mutex interruptible |
1510 | * the timeout structure is provided | 1522 | * the timeout structure is provided |