aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-01-08 13:47:38 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-01-08 19:21:39 -0500
commitcdf71a10c7b6432d9b48e292cca2c62a0b9fa6cf (patch)
tree9ddb693f20dbd2bf2dbb172b7f4ab1160238e63b
parentbf5e5834bffc62b50cd4a201804506eb11ef1af8 (diff)
futex: Prevent stale futex owner when interrupted/timeout
Roland Westrelin did a great analysis of a long standing thinko in the return path of futex_lock_pi. While we fixed the lock steal case long ago, which was easy to trigger, we never had a test case which exposed this problem and stupidly never thought about the reverse lock stealing scenario and the return to user space with a stale state. When a blocked tasks returns from rt_mutex_timed_locked without holding the rt_mutex (due to a signal or timeout) and at the same time the task holding the futex is releasing the futex and assigning the ownership of the futex to the returning task, then it might happen that a third task acquires the rt_mutex before the final rt_mutex_trylock() of the returning task happens under the futex hash bucket lock. The returning task returns to user space with ETIMEOUT or EINTR, but the user space futex value is assigned to this task. The task which acquired the rt_mutex fixes the user space futex value right after the hash bucket lock has been released by the returning task, but for a short period of time the user space value is wrong. Detailed description is available at: https://bugzilla.redhat.com/show_bug.cgi?id=400541 The fix for this is the same as we do when the rt_mutex was acquired by a higher priority task via lock stealing from the designated new owner. In that case we already fix the user space value and the internal pi_state up before we return. This mechanism can be used to fixup the above corner case as well. When the returning task, which failed to acquire the rt_mutex, notices that it is the designated owner of the futex, then it fixes up the stale user space value and the pi_state, before returning to user space. This happens with the futex hash bucket lock held, so the task which acquired the rt_mutex is guaranteed to be blocked on the hash bucket lock. We can access the rt_mutex owner, which gives us the pid of the new owner, safely here as the owner is not able to modify (release) it while waiting on the hash bucket lock. Rename the "curr" argument of fixup_pi_state_owner() to "newowner" to avoid confusion with current and add the check for the stale state into the failure path of rt_mutex_trylock() in the return path of unlock_futex_pi(). If the situation is detected use fixup_pi_state_owner() to assign everything to the owner of the rt_mutex. Pointed-out-and-tested-by: Roland Westrelin <roland.westrelin@sun.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/futex.c51
1 files changed, 41 insertions, 10 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 172a1aeeafdb..db9824de8bf0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1097,15 +1097,15 @@ static void unqueue_me_pi(struct futex_q *q)
1097} 1097}
1098 1098
1099/* 1099/*
1100 * Fixup the pi_state owner with current. 1100 * Fixup the pi_state owner with the new owner.
1101 * 1101 *
1102 * Must be called with hash bucket lock held and mm->sem held for non 1102 * Must be called with hash bucket lock held and mm->sem held for non
1103 * private futexes. 1103 * private futexes.
1104 */ 1104 */
1105static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1105static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1106 struct task_struct *curr) 1106 struct task_struct *newowner)
1107{ 1107{
1108 u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS; 1108 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1109 struct futex_pi_state *pi_state = q->pi_state; 1109 struct futex_pi_state *pi_state = q->pi_state;
1110 u32 uval, curval, newval; 1110 u32 uval, curval, newval;
1111 int ret; 1111 int ret;
@@ -1119,12 +1119,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1119 } else 1119 } else
1120 newtid |= FUTEX_OWNER_DIED; 1120 newtid |= FUTEX_OWNER_DIED;
1121 1121
1122 pi_state->owner = curr; 1122 pi_state->owner = newowner;
1123 1123
1124 spin_lock_irq(&curr->pi_lock); 1124 spin_lock_irq(&newowner->pi_lock);
1125 WARN_ON(!list_empty(&pi_state->list)); 1125 WARN_ON(!list_empty(&pi_state->list));
1126 list_add(&pi_state->list, &curr->pi_state_list); 1126 list_add(&pi_state->list, &newowner->pi_state_list);
1127 spin_unlock_irq(&curr->pi_lock); 1127 spin_unlock_irq(&newowner->pi_lock);
1128 1128
1129 /* 1129 /*
1130 * We own it, so we have to replace the pending owner 1130 * We own it, so we have to replace the pending owner
@@ -1508,9 +1508,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1508 * when we were on the way back before we locked the 1508 * when we were on the way back before we locked the
1509 * hash bucket. 1509 * hash bucket.
1510 */ 1510 */
1511 if (q.pi_state->owner == curr && 1511 if (q.pi_state->owner == curr) {
1512 rt_mutex_trylock(&q.pi_state->pi_mutex)) { 1512 /*
1513 ret = 0; 1513 * Try to get the rt_mutex now. This might
1514 * fail as some other task acquired the
1515 * rt_mutex after we removed ourself from the
1516 * rt_mutex waiters list.
1517 */
1518 if (rt_mutex_trylock(&q.pi_state->pi_mutex))
1519 ret = 0;
1520 else {
1521 /*
1522 * pi_state is incorrect, some other
1523 * task did a lock steal and we
1524 * returned due to timeout or signal
1525 * without taking the rt_mutex. Too
1526 * late. We can access the
1527 * rt_mutex_owner without locking, as
1528 * the other task is now blocked on
1529 * the hash bucket lock. Fix the state
1530 * up.
1531 */
1532 struct task_struct *owner;
1533 int res;
1534
1535 owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1536 res = fixup_pi_state_owner(uaddr, &q, owner);
1537
1538 WARN_ON(rt_mutex_owner(&q.pi_state->pi_mutex) !=
1539 owner);
1540
1541 /* propagate -EFAULT, if the fixup failed */
1542 if (res)
1543 ret = res;
1544 }
1514 } else { 1545 } else {
1515 /* 1546 /*
1516 * Paranoia check. If we did not take the lock 1547 * Paranoia check. If we did not take the lock