From fb75a4282d0d9a3c7c44d940582c2d226cf3acfb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 Dec 2015 20:07:38 +0000 Subject: futex: Drop refcount if requeue_pi() acquired the rtmutex If the proxy lock in the requeue loop acquires the rtmutex for a waiter then it acquired also refcount on the pi_state related to the futex, but the waiter side does not drop the reference count. Add the missing free_pi_state() call. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Bhuvanesh_Surachari@mentor.com Cc: Andy Lowe Link: http://lkml.kernel.org/r/20151219200607.178132067@linutronix.de Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- kernel/futex.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index 684d7549825a..24fbc7765828 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2755,6 +2755,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + /* + * Drop the reference to the pi state which + * the requeue_pi() code acquired for us. + */ + free_pi_state(q.pi_state); spin_unlock(q.lock_ptr); } } else { -- cgit v1.2.2 From 29e9ee5d48c35d6cf8afe09bdf03f77125c9ac11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 Dec 2015 20:07:39 +0000 Subject: futex: Rename free_pi_state() to put_pi_state() free_pi_state() is confusing as it is in fact only freeing/caching the pi state when the last reference is gone. Rename it to put_pi_state() which reflects better what it is doing. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Bhuvanesh_Surachari@mentor.com Cc: Andy Lowe Link: http://lkml.kernel.org/r/20151219200607.259636467@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index 24fbc7765828..f1581ff47122 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void) } /* + * Drops a reference to the pi_state object and frees or caches it + * when the last reference is gone. + * * Must be called with the hb lock held. */ -static void free_pi_state(struct futex_pi_state *pi_state) +static void put_pi_state(struct futex_pi_state *pi_state) { if (!pi_state) return; @@ -1729,7 +1732,7 @@ retry_private: case 0: break; case -EFAULT: - free_pi_state(pi_state); + put_pi_state(pi_state); pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); @@ -1746,7 +1749,7 @@ retry_private: * exit to complete. * - The user space value changed. */ - free_pi_state(pi_state); + put_pi_state(pi_state); pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); @@ -1815,7 +1818,7 @@ retry_private: } else if (ret) { /* -EDEADLK */ this->pi_state = NULL; - free_pi_state(pi_state); + put_pi_state(pi_state); goto out_unlock; } } @@ -1824,7 +1827,7 @@ retry_private: } out_unlock: - free_pi_state(pi_state); + put_pi_state(pi_state); double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); hb_waiters_dec(hb2); @@ -1973,7 +1976,7 @@ static void unqueue_me_pi(struct futex_q *q) __unqueue_futex(q); BUG_ON(!q->pi_state); - free_pi_state(q->pi_state); + put_pi_state(q->pi_state); q->pi_state = NULL; spin_unlock(q->lock_ptr); @@ -2759,7 +2762,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * Drop the reference to the pi state which * the requeue_pi() code acquired for us. */ - free_pi_state(q.pi_state); + put_pi_state(q.pi_state); spin_unlock(q.lock_ptr); } } else { -- cgit v1.2.2 From ecb38b78f698a51988ec456751b20440e54702fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 Dec 2015 20:07:39 +0000 Subject: futex: Document pi_state refcounting in requeue code Documentation of the pi_state refcounting in the requeue code is non existent. Add it. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Bhuvanesh_Surachari@mentor.com Cc: Andy Lowe Link: http://lkml.kernel.org/r/20151219200607.335938312@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index f1581ff47122..20c468356b90 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1709,27 +1709,31 @@ retry_private: * exist yet, look it up one more time to ensure we have a * reference to it. If the lock was taken, ret contains the * vpid of the top waiter task. + * If the lock was not taken, we have pi_state and an initial + * refcount on it. In case of an error we have nothing. */ if (ret > 0) { WARN_ON(pi_state); drop_count++; task_count++; /* - * If we acquired the lock, then the user - * space value of uaddr2 should be vpid. It - * cannot be changed by the top waiter as it - * is blocked on hb2 lock if it tries to do - * so. If something fiddled with it behind our - * back the pi state lookup might unearth - * it. So we rather use the known value than - * rereading and handing potential crap to - * lookup_pi_state. + * If we acquired the lock, then the user space value + * of uaddr2 should be vpid. It cannot be changed by + * the top waiter as it is blocked on hb2 lock if it + * tries to do so. If something fiddled with it behind + * our back the pi state lookup might unearth it. So + * we rather use the known value than rereading and + * handing potential crap to lookup_pi_state. + * + * If that call succeeds then we have pi_state and an + * initial refcount on it. */ ret = lookup_pi_state(ret, hb2, &key2, &pi_state); } switch (ret) { case 0: + /* We hold a reference on the pi state. */ break; case -EFAULT: put_pi_state(pi_state); @@ -1804,19 +1808,37 @@ retry_private: * of requeue_pi if we couldn't acquire the lock atomically. */ if (requeue_pi) { - /* Prepare the waiter to take the rt_mutex. */ + /* + * Prepare the waiter to take the rt_mutex. Take a + * refcount on the pi_state and store the pointer in + * the futex_q object of the waiter. + */ atomic_inc(&pi_state->refcount); this->pi_state = pi_state; ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, this->rt_waiter, this->task); if (ret == 1) { - /* We got the lock. */ + /* + * We got the lock. We do neither drop the + * refcount on pi_state nor clear + * this->pi_state because the waiter needs the + * pi_state for cleaning up the user space + * value. It will drop the refcount after + * doing so. + */ requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; } else if (ret) { - /* -EDEADLK */ + /* + * rt_mutex_start_proxy_lock() detected a + * potential deadlock when we tried to queue + * that waiter. Drop the pi_state reference + * which we took above and remove the pointer + * to the state from the waiters futex_q + * object. + */ this->pi_state = NULL; put_pi_state(pi_state); goto out_unlock; @@ -1827,6 +1849,11 @@ retry_private: } out_unlock: + /* + * We took an extra initial reference to the pi_state either + * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We + * need to drop it here again. + */ put_pi_state(pi_state); double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); -- cgit v1.2.2 From 4959f2de11ca532a120a337429e5576fd283700f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 Dec 2015 20:07:40 +0000 Subject: futex: Remove pointless put_pi_state calls in requeue() In the error handling cases we neither have pi_state nor a reference to it. Remove the pointless code. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Bhuvanesh_Surachari@mentor.com Cc: Andy Lowe Link: http://lkml.kernel.org/r/20151219200607.432780944@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index 20c468356b90..dcec01856cf3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1735,9 +1735,9 @@ retry_private: case 0: /* We hold a reference on the pi state. */ break; + + /* If the above failed, then pi_state is NULL */ case -EFAULT: - put_pi_state(pi_state); - pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); @@ -1753,8 +1753,6 @@ retry_private: * exit to complete. * - The user space value changed. */ - put_pi_state(pi_state); - pi_state = NULL; double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); -- cgit v1.2.2 From 885c2cb770b5ac2507c41bc9f91a5d1c98337bee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 Dec 2015 20:07:41 +0000 Subject: futex: Cleanup the goto confusion in requeue_pi() out_unlock: does not only drop the locks, it also drops the refcount on the pi_state. Really intuitive. Move the label after the put_pi_state() call and use 'break' in the error handling path of the requeue loop. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Bhuvanesh_Surachari@mentor.com Cc: Andy Lowe Link: http://lkml.kernel.org/r/20151219200607.526665141@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index dcec01856cf3..461d438f4816 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1839,20 +1839,25 @@ retry_private: */ this->pi_state = NULL; put_pi_state(pi_state); - goto out_unlock; + /* + * We stop queueing more waiters and let user + * space deal with the mess. + */ + break; } } requeue_futex(this, hb1, hb2, &key2); drop_count++; } -out_unlock: /* * We took an extra initial reference to the pi_state either * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We * need to drop it here again. */ put_pi_state(pi_state); + +out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); hb_waiters_dec(hb2); -- cgit v1.2.2 From 337f13046ff03717a9e99675284a817527440a49 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 18 Dec 2015 13:36:37 -0800 Subject: futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op While reviewing Michael Kerrisk's recent futex manpage update, I noticed that we allow the FUTEX_CLOCK_REALTIME flag for FUTEX_WAIT_BITSET but not for FUTEX_WAIT. FUTEX_WAIT is treated as a simple version for FUTEX_WAIT_BITSET internally (with a bitmask of FUTEX_BITSET_MATCH_ANY). As such, I cannot come up with a reason for this exclusion for FUTEX_WAIT. This change does modify the behavior of the futex syscall, changing a call with FUTEX_WAIT | FUTEX_CLOCK_REALTIME from returning -ENOSYS, to be equivalent to FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME with a bitset of FUTEX_BITSET_MATCH_ANY. Reported-by: Michael Kerrisk Signed-off-by: Darren Hart Cc: Peter Zijlstra Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/9f3bdc116d79d23f5ee72ceb9a2a857f5ff8fa29.1450474525.git.dvhart@linux.intel.com Signed-off-by: Thomas Gleixner --- kernel/futex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index 461d438f4816..8a310e240cda 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3084,7 +3084,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (op & FUTEX_CLOCK_REALTIME) { flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) + if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \ + cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; } -- cgit v1.2.2