aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-05 15:31:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-05 15:31:32 -0400
commit1c5aefb5b12a90e29866c960a57c1f8f75def617 (patch)
tree038ed51fcd95c37b5a1b1e22de1a5de06ac6640a
parent54539cd217d687d9acf385eab22ec02b3f7a86a0 (diff)
parent54a217887a7b658e2650c3feff22756ab80c7339 (diff)
Merge branch 'futex-fixes' (futex fixes from Thomas Gleixner)
Merge futex fixes from Thomas Gleixner: "So with more awake and less futex wreckaged brain, I went through my list of points again and came up with the following 4 patches. 1) Prevent pi requeueing on the same futex I kept Kees check for uaddr1 == uaddr2 as a early check for private futexes and added a key comparison to both futex_requeue and futex_wait_requeue_pi. Sebastian, sorry for the confusion yesterday night. I really misunderstood your question. You are right the check is pointless for shared futexes where the same physical address is mapped to two different virtual addresses. 2) Sanity check atomic acquisiton in futex_lock_pi_atomic That's basically what Darren suggested. I just simplified it to use futex_top_waiter() to find kernel internal state. If state is found return -EINVAL and do not bother to fix up the user space variable. It's corrupted already. 3) Ensure state consistency in futex_unlock_pi The code is silly versus the owner died bit. There is no point to preserve it on unlock when the user space thread owns the futex. What's worse is that it does not update the user space value when the owner died bit is set. So the kernel itself creates observable inconsistency. Another "optimization" is to retry an atomic unlock. That's pointless as in a sane environment user space would not call into that code if it could have unlocked it atomically. So we always check whether there is kernel state around and only if there is none, we do the unlock by setting the user space value to 0. 4) Sanitize lookup_pi_state lookup_pi_state is ambigous about TID == 0 in the user space value. This can be a valid state even if there is kernel state on this uaddr, but we miss a few corner case checks. I tried to come up with a smaller solution hacking the checks into the current cruft, but it turned out to be ugly as hell and I got more confused than I was before. So I rewrote the sanity checks along the state documentation with awful lots of commentry" * emailed patches from Thomas Gleixner <tglx@linutronix.de>: futex: Make lookup_pi_state more robust futex: Always cleanup owner tid in unlock_pi futex: Validate atomic acquisition in futex_lock_pi_atomic() futex-prevent-requeue-pi-on-same-futex.patch futex: Forbid uaddr == uaddr2 in futex_requeue(..., requeue_pi=1)
-rw-r--r--kernel/futex.c213
1 files changed, 160 insertions, 53 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 81dbe773ce4c..de938d20df19 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -743,10 +743,58 @@ void exit_pi_state_list(struct task_struct *curr)
743 raw_spin_unlock_irq(&curr->pi_lock); 743 raw_spin_unlock_irq(&curr->pi_lock);
744} 744}
745 745
746/*
747 * We need to check the following states:
748 *
749 * Waiter | pi_state | pi->owner | uTID | uODIED | ?
750 *
751 * [1] NULL | --- | --- | 0 | 0/1 | Valid
752 * [2] NULL | --- | --- | >0 | 0/1 | Valid
753 *
754 * [3] Found | NULL | -- | Any | 0/1 | Invalid
755 *
756 * [4] Found | Found | NULL | 0 | 1 | Valid
757 * [5] Found | Found | NULL | >0 | 1 | Invalid
758 *
759 * [6] Found | Found | task | 0 | 1 | Valid
760 *
761 * [7] Found | Found | NULL | Any | 0 | Invalid
762 *
763 * [8] Found | Found | task | ==taskTID | 0/1 | Valid
764 * [9] Found | Found | task | 0 | 0 | Invalid
765 * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
766 *
767 * [1] Indicates that the kernel can acquire the futex atomically. We
768 * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
769 *
770 * [2] Valid, if TID does not belong to a kernel thread. If no matching
771 * thread is found then it indicates that the owner TID has died.
772 *
773 * [3] Invalid. The waiter is queued on a non PI futex
774 *
775 * [4] Valid state after exit_robust_list(), which sets the user space
776 * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
777 *
778 * [5] The user space value got manipulated between exit_robust_list()
779 * and exit_pi_state_list()
780 *
781 * [6] Valid state after exit_pi_state_list() which sets the new owner in
782 * the pi_state but cannot access the user space value.
783 *
784 * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
785 *
786 * [8] Owner and user space value match
787 *
788 * [9] There is no transient state which sets the user space TID to 0
789 * except exit_robust_list(), but this is indicated by the
790 * FUTEX_OWNER_DIED bit. See [4]
791 *
792 * [10] There is no transient state which leaves owner and user space
793 * TID out of sync.
794 */
746static int 795static int
747lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, 796lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
748 union futex_key *key, struct futex_pi_state **ps, 797 union futex_key *key, struct futex_pi_state **ps)
749 struct task_struct *task)
750{ 798{
751 struct futex_pi_state *pi_state = NULL; 799 struct futex_pi_state *pi_state = NULL;
752 struct futex_q *this, *next; 800 struct futex_q *this, *next;
@@ -756,12 +804,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
756 plist_for_each_entry_safe(this, next, &hb->chain, list) { 804 plist_for_each_entry_safe(this, next, &hb->chain, list) {
757 if (match_futex(&this->key, key)) { 805 if (match_futex(&this->key, key)) {
758 /* 806 /*
759 * Another waiter already exists - bump up 807 * Sanity check the waiter before increasing
760 * the refcount and return its pi_state: 808 * the refcount and attaching to it.
761 */ 809 */
762 pi_state = this->pi_state; 810 pi_state = this->pi_state;
763 /* 811 /*
764 * Userspace might have messed up non-PI and PI futexes 812 * Userspace might have messed up non-PI and
813 * PI futexes [3]
765 */ 814 */
766 if (unlikely(!pi_state)) 815 if (unlikely(!pi_state))
767 return -EINVAL; 816 return -EINVAL;
@@ -769,44 +818,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
769 WARN_ON(!atomic_read(&pi_state->refcount)); 818 WARN_ON(!atomic_read(&pi_state->refcount));
770 819
771 /* 820 /*
772 * When pi_state->owner is NULL then the owner died 821 * Handle the owner died case:
773 * and another waiter is on the fly. pi_state->owner
774 * is fixed up by the task which acquires
775 * pi_state->rt_mutex.
776 *
777 * We do not check for pid == 0 which can happen when
778 * the owner died and robust_list_exit() cleared the
779 * TID.
780 */ 822 */
781 if (pid && pi_state->owner) { 823 if (uval & FUTEX_OWNER_DIED) {
782 /* 824 /*
783 * Bail out if user space manipulated the 825 * exit_pi_state_list sets owner to NULL and
784 * futex value. 826 * wakes the topmost waiter. The task which
827 * acquires the pi_state->rt_mutex will fixup
828 * owner.
785 */ 829 */
786 if (pid != task_pid_vnr(pi_state->owner)) 830 if (!pi_state->owner) {
831 /*
832 * No pi state owner, but the user
833 * space TID is not 0. Inconsistent
834 * state. [5]
835 */
836 if (pid)
837 return -EINVAL;
838 /*
839 * Take a ref on the state and
840 * return. [4]
841 */
842 goto out_state;
843 }
844
845 /*
846 * If TID is 0, then either the dying owner
847 * has not yet executed exit_pi_state_list()
848 * or some waiter acquired the rtmutex in the
849 * pi state, but did not yet fixup the TID in
850 * user space.
851 *
852 * Take a ref on the state and return. [6]
853 */
854 if (!pid)
855 goto out_state;
856 } else {
857 /*
858 * If the owner died bit is not set,
859 * then the pi_state must have an
860 * owner. [7]
861 */
862 if (!pi_state->owner)
787 return -EINVAL; 863 return -EINVAL;
788 } 864 }
789 865
790 /* 866 /*
791 * Protect against a corrupted uval. If uval 867 * Bail out if user space manipulated the
792 * is 0x80000000 then pid is 0 and the waiter 868 * futex value. If pi state exists then the
793 * bit is set. So the deadlock check in the 869 * owner TID must be the same as the user
794 * calling code has failed and we did not fall 870 * space TID. [9/10]
795 * into the check above due to !pid.
796 */ 871 */
797 if (task && pi_state->owner == task) 872 if (pid != task_pid_vnr(pi_state->owner))
798 return -EDEADLK; 873 return -EINVAL;
799 874
875 out_state:
800 atomic_inc(&pi_state->refcount); 876 atomic_inc(&pi_state->refcount);
801 *ps = pi_state; 877 *ps = pi_state;
802
803 return 0; 878 return 0;
804 } 879 }
805 } 880 }
806 881
807 /* 882 /*
808 * We are the first waiter - try to look up the real owner and attach 883 * We are the first waiter - try to look up the real owner and attach
809 * the new pi_state to it, but bail out when TID = 0 884 * the new pi_state to it, but bail out when TID = 0 [1]
810 */ 885 */
811 if (!pid) 886 if (!pid)
812 return -ESRCH; 887 return -ESRCH;
@@ -839,6 +914,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
839 return ret; 914 return ret;
840 } 915 }
841 916
917 /*
918 * No existing pi state. First waiter. [2]
919 */
842 pi_state = alloc_pi_state(); 920 pi_state = alloc_pi_state();
843 921
844 /* 922 /*
@@ -910,10 +988,18 @@ retry:
910 return -EDEADLK; 988 return -EDEADLK;
911 989
912 /* 990 /*
913 * Surprise - we got the lock. Just return to userspace: 991 * Surprise - we got the lock, but we do not trust user space at all.
914 */ 992 */
915 if (unlikely(!curval)) 993 if (unlikely(!curval)) {
916 return 1; 994 /*
995 * We verify whether there is kernel state for this
996 * futex. If not, we can safely assume, that the 0 ->
997 * TID transition is correct. If state exists, we do
998 * not bother to fixup the user space state as it was
999 * corrupted already.
1000 */
1001 return futex_top_waiter(hb, key) ? -EINVAL : 1;
1002 }
917 1003
918 uval = curval; 1004 uval = curval;
919 1005
@@ -951,7 +1037,7 @@ retry:
951 * We dont have the lock. Look up the PI state (or create it if 1037 * We dont have the lock. Look up the PI state (or create it if
952 * we are the first waiter): 1038 * we are the first waiter):
953 */ 1039 */
954 ret = lookup_pi_state(uval, hb, key, ps, task); 1040 ret = lookup_pi_state(uval, hb, key, ps);
955 1041
956 if (unlikely(ret)) { 1042 if (unlikely(ret)) {
957 switch (ret) { 1043 switch (ret) {
@@ -1044,6 +1130,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1044 struct task_struct *new_owner; 1130 struct task_struct *new_owner;
1045 struct futex_pi_state *pi_state = this->pi_state; 1131 struct futex_pi_state *pi_state = this->pi_state;
1046 u32 uninitialized_var(curval), newval; 1132 u32 uninitialized_var(curval), newval;
1133 int ret = 0;
1047 1134
1048 if (!pi_state) 1135 if (!pi_state)
1049 return -EINVAL; 1136 return -EINVAL;
@@ -1067,23 +1154,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1067 new_owner = this->task; 1154 new_owner = this->task;
1068 1155
1069 /* 1156 /*
1070 * We pass it to the next owner. (The WAITERS bit is always 1157 * We pass it to the next owner. The WAITERS bit is always
1071 * kept enabled while there is PI state around. We must also 1158 * kept enabled while there is PI state around. We cleanup the
1072 * preserve the owner died bit.) 1159 * owner died bit, because we are the owner.
1073 */ 1160 */
1074 if (!(uval & FUTEX_OWNER_DIED)) { 1161 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1075 int ret = 0;
1076
1077 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1078 1162
1079 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) 1163 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1080 ret = -EFAULT; 1164 ret = -EFAULT;
1081 else if (curval != uval) 1165 else if (curval != uval)
1082 ret = -EINVAL; 1166 ret = -EINVAL;
1083 if (ret) { 1167 if (ret) {
1084 raw_spin_unlock(&pi_state->pi_mutex.wait_lock); 1168 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1085 return ret; 1169 return ret;
1086 }
1087 } 1170 }
1088 1171
1089 raw_spin_lock_irq(&pi_state->owner->pi_lock); 1172 raw_spin_lock_irq(&pi_state->owner->pi_lock);
@@ -1442,6 +1525,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1442 1525
1443 if (requeue_pi) { 1526 if (requeue_pi) {
1444 /* 1527 /*
1528 * Requeue PI only works on two distinct uaddrs. This
1529 * check is only valid for private futexes. See below.
1530 */
1531 if (uaddr1 == uaddr2)
1532 return -EINVAL;
1533
1534 /*
1445 * requeue_pi requires a pi_state, try to allocate it now 1535 * requeue_pi requires a pi_state, try to allocate it now
1446 * without any locks in case it fails. 1536 * without any locks in case it fails.
1447 */ 1537 */
@@ -1479,6 +1569,15 @@ retry:
1479 if (unlikely(ret != 0)) 1569 if (unlikely(ret != 0))
1480 goto out_put_key1; 1570 goto out_put_key1;
1481 1571
1572 /*
1573 * The check above which compares uaddrs is not sufficient for
1574 * shared futexes. We need to compare the keys:
1575 */
1576 if (requeue_pi && match_futex(&key1, &key2)) {
1577 ret = -EINVAL;
1578 goto out_put_keys;
1579 }
1580
1482 hb1 = hash_futex(&key1); 1581 hb1 = hash_futex(&key1);
1483 hb2 = hash_futex(&key2); 1582 hb2 = hash_futex(&key2);
1484 1583
@@ -1544,7 +1643,7 @@ retry_private:
1544 * rereading and handing potential crap to 1643 * rereading and handing potential crap to
1545 * lookup_pi_state. 1644 * lookup_pi_state.
1546 */ 1645 */
1547 ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); 1646 ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
1548 } 1647 }
1549 1648
1550 switch (ret) { 1649 switch (ret) {
@@ -2327,9 +2426,10 @@ retry:
2327 /* 2426 /*
2328 * To avoid races, try to do the TID -> 0 atomic transition 2427 * To avoid races, try to do the TID -> 0 atomic transition
2329 * again. If it succeeds then we can return without waking 2428 * again. If it succeeds then we can return without waking
2330 * anyone else up: 2429 * anyone else up. We only try this if neither the waiters nor
2430 * the owner died bit are set.
2331 */ 2431 */
2332 if (!(uval & FUTEX_OWNER_DIED) && 2432 if (!(uval & ~FUTEX_TID_MASK) &&
2333 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) 2433 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2334 goto pi_faulted; 2434 goto pi_faulted;
2335 /* 2435 /*
@@ -2359,11 +2459,9 @@ retry:
2359 /* 2459 /*
2360 * No waiters - kernel unlocks the futex: 2460 * No waiters - kernel unlocks the futex:
2361 */ 2461 */
2362 if (!(uval & FUTEX_OWNER_DIED)) { 2462 ret = unlock_futex_pi(uaddr, uval);
2363 ret = unlock_futex_pi(uaddr, uval); 2463 if (ret == -EFAULT)
2364 if (ret == -EFAULT) 2464 goto pi_faulted;
2365 goto pi_faulted;
2366 }
2367 2465
2368out_unlock: 2466out_unlock:
2369 spin_unlock(&hb->lock); 2467 spin_unlock(&hb->lock);
@@ -2525,6 +2623,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2525 if (ret) 2623 if (ret)
2526 goto out_key2; 2624 goto out_key2;
2527 2625
2626 /*
2627 * The check above which compares uaddrs is not sufficient for
2628 * shared futexes. We need to compare the keys:
2629 */
2630 if (match_futex(&q.key, &key2)) {
2631 ret = -EINVAL;
2632 goto out_put_keys;
2633 }
2634
2528 /* Queue the futex_q, drop the hb lock, wait for wakeup. */ 2635 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
2529 futex_wait_queue_me(hb, &q, to); 2636 futex_wait_queue_me(hb, &q, to);
2530 2637