aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c402
1 files changed, 183 insertions, 219 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index b632b5f3f094..d3a9d946d0b7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -792,94 +792,91 @@ void exit_pi_state_list(struct task_struct *curr)
792 * [10] There is no transient state which leaves owner and user space 792 * [10] There is no transient state which leaves owner and user space
793 * TID out of sync. 793 * TID out of sync.
794 */ 794 */
795static int 795
796lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, 796/*
797 union futex_key *key, struct futex_pi_state **ps) 797 * Validate that the existing waiter has a pi_state and sanity check
798 * the pi_state against the user space value. If correct, attach to
799 * it.
800 */
801static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
802 struct futex_pi_state **ps)
798{ 803{
799 struct futex_pi_state *pi_state = NULL;
800 struct futex_q *this, *next;
801 struct task_struct *p;
802 pid_t pid = uval & FUTEX_TID_MASK; 804 pid_t pid = uval & FUTEX_TID_MASK;
803 805
804 plist_for_each_entry_safe(this, next, &hb->chain, list) { 806 /*
805 if (match_futex(&this->key, key)) { 807 * Userspace might have messed up non-PI and PI futexes [3]
806 /* 808 */
807 * Sanity check the waiter before increasing 809 if (unlikely(!pi_state))
808 * the refcount and attaching to it. 810 return -EINVAL;
809 */
810 pi_state = this->pi_state;
811 /*
812 * Userspace might have messed up non-PI and
813 * PI futexes [3]
814 */
815 if (unlikely(!pi_state))
816 return -EINVAL;
817 811
818 WARN_ON(!atomic_read(&pi_state->refcount)); 812 WARN_ON(!atomic_read(&pi_state->refcount));
819 813
814 /*
815 * Handle the owner died case:
816 */
817 if (uval & FUTEX_OWNER_DIED) {
818 /*
819 * exit_pi_state_list sets owner to NULL and wakes the
820 * topmost waiter. The task which acquires the
821 * pi_state->rt_mutex will fixup owner.
822 */
823 if (!pi_state->owner) {
820 /* 824 /*
821 * Handle the owner died case: 825 * No pi state owner, but the user space TID
826 * is not 0. Inconsistent state. [5]
822 */ 827 */
823 if (uval & FUTEX_OWNER_DIED) { 828 if (pid)
824 /* 829 return -EINVAL;
825 * exit_pi_state_list sets owner to NULL and
826 * wakes the topmost waiter. The task which
827 * acquires the pi_state->rt_mutex will fixup
828 * owner.
829 */
830 if (!pi_state->owner) {
831 /*
832 * No pi state owner, but the user
833 * space TID is not 0. Inconsistent
834 * state. [5]
835 */
836 if (pid)
837 return -EINVAL;
838 /*
839 * Take a ref on the state and
840 * return. [4]
841 */
842 goto out_state;
843 }
844
845 /*
846 * If TID is 0, then either the dying owner
847 * has not yet executed exit_pi_state_list()
848 * or some waiter acquired the rtmutex in the
849 * pi state, but did not yet fixup the TID in
850 * user space.
851 *
852 * Take a ref on the state and return. [6]
853 */
854 if (!pid)
855 goto out_state;
856 } else {
857 /*
858 * If the owner died bit is not set,
859 * then the pi_state must have an
860 * owner. [7]
861 */
862 if (!pi_state->owner)
863 return -EINVAL;
864 }
865
866 /* 830 /*
867 * Bail out if user space manipulated the 831 * Take a ref on the state and return success. [4]
868 * futex value. If pi state exists then the
869 * owner TID must be the same as the user
870 * space TID. [9/10]
871 */ 832 */
872 if (pid != task_pid_vnr(pi_state->owner)) 833 goto out_state;
873 return -EINVAL;
874
875 out_state:
876 atomic_inc(&pi_state->refcount);
877 *ps = pi_state;
878 return 0;
879 } 834 }
835
836 /*
837 * If TID is 0, then either the dying owner has not
838 * yet executed exit_pi_state_list() or some waiter
839 * acquired the rtmutex in the pi state, but did not
840 * yet fixup the TID in user space.
841 *
842 * Take a ref on the state and return success. [6]
843 */
844 if (!pid)
845 goto out_state;
846 } else {
847 /*
848 * If the owner died bit is not set, then the pi_state
849 * must have an owner. [7]
850 */
851 if (!pi_state->owner)
852 return -EINVAL;
880 } 853 }
881 854
882 /* 855 /*
856 * Bail out if user space manipulated the futex value. If pi
857 * state exists then the owner TID must be the same as the
858 * user space TID. [9/10]
859 */
860 if (pid != task_pid_vnr(pi_state->owner))
861 return -EINVAL;
862out_state:
863 atomic_inc(&pi_state->refcount);
864 *ps = pi_state;
865 return 0;
866}
867
868/*
869 * Lookup the task for the TID provided from user space and attach to
870 * it after doing proper sanity checks.
871 */
872static int attach_to_pi_owner(u32 uval, union futex_key *key,
873 struct futex_pi_state **ps)
874{
875 pid_t pid = uval & FUTEX_TID_MASK;
876 struct futex_pi_state *pi_state;
877 struct task_struct *p;
878
879 /*
883 * We are the first waiter - try to look up the real owner and attach 880 * We are the first waiter - try to look up the real owner and attach
884 * the new pi_state to it, but bail out when TID = 0 [1] 881 * the new pi_state to it, but bail out when TID = 0 [1]
885 */ 882 */
@@ -920,7 +917,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
920 pi_state = alloc_pi_state(); 917 pi_state = alloc_pi_state();
921 918
922 /* 919 /*
923 * Initialize the pi_mutex in locked state and make 'p' 920 * Initialize the pi_mutex in locked state and make @p
924 * the owner of it: 921 * the owner of it:
925 */ 922 */
926 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); 923 rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
@@ -940,6 +937,36 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
940 return 0; 937 return 0;
941} 938}
942 939
940static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
941 union futex_key *key, struct futex_pi_state **ps)
942{
943 struct futex_q *match = futex_top_waiter(hb, key);
944
945 /*
946 * If there is a waiter on that futex, validate it and
947 * attach to the pi_state when the validation succeeds.
948 */
949 if (match)
950 return attach_to_pi_state(uval, match->pi_state, ps);
951
952 /*
953 * We are the first waiter - try to look up the owner based on
954 * @uval and attach to it.
955 */
956 return attach_to_pi_owner(uval, key, ps);
957}
958
959static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
960{
961 u32 uninitialized_var(curval);
962
963 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
964 return -EFAULT;
965
966 /*If user space value changed, let the caller retry */
967 return curval != uval ? -EAGAIN : 0;
968}
969
943/** 970/**
944 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex 971 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
945 * @uaddr: the pi futex user address 972 * @uaddr: the pi futex user address
@@ -963,113 +990,69 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
963 struct futex_pi_state **ps, 990 struct futex_pi_state **ps,
964 struct task_struct *task, int set_waiters) 991 struct task_struct *task, int set_waiters)
965{ 992{
966 int lock_taken, ret, force_take = 0; 993 u32 uval, newval, vpid = task_pid_vnr(task);
967 u32 uval, newval, curval, vpid = task_pid_vnr(task); 994 struct futex_q *match;
968 995 int ret;
969retry:
970 ret = lock_taken = 0;
971 996
972 /* 997 /*
973 * To avoid races, we attempt to take the lock here again 998 * Read the user space value first so we can validate a few
974 * (by doing a 0 -> TID atomic cmpxchg), while holding all 999 * things before proceeding further.
975 * the locks. It will most likely not succeed.
976 */ 1000 */
977 newval = vpid; 1001 if (get_futex_value_locked(&uval, uaddr))
978 if (set_waiters)
979 newval |= FUTEX_WAITERS;
980
981 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
982 return -EFAULT; 1002 return -EFAULT;
983 1003
984 /* 1004 /*
985 * Detect deadlocks. 1005 * Detect deadlocks.
986 */ 1006 */
987 if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) 1007 if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
988 return -EDEADLK; 1008 return -EDEADLK;
989 1009
990 /* 1010 /*
991 * Surprise - we got the lock, but we do not trust user space at all. 1011 * Lookup existing state first. If it exists, try to attach to
992 */ 1012 * its pi_state.
993 if (unlikely(!curval)) {
994 /*
995 * We verify whether there is kernel state for this
996 * futex. If not, we can safely assume, that the 0 ->
997 * TID transition is correct. If state exists, we do
998 * not bother to fixup the user space state as it was
999 * corrupted already.
1000 */
1001 return futex_top_waiter(hb, key) ? -EINVAL : 1;
1002 }
1003
1004 uval = curval;
1005
1006 /*
1007 * Set the FUTEX_WAITERS flag, so the owner will know it has someone
1008 * to wake at the next unlock.
1009 */ 1013 */
1010 newval = curval | FUTEX_WAITERS; 1014 match = futex_top_waiter(hb, key);
1015 if (match)
1016 return attach_to_pi_state(uval, match->pi_state, ps);
1011 1017
1012 /* 1018 /*
1013 * Should we force take the futex? See below. 1019 * No waiter and user TID is 0. We are here because the
1020 * waiters or the owner died bit is set or called from
1021 * requeue_cmp_pi or for whatever reason something took the
1022 * syscall.
1014 */ 1023 */
1015 if (unlikely(force_take)) { 1024 if (!(uval & FUTEX_TID_MASK)) {
1016 /* 1025 /*
1017 * Keep the OWNER_DIED and the WAITERS bit and set the 1026 * We take over the futex. No other waiters and the user space
1018 * new TID value. 1027 * TID is 0. We preserve the owner died bit.
1019 */ 1028 */
1020 newval = (curval & ~FUTEX_TID_MASK) | vpid; 1029 newval = uval & FUTEX_OWNER_DIED;
1021 force_take = 0; 1030 newval |= vpid;
1022 lock_taken = 1;
1023 }
1024 1031
1025 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) 1032 /* The futex requeue_pi code can enforce the waiters bit */
1026 return -EFAULT; 1033 if (set_waiters)
1027 if (unlikely(curval != uval)) 1034 newval |= FUTEX_WAITERS;
1028 goto retry; 1035
1036 ret = lock_pi_update_atomic(uaddr, uval, newval);
1037 /* If the take over worked, return 1 */
1038 return ret < 0 ? ret : 1;
1039 }
1029 1040
1030 /* 1041 /*
1031 * We took the lock due to forced take over. 1042 * First waiter. Set the waiters bit before attaching ourself to
1043 * the owner. If owner tries to unlock, it will be forced into
1044 * the kernel and blocked on hb->lock.
1032 */ 1045 */
1033 if (unlikely(lock_taken)) 1046 newval = uval | FUTEX_WAITERS;
1034 return 1; 1047 ret = lock_pi_update_atomic(uaddr, uval, newval);
1035 1048 if (ret)
1049 return ret;
1036 /* 1050 /*
1037 * We dont have the lock. Look up the PI state (or create it if 1051 * If the update of the user space value succeeded, we try to
1038 * we are the first waiter): 1052 * attach to the owner. If that fails, no harm done, we only
1053 * set the FUTEX_WAITERS bit in the user space variable.
1039 */ 1054 */
1040 ret = lookup_pi_state(uval, hb, key, ps); 1055 return attach_to_pi_owner(uval, key, ps);
1041
1042 if (unlikely(ret)) {
1043 switch (ret) {
1044 case -ESRCH:
1045 /*
1046 * We failed to find an owner for this
1047 * futex. So we have no pi_state to block
1048 * on. This can happen in two cases:
1049 *
1050 * 1) The owner died
1051 * 2) A stale FUTEX_WAITERS bit
1052 *
1053 * Re-read the futex value.
1054 */
1055 if (get_futex_value_locked(&curval, uaddr))
1056 return -EFAULT;
1057
1058 /*
1059 * If the owner died or we have a stale
1060 * WAITERS bit the owner TID in the user space
1061 * futex is 0.
1062 */
1063 if (!(curval & FUTEX_TID_MASK)) {
1064 force_take = 1;
1065 goto retry;
1066 }
1067 default:
1068 break;
1069 }
1070 }
1071
1072 return ret;
1073} 1056}
1074 1057
1075/** 1058/**
@@ -1186,22 +1169,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1186 return 0; 1169 return 0;
1187} 1170}
1188 1171
1189static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
1190{
1191 u32 uninitialized_var(oldval);
1192
1193 /*
1194 * There is no waiter, so we unlock the futex. The owner died
1195 * bit has not to be preserved here. We are the owner:
1196 */
1197 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
1198 return -EFAULT;
1199 if (oldval != uval)
1200 return -EAGAIN;
1201
1202 return 0;
1203}
1204
1205/* 1172/*
1206 * Express the locking dependencies for lockdep: 1173 * Express the locking dependencies for lockdep:
1207 */ 1174 */
@@ -1659,7 +1626,12 @@ retry_private:
1659 goto retry; 1626 goto retry;
1660 goto out; 1627 goto out;
1661 case -EAGAIN: 1628 case -EAGAIN:
1662 /* The owner was exiting, try again. */ 1629 /*
1630 * Two reasons for this:
1631 * - Owner is exiting and we just wait for the
1632 * exit to complete.
1633 * - The user space value changed.
1634 */
1663 double_unlock_hb(hb1, hb2); 1635 double_unlock_hb(hb1, hb2);
1664 hb_waiters_dec(hb2); 1636 hb_waiters_dec(hb2);
1665 put_futex_key(&key2); 1637 put_futex_key(&key2);
@@ -1718,7 +1690,7 @@ retry_private:
1718 this->pi_state = pi_state; 1690 this->pi_state = pi_state;
1719 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, 1691 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1720 this->rt_waiter, 1692 this->rt_waiter,
1721 this->task, 1); 1693 this->task);
1722 if (ret == 1) { 1694 if (ret == 1) {
1723 /* We got the lock. */ 1695 /* We got the lock. */
1724 requeue_pi_wake_futex(this, &key2, hb2); 1696 requeue_pi_wake_futex(this, &key2, hb2);
@@ -2316,8 +2288,10 @@ retry_private:
2316 goto uaddr_faulted; 2288 goto uaddr_faulted;
2317 case -EAGAIN: 2289 case -EAGAIN:
2318 /* 2290 /*
2319 * Task is exiting and we just wait for the 2291 * Two reasons for this:
2320 * exit to complete. 2292 * - Task is exiting and we just wait for the
2293 * exit to complete.
2294 * - The user space value changed.
2321 */ 2295 */
2322 queue_unlock(hb); 2296 queue_unlock(hb);
2323 put_futex_key(&q.key); 2297 put_futex_key(&q.key);
@@ -2337,9 +2311,9 @@ retry_private:
2337 /* 2311 /*
2338 * Block on the PI mutex: 2312 * Block on the PI mutex:
2339 */ 2313 */
2340 if (!trylock) 2314 if (!trylock) {
2341 ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1); 2315 ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
2342 else { 2316 } else {
2343 ret = rt_mutex_trylock(&q.pi_state->pi_mutex); 2317 ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2344 /* Fixup the trylock return value: */ 2318 /* Fixup the trylock return value: */
2345 ret = ret ? 0 : -EWOULDBLOCK; 2319 ret = ret ? 0 : -EWOULDBLOCK;
@@ -2401,10 +2375,10 @@ uaddr_faulted:
2401 */ 2375 */
2402static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) 2376static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2403{ 2377{
2404 struct futex_hash_bucket *hb; 2378 u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current);
2405 struct futex_q *this, *next;
2406 union futex_key key = FUTEX_KEY_INIT; 2379 union futex_key key = FUTEX_KEY_INIT;
2407 u32 uval, vpid = task_pid_vnr(current); 2380 struct futex_hash_bucket *hb;
2381 struct futex_q *match;
2408 int ret; 2382 int ret;
2409 2383
2410retry: 2384retry:
@@ -2417,57 +2391,47 @@ retry:
2417 return -EPERM; 2391 return -EPERM;
2418 2392
2419 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); 2393 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2420 if (unlikely(ret != 0)) 2394 if (ret)
2421 goto out; 2395 return ret;
2422 2396
2423 hb = hash_futex(&key); 2397 hb = hash_futex(&key);
2424 spin_lock(&hb->lock); 2398 spin_lock(&hb->lock);
2425 2399
2426 /* 2400 /*
2427 * To avoid races, try to do the TID -> 0 atomic transition 2401 * Check waiters first. We do not trust user space values at
2428 * again. If it succeeds then we can return without waking 2402 * all and we at least want to know if user space fiddled
2429 * anyone else up. We only try this if neither the waiters nor 2403 * with the futex value instead of blindly unlocking.
2430 * the owner died bit are set.
2431 */
2432 if (!(uval & ~FUTEX_TID_MASK) &&
2433 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2434 goto pi_faulted;
2435 /*
2436 * Rare case: we managed to release the lock atomically,
2437 * no need to wake anyone else up:
2438 */
2439 if (unlikely(uval == vpid))
2440 goto out_unlock;
2441
2442 /*
2443 * Ok, other tasks may need to be woken up - check waiters
2444 * and do the wakeup if necessary:
2445 */ 2404 */
2446 plist_for_each_entry_safe(this, next, &hb->chain, list) { 2405 match = futex_top_waiter(hb, &key);
2447 if (!match_futex (&this->key, &key)) 2406 if (match) {
2448 continue; 2407 ret = wake_futex_pi(uaddr, uval, match);
2449 ret = wake_futex_pi(uaddr, uval, this);
2450 /* 2408 /*
2451 * The atomic access to the futex value 2409 * The atomic access to the futex value generated a
2452 * generated a pagefault, so retry the 2410 * pagefault, so retry the user-access and the wakeup:
2453 * user-access and the wakeup:
2454 */ 2411 */
2455 if (ret == -EFAULT) 2412 if (ret == -EFAULT)
2456 goto pi_faulted; 2413 goto pi_faulted;
2457 goto out_unlock; 2414 goto out_unlock;
2458 } 2415 }
2416
2459 /* 2417 /*
2460 * No waiters - kernel unlocks the futex: 2418 * We have no kernel internal state, i.e. no waiters in the
2419 * kernel. Waiters which are about to queue themselves are stuck
2420 * on hb->lock. So we can safely ignore them. We do neither
2421 * preserve the WAITERS bit not the OWNER_DIED one. We are the
2422 * owner.
2461 */ 2423 */
2462 ret = unlock_futex_pi(uaddr, uval); 2424 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
2463 if (ret == -EFAULT)
2464 goto pi_faulted; 2425 goto pi_faulted;
2465 2426
2427 /*
2428 * If uval has changed, let user space handle it.
2429 */
2430 ret = (curval == uval) ? 0 : -EAGAIN;
2431
2466out_unlock: 2432out_unlock:
2467 spin_unlock(&hb->lock); 2433 spin_unlock(&hb->lock);
2468 put_futex_key(&key); 2434 put_futex_key(&key);
2469
2470out:
2471 return ret; 2435 return ret;
2472 2436
2473pi_faulted: 2437pi_faulted:
@@ -2669,7 +2633,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2669 */ 2633 */
2670 WARN_ON(!q.pi_state); 2634 WARN_ON(!q.pi_state);
2671 pi_mutex = &q.pi_state->pi_mutex; 2635 pi_mutex = &q.pi_state->pi_mutex;
2672 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); 2636 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
2673 debug_rt_mutex_free_waiter(&rt_waiter); 2637 debug_rt_mutex_free_waiter(&rt_waiter);
2674 2638
2675 spin_lock(q.lock_ptr); 2639 spin_lock(q.lock_ptr);