diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2014-06-11 16:45:41 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2014-06-21 16:26:24 -0400 |
commit | af54d6a1c3ad474bbc9893c9905022646be6092c (patch) | |
tree | 462e3a75dcc8858a272099bcf449c35f5ecea141 /kernel/futex.c | |
parent | 04e1b2e52b17195c9a1daa5935c55a4c8716095c (diff) |
futex: Simplify futex_lock_pi_atomic() and make it more robust
futex_lock_pi_atomic() is a maze of retry hoops and loops.
Reduce it to simple and understandable states:
First step is to lookup existing waiters (state) in the kernel.
If there is an existing waiter, validate it and attach to it.
If there is no existing waiter, check the user space value
If the TID encoded in the user space value is 0, take over the futex
preserving the owner died bit.
If the TID encoded in the user space value is != 0, lookup the owner
task, validate it and attach to it.
Reduces text size by 128 bytes on x8664.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Kees Cook <kees@outflux.net>
Cc: wad@chromium.org
Cc: Darren Hart <darren@dvhart.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1406131137020.5170@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 148 |
1 files changed, 61 insertions, 87 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index e65b68677d0b..d3a9d946d0b7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -956,6 +956,17 @@ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
956 | return attach_to_pi_owner(uval, key, ps); | 956 | return attach_to_pi_owner(uval, key, ps); |
957 | } | 957 | } |
958 | 958 | ||
959 | static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) | ||
960 | { | ||
961 | u32 uninitialized_var(curval); | ||
962 | |||
963 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) | ||
964 | return -EFAULT; | ||
965 | |||
966 | /*If user space value changed, let the caller retry */ | ||
967 | return curval != uval ? -EAGAIN : 0; | ||
968 | } | ||
969 | |||
959 | /** | 970 | /** |
960 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex | 971 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex |
961 | * @uaddr: the pi futex user address | 972 | * @uaddr: the pi futex user address |
@@ -979,113 +990,69 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, | |||
979 | struct futex_pi_state **ps, | 990 | struct futex_pi_state **ps, |
980 | struct task_struct *task, int set_waiters) | 991 | struct task_struct *task, int set_waiters) |
981 | { | 992 | { |
982 | int lock_taken, ret, force_take = 0; | 993 | u32 uval, newval, vpid = task_pid_vnr(task); |
983 | u32 uval, newval, curval, vpid = task_pid_vnr(task); | 994 | struct futex_q *match; |
984 | 995 | int ret; | |
985 | retry: | ||
986 | ret = lock_taken = 0; | ||
987 | 996 | ||
988 | /* | 997 | /* |
989 | * To avoid races, we attempt to take the lock here again | 998 | * Read the user space value first so we can validate a few |
990 | * (by doing a 0 -> TID atomic cmpxchg), while holding all | 999 | * things before proceeding further. |
991 | * the locks. It will most likely not succeed. | ||
992 | */ | 1000 | */ |
993 | newval = vpid; | 1001 | if (get_futex_value_locked(&uval, uaddr)) |
994 | if (set_waiters) | ||
995 | newval |= FUTEX_WAITERS; | ||
996 | |||
997 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval))) | ||
998 | return -EFAULT; | 1002 | return -EFAULT; |
999 | 1003 | ||
1000 | /* | 1004 | /* |
1001 | * Detect deadlocks. | 1005 | * Detect deadlocks. |
1002 | */ | 1006 | */ |
1003 | if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) | 1007 | if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) |
1004 | return -EDEADLK; | 1008 | return -EDEADLK; |
1005 | 1009 | ||
1006 | /* | 1010 | /* |
1007 | * Surprise - we got the lock, but we do not trust user space at all. | 1011 | * Lookup existing state first. If it exists, try to attach to |
1012 | * its pi_state. | ||
1008 | */ | 1013 | */ |
1009 | if (unlikely(!curval)) { | 1014 | match = futex_top_waiter(hb, key); |
1010 | /* | 1015 | if (match) |
1011 | * We verify whether there is kernel state for this | 1016 | return attach_to_pi_state(uval, match->pi_state, ps); |
1012 | * futex. If not, we can safely assume, that the 0 -> | ||
1013 | * TID transition is correct. If state exists, we do | ||
1014 | * not bother to fixup the user space state as it was | ||
1015 | * corrupted already. | ||
1016 | */ | ||
1017 | return futex_top_waiter(hb, key) ? -EINVAL : 1; | ||
1018 | } | ||
1019 | |||
1020 | uval = curval; | ||
1021 | |||
1022 | /* | ||
1023 | * Set the FUTEX_WAITERS flag, so the owner will know it has someone | ||
1024 | * to wake at the next unlock. | ||
1025 | */ | ||
1026 | newval = curval | FUTEX_WAITERS; | ||
1027 | 1017 | ||
1028 | /* | 1018 | /* |
1029 | * Should we force take the futex? See below. | 1019 | * No waiter and user TID is 0. We are here because the |
1020 | * waiters or the owner died bit is set or called from | ||
1021 | * requeue_cmp_pi or for whatever reason something took the | ||
1022 | * syscall. | ||
1030 | */ | 1023 | */ |
1031 | if (unlikely(force_take)) { | 1024 | if (!(uval & FUTEX_TID_MASK)) { |
1032 | /* | 1025 | /* |
1033 | * Keep the OWNER_DIED and the WAITERS bit and set the | 1026 | * We take over the futex. No other waiters and the user space |
1034 | * new TID value. | 1027 | * TID is 0. We preserve the owner died bit. |
1035 | */ | 1028 | */ |
1036 | newval = (curval & ~FUTEX_TID_MASK) | vpid; | 1029 | newval = uval & FUTEX_OWNER_DIED; |
1037 | force_take = 0; | 1030 | newval |= vpid; |
1038 | lock_taken = 1; | ||
1039 | } | ||
1040 | 1031 | ||
1041 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) | 1032 | /* The futex requeue_pi code can enforce the waiters bit */ |
1042 | return -EFAULT; | 1033 | if (set_waiters) |
1043 | if (unlikely(curval != uval)) | 1034 | newval |= FUTEX_WAITERS; |
1044 | goto retry; | 1035 | |
1036 | ret = lock_pi_update_atomic(uaddr, uval, newval); | ||
1037 | /* If the take over worked, return 1 */ | ||
1038 | return ret < 0 ? ret : 1; | ||
1039 | } | ||
1045 | 1040 | ||
1046 | /* | 1041 | /* |
1047 | * We took the lock due to forced take over. | 1042 | * First waiter. Set the waiters bit before attaching ourself to |
1043 | * the owner. If owner tries to unlock, it will be forced into | ||
1044 | * the kernel and blocked on hb->lock. | ||
1048 | */ | 1045 | */ |
1049 | if (unlikely(lock_taken)) | 1046 | newval = uval | FUTEX_WAITERS; |
1050 | return 1; | 1047 | ret = lock_pi_update_atomic(uaddr, uval, newval); |
1051 | 1048 | if (ret) | |
1049 | return ret; | ||
1052 | /* | 1050 | /* |
1053 | * We dont have the lock. Look up the PI state (or create it if | 1051 | * If the update of the user space value succeeded, we try to |
1054 | * we are the first waiter): | 1052 | * attach to the owner. If that fails, no harm done, we only |
1053 | * set the FUTEX_WAITERS bit in the user space variable. | ||
1055 | */ | 1054 | */ |
1056 | ret = lookup_pi_state(uval, hb, key, ps); | 1055 | return attach_to_pi_owner(uval, key, ps); |
1057 | |||
1058 | if (unlikely(ret)) { | ||
1059 | switch (ret) { | ||
1060 | case -ESRCH: | ||
1061 | /* | ||
1062 | * We failed to find an owner for this | ||
1063 | * futex. So we have no pi_state to block | ||
1064 | * on. This can happen in two cases: | ||
1065 | * | ||
1066 | * 1) The owner died | ||
1067 | * 2) A stale FUTEX_WAITERS bit | ||
1068 | * | ||
1069 | * Re-read the futex value. | ||
1070 | */ | ||
1071 | if (get_futex_value_locked(&curval, uaddr)) | ||
1072 | return -EFAULT; | ||
1073 | |||
1074 | /* | ||
1075 | * If the owner died or we have a stale | ||
1076 | * WAITERS bit the owner TID in the user space | ||
1077 | * futex is 0. | ||
1078 | */ | ||
1079 | if (!(curval & FUTEX_TID_MASK)) { | ||
1080 | force_take = 1; | ||
1081 | goto retry; | ||
1082 | } | ||
1083 | default: | ||
1084 | break; | ||
1085 | } | ||
1086 | } | ||
1087 | |||
1088 | return ret; | ||
1089 | } | 1056 | } |
1090 | 1057 | ||
1091 | /** | 1058 | /** |
@@ -1659,7 +1626,12 @@ retry_private: | |||
1659 | goto retry; | 1626 | goto retry; |
1660 | goto out; | 1627 | goto out; |
1661 | case -EAGAIN: | 1628 | case -EAGAIN: |
1662 | /* The owner was exiting, try again. */ | 1629 | /* |
1630 | * Two reasons for this: | ||
1631 | * - Owner is exiting and we just wait for the | ||
1632 | * exit to complete. | ||
1633 | * - The user space value changed. | ||
1634 | */ | ||
1663 | double_unlock_hb(hb1, hb2); | 1635 | double_unlock_hb(hb1, hb2); |
1664 | hb_waiters_dec(hb2); | 1636 | hb_waiters_dec(hb2); |
1665 | put_futex_key(&key2); | 1637 | put_futex_key(&key2); |
@@ -2316,8 +2288,10 @@ retry_private: | |||
2316 | goto uaddr_faulted; | 2288 | goto uaddr_faulted; |
2317 | case -EAGAIN: | 2289 | case -EAGAIN: |
2318 | /* | 2290 | /* |
2319 | * Task is exiting and we just wait for the | 2291 | * Two reasons for this: |
2320 | * exit to complete. | 2292 | * - Task is exiting and we just wait for the |
2293 | * exit to complete. | ||
2294 | * - The user space value changed. | ||
2321 | */ | 2295 | */ |
2322 | queue_unlock(hb); | 2296 | queue_unlock(hb); |
2323 | put_futex_key(&q.key); | 2297 | put_futex_key(&q.key); |