diff options
-rw-r--r-- | include/linux/futex.h | 8 | ||||
-rw-r--r-- | include/linux/thread_info.h | 3 | ||||
-rw-r--r-- | kernel/futex.c | 519 |
3 files changed, 510 insertions, 20 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5a34f9..b05519ca9e57 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -23,6 +23,9 @@ union ktime; | |||
23 | #define FUTEX_TRYLOCK_PI 8 | 23 | #define FUTEX_TRYLOCK_PI 8 |
24 | #define FUTEX_WAIT_BITSET 9 | 24 | #define FUTEX_WAIT_BITSET 9 |
25 | #define FUTEX_WAKE_BITSET 10 | 25 | #define FUTEX_WAKE_BITSET 10 |
26 | #define FUTEX_WAIT_REQUEUE_PI 11 | ||
27 | #define FUTEX_REQUEUE_PI 12 | ||
28 | #define FUTEX_CMP_REQUEUE_PI 13 | ||
26 | 29 | ||
27 | #define FUTEX_PRIVATE_FLAG 128 | 30 | #define FUTEX_PRIVATE_FLAG 128 |
28 | #define FUTEX_CLOCK_REALTIME 256 | 31 | #define FUTEX_CLOCK_REALTIME 256 |
@@ -38,6 +41,11 @@ union ktime; | |||
38 | #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) | 41 | #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) |
39 | #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) | 42 | #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) |
40 | #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) | 43 | #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) |
44 | #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ | ||
45 | FUTEX_PRIVATE_FLAG) | ||
46 | #define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) | ||
47 | #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ | ||
48 | FUTEX_PRIVATE_FLAG) | ||
41 | 49 | ||
42 | /* | 50 | /* |
43 | * Support for robust futexes: the kernel cleans up held futexes at | 51 | * Support for robust futexes: the kernel cleans up held futexes at |
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f8b56b..a8cc4e13434c 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
@@ -21,13 +21,14 @@ struct restart_block { | |||
21 | struct { | 21 | struct { |
22 | unsigned long arg0, arg1, arg2, arg3; | 22 | unsigned long arg0, arg1, arg2, arg3; |
23 | }; | 23 | }; |
24 | /* For futex_wait */ | 24 | /* For futex_wait and futex_wait_requeue_pi */ |
25 | struct { | 25 | struct { |
26 | u32 *uaddr; | 26 | u32 *uaddr; |
27 | u32 val; | 27 | u32 val; |
28 | u32 flags; | 28 | u32 flags; |
29 | u32 bitset; | 29 | u32 bitset; |
30 | u64 time; | 30 | u64 time; |
31 | u32 *uaddr2; | ||
31 | } futex; | 32 | } futex; |
32 | /* For nanosleep */ | 33 | /* For nanosleep */ |
33 | struct { | 34 | struct { |
diff --git a/kernel/futex.c b/kernel/futex.c index dbe857aa4381..185c981d89e3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -19,6 +19,10 @@ | |||
19 | * PRIVATE futexes by Eric Dumazet | 19 | * PRIVATE futexes by Eric Dumazet |
20 | * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> | 20 | * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> |
21 | * | 21 | * |
22 | * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com> | ||
23 | * Copyright (C) IBM Corporation, 2009 | ||
24 | * Thanks to Thomas Gleixner for conceptual design and careful reviews. | ||
25 | * | ||
22 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly | 26 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly |
23 | * enough at me, Linus for the original (flawed) idea, Matthew | 27 | * enough at me, Linus for the original (flawed) idea, Matthew |
24 | * Kirkwood for proof-of-concept implementation. | 28 | * Kirkwood for proof-of-concept implementation. |
@@ -109,6 +113,9 @@ struct futex_q { | |||
109 | struct futex_pi_state *pi_state; | 113 | struct futex_pi_state *pi_state; |
110 | struct task_struct *task; | 114 | struct task_struct *task; |
111 | 115 | ||
116 | /* rt_waiter storage for requeue_pi: */ | ||
117 | struct rt_mutex_waiter *rt_waiter; | ||
118 | |||
112 | /* Bitset for the optional bitmasked wakeup */ | 119 | /* Bitset for the optional bitmasked wakeup */ |
113 | u32 bitset; | 120 | u32 bitset; |
114 | }; | 121 | }; |
@@ -827,7 +834,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
827 | 834 | ||
828 | plist_for_each_entry_safe(this, next, head, list) { | 835 | plist_for_each_entry_safe(this, next, head, list) { |
829 | if (match_futex (&this->key, &key)) { | 836 | if (match_futex (&this->key, &key)) { |
830 | if (this->pi_state) { | 837 | if (this->pi_state || this->rt_waiter) { |
831 | ret = -EINVAL; | 838 | ret = -EINVAL; |
832 | break; | 839 | break; |
833 | } | 840 | } |
@@ -968,20 +975,138 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
968 | q->key = *key2; | 975 | q->key = *key2; |
969 | } | 976 | } |
970 | 977 | ||
971 | /* | 978 | /** |
972 | * Requeue all waiters hashed on one physical page to another | 979 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
973 | * physical page. | 980 | * q: the futex_q |
981 | * key: the key of the requeue target futex | ||
982 | * | ||
983 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | ||
984 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | ||
985 | * to the requeue target futex so the waiter can detect the wakeup on the right | ||
986 | * futex, but remove it from the hb and NULL the rt_waiter so it can detect | ||
987 | * atomic lock acquisition. Must be called with the q->lock_ptr held. | ||
988 | */ | ||
989 | static inline | ||
990 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) | ||
991 | { | ||
992 | drop_futex_key_refs(&q->key); | ||
993 | get_futex_key_refs(key); | ||
994 | q->key = *key; | ||
995 | |||
996 | WARN_ON(plist_node_empty(&q->list)); | ||
997 | plist_del(&q->list, &q->list.plist); | ||
998 | |||
999 | WARN_ON(!q->rt_waiter); | ||
1000 | q->rt_waiter = NULL; | ||
1001 | |||
1002 | wake_up(&q->waiter); | ||
1003 | } | ||
1004 | |||
1005 | /** | ||
1006 | * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter | ||
1007 | * @pifutex: the user address of the to futex | ||
1008 | * @hb1: the from futex hash bucket, must be locked by the caller | ||
1009 | * @hb2: the to futex hash bucket, must be locked by the caller | ||
1010 | * @key1: the from futex key | ||
1011 | * @key2: the to futex key | ||
1012 | * | ||
1013 | * Try and get the lock on behalf of the top waiter if we can do it atomically. | ||
1014 | * Wake the top waiter if we succeed. hb1 and hb2 must be held by the caller. | ||
1015 | * | ||
1016 | * Returns: | ||
1017 | * 0 - failed to acquire the lock atomicly | ||
1018 | * 1 - acquired the lock | ||
1019 | * <0 - error | ||
1020 | */ | ||
1021 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, | ||
1022 | struct futex_hash_bucket *hb1, | ||
1023 | struct futex_hash_bucket *hb2, | ||
1024 | union futex_key *key1, union futex_key *key2, | ||
1025 | struct futex_pi_state **ps) | ||
1026 | { | ||
1027 | struct futex_q *top_waiter; | ||
1028 | u32 curval; | ||
1029 | int ret; | ||
1030 | |||
1031 | if (get_futex_value_locked(&curval, pifutex)) | ||
1032 | return -EFAULT; | ||
1033 | |||
1034 | top_waiter = futex_top_waiter(hb1, key1); | ||
1035 | |||
1036 | /* There are no waiters, nothing for us to do. */ | ||
1037 | if (!top_waiter) | ||
1038 | return 0; | ||
1039 | |||
1040 | /* | ||
1041 | * Either take the lock for top_waiter or set the FUTEX_WAITERS bit. | ||
1042 | * The pi_state is returned in ps in contended cases. | ||
1043 | */ | ||
1044 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task); | ||
1045 | if (ret == 1) | ||
1046 | requeue_pi_wake_futex(top_waiter, key2); | ||
1047 | |||
1048 | return ret; | ||
1049 | } | ||
1050 | |||
1051 | /** | ||
1052 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | ||
1053 | * uaddr1: source futex user address | ||
1054 | * uaddr2: target futex user address | ||
1055 | * nr_wake: number of waiters to wake (must be 1 for requeue_pi) | ||
1056 | * nr_requeue: number of waiters to requeue (0-INT_MAX) | ||
1057 | * requeue_pi: if we are attempting to requeue from a non-pi futex to a | ||
1058 | * pi futex (pi to pi requeue is not supported) | ||
1059 | * | ||
1060 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | ||
1061 | * uaddr2 atomically on behalf of the top waiter. | ||
1062 | * | ||
1063 | * Returns: | ||
1064 | * >=0 - on success, the number of tasks requeued or woken | ||
1065 | * <0 - on error | ||
974 | */ | 1066 | */ |
975 | static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | 1067 | static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, |
976 | int nr_wake, int nr_requeue, u32 *cmpval) | 1068 | int nr_wake, int nr_requeue, u32 *cmpval, |
1069 | int requeue_pi) | ||
977 | { | 1070 | { |
978 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; | 1071 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
1072 | int drop_count = 0, task_count = 0, ret; | ||
1073 | struct futex_pi_state *pi_state = NULL; | ||
979 | struct futex_hash_bucket *hb1, *hb2; | 1074 | struct futex_hash_bucket *hb1, *hb2; |
980 | struct plist_head *head1; | 1075 | struct plist_head *head1; |
981 | struct futex_q *this, *next; | 1076 | struct futex_q *this, *next; |
982 | int ret, drop_count = 0; | 1077 | u32 curval2; |
1078 | |||
1079 | if (requeue_pi) { | ||
1080 | /* | ||
1081 | * requeue_pi requires a pi_state, try to allocate it now | ||
1082 | * without any locks in case it fails. | ||
1083 | */ | ||
1084 | if (refill_pi_state_cache()) | ||
1085 | return -ENOMEM; | ||
1086 | /* | ||
1087 | * requeue_pi must wake as many tasks as it can, up to nr_wake | ||
1088 | * + nr_requeue, since it acquires the rt_mutex prior to | ||
1089 | * returning to userspace, so as to not leave the rt_mutex with | ||
1090 | * waiters and no owner. However, second and third wake-ups | ||
1091 | * cannot be predicted as they involve race conditions with the | ||
1092 | * first wake and a fault while looking up the pi_state. Both | ||
1093 | * pthread_cond_signal() and pthread_cond_broadcast() should | ||
1094 | * use nr_wake=1. | ||
1095 | */ | ||
1096 | if (nr_wake != 1) | ||
1097 | return -EINVAL; | ||
1098 | } | ||
983 | 1099 | ||
984 | retry: | 1100 | retry: |
1101 | if (pi_state != NULL) { | ||
1102 | /* | ||
1103 | * We will have to lookup the pi_state again, so free this one | ||
1104 | * to keep the accounting correct. | ||
1105 | */ | ||
1106 | free_pi_state(pi_state); | ||
1107 | pi_state = NULL; | ||
1108 | } | ||
1109 | |||
985 | ret = get_futex_key(uaddr1, fshared, &key1); | 1110 | ret = get_futex_key(uaddr1, fshared, &key1); |
986 | if (unlikely(ret != 0)) | 1111 | if (unlikely(ret != 0)) |
987 | goto out; | 1112 | goto out; |
@@ -1020,19 +1145,94 @@ retry_private: | |||
1020 | } | 1145 | } |
1021 | } | 1146 | } |
1022 | 1147 | ||
1148 | if (requeue_pi && (task_count - nr_wake < nr_requeue)) { | ||
1149 | /* Attempt to acquire uaddr2 and wake the top_waiter. */ | ||
1150 | ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, | ||
1151 | &key2, &pi_state); | ||
1152 | |||
1153 | /* | ||
1154 | * At this point the top_waiter has either taken uaddr2 or is | ||
1155 | * waiting on it. If the former, then the pi_state will not | ||
1156 | * exist yet, look it up one more time to ensure we have a | ||
1157 | * reference to it. | ||
1158 | */ | ||
1159 | if (ret == 1) { | ||
1160 | WARN_ON(pi_state); | ||
1161 | task_count++; | ||
1162 | ret = get_futex_value_locked(&curval2, uaddr2); | ||
1163 | if (!ret) | ||
1164 | ret = lookup_pi_state(curval2, hb2, &key2, | ||
1165 | &pi_state); | ||
1166 | } | ||
1167 | |||
1168 | switch (ret) { | ||
1169 | case 0: | ||
1170 | break; | ||
1171 | case -EFAULT: | ||
1172 | double_unlock_hb(hb1, hb2); | ||
1173 | put_futex_key(fshared, &key2); | ||
1174 | put_futex_key(fshared, &key1); | ||
1175 | ret = get_user(curval2, uaddr2); | ||
1176 | if (!ret) | ||
1177 | goto retry; | ||
1178 | goto out; | ||
1179 | case -EAGAIN: | ||
1180 | /* The owner was exiting, try again. */ | ||
1181 | double_unlock_hb(hb1, hb2); | ||
1182 | put_futex_key(fshared, &key2); | ||
1183 | put_futex_key(fshared, &key1); | ||
1184 | cond_resched(); | ||
1185 | goto retry; | ||
1186 | default: | ||
1187 | goto out_unlock; | ||
1188 | } | ||
1189 | } | ||
1190 | |||
1023 | head1 = &hb1->chain; | 1191 | head1 = &hb1->chain; |
1024 | plist_for_each_entry_safe(this, next, head1, list) { | 1192 | plist_for_each_entry_safe(this, next, head1, list) { |
1025 | if (!match_futex (&this->key, &key1)) | 1193 | if (task_count - nr_wake >= nr_requeue) |
1194 | break; | ||
1195 | |||
1196 | if (!match_futex(&this->key, &key1)) | ||
1026 | continue; | 1197 | continue; |
1027 | if (++ret <= nr_wake) { | 1198 | |
1199 | WARN_ON(!requeue_pi && this->rt_waiter); | ||
1200 | WARN_ON(requeue_pi && !this->rt_waiter); | ||
1201 | |||
1202 | /* | ||
1203 | * Wake nr_wake waiters. For requeue_pi, if we acquired the | ||
1204 | * lock, we already woke the top_waiter. If not, it will be | ||
1205 | * woken by futex_unlock_pi(). | ||
1206 | */ | ||
1207 | if (++task_count <= nr_wake && !requeue_pi) { | ||
1028 | wake_futex(this); | 1208 | wake_futex(this); |
1029 | } else { | 1209 | continue; |
1030 | requeue_futex(this, hb1, hb2, &key2); | 1210 | } |
1031 | drop_count++; | ||
1032 | 1211 | ||
1033 | if (ret - nr_wake >= nr_requeue) | 1212 | /* |
1034 | break; | 1213 | * Requeue nr_requeue waiters and possibly one more in the case |
1214 | * of requeue_pi if we couldn't acquire the lock atomically. | ||
1215 | */ | ||
1216 | if (requeue_pi) { | ||
1217 | /* Prepare the waiter to take the rt_mutex. */ | ||
1218 | atomic_inc(&pi_state->refcount); | ||
1219 | this->pi_state = pi_state; | ||
1220 | ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, | ||
1221 | this->rt_waiter, | ||
1222 | this->task, 1); | ||
1223 | if (ret == 1) { | ||
1224 | /* We got the lock. */ | ||
1225 | requeue_pi_wake_futex(this, &key2); | ||
1226 | continue; | ||
1227 | } else if (ret) { | ||
1228 | /* -EDEADLK */ | ||
1229 | this->pi_state = NULL; | ||
1230 | free_pi_state(pi_state); | ||
1231 | goto out_unlock; | ||
1232 | } | ||
1035 | } | 1233 | } |
1234 | requeue_futex(this, hb1, hb2, &key2); | ||
1235 | drop_count++; | ||
1036 | } | 1236 | } |
1037 | 1237 | ||
1038 | out_unlock: | 1238 | out_unlock: |
@@ -1047,7 +1247,9 @@ out_put_keys: | |||
1047 | out_put_key1: | 1247 | out_put_key1: |
1048 | put_futex_key(fshared, &key1); | 1248 | put_futex_key(fshared, &key1); |
1049 | out: | 1249 | out: |
1050 | return ret; | 1250 | if (pi_state != NULL) |
1251 | free_pi_state(pi_state); | ||
1252 | return ret ? ret : task_count; | ||
1051 | } | 1253 | } |
1052 | 1254 | ||
1053 | /* The key must be already stored in q->key. */ | 1255 | /* The key must be already stored in q->key. */ |
@@ -1270,6 +1472,7 @@ handle_fault: | |||
1270 | #define FLAGS_HAS_TIMEOUT 0x04 | 1472 | #define FLAGS_HAS_TIMEOUT 0x04 |
1271 | 1473 | ||
1272 | static long futex_wait_restart(struct restart_block *restart); | 1474 | static long futex_wait_restart(struct restart_block *restart); |
1475 | static long futex_lock_pi_restart(struct restart_block *restart); | ||
1273 | 1476 | ||
1274 | /** | 1477 | /** |
1275 | * fixup_owner() - Post lock pi_state and corner case management | 1478 | * fixup_owner() - Post lock pi_state and corner case management |
@@ -1489,6 +1692,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1489 | 1692 | ||
1490 | q.pi_state = NULL; | 1693 | q.pi_state = NULL; |
1491 | q.bitset = bitset; | 1694 | q.bitset = bitset; |
1695 | q.rt_waiter = NULL; | ||
1492 | 1696 | ||
1493 | if (abs_time) { | 1697 | if (abs_time) { |
1494 | to = &timeout; | 1698 | to = &timeout; |
@@ -1596,6 +1800,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1596 | } | 1800 | } |
1597 | 1801 | ||
1598 | q.pi_state = NULL; | 1802 | q.pi_state = NULL; |
1803 | q.rt_waiter = NULL; | ||
1599 | retry: | 1804 | retry: |
1600 | q.key = FUTEX_KEY_INIT; | 1805 | q.key = FUTEX_KEY_INIT; |
1601 | ret = get_futex_key(uaddr, fshared, &q.key); | 1806 | ret = get_futex_key(uaddr, fshared, &q.key); |
@@ -1701,6 +1906,20 @@ uaddr_faulted: | |||
1701 | goto retry; | 1906 | goto retry; |
1702 | } | 1907 | } |
1703 | 1908 | ||
1909 | static long futex_lock_pi_restart(struct restart_block *restart) | ||
1910 | { | ||
1911 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; | ||
1912 | ktime_t t, *tp = NULL; | ||
1913 | int fshared = restart->futex.flags & FLAGS_SHARED; | ||
1914 | |||
1915 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { | ||
1916 | t.tv64 = restart->futex.time; | ||
1917 | tp = &t; | ||
1918 | } | ||
1919 | restart->fn = do_no_restart_syscall; | ||
1920 | |||
1921 | return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0); | ||
1922 | } | ||
1704 | 1923 | ||
1705 | /* | 1924 | /* |
1706 | * Userspace attempted a TID -> 0 atomic transition, and failed. | 1925 | * Userspace attempted a TID -> 0 atomic transition, and failed. |
@@ -1803,6 +2022,253 @@ pi_faulted: | |||
1803 | return ret; | 2022 | return ret; |
1804 | } | 2023 | } |
1805 | 2024 | ||
2025 | /** | ||
2026 | * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex | ||
2027 | * @hb: the hash_bucket futex_q was original enqueued on | ||
2028 | * @q: the futex_q woken while waiting to be requeued | ||
2029 | * @key2: the futex_key of the requeue target futex | ||
2030 | * @timeout: the timeout associated with the wait (NULL if none) | ||
2031 | * | ||
2032 | * Detect if the task was woken on the initial futex as opposed to the requeue | ||
2033 | * target futex. If so, determine if it was a timeout or a signal that caused | ||
2034 | * the wakeup and return the appropriate error code to the caller. Must be | ||
2035 | * called with the hb lock held. | ||
2036 | * | ||
2037 | * Returns | ||
2038 | * 0 - no early wakeup detected | ||
2039 | * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?) | ||
2040 | */ | ||
2041 | static inline | ||
2042 | int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | ||
2043 | struct futex_q *q, union futex_key *key2, | ||
2044 | struct hrtimer_sleeper *timeout) | ||
2045 | { | ||
2046 | int ret = 0; | ||
2047 | |||
2048 | /* | ||
2049 | * With the hb lock held, we avoid races while we process the wakeup. | ||
2050 | * We only need to hold hb (and not hb2) to ensure atomicity as the | ||
2051 | * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. | ||
2052 | * It can't be requeued from uaddr2 to something else since we don't | ||
2053 | * support a PI aware source futex for requeue. | ||
2054 | */ | ||
2055 | if (!match_futex(&q->key, key2)) { | ||
2056 | WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); | ||
2057 | /* | ||
2058 | * We were woken prior to requeue by a timeout or a signal. | ||
2059 | * Unqueue the futex_q and determine which it was. | ||
2060 | */ | ||
2061 | plist_del(&q->list, &q->list.plist); | ||
2062 | drop_futex_key_refs(&q->key); | ||
2063 | |||
2064 | if (timeout && !timeout->task) | ||
2065 | ret = -ETIMEDOUT; | ||
2066 | else { | ||
2067 | /* | ||
2068 | * We expect signal_pending(current), but another | ||
2069 | * thread may have handled it for us already. | ||
2070 | */ | ||
2071 | /* FIXME: ERESTARTSYS or ERESTARTNOINTR? Do we care if | ||
2072 | * the user specified SA_RESTART or not? */ | ||
2073 | ret = -ERESTARTSYS; | ||
2074 | } | ||
2075 | } | ||
2076 | return ret; | ||
2077 | } | ||
2078 | |||
2079 | /** | ||
2080 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | ||
2081 | * @uaddr: the futex we initialyl wait on (non-pi) | ||
2082 | * @fshared: whether the futexes are shared (1) or not (0). They must be | ||
2083 | * the same type, no requeueing from private to shared, etc. | ||
2084 | * @val: the expected value of uaddr | ||
2085 | * @abs_time: absolute timeout | ||
2086 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | ||
2087 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | ||
2088 | * @uaddr2: the pi futex we will take prior to returning to user-space | ||
2089 | * | ||
2090 | * The caller will wait on uaddr and will be requeued by futex_requeue() to | ||
2091 | * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and | ||
2092 | * complete the acquisition of the rt_mutex prior to returning to userspace. | ||
2093 | * This ensures the rt_mutex maintains an owner when it has waiters; without | ||
2094 | * one, the pi logic wouldn't know which task to boost/deboost, if there was a | ||
2095 | * need to. | ||
2096 | * | ||
2097 | * We call schedule in futex_wait_queue_me() when we enqueue and return there | ||
2098 | * via the following: | ||
2099 | * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() | ||
2100 | * 2) wakeup on uaddr2 after a requeue and subsequent unlock | ||
2101 | * 3) signal (before or after requeue) | ||
2102 | * 4) timeout (before or after requeue) | ||
2103 | * | ||
2104 | * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. | ||
2105 | * | ||
2106 | * If 2, we may then block on trying to take the rt_mutex and return via: | ||
2107 | * 5) successful lock | ||
2108 | * 6) signal | ||
2109 | * 7) timeout | ||
2110 | * 8) other lock acquisition failure | ||
2111 | * | ||
2112 | * If 6, we setup a restart_block with futex_lock_pi() as the function. | ||
2113 | * | ||
2114 | * If 4 or 7, we cleanup and return with -ETIMEDOUT. | ||
2115 | * | ||
2116 | * Returns: | ||
2117 | * 0 - On success | ||
2118 | * <0 - On error | ||
2119 | */ | ||
2120 | static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | ||
2121 | u32 val, ktime_t *abs_time, u32 bitset, | ||
2122 | int clockrt, u32 __user *uaddr2) | ||
2123 | { | ||
2124 | struct hrtimer_sleeper timeout, *to = NULL; | ||
2125 | struct rt_mutex_waiter rt_waiter; | ||
2126 | struct rt_mutex *pi_mutex = NULL; | ||
2127 | DECLARE_WAITQUEUE(wait, current); | ||
2128 | struct restart_block *restart; | ||
2129 | struct futex_hash_bucket *hb; | ||
2130 | union futex_key key2; | ||
2131 | struct futex_q q; | ||
2132 | int res, ret; | ||
2133 | u32 uval; | ||
2134 | |||
2135 | if (!bitset) | ||
2136 | return -EINVAL; | ||
2137 | |||
2138 | if (abs_time) { | ||
2139 | to = &timeout; | ||
2140 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : | ||
2141 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
2142 | hrtimer_init_sleeper(to, current); | ||
2143 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | ||
2144 | current->timer_slack_ns); | ||
2145 | } | ||
2146 | |||
2147 | /* | ||
2148 | * The waiter is allocated on our stack, manipulated by the requeue | ||
2149 | * code while we sleep on uaddr. | ||
2150 | */ | ||
2151 | debug_rt_mutex_init_waiter(&rt_waiter); | ||
2152 | rt_waiter.task = NULL; | ||
2153 | |||
2154 | q.pi_state = NULL; | ||
2155 | q.bitset = bitset; | ||
2156 | q.rt_waiter = &rt_waiter; | ||
2157 | |||
2158 | key2 = FUTEX_KEY_INIT; | ||
2159 | ret = get_futex_key(uaddr2, fshared, &key2); | ||
2160 | if (unlikely(ret != 0)) | ||
2161 | goto out; | ||
2162 | |||
2163 | /* Prepare to wait on uaddr. */ | ||
2164 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | ||
2165 | if (ret) { | ||
2166 | put_futex_key(fshared, &key2); | ||
2167 | goto out; | ||
2168 | } | ||
2169 | |||
2170 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | ||
2171 | futex_wait_queue_me(hb, &q, to, &wait); | ||
2172 | |||
2173 | spin_lock(&hb->lock); | ||
2174 | ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | ||
2175 | spin_unlock(&hb->lock); | ||
2176 | if (ret) | ||
2177 | goto out_put_keys; | ||
2178 | |||
2179 | /* | ||
2180 | * In order for us to be here, we know our q.key == key2, and since | ||
2181 | * we took the hb->lock above, we also know that futex_requeue() has | ||
2182 | * completed and we no longer have to concern ourselves with a wakeup | ||
2183 | * race with the atomic proxy lock acquition by the requeue code. | ||
2184 | */ | ||
2185 | |||
2186 | /* Check if the requeue code acquired the second futex for us. */ | ||
2187 | if (!q.rt_waiter) { | ||
2188 | /* | ||
2189 | * Got the lock. We might not be the anticipated owner if we | ||
2190 | * did a lock-steal - fix up the PI-state in that case. | ||
2191 | */ | ||
2192 | if (q.pi_state && (q.pi_state->owner != current)) { | ||
2193 | spin_lock(q.lock_ptr); | ||
2194 | ret = fixup_pi_state_owner(uaddr2, &q, current, | ||
2195 | fshared); | ||
2196 | spin_unlock(q.lock_ptr); | ||
2197 | } | ||
2198 | } else { | ||
2199 | /* | ||
2200 | * We have been woken up by futex_unlock_pi(), a timeout, or a | ||
2201 | * signal. futex_unlock_pi() will not destroy the lock_ptr nor | ||
2202 | * the pi_state. | ||
2203 | */ | ||
2204 | WARN_ON(!&q.pi_state); | ||
2205 | pi_mutex = &q.pi_state->pi_mutex; | ||
2206 | ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); | ||
2207 | debug_rt_mutex_free_waiter(&rt_waiter); | ||
2208 | |||
2209 | spin_lock(q.lock_ptr); | ||
2210 | /* | ||
2211 | * Fixup the pi_state owner and possibly acquire the lock if we | ||
2212 | * haven't already. | ||
2213 | */ | ||
2214 | res = fixup_owner(uaddr2, fshared, &q, !ret); | ||
2215 | /* | ||
2216 | * If fixup_owner() returned an error, proprogate that. If it | ||
2217 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | ||
2218 | */ | ||
2219 | if (res) | ||
2220 | ret = (res < 0) ? res : 0; | ||
2221 | |||
2222 | /* Unqueue and drop the lock. */ | ||
2223 | unqueue_me_pi(&q); | ||
2224 | } | ||
2225 | |||
2226 | /* | ||
2227 | * If fixup_pi_state_owner() faulted and was unable to handle the | ||
2228 | * fault, unlock the rt_mutex and return the fault to userspace. | ||
2229 | */ | ||
2230 | if (ret == -EFAULT) { | ||
2231 | if (rt_mutex_owner(pi_mutex) == current) | ||
2232 | rt_mutex_unlock(pi_mutex); | ||
2233 | } else if (ret == -EINTR) { | ||
2234 | ret = -EFAULT; | ||
2235 | if (get_user(uval, uaddr2)) | ||
2236 | goto out_put_keys; | ||
2237 | |||
2238 | /* | ||
2239 | * We've already been requeued, so restart by calling | ||
2240 | * futex_lock_pi() directly, rather then returning to this | ||
2241 | * function. | ||
2242 | */ | ||
2243 | ret = -ERESTART_RESTARTBLOCK; | ||
2244 | restart = ¤t_thread_info()->restart_block; | ||
2245 | restart->fn = futex_lock_pi_restart; | ||
2246 | restart->futex.uaddr = (u32 *)uaddr2; | ||
2247 | restart->futex.val = uval; | ||
2248 | restart->futex.flags = 0; | ||
2249 | if (abs_time) { | ||
2250 | restart->futex.flags |= FLAGS_HAS_TIMEOUT; | ||
2251 | restart->futex.time = abs_time->tv64; | ||
2252 | } | ||
2253 | |||
2254 | if (fshared) | ||
2255 | restart->futex.flags |= FLAGS_SHARED; | ||
2256 | if (clockrt) | ||
2257 | restart->futex.flags |= FLAGS_CLOCKRT; | ||
2258 | } | ||
2259 | |||
2260 | out_put_keys: | ||
2261 | put_futex_key(fshared, &q.key); | ||
2262 | put_futex_key(fshared, &key2); | ||
2263 | |||
2264 | out: | ||
2265 | if (to) { | ||
2266 | hrtimer_cancel(&to->timer); | ||
2267 | destroy_hrtimer_on_stack(&to->timer); | ||
2268 | } | ||
2269 | return ret; | ||
2270 | } | ||
2271 | |||
1806 | /* | 2272 | /* |
1807 | * Support for robust futexes: the kernel cleans up held futexes at | 2273 | * Support for robust futexes: the kernel cleans up held futexes at |
1808 | * thread exit time. | 2274 | * thread exit time. |
@@ -2025,7 +2491,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
2025 | fshared = 1; | 2491 | fshared = 1; |
2026 | 2492 | ||
2027 | clockrt = op & FUTEX_CLOCK_REALTIME; | 2493 | clockrt = op & FUTEX_CLOCK_REALTIME; |
2028 | if (clockrt && cmd != FUTEX_WAIT_BITSET) | 2494 | if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) |
2029 | return -ENOSYS; | 2495 | return -ENOSYS; |
2030 | 2496 | ||
2031 | switch (cmd) { | 2497 | switch (cmd) { |
@@ -2040,10 +2506,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
2040 | ret = futex_wake(uaddr, fshared, val, val3); | 2506 | ret = futex_wake(uaddr, fshared, val, val3); |
2041 | break; | 2507 | break; |
2042 | case FUTEX_REQUEUE: | 2508 | case FUTEX_REQUEUE: |
2043 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); | 2509 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); |
2044 | break; | 2510 | break; |
2045 | case FUTEX_CMP_REQUEUE: | 2511 | case FUTEX_CMP_REQUEUE: |
2046 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); | 2512 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, |
2513 | 0); | ||
2047 | break; | 2514 | break; |
2048 | case FUTEX_WAKE_OP: | 2515 | case FUTEX_WAKE_OP: |
2049 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); | 2516 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); |
@@ -2060,6 +2527,18 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
2060 | if (futex_cmpxchg_enabled) | 2527 | if (futex_cmpxchg_enabled) |
2061 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | 2528 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); |
2062 | break; | 2529 | break; |
2530 | case FUTEX_WAIT_REQUEUE_PI: | ||
2531 | val3 = FUTEX_BITSET_MATCH_ANY; | ||
2532 | ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, | ||
2533 | clockrt, uaddr2); | ||
2534 | break; | ||
2535 | case FUTEX_REQUEUE_PI: | ||
2536 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1); | ||
2537 | break; | ||
2538 | case FUTEX_CMP_REQUEUE_PI: | ||
2539 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, | ||
2540 | 1); | ||
2541 | break; | ||
2063 | default: | 2542 | default: |
2064 | ret = -ENOSYS; | 2543 | ret = -ENOSYS; |
2065 | } | 2544 | } |
@@ -2077,7 +2556,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, | |||
2077 | int cmd = op & FUTEX_CMD_MASK; | 2556 | int cmd = op & FUTEX_CMD_MASK; |
2078 | 2557 | ||
2079 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || | 2558 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || |
2080 | cmd == FUTEX_WAIT_BITSET)) { | 2559 | cmd == FUTEX_WAIT_BITSET || |
2560 | cmd == FUTEX_WAIT_REQUEUE_PI)) { | ||
2081 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) | 2561 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) |
2082 | return -EFAULT; | 2562 | return -EFAULT; |
2083 | if (!timespec_valid(&ts)) | 2563 | if (!timespec_valid(&ts)) |
@@ -2089,10 +2569,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, | |||
2089 | tp = &t; | 2569 | tp = &t; |
2090 | } | 2570 | } |
2091 | /* | 2571 | /* |
2092 | * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. | 2572 | * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. |
2093 | * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. | 2573 | * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. |
2094 | */ | 2574 | */ |
2095 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || | 2575 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || |
2576 | cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI || | ||
2096 | cmd == FUTEX_WAKE_OP) | 2577 | cmd == FUTEX_WAKE_OP) |
2097 | val2 = (u32) (unsigned long) utime; | 2578 | val2 = (u32) (unsigned long) utime; |
2098 | 2579 | ||