diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 160 |
1 files changed, 92 insertions, 68 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 248dd119a86e..fb65e822fc41 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -89,36 +89,36 @@ struct futex_pi_state { | |||
89 | union futex_key key; | 89 | union futex_key key; |
90 | }; | 90 | }; |
91 | 91 | ||
92 | /* | 92 | /** |
93 | * We use this hashed waitqueue instead of a normal wait_queue_t, so | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
94 | * @task: the task waiting on the futex | ||
95 | * @lock_ptr: the hash bucket lock | ||
96 | * @key: the key the futex is hashed on | ||
97 | * @pi_state: optional priority inheritance state | ||
98 | * @rt_waiter: rt_waiter storage for use with requeue_pi | ||
99 | * @requeue_pi_key: the requeue_pi target futex key | ||
100 | * @bitset: bitset for the optional bitmasked wakeup | ||
101 | * | ||
102 | * We use this hashed waitqueue, instead of a normal wait_queue_t, so | ||
94 | * we can wake only the relevant ones (hashed queues may be shared). | 103 | * we can wake only the relevant ones (hashed queues may be shared). |
95 | * | 104 | * |
96 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
97 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
98 | * The order of wakup is always to make the first condition true, then | 107 | * The order of wakup is always to make the first condition true, then |
99 | * wake up q->waiter, then make the second condition true. | 108 | * the second. |
109 | * | ||
110 | * PI futexes are typically woken before they are removed from the hash list via | ||
111 | * the rt_mutex code. See unqueue_me_pi(). | ||
100 | */ | 112 | */ |
101 | struct futex_q { | 113 | struct futex_q { |
102 | struct plist_node list; | 114 | struct plist_node list; |
103 | /* Waiter reference */ | ||
104 | struct task_struct *task; | ||
105 | 115 | ||
106 | /* Which hash list lock to use: */ | 116 | struct task_struct *task; |
107 | spinlock_t *lock_ptr; | 117 | spinlock_t *lock_ptr; |
108 | |||
109 | /* Key which the futex is hashed on: */ | ||
110 | union futex_key key; | 118 | union futex_key key; |
111 | |||
112 | /* Optional priority inheritance state: */ | ||
113 | struct futex_pi_state *pi_state; | 119 | struct futex_pi_state *pi_state; |
114 | |||
115 | /* rt_waiter storage for requeue_pi: */ | ||
116 | struct rt_mutex_waiter *rt_waiter; | 120 | struct rt_mutex_waiter *rt_waiter; |
117 | |||
118 | /* The expected requeue pi target futex key: */ | ||
119 | union futex_key *requeue_pi_key; | 121 | union futex_key *requeue_pi_key; |
120 | |||
121 | /* Bitset for the optional bitmasked wakeup */ | ||
122 | u32 bitset; | 122 | u32 bitset; |
123 | }; | 123 | }; |
124 | 124 | ||
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key) | |||
150 | */ | 150 | */ |
151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) | 151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) |
152 | { | 152 | { |
153 | return (key1->both.word == key2->both.word | 153 | return (key1 && key2 |
154 | && key1->both.word == key2->both.word | ||
154 | && key1->both.ptr == key2->both.ptr | 155 | && key1->both.ptr == key2->both.ptr |
155 | && key1->both.offset == key2->both.offset); | 156 | && key1->both.offset == key2->both.offset); |
156 | } | 157 | } |
@@ -198,11 +199,12 @@ static void drop_futex_key_refs(union futex_key *key) | |||
198 | } | 199 | } |
199 | 200 | ||
200 | /** | 201 | /** |
201 | * get_futex_key - Get parameters which are the keys for a futex. | 202 | * get_futex_key() - Get parameters which are the keys for a futex |
202 | * @uaddr: virtual address of the futex | 203 | * @uaddr: virtual address of the futex |
203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 204 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
204 | * @key: address where result is stored. | 205 | * @key: address where result is stored. |
205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) | 206 | * @rw: mapping needs to be read/write (values: VERIFY_READ, |
207 | * VERIFY_WRITE) | ||
206 | * | 208 | * |
207 | * Returns a negative error code or 0 | 209 | * Returns a negative error code or 0 |
208 | * The key words are stored in *key on success. | 210 | * The key words are stored in *key on success. |
@@ -288,8 +290,8 @@ void put_futex_key(int fshared, union futex_key *key) | |||
288 | drop_futex_key_refs(key); | 290 | drop_futex_key_refs(key); |
289 | } | 291 | } |
290 | 292 | ||
291 | /* | 293 | /** |
292 | * fault_in_user_writeable - fault in user address and verify RW access | 294 | * fault_in_user_writeable() - Fault in user address and verify RW access |
293 | * @uaddr: pointer to faulting user space address | 295 | * @uaddr: pointer to faulting user space address |
294 | * | 296 | * |
295 | * Slow path to fixup the fault we just took in the atomic write | 297 | * Slow path to fixup the fault we just took in the atomic write |
@@ -309,8 +311,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) | |||
309 | 311 | ||
310 | /** | 312 | /** |
311 | * futex_top_waiter() - Return the highest priority waiter on a futex | 313 | * futex_top_waiter() - Return the highest priority waiter on a futex |
312 | * @hb: the hash bucket the futex_q's reside in | 314 | * @hb: the hash bucket the futex_q's reside in |
313 | * @key: the futex key (to distinguish it from other futex futex_q's) | 315 | * @key: the futex key (to distinguish it from other futex futex_q's) |
314 | * | 316 | * |
315 | * Must be called with the hb lock held. | 317 | * Must be called with the hb lock held. |
316 | */ | 318 | */ |
@@ -588,7 +590,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
588 | } | 590 | } |
589 | 591 | ||
590 | /** | 592 | /** |
591 | * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex | 593 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex |
592 | * @uaddr: the pi futex user address | 594 | * @uaddr: the pi futex user address |
593 | * @hb: the pi futex hash bucket | 595 | * @hb: the pi futex hash bucket |
594 | * @key: the futex key associated with uaddr and hb | 596 | * @key: the futex key associated with uaddr and hb |
@@ -915,8 +917,8 @@ retry: | |||
915 | hb1 = hash_futex(&key1); | 917 | hb1 = hash_futex(&key1); |
916 | hb2 = hash_futex(&key2); | 918 | hb2 = hash_futex(&key2); |
917 | 919 | ||
918 | double_lock_hb(hb1, hb2); | ||
919 | retry_private: | 920 | retry_private: |
921 | double_lock_hb(hb1, hb2); | ||
920 | op_ret = futex_atomic_op_inuser(op, uaddr2); | 922 | op_ret = futex_atomic_op_inuser(op, uaddr2); |
921 | if (unlikely(op_ret < 0)) { | 923 | if (unlikely(op_ret < 0)) { |
922 | 924 | ||
@@ -1011,9 +1013,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1011 | 1013 | ||
1012 | /** | 1014 | /** |
1013 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | 1015 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
1014 | * q: the futex_q | 1016 | * @q: the futex_q |
1015 | * key: the key of the requeue target futex | 1017 | * @key: the key of the requeue target futex |
1016 | * hb: the hash_bucket of the requeue target futex | 1018 | * @hb: the hash_bucket of the requeue target futex |
1017 | * | 1019 | * |
1018 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | 1020 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the |
1019 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | 1021 | * target futex if it is uncontended or via a lock steal. Set the futex_q key |
@@ -1027,7 +1029,6 @@ static inline | |||
1027 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | 1029 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
1028 | struct futex_hash_bucket *hb) | 1030 | struct futex_hash_bucket *hb) |
1029 | { | 1031 | { |
1030 | drop_futex_key_refs(&q->key); | ||
1031 | get_futex_key_refs(key); | 1032 | get_futex_key_refs(key); |
1032 | q->key = *key; | 1033 | q->key = *key; |
1033 | 1034 | ||
@@ -1225,6 +1226,7 @@ retry_private: | |||
1225 | */ | 1226 | */ |
1226 | if (ret == 1) { | 1227 | if (ret == 1) { |
1227 | WARN_ON(pi_state); | 1228 | WARN_ON(pi_state); |
1229 | drop_count++; | ||
1228 | task_count++; | 1230 | task_count++; |
1229 | ret = get_futex_value_locked(&curval2, uaddr2); | 1231 | ret = get_futex_value_locked(&curval2, uaddr2); |
1230 | if (!ret) | 1232 | if (!ret) |
@@ -1303,6 +1305,7 @@ retry_private: | |||
1303 | if (ret == 1) { | 1305 | if (ret == 1) { |
1304 | /* We got the lock. */ | 1306 | /* We got the lock. */ |
1305 | requeue_pi_wake_futex(this, &key2, hb2); | 1307 | requeue_pi_wake_futex(this, &key2, hb2); |
1308 | drop_count++; | ||
1306 | continue; | 1309 | continue; |
1307 | } else if (ret) { | 1310 | } else if (ret) { |
1308 | /* -EDEADLK */ | 1311 | /* -EDEADLK */ |
@@ -1350,6 +1353,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
1350 | return hb; | 1353 | return hb; |
1351 | } | 1354 | } |
1352 | 1355 | ||
1356 | static inline void | ||
1357 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | ||
1358 | { | ||
1359 | spin_unlock(&hb->lock); | ||
1360 | drop_futex_key_refs(&q->key); | ||
1361 | } | ||
1362 | |||
1363 | /** | ||
1364 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
1365 | * @q: The futex_q to enqueue | ||
1366 | * @hb: The destination hash bucket | ||
1367 | * | ||
1368 | * The hb->lock must be held by the caller, and is released here. A call to | ||
1369 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
1370 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
1371 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
1372 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
1373 | * an example). | ||
1374 | */ | ||
1353 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1375 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
1354 | { | 1376 | { |
1355 | int prio; | 1377 | int prio; |
@@ -1373,19 +1395,17 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1373 | spin_unlock(&hb->lock); | 1395 | spin_unlock(&hb->lock); |
1374 | } | 1396 | } |
1375 | 1397 | ||
1376 | static inline void | 1398 | /** |
1377 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1399 | * unqueue_me() - Remove the futex_q from its futex_hash_bucket |
1378 | { | 1400 | * @q: The futex_q to unqueue |
1379 | spin_unlock(&hb->lock); | 1401 | * |
1380 | drop_futex_key_refs(&q->key); | 1402 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must |
1381 | } | 1403 | * be paired with exactly one earlier call to queue_me(). |
1382 | 1404 | * | |
1383 | /* | 1405 | * Returns: |
1384 | * queue_me and unqueue_me must be called as a pair, each | 1406 | * 1 - if the futex_q was still queued (and we removed unqueued it) |
1385 | * exactly once. They are called with the hashed spinlock held. | 1407 | * 0 - if the futex_q was already removed by the waking thread |
1386 | */ | 1408 | */ |
1387 | |||
1388 | /* Return 1 if we were still queued (ie. 0 means we were woken) */ | ||
1389 | static int unqueue_me(struct futex_q *q) | 1409 | static int unqueue_me(struct futex_q *q) |
1390 | { | 1410 | { |
1391 | spinlock_t *lock_ptr; | 1411 | spinlock_t *lock_ptr; |
@@ -1638,17 +1658,14 @@ out: | |||
1638 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | 1658 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, |
1639 | struct hrtimer_sleeper *timeout) | 1659 | struct hrtimer_sleeper *timeout) |
1640 | { | 1660 | { |
1641 | queue_me(q, hb); | ||
1642 | |||
1643 | /* | 1661 | /* |
1644 | * There might have been scheduling since the queue_me(), as we | 1662 | * The task state is guaranteed to be set before another task can |
1645 | * cannot hold a spinlock across the get_user() in case it | 1663 | * wake it. set_current_state() is implemented using set_mb() and |
1646 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1664 | * queue_me() calls spin_unlock() upon completion, both serializing |
1647 | * queueing ourselves into the futex hash. This code thus has to | 1665 | * access to the hash list and forcing another memory barrier. |
1648 | * rely on the futex_wake() code removing us from hash when it | ||
1649 | * wakes us up. | ||
1650 | */ | 1666 | */ |
1651 | set_current_state(TASK_INTERRUPTIBLE); | 1667 | set_current_state(TASK_INTERRUPTIBLE); |
1668 | queue_me(q, hb); | ||
1652 | 1669 | ||
1653 | /* Arm the timer */ | 1670 | /* Arm the timer */ |
1654 | if (timeout) { | 1671 | if (timeout) { |
@@ -1658,8 +1675,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1658 | } | 1675 | } |
1659 | 1676 | ||
1660 | /* | 1677 | /* |
1661 | * !plist_node_empty() is safe here without any lock. | 1678 | * If we have been removed from the hash list, then another task |
1662 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1679 | * has tried to wake us, and we can skip the call to schedule(). |
1663 | */ | 1680 | */ |
1664 | if (likely(!plist_node_empty(&q->list))) { | 1681 | if (likely(!plist_node_empty(&q->list))) { |
1665 | /* | 1682 | /* |
@@ -1776,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1776 | current->timer_slack_ns); | 1793 | current->timer_slack_ns); |
1777 | } | 1794 | } |
1778 | 1795 | ||
1796 | retry: | ||
1779 | /* Prepare to wait on uaddr. */ | 1797 | /* Prepare to wait on uaddr. */ |
1780 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1798 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
1781 | if (ret) | 1799 | if (ret) |
@@ -1793,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1793 | goto out_put_key; | 1811 | goto out_put_key; |
1794 | 1812 | ||
1795 | /* | 1813 | /* |
1796 | * We expect signal_pending(current), but another thread may | 1814 | * We expect signal_pending(current), but we might be the |
1797 | * have handled it for us already. | 1815 | * victim of a spurious wakeup as well. |
1798 | */ | 1816 | */ |
1817 | if (!signal_pending(current)) { | ||
1818 | put_futex_key(fshared, &q.key); | ||
1819 | goto retry; | ||
1820 | } | ||
1821 | |||
1799 | ret = -ERESTARTSYS; | 1822 | ret = -ERESTARTSYS; |
1800 | if (!abs_time) | 1823 | if (!abs_time) |
1801 | goto out_put_key; | 1824 | goto out_put_key; |
@@ -2102,11 +2125,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2102 | * Unqueue the futex_q and determine which it was. | 2125 | * Unqueue the futex_q and determine which it was. |
2103 | */ | 2126 | */ |
2104 | plist_del(&q->list, &q->list.plist); | 2127 | plist_del(&q->list, &q->list.plist); |
2105 | drop_futex_key_refs(&q->key); | ||
2106 | 2128 | ||
2129 | /* Handle spurious wakeups gracefully */ | ||
2130 | ret = -EWOULDBLOCK; | ||
2107 | if (timeout && !timeout->task) | 2131 | if (timeout && !timeout->task) |
2108 | ret = -ETIMEDOUT; | 2132 | ret = -ETIMEDOUT; |
2109 | else | 2133 | else if (signal_pending(current)) |
2110 | ret = -ERESTARTNOINTR; | 2134 | ret = -ERESTARTNOINTR; |
2111 | } | 2135 | } |
2112 | return ret; | 2136 | return ret; |
@@ -2114,12 +2138,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2114 | 2138 | ||
2115 | /** | 2139 | /** |
2116 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2140 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
2117 | * @uaddr: the futex we initialyl wait on (non-pi) | 2141 | * @uaddr: the futex we initially wait on (non-pi) |
2118 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2142 | * @fshared: whether the futexes are shared (1) or not (0). They must be |
2119 | * the same type, no requeueing from private to shared, etc. | 2143 | * the same type, no requeueing from private to shared, etc. |
2120 | * @val: the expected value of uaddr | 2144 | * @val: the expected value of uaddr |
2121 | * @abs_time: absolute timeout | 2145 | * @abs_time: absolute timeout |
2122 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | 2146 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all |
2123 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | 2147 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) |
2124 | * @uaddr2: the pi futex we will take prior to returning to user-space | 2148 | * @uaddr2: the pi futex we will take prior to returning to user-space |
2125 | * | 2149 | * |
@@ -2246,7 +2270,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2246 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2270 | res = fixup_owner(uaddr2, fshared, &q, !ret); |
2247 | /* | 2271 | /* |
2248 | * If fixup_owner() returned an error, proprogate that. If it | 2272 | * If fixup_owner() returned an error, proprogate that. If it |
2249 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | 2273 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
2250 | */ | 2274 | */ |
2251 | if (res) | 2275 | if (res) |
2252 | ret = (res < 0) ? res : 0; | 2276 | ret = (res < 0) ? res : 0; |
@@ -2302,9 +2326,9 @@ out: | |||
2302 | */ | 2326 | */ |
2303 | 2327 | ||
2304 | /** | 2328 | /** |
2305 | * sys_set_robust_list - set the robust-futex list head of a task | 2329 | * sys_set_robust_list() - Set the robust-futex list head of a task |
2306 | * @head: pointer to the list-head | 2330 | * @head: pointer to the list-head |
2307 | * @len: length of the list-head, as userspace expects | 2331 | * @len: length of the list-head, as userspace expects |
2308 | */ | 2332 | */ |
2309 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | 2333 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, |
2310 | size_t, len) | 2334 | size_t, len) |
@@ -2323,10 +2347,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | |||
2323 | } | 2347 | } |
2324 | 2348 | ||
2325 | /** | 2349 | /** |
2326 | * sys_get_robust_list - get the robust-futex list head of a task | 2350 | * sys_get_robust_list() - Get the robust-futex list head of a task |
2327 | * @pid: pid of the process [zero for current task] | 2351 | * @pid: pid of the process [zero for current task] |
2328 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | 2352 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in |
2329 | * @len_ptr: pointer to a length field, the kernel fills in the header size | 2353 | * @len_ptr: pointer to a length field, the kernel fills in the header size |
2330 | */ | 2354 | */ |
2331 | SYSCALL_DEFINE3(get_robust_list, int, pid, | 2355 | SYSCALL_DEFINE3(get_robust_list, int, pid, |
2332 | struct robust_list_head __user * __user *, head_ptr, | 2356 | struct robust_list_head __user * __user *, head_ptr, |