diff options
Diffstat (limited to 'kernel/futex.c')
| -rw-r--r-- | kernel/futex.c | 160 |
1 files changed, 92 insertions, 68 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 248dd119a86e..fb65e822fc41 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -89,36 +89,36 @@ struct futex_pi_state { | |||
| 89 | union futex_key key; | 89 | union futex_key key; |
| 90 | }; | 90 | }; |
| 91 | 91 | ||
| 92 | /* | 92 | /** |
| 93 | * We use this hashed waitqueue instead of a normal wait_queue_t, so | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
| 94 | * @task: the task waiting on the futex | ||
| 95 | * @lock_ptr: the hash bucket lock | ||
| 96 | * @key: the key the futex is hashed on | ||
| 97 | * @pi_state: optional priority inheritance state | ||
| 98 | * @rt_waiter: rt_waiter storage for use with requeue_pi | ||
| 99 | * @requeue_pi_key: the requeue_pi target futex key | ||
| 100 | * @bitset: bitset for the optional bitmasked wakeup | ||
| 101 | * | ||
| 102 | * We use this hashed waitqueue, instead of a normal wait_queue_t, so | ||
| 94 | * we can wake only the relevant ones (hashed queues may be shared). | 103 | * we can wake only the relevant ones (hashed queues may be shared). |
| 95 | * | 104 | * |
| 96 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 97 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 98 | * The order of wakup is always to make the first condition true, then | 107 | * The order of wakup is always to make the first condition true, then |
| 99 | * wake up q->waiter, then make the second condition true. | 108 | * the second. |
| 109 | * | ||
| 110 | * PI futexes are typically woken before they are removed from the hash list via | ||
| 111 | * the rt_mutex code. See unqueue_me_pi(). | ||
| 100 | */ | 112 | */ |
| 101 | struct futex_q { | 113 | struct futex_q { |
| 102 | struct plist_node list; | 114 | struct plist_node list; |
| 103 | /* Waiter reference */ | ||
| 104 | struct task_struct *task; | ||
| 105 | 115 | ||
| 106 | /* Which hash list lock to use: */ | 116 | struct task_struct *task; |
| 107 | spinlock_t *lock_ptr; | 117 | spinlock_t *lock_ptr; |
| 108 | |||
| 109 | /* Key which the futex is hashed on: */ | ||
| 110 | union futex_key key; | 118 | union futex_key key; |
| 111 | |||
| 112 | /* Optional priority inheritance state: */ | ||
| 113 | struct futex_pi_state *pi_state; | 119 | struct futex_pi_state *pi_state; |
| 114 | |||
| 115 | /* rt_waiter storage for requeue_pi: */ | ||
| 116 | struct rt_mutex_waiter *rt_waiter; | 120 | struct rt_mutex_waiter *rt_waiter; |
| 117 | |||
| 118 | /* The expected requeue pi target futex key: */ | ||
| 119 | union futex_key *requeue_pi_key; | 121 | union futex_key *requeue_pi_key; |
| 120 | |||
| 121 | /* Bitset for the optional bitmasked wakeup */ | ||
| 122 | u32 bitset; | 122 | u32 bitset; |
| 123 | }; | 123 | }; |
| 124 | 124 | ||
| @@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key) | |||
| 150 | */ | 150 | */ |
| 151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) | 151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) |
| 152 | { | 152 | { |
| 153 | return (key1->both.word == key2->both.word | 153 | return (key1 && key2 |
| 154 | && key1->both.word == key2->both.word | ||
| 154 | && key1->both.ptr == key2->both.ptr | 155 | && key1->both.ptr == key2->both.ptr |
| 155 | && key1->both.offset == key2->both.offset); | 156 | && key1->both.offset == key2->both.offset); |
| 156 | } | 157 | } |
| @@ -198,11 +199,12 @@ static void drop_futex_key_refs(union futex_key *key) | |||
| 198 | } | 199 | } |
| 199 | 200 | ||
| 200 | /** | 201 | /** |
| 201 | * get_futex_key - Get parameters which are the keys for a futex. | 202 | * get_futex_key() - Get parameters which are the keys for a futex |
| 202 | * @uaddr: virtual address of the futex | 203 | * @uaddr: virtual address of the futex |
| 203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 204 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
| 204 | * @key: address where result is stored. | 205 | * @key: address where result is stored. |
| 205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) | 206 | * @rw: mapping needs to be read/write (values: VERIFY_READ, |
| 207 | * VERIFY_WRITE) | ||
| 206 | * | 208 | * |
| 207 | * Returns a negative error code or 0 | 209 | * Returns a negative error code or 0 |
| 208 | * The key words are stored in *key on success. | 210 | * The key words are stored in *key on success. |
| @@ -288,8 +290,8 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 288 | drop_futex_key_refs(key); | 290 | drop_futex_key_refs(key); |
| 289 | } | 291 | } |
| 290 | 292 | ||
| 291 | /* | 293 | /** |
| 292 | * fault_in_user_writeable - fault in user address and verify RW access | 294 | * fault_in_user_writeable() - Fault in user address and verify RW access |
| 293 | * @uaddr: pointer to faulting user space address | 295 | * @uaddr: pointer to faulting user space address |
| 294 | * | 296 | * |
| 295 | * Slow path to fixup the fault we just took in the atomic write | 297 | * Slow path to fixup the fault we just took in the atomic write |
| @@ -309,8 +311,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) | |||
| 309 | 311 | ||
| 310 | /** | 312 | /** |
| 311 | * futex_top_waiter() - Return the highest priority waiter on a futex | 313 | * futex_top_waiter() - Return the highest priority waiter on a futex |
| 312 | * @hb: the hash bucket the futex_q's reside in | 314 | * @hb: the hash bucket the futex_q's reside in |
| 313 | * @key: the futex key (to distinguish it from other futex futex_q's) | 315 | * @key: the futex key (to distinguish it from other futex futex_q's) |
| 314 | * | 316 | * |
| 315 | * Must be called with the hb lock held. | 317 | * Must be called with the hb lock held. |
| 316 | */ | 318 | */ |
| @@ -588,7 +590,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 588 | } | 590 | } |
| 589 | 591 | ||
| 590 | /** | 592 | /** |
| 591 | * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex | 593 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex |
| 592 | * @uaddr: the pi futex user address | 594 | * @uaddr: the pi futex user address |
| 593 | * @hb: the pi futex hash bucket | 595 | * @hb: the pi futex hash bucket |
| 594 | * @key: the futex key associated with uaddr and hb | 596 | * @key: the futex key associated with uaddr and hb |
| @@ -915,8 +917,8 @@ retry: | |||
| 915 | hb1 = hash_futex(&key1); | 917 | hb1 = hash_futex(&key1); |
| 916 | hb2 = hash_futex(&key2); | 918 | hb2 = hash_futex(&key2); |
| 917 | 919 | ||
| 918 | double_lock_hb(hb1, hb2); | ||
| 919 | retry_private: | 920 | retry_private: |
| 921 | double_lock_hb(hb1, hb2); | ||
| 920 | op_ret = futex_atomic_op_inuser(op, uaddr2); | 922 | op_ret = futex_atomic_op_inuser(op, uaddr2); |
| 921 | if (unlikely(op_ret < 0)) { | 923 | if (unlikely(op_ret < 0)) { |
| 922 | 924 | ||
| @@ -1011,9 +1013,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
| 1011 | 1013 | ||
| 1012 | /** | 1014 | /** |
| 1013 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | 1015 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
| 1014 | * q: the futex_q | 1016 | * @q: the futex_q |
| 1015 | * key: the key of the requeue target futex | 1017 | * @key: the key of the requeue target futex |
| 1016 | * hb: the hash_bucket of the requeue target futex | 1018 | * @hb: the hash_bucket of the requeue target futex |
| 1017 | * | 1019 | * |
| 1018 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | 1020 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the |
| 1019 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | 1021 | * target futex if it is uncontended or via a lock steal. Set the futex_q key |
| @@ -1027,7 +1029,6 @@ static inline | |||
| 1027 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | 1029 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
| 1028 | struct futex_hash_bucket *hb) | 1030 | struct futex_hash_bucket *hb) |
| 1029 | { | 1031 | { |
| 1030 | drop_futex_key_refs(&q->key); | ||
| 1031 | get_futex_key_refs(key); | 1032 | get_futex_key_refs(key); |
| 1032 | q->key = *key; | 1033 | q->key = *key; |
| 1033 | 1034 | ||
| @@ -1225,6 +1226,7 @@ retry_private: | |||
| 1225 | */ | 1226 | */ |
| 1226 | if (ret == 1) { | 1227 | if (ret == 1) { |
| 1227 | WARN_ON(pi_state); | 1228 | WARN_ON(pi_state); |
| 1229 | drop_count++; | ||
| 1228 | task_count++; | 1230 | task_count++; |
| 1229 | ret = get_futex_value_locked(&curval2, uaddr2); | 1231 | ret = get_futex_value_locked(&curval2, uaddr2); |
| 1230 | if (!ret) | 1232 | if (!ret) |
| @@ -1303,6 +1305,7 @@ retry_private: | |||
| 1303 | if (ret == 1) { | 1305 | if (ret == 1) { |
| 1304 | /* We got the lock. */ | 1306 | /* We got the lock. */ |
| 1305 | requeue_pi_wake_futex(this, &key2, hb2); | 1307 | requeue_pi_wake_futex(this, &key2, hb2); |
| 1308 | drop_count++; | ||
| 1306 | continue; | 1309 | continue; |
| 1307 | } else if (ret) { | 1310 | } else if (ret) { |
| 1308 | /* -EDEADLK */ | 1311 | /* -EDEADLK */ |
| @@ -1350,6 +1353,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 1350 | return hb; | 1353 | return hb; |
| 1351 | } | 1354 | } |
| 1352 | 1355 | ||
| 1356 | static inline void | ||
| 1357 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | ||
| 1358 | { | ||
| 1359 | spin_unlock(&hb->lock); | ||
| 1360 | drop_futex_key_refs(&q->key); | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | /** | ||
| 1364 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
| 1365 | * @q: The futex_q to enqueue | ||
| 1366 | * @hb: The destination hash bucket | ||
| 1367 | * | ||
| 1368 | * The hb->lock must be held by the caller, and is released here. A call to | ||
| 1369 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
| 1370 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
| 1371 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
| 1372 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
| 1373 | * an example). | ||
| 1374 | */ | ||
| 1353 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1375 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1354 | { | 1376 | { |
| 1355 | int prio; | 1377 | int prio; |
| @@ -1373,19 +1395,17 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
| 1373 | spin_unlock(&hb->lock); | 1395 | spin_unlock(&hb->lock); |
| 1374 | } | 1396 | } |
| 1375 | 1397 | ||
| 1376 | static inline void | 1398 | /** |
| 1377 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1399 | * unqueue_me() - Remove the futex_q from its futex_hash_bucket |
| 1378 | { | 1400 | * @q: The futex_q to unqueue |
| 1379 | spin_unlock(&hb->lock); | 1401 | * |
| 1380 | drop_futex_key_refs(&q->key); | 1402 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must |
| 1381 | } | 1403 | * be paired with exactly one earlier call to queue_me(). |
| 1382 | 1404 | * | |
| 1383 | /* | 1405 | * Returns: |
| 1384 | * queue_me and unqueue_me must be called as a pair, each | 1406 | * 1 - if the futex_q was still queued (and we removed unqueued it) |
| 1385 | * exactly once. They are called with the hashed spinlock held. | 1407 | * 0 - if the futex_q was already removed by the waking thread |
| 1386 | */ | 1408 | */ |
| 1387 | |||
| 1388 | /* Return 1 if we were still queued (ie. 0 means we were woken) */ | ||
| 1389 | static int unqueue_me(struct futex_q *q) | 1409 | static int unqueue_me(struct futex_q *q) |
| 1390 | { | 1410 | { |
| 1391 | spinlock_t *lock_ptr; | 1411 | spinlock_t *lock_ptr; |
| @@ -1638,17 +1658,14 @@ out: | |||
| 1638 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | 1658 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, |
| 1639 | struct hrtimer_sleeper *timeout) | 1659 | struct hrtimer_sleeper *timeout) |
| 1640 | { | 1660 | { |
| 1641 | queue_me(q, hb); | ||
| 1642 | |||
| 1643 | /* | 1661 | /* |
| 1644 | * There might have been scheduling since the queue_me(), as we | 1662 | * The task state is guaranteed to be set before another task can |
| 1645 | * cannot hold a spinlock across the get_user() in case it | 1663 | * wake it. set_current_state() is implemented using set_mb() and |
| 1646 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1664 | * queue_me() calls spin_unlock() upon completion, both serializing |
| 1647 | * queueing ourselves into the futex hash. This code thus has to | 1665 | * access to the hash list and forcing another memory barrier. |
| 1648 | * rely on the futex_wake() code removing us from hash when it | ||
| 1649 | * wakes us up. | ||
| 1650 | */ | 1666 | */ |
| 1651 | set_current_state(TASK_INTERRUPTIBLE); | 1667 | set_current_state(TASK_INTERRUPTIBLE); |
| 1668 | queue_me(q, hb); | ||
| 1652 | 1669 | ||
| 1653 | /* Arm the timer */ | 1670 | /* Arm the timer */ |
| 1654 | if (timeout) { | 1671 | if (timeout) { |
| @@ -1658,8 +1675,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
| 1658 | } | 1675 | } |
| 1659 | 1676 | ||
| 1660 | /* | 1677 | /* |
| 1661 | * !plist_node_empty() is safe here without any lock. | 1678 | * If we have been removed from the hash list, then another task |
| 1662 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1679 | * has tried to wake us, and we can skip the call to schedule(). |
| 1663 | */ | 1680 | */ |
| 1664 | if (likely(!plist_node_empty(&q->list))) { | 1681 | if (likely(!plist_node_empty(&q->list))) { |
| 1665 | /* | 1682 | /* |
| @@ -1776,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
| 1776 | current->timer_slack_ns); | 1793 | current->timer_slack_ns); |
| 1777 | } | 1794 | } |
| 1778 | 1795 | ||
| 1796 | retry: | ||
| 1779 | /* Prepare to wait on uaddr. */ | 1797 | /* Prepare to wait on uaddr. */ |
| 1780 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1798 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 1781 | if (ret) | 1799 | if (ret) |
| @@ -1793,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
| 1793 | goto out_put_key; | 1811 | goto out_put_key; |
| 1794 | 1812 | ||
| 1795 | /* | 1813 | /* |
| 1796 | * We expect signal_pending(current), but another thread may | 1814 | * We expect signal_pending(current), but we might be the |
| 1797 | * have handled it for us already. | 1815 | * victim of a spurious wakeup as well. |
| 1798 | */ | 1816 | */ |
| 1817 | if (!signal_pending(current)) { | ||
| 1818 | put_futex_key(fshared, &q.key); | ||
| 1819 | goto retry; | ||
| 1820 | } | ||
| 1821 | |||
| 1799 | ret = -ERESTARTSYS; | 1822 | ret = -ERESTARTSYS; |
| 1800 | if (!abs_time) | 1823 | if (!abs_time) |
| 1801 | goto out_put_key; | 1824 | goto out_put_key; |
| @@ -2102,11 +2125,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
| 2102 | * Unqueue the futex_q and determine which it was. | 2125 | * Unqueue the futex_q and determine which it was. |
| 2103 | */ | 2126 | */ |
| 2104 | plist_del(&q->list, &q->list.plist); | 2127 | plist_del(&q->list, &q->list.plist); |
| 2105 | drop_futex_key_refs(&q->key); | ||
| 2106 | 2128 | ||
| 2129 | /* Handle spurious wakeups gracefully */ | ||
| 2130 | ret = -EWOULDBLOCK; | ||
| 2107 | if (timeout && !timeout->task) | 2131 | if (timeout && !timeout->task) |
| 2108 | ret = -ETIMEDOUT; | 2132 | ret = -ETIMEDOUT; |
| 2109 | else | 2133 | else if (signal_pending(current)) |
| 2110 | ret = -ERESTARTNOINTR; | 2134 | ret = -ERESTARTNOINTR; |
| 2111 | } | 2135 | } |
| 2112 | return ret; | 2136 | return ret; |
| @@ -2114,12 +2138,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
| 2114 | 2138 | ||
| 2115 | /** | 2139 | /** |
| 2116 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2140 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
| 2117 | * @uaddr: the futex we initialyl wait on (non-pi) | 2141 | * @uaddr: the futex we initially wait on (non-pi) |
| 2118 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2142 | * @fshared: whether the futexes are shared (1) or not (0). They must be |
| 2119 | * the same type, no requeueing from private to shared, etc. | 2143 | * the same type, no requeueing from private to shared, etc. |
| 2120 | * @val: the expected value of uaddr | 2144 | * @val: the expected value of uaddr |
| 2121 | * @abs_time: absolute timeout | 2145 | * @abs_time: absolute timeout |
| 2122 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | 2146 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all |
| 2123 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | 2147 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) |
| 2124 | * @uaddr2: the pi futex we will take prior to returning to user-space | 2148 | * @uaddr2: the pi futex we will take prior to returning to user-space |
| 2125 | * | 2149 | * |
| @@ -2246,7 +2270,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2246 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2270 | res = fixup_owner(uaddr2, fshared, &q, !ret); |
| 2247 | /* | 2271 | /* |
| 2248 | * If fixup_owner() returned an error, proprogate that. If it | 2272 | * If fixup_owner() returned an error, proprogate that. If it |
| 2249 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | 2273 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
| 2250 | */ | 2274 | */ |
| 2251 | if (res) | 2275 | if (res) |
| 2252 | ret = (res < 0) ? res : 0; | 2276 | ret = (res < 0) ? res : 0; |
| @@ -2302,9 +2326,9 @@ out: | |||
| 2302 | */ | 2326 | */ |
| 2303 | 2327 | ||
| 2304 | /** | 2328 | /** |
| 2305 | * sys_set_robust_list - set the robust-futex list head of a task | 2329 | * sys_set_robust_list() - Set the robust-futex list head of a task |
| 2306 | * @head: pointer to the list-head | 2330 | * @head: pointer to the list-head |
| 2307 | * @len: length of the list-head, as userspace expects | 2331 | * @len: length of the list-head, as userspace expects |
| 2308 | */ | 2332 | */ |
| 2309 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | 2333 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, |
| 2310 | size_t, len) | 2334 | size_t, len) |
| @@ -2323,10 +2347,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | |||
| 2323 | } | 2347 | } |
| 2324 | 2348 | ||
| 2325 | /** | 2349 | /** |
| 2326 | * sys_get_robust_list - get the robust-futex list head of a task | 2350 | * sys_get_robust_list() - Get the robust-futex list head of a task |
| 2327 | * @pid: pid of the process [zero for current task] | 2351 | * @pid: pid of the process [zero for current task] |
| 2328 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | 2352 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in |
| 2329 | * @len_ptr: pointer to a length field, the kernel fills in the header size | 2353 | * @len_ptr: pointer to a length field, the kernel fills in the header size |
| 2330 | */ | 2354 | */ |
| 2331 | SYSCALL_DEFINE3(get_robust_list, int, pid, | 2355 | SYSCALL_DEFINE3(get_robust_list, int, pid, |
| 2332 | struct robust_list_head __user * __user *, head_ptr, | 2356 | struct robust_list_head __user * __user *, head_ptr, |
